예제 #1
0
def main(ctx, base_url, tenant_name, client_id, client_secret, refresh_token):
    """CLI for interacting with Workday’s Prism API"""

    # initialize the prism class with your credentials
    p = prism.Prism(base_url, tenant_name, client_id, client_secret,
                    refresh_token)

    # create the bearer token
    p.create_bearer_token()

    # store the prism object in the context
    ctx.obj = {"p": p}
예제 #2
0
import os
import prism

# instantiate the Prism class
p = prism.Prism(
    os.getenv("workday_base_url"),
    os.getenv("workday_tenant_name"),
    os.getenv("prism_client_id"),
    os.getenv("prism_client_secret"),
    os.getenv("prism_refresh_token"),
)

# load schema for new table
schema = prism.load_schema("schema.json")

# create the table in Prism
table = prism.create_table(p, "Topic_Model_Predictions_BDS", schema["fields"])

# upload the file to the table
prism.upload_file(p,
                  "predictions.csv.gz",
                  table["id"],
                  operation="TruncateandInsert")
예제 #3
0
def get_scores(modelname, datadir, outputdir=None, ref='ref'):
    """
    Uses specified model to score examples read from datadir. See README.md for proper data format
    
    Keyword arguments:
    model -- model to use
    datadir -- string directory to fetch data (tsv)
    outputdir -- optional string directory path to save output
    ref -- optional string denoting reference string. either 'ref' or 'empty'
    """
    # check ref argument validity
    if ref not in [
            'ref', 'context_last', 'empty', 'multi_avg', 'multi_max', 'multi'
    ]:
        raise ValueError(
            "ref must be 'ref' or 'context_last' or 'empty' or 'multi_avg' or 'multi_max' or 'multi'."
        )
    if modelname not in __models__:
        raise ValueError("model not listed")
    # get scores
    if modelname == 'prism':
        model = prism.Prism(model_dir=os.environ['MODEL_DIR'], lang='en')
    elif modelname == 'bert_score':
        pass  # no model directory
    elif modelname == 'roberta_ft':
        pass  # no model directory
    elif modelname == 'bleu':
        model = datasets.load_metric("sacrebleu")
    elif modelname == 'bleurt':
        model = datasets.load_metric('bleurt', 'bleurt-large-512')
    else:
        warnings.warn('Model not listed.')

    # read in data
    data = pd.read_csv(datadir, sep='\t')
    # determine model inputs
    if ref == 'ref':
        ref_list = data['reference_text'].astype(str).to_list()
    elif ref == 'context_last':
        ref_list = data['prompt_text'].astype(str).to_list()
    elif ref == 'empty':
        ref_list = [''] * len(data['candidate_text'])
    cand_list = data['candidate_text'].astype(str).to_list()

    # determine model and calculate scores
    score = []
    if modelname == 'prism':
        if ref == 'multi_avg' or ref == 'multi_max':
            # ref
            ref_list = data['reference_text'].astype(str).to_list()
            ref_score = [
                model.score([c], [r]) for c, r in zip(cand_list, ref_list)
            ]
            # context_last
            ref_list = data['prompt_text'].apply(
                lambda x: str(x).split('\n')[-1]).to_list()
            context_score = [
                model.score([c], [r]) for c, r in zip(cand_list, ref_list)
            ]
            # empty
            ref_list = [''] * len(data['candidate_text'])
            empty_score = [
                model.score([c], [r]) for c, r in zip(cand_list, ref_list)
            ]
        else:
            score = [
                model.score([c], [r]) for c, r in zip(cand_list, ref_list)
            ]
    elif modelname == 'bert_score':
        p, r, score = bert_score.score(cands=cand_list,
                                       refs=ref_list,
                                       lang='en',
                                       verbose=True)
    elif modelname == 'roberta_ft':
        p, r, score = bert_score.score(
            cands=cand_list,
            refs=ref_list,
            lang='en',
            verbose=True,
            model_type='../Chatbot_evaluation/models/roberta_ft',
            num_layers=10)
    elif modelname == 'bleu':
        if ref == 'multi':
            # ref
            ref_list = data['reference_text'].astype(str).to_list()
            # context_last
            context_list = data['prompt_text'].apply(
                lambda x: str(x).split('\n')[-1]).to_list()

            bs = [
                model.compute(predictions=[cand], references=[[ref, ctx]])
                for cand, ref, ctx in zip(cand_list, ref_list, context_list)
            ]
        else:
            bs = [
                model.compute(predictions=[c], references=[[r]])
                for c, r in zip(cand_list, ref_list)
            ]
        score = [x['bp'] for x in bs]
    elif modelname == 'bleurt':
        preds = model.compute(predictions=cand_list, references=ref_list)
        score = preds['scores']

    # add scores to dataframe
    if modelname == 'prism' and (ref == 'multi_avg' or ref == 'multi_max'):
        data['ref_score'] = ref_score
        data['context_score'] = context_score
        data['empty_score'] = empty_score
        if ref == 'multi_avg':
            data['score'] = data[['ref_score', 'context_score',
                                  'empty_score']].mean(axis=1)
        elif ref == 'multi_max':
            data['score'] = data[['ref_score', 'context_score',
                                  'empty_score']].max(axis=1)
    else:
        data['score'] = score

    # write scores to output
    if outputdir is not None:
        data.to_csv(outputdir, sep='\t')
    return data