def main(ctx, base_url, tenant_name, client_id, client_secret, refresh_token): """CLI for interacting with Workday’s Prism API""" # initialize the prism class with your credentials p = prism.Prism(base_url, tenant_name, client_id, client_secret, refresh_token) # create the bearer token p.create_bearer_token() # store the prism object in the context ctx.obj = {"p": p}
import os import prism # instantiate the Prism class p = prism.Prism( os.getenv("workday_base_url"), os.getenv("workday_tenant_name"), os.getenv("prism_client_id"), os.getenv("prism_client_secret"), os.getenv("prism_refresh_token"), ) # load schema for new table schema = prism.load_schema("schema.json") # create the table in Prism table = prism.create_table(p, "Topic_Model_Predictions_BDS", schema["fields"]) # upload the file to the table prism.upload_file(p, "predictions.csv.gz", table["id"], operation="TruncateandInsert")
def get_scores(modelname, datadir, outputdir=None, ref='ref'): """ Uses specified model to score examples read from datadir. See README.md for proper data format Keyword arguments: model -- model to use datadir -- string directory to fetch data (tsv) outputdir -- optional string directory path to save output ref -- optional string denoting reference string. either 'ref' or 'empty' """ # check ref argument validity if ref not in [ 'ref', 'context_last', 'empty', 'multi_avg', 'multi_max', 'multi' ]: raise ValueError( "ref must be 'ref' or 'context_last' or 'empty' or 'multi_avg' or 'multi_max' or 'multi'." ) if modelname not in __models__: raise ValueError("model not listed") # get scores if modelname == 'prism': model = prism.Prism(model_dir=os.environ['MODEL_DIR'], lang='en') elif modelname == 'bert_score': pass # no model directory elif modelname == 'roberta_ft': pass # no model directory elif modelname == 'bleu': model = datasets.load_metric("sacrebleu") elif modelname == 'bleurt': model = datasets.load_metric('bleurt', 'bleurt-large-512') else: warnings.warn('Model not listed.') # read in data data = pd.read_csv(datadir, sep='\t') # determine model inputs if ref == 'ref': ref_list = data['reference_text'].astype(str).to_list() elif ref == 'context_last': ref_list = data['prompt_text'].astype(str).to_list() elif ref == 'empty': ref_list = [''] * len(data['candidate_text']) cand_list = data['candidate_text'].astype(str).to_list() # determine model and calculate scores score = [] if modelname == 'prism': if ref == 'multi_avg' or ref == 'multi_max': # ref ref_list = data['reference_text'].astype(str).to_list() ref_score = [ model.score([c], [r]) for c, r in zip(cand_list, ref_list) ] # context_last ref_list = data['prompt_text'].apply( lambda x: str(x).split('\n')[-1]).to_list() context_score = [ model.score([c], [r]) for c, r in zip(cand_list, ref_list) ] # empty ref_list = [''] * len(data['candidate_text']) empty_score = [ model.score([c], [r]) for c, r in zip(cand_list, ref_list) ] else: score = [ model.score([c], [r]) for c, r in zip(cand_list, ref_list) ] elif modelname == 'bert_score': p, r, score = bert_score.score(cands=cand_list, refs=ref_list, lang='en', verbose=True) elif modelname == 'roberta_ft': p, r, score = bert_score.score( cands=cand_list, refs=ref_list, lang='en', verbose=True, model_type='../Chatbot_evaluation/models/roberta_ft', num_layers=10) elif modelname == 'bleu': if ref == 'multi': # ref ref_list = data['reference_text'].astype(str).to_list() # context_last context_list = data['prompt_text'].apply( lambda x: str(x).split('\n')[-1]).to_list() bs = [ model.compute(predictions=[cand], references=[[ref, ctx]]) for cand, ref, ctx in zip(cand_list, ref_list, context_list) ] else: bs = [ model.compute(predictions=[c], references=[[r]]) for c, r in zip(cand_list, ref_list) ] score = [x['bp'] for x in bs] elif modelname == 'bleurt': preds = model.compute(predictions=cand_list, references=ref_list) score = preds['scores'] # add scores to dataframe if modelname == 'prism' and (ref == 'multi_avg' or ref == 'multi_max'): data['ref_score'] = ref_score data['context_score'] = context_score data['empty_score'] = empty_score if ref == 'multi_avg': data['score'] = data[['ref_score', 'context_score', 'empty_score']].mean(axis=1) elif ref == 'multi_max': data['score'] = data[['ref_score', 'context_score', 'empty_score']].max(axis=1) else: data['score'] = score # write scores to output if outputdir is not None: data.to_csv(outputdir, sep='\t') return data