def reload_model(): global model, tokenizer, checkpoint_fn try: start = time.time() logging.info('prediction requested') params = get_params() logger.debug(json.dumps(params, indent=2)) args_model_checkpoint = args.model_checkpoint full_model_checkpoint = os.path.join( os.path.dirname(args_model_checkpoint), params['model_checkpoint']) model, tokenizer, checkpoint_fn = load_model( model_checkpoint=full_model_checkpoint, model_type=params['model_type']) response = Response( f'loaded model {params["model_checkpoint"]} successfully') logger.info("Time spent handling the request: %f" % (time.time() - start)) except Exception as e: tb = traceback.format_exc() logger.error(tb) exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] raise InvalidUsage('%s: %s @line %s in %s' % (type(e).__name__, str(e), exc_tb.tb_lineno, fname)) return response
def load(): """ load """ return interact.load_model()
]) return res return _context_fetcher if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__file__) args = get_args() random.seed(args.seed) torch.random.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) model, tokenizer, checkpoint_fn = load_model( model_checkpoint=args.model_checkpoint, model_type=args.model) try: logger.info('create wikipedia context fetcher ...') context_fetcher = create_wikipedia_context_fetcher( wikipedia_file=args.wikipedia_dump, base_url=args.entity_linking_service_url) except IOError as e: logger.warning( 'could not create a context fetcher. Please provide a context with every request.' % args.spacy_model) context_fetcher = None logger.info('Starting the API') if args.deploy: logger.info('use deployment server')
def load(): """ load """ return interact.load_model(), load_candidate_set( "../data/candidate_set.txt")
def pred_attacks(df, model_path, args, output_clm, context='title', attacks_clm='premise_counter_premise_pairs', post_clm='post', rand_premise_idx=-1, rand_premises_clm='', baseline=False, random=False): model, tokenizer = interact.load_model(model_path) predictions = [] for row_idx, row in df.iterrows(): post_attacks = [] for premise_counter in row[attacks_clm]: if context == 'title+full_post': argument = [tokenizer.encode(row['title'])] + [ tokenizer.encode(sent) for sent in row[post_clm] ] else: argument = [tokenizer.encode( row['title'])] if context == 'title' else [ tokenizer.encode(sent) for sent in row[post_clm] ] #print([row['title']]+row[post_clm]) #print(premise_counter[0]) if baseline: argument = trim_argument(argument, 510, args) pred_counter = interact.sample_sequence(argument, [], tokenizer, model, args, baseline=True) elif random: if rand_premise_idx != None: weak_premise_encoded = [ tokenizer.encode( row[rand_premises_clm][rand_premise_idx]) ] else: weak_premise_encoded = [ tokenizer.encode(row[rand_premises_clm]) ] argument = trim_argument(argument, 510, args) pred_counter = interact.sample_sequence( argument, weak_premise_encoded, tokenizer, model, args) else: weak_premise_encoded = [ tokenizer.encode(premise) for premise in premise_counter[0] ] argument = trim_argument(argument, 510, args) pred_counter = interact.sample_sequence( argument, weak_premise_encoded, tokenizer, model, args) #remove quoted text #pred_counter = tokenizer.decode(pred_counter) #for p in premise_counter[0]: # pred_counter = pred_counter.replace(p.lower(), '') post_attacks.append([premise_counter[0], pred_counter]) predictions.append(post_attacks) df[output_clm] = predictions return df
def perform_attacks_hua_df(df, model_path, args, output_clm, context, weak_premise_clm, weak_premise_idx=None, baseline=False): model, tokenizer = interact.load_model(model_path) pred_attacks = [] for idx, row in df.iterrows(): if baseline: argument = [row['claim']] + row['post'] argument = [tokenizer.encode(sentence) for sentence in argument] argument = trim_argument(argument, 510, args) pred_counter = interact.sample_sequence(argument, [], tokenizer, model, args, baseline=True) pred_counter = pred_counter.replace(row['claim'].lower(), '') for sent in row['post']: pred_counter = pred_counter.replace(sent.lower(), '') else: if context == 'title+full_post': argument = [tokenizer.encode(row['claim'])] + [ tokenizer.encode(sent) for sent in row['post'] ] else: argument = [tokenizer.encode(row['claim']) ] if context == 'title' else [ tokenizer.encode(sent) for sent in row['post'] ] if weak_premise_idx != None: weak_premise_encoded = [ tokenizer.encode(premise) for premise in row[weak_premise_clm][weak_premise_idx] ] else: weak_premise_encoded = [ tokenizer.encode(premise) for premise in row[weak_premise_clm] ] if args.premise_extra: argument = trim_argument( argument, 510 - len([token for p in weak_premise_encoded for token in p]), args) else: argument = trim_argument(argument, 510, args) pred_counter = interact.sample_sequence(argument, weak_premise_encoded, tokenizer, model, args) #remove quoted text #pred_counter = tokenizer.decode(pred_counter) for p in row[weak_premise_clm]: pred_counter = pred_counter.replace(p.lower(), '') pred_attacks.append(pred_counter) df[output_clm] = pred_attacks return df