def __init__(self, model_name = 'bertttt.hdf5', model_path = current_directory_path + '/external_pretrained_models/'): self.answ = "UNKNOWN ERROR" self.model_name = model_name self.model_path = model_path self.first_object = '' self.second_object = '' self.predicates = '' self.aspects = '' try: self.model = TaggerFactory.load(self.model_path + self.model_name, 2) print ("extract_objects_predicates gpu", self.model.gpu) self.model.cuda(device=2) self.model.gpu = 2 except: raise RuntimeError("Can't map to gpu. Maybe it is OOM")
def __init__(self, my_device=6, model_name='Aurora.hdf5', model_path=current_directory_path + '/external_pretrained_models/'): self.answ = "UNKNOWN ERROR" self.model_name = model_name self.model_path = model_path self.first_object = '' self.second_object = '' self.predicates = '' self.spans = [ ] # we can't use set because span object is dict and dict is unchashable. We add function add_span to keep non-repeatability try: self.model = TaggerFactory.load(self.model_path + self.model_name, my_device) self.model.cuda(device=my_device) self.model.gpu = my_device print("extract_objects_predicates gpu", self.model.gpu) except: raise RuntimeError( "Init extractor: can't map to gpu. Maybe it is OOM")
makedir(os.path.join(args.save_dir,'Tests','Validity')) makedir(os.path.join(args.save_dir,'Master-Data-Files')) # Load text data as lists of lists of words (sequences) and corresponding list of lists of tags data_io = DataIOFactory.create(args) word_sequences_train, tag_sequences_train, word_sequences_dev, tag_sequences_dev, word_sequences_test, tag_sequences_test = data_io.read_train_dev_test(args) # Load taggers print("Loading models...") blackbox_load_name = 'attention' prototype_load_name_1 = 'proto-p40-kmeans-push20' prototype_load_name_2 = 'proto-fixed-p40' blackbox_path = os.path.join('saved_models','%s.hdf5' % blackbox_load_name) prototype_path_1 = os.path.join('saved_models','%s.hdf5' % prototype_load_name_1) prototype_path_2 = os.path.join('saved_models','%s.hdf5' % prototype_load_name_2) blackbox_tagger = TaggerFactory.load(blackbox_path, args.gpu) prototype_tagger = TaggerFactory.load(prototype_path_1, args.gpu) fixed_prototype_tagger = TaggerFactory.load(prototype_path_2, args.gpu) # put taggers in dict name2tagger_dict = { 'blackbox' : blackbox_tagger, 'prototype' : prototype_tagger, } # all tags and predicted tags must be one of these constrain_to_classes = ['pos','neg'] # tokenizer + word embedding object (Spacy object) for perturb_sentence() from anchors.anchors.utils print("Loading spacy object...") spacy_obj = en_core_web_lg.load()
check_for_lowercase=args.check_for_lowercase, embeddings_dim=args.emb_dim, verbose=True) word_seq_indexer.load_items_from_embeddings_file_and_unique_words_list( emb_fn=args.emb_fn, emb_delimiter=args.emb_delimiter, emb_load_all=args.emb_load_all, unique_words_list=datasets_bank.unique_words_list) if args.word_seq_indexer is not None and not isfile(args.word_seq_indexer): torch.save(word_seq_indexer, args.word_seq_indexer) # Tag_seq_indexer converts lists of lists of tags to lists of lists of integer indices and back tag_seq_indexer = SeqIndexerTag(gpu=args.gpu) tag_seq_indexer.load_items_from_tag_sequences(tag_sequences_train) # Create or load pre-trained tagger if args.load is None: tagger = TaggerFactory.create(args, word_seq_indexer, tag_seq_indexer, tag_sequences_train) else: tagger = TaggerFactory.load(args.load, args.gpu) # Create evaluator evaluator = EvaluatorFactory.create(args) # Create optimizer optimizer, scheduler = OptimizerFactory.create(args, tagger) # Prepare report and temporary variables for "save best" strategy report = Report(args.report_fn, args, score_names=('train loss', '%s-train' % args.evaluator, '%s-dev' % args.evaluator, '%s-test' % args.evaluator)) # Initialize training variables iterations_num = floor(datasets_bank.train_data_num / args.batch_size) best_dev_score = -1
n_values = [len(dataset.categorical_names[i]) for i in cat_names] data_encoder = sklearn.preprocessing.OneHotEncoder( categorical_features=cat_names, n_values=n_values) data_encoder.fit(dataset.data) # add dataset info to data_encoder data_encoder.feature_names = dataset.feature_names data_encoder.categorical_names = dataset.categorical_names # Tag_seq_indexer converts lists of lists of tags to lists of lists of integer indices and back tag_seq_indexer = SeqIndexerTag(gpu=args.gpu) tag_seq_indexer.load_items_from_tag_sequence(dataset.class_names) # Create or load pre-trained tagger if args.load_name is None: tagger = TaggerFactory.create(args, data_encoder, tag_seq_indexer) start_epoch = args.start_epoch else: load_path = os.path.join('saved_models', '%s.hdf5' % args.load_name) print("Loading model from %s" % load_path) tagger = TaggerFactory.load(load_path, args.gpu) report_path = os.path.join('saved_models', '%s-report.txt' % args.load_name) start_epoch = args.start_epoch # init proto model layers if args.pretrained_model is not None and args.load_name is None: pretrained_path = os.path.join('saved_models', '%s.hdf5' % args.pretrained_model) tagger.initialize_from_pretrained(pretrained_path)
parser.add_argument('--dataset-sort', type=str2bool, default=False, help='Sort sequences by length for training.', nargs='?', choices=['yes', True, 'no (default)', False]) parser.add_argument('--save-data', type=str2bool, default=False, help='Save a new dataset split.') args = parser.parse_args() # Load tagger model load_path = os.path.join('saved_models', '%s.hdf5' % args.load_name) print("Loading model from %s" % load_path) tagger = TaggerFactory.load(load_path, args.gpu) # Create DataIO object data_io = DataIOFactory.create(args) # Load text data as lists of lists of words (sequences) and corresponding list of lists of tags data_io = DataIOFactory.create(args) dataset, X_train, Y_train, X_dev, Y_dev, X_test, Y_test = data_io.read_train_dev_test( args) # fit imputation models # import ipdb; ipdb.set_trace() # tagger.fit_imputation_models(dataset, counterfactual_method = 'conditional_expected_value') # sklearn baselines explainer = anchor_tabular.AnchorTabularExplainer(
default='f1-connl', help='Evaluation method.', choices=[ 'f1-connl', 'f1-alpha-match-10', 'f1-alpha-match-05', 'f1-macro', 'f05-macro', 'token-acc' ]) parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU device number, 0 by default, -1 means CPU.') print('Start run_tagger.py.') args = parser.parse_args() # Load tagger model tagger = TaggerFactory.load(args.load, args.gpu) # Create DataIO object data_io = DataIOFactory.create(args) # Read data in CoNNL-2003 file format format word_sequences, targets_tag_sequences_test = \ data_io.read_data(args.input) # Create evaluator evaluator = EvaluatorFactory.create(args) # Get tags as sequences of strings output_tag_sequences_test = tagger.predict_tags_from_words(word_sequences, batch_size=100) test_score, test_msg = \ evaluator.get_evaluation_score(targets_tag_sequences=targets_tag_sequences_test, outputs_tag_sequences=output_tag_sequences_test, word_sequences=word_sequences) # Show the evaluation results
unique_words_list=datasets_bank.unique_words_list) if args.word_seq_indexer is not None and not isfile(args.word_seq_indexer): torch.save(word_seq_indexer, args.word_seq_indexer) # Tag_seq_indexer converts lists of lists of tags to lists of lists of integer indices and back tag_seq_indexer = SeqIndexerTag(gpu=args.gpu) tag_seq_indexer.load_items_from_tag_sequences(tag_sequences_train) # Create or load pre-trained tagger if args.load is None: print("in main") print(args.isElmo) print(args.elmo_weights) print(args.elmo_options) tagger = TaggerFactory.create(args, word_seq_indexer, tag_seq_indexer, tag_sequences_train, args.isElmo, weight_file=args.elmo_weights, options_file=args.elmo_options) else: tagger = TaggerFactory.load(args.load, args.gpu) # Create evaluator evaluator = EvaluatorFactory.create(args) # Create optimizer optimizer, scheduler = OptimizerFactory.create(args, tagger) # Prepare report and temporary variables for "save best" strategy report = Report(args.report_fn, args, score_names=('train loss', '%s-train' % args.evaluator, '%s-dev' % args.evaluator, '%s-test' % args.evaluator))
if __name__ == "__main__": parser = argparse.ArgumentParser(description='Run simple pipeline') parser.add_argument( '--input', help= 'input phrases as a list form, i.e [qwestion1, qwestion2, qwestion3]') args = parser.parse_args() x = ast.literal_eval(args.input) words = create_sequence_from_sentence([ 'what is better amazon or itunes for showing', 'what is better mouse or rat', 'what is easier to make bread o pizza' ]) model = TaggerFactory.load(PATH_TO_PRETRAINED + MODEL_NAME) tags = model.predict_tags_from_words(words) objects_list = [] for elem in list(zip(words, tags)): objects = get_objects(elem[0], elem[1]) assert len(objects) >= 2, "We have %d objects to compare" % ( len(objects)) objects_list.append((objects[0], objects[1])) for obj0, obj1 in objects_list: response = get_response(obj0, obj1, False) response_json = response.json() Merlin = diviner() Merlin.create_from_json(response_json) Merlin.generate_advice()