Exemple #1
0
 def __init__(self, model_name = 'bertttt.hdf5', model_path = current_directory_path + '/external_pretrained_models/'):
     self.answ = "UNKNOWN ERROR"
     self.model_name = model_name
     self.model_path = model_path
     self.first_object = ''
     self.second_object = ''
     self.predicates = ''
     self.aspects = ''
     try:
         self.model = TaggerFactory.load(self.model_path + self.model_name, 2)
         print ("extract_objects_predicates gpu", self.model.gpu)
         self.model.cuda(device=2)
         self.model.gpu = 2
     except:
         raise RuntimeError("Can't map to gpu. Maybe it is OOM")
Exemple #2
0
 def __init__(self,
              my_device=6,
              model_name='Aurora.hdf5',
              model_path=current_directory_path +
              '/external_pretrained_models/'):
     self.answ = "UNKNOWN ERROR"
     self.model_name = model_name
     self.model_path = model_path
     self.first_object = ''
     self.second_object = ''
     self.predicates = ''
     self.spans = [
     ]  # we can't use set because span object is dict and dict is unchashable. We add function add_span to keep non-repeatability
     try:
         self.model = TaggerFactory.load(self.model_path + self.model_name,
                                         my_device)
         self.model.cuda(device=my_device)
         self.model.gpu = my_device
         print("extract_objects_predicates gpu", self.model.gpu)
     except:
         raise RuntimeError(
             "Init extractor: can't map to gpu. Maybe it is OOM")
Exemple #3
0
    makedir(os.path.join(args.save_dir,'Tests','Validity'))
    makedir(os.path.join(args.save_dir,'Master-Data-Files'))

    # Load text data as lists of lists of words (sequences) and corresponding list of lists of tags
    data_io = DataIOFactory.create(args)
    word_sequences_train, tag_sequences_train, word_sequences_dev, tag_sequences_dev, word_sequences_test, tag_sequences_test = data_io.read_train_dev_test(args)

    # Load taggers
    print("Loading models...")
    blackbox_load_name = 'attention'
    prototype_load_name_1 = 'proto-p40-kmeans-push20' 
    prototype_load_name_2 = 'proto-fixed-p40'
    blackbox_path = os.path.join('saved_models','%s.hdf5' % blackbox_load_name)    
    prototype_path_1 = os.path.join('saved_models','%s.hdf5' % prototype_load_name_1)
    prototype_path_2 = os.path.join('saved_models','%s.hdf5' % prototype_load_name_2)
    blackbox_tagger = TaggerFactory.load(blackbox_path, args.gpu)    
    prototype_tagger = TaggerFactory.load(prototype_path_1, args.gpu)    
    fixed_prototype_tagger = TaggerFactory.load(prototype_path_2, args.gpu)    

    # put taggers in dict
    name2tagger_dict = {
        'blackbox' : blackbox_tagger,
        'prototype' : prototype_tagger,
    }

    # all tags and predicted tags must be one of these
    constrain_to_classes = ['pos','neg']    

    # tokenizer + word embedding object (Spacy object) for perturb_sentence() from anchors.anchors.utils
    print("Loading spacy object...")
    spacy_obj = en_core_web_lg.load()
         check_for_lowercase=args.check_for_lowercase,
         embeddings_dim=args.emb_dim,
         verbose=True)
     word_seq_indexer.load_items_from_embeddings_file_and_unique_words_list(
         emb_fn=args.emb_fn,
         emb_delimiter=args.emb_delimiter,
         emb_load_all=args.emb_load_all,
         unique_words_list=datasets_bank.unique_words_list)
 if args.word_seq_indexer is not None and not isfile(args.word_seq_indexer):
     torch.save(word_seq_indexer, args.word_seq_indexer)
 # Tag_seq_indexer converts lists of lists of tags to lists of lists of integer indices and back
 tag_seq_indexer = SeqIndexerTag(gpu=args.gpu)
 tag_seq_indexer.load_items_from_tag_sequences(tag_sequences_train)
 # Create or load pre-trained tagger
 if args.load is None:
     tagger = TaggerFactory.create(args, word_seq_indexer, tag_seq_indexer,
                                   tag_sequences_train)
 else:
     tagger = TaggerFactory.load(args.load, args.gpu)
 # Create evaluator
 evaluator = EvaluatorFactory.create(args)
 # Create optimizer
 optimizer, scheduler = OptimizerFactory.create(args, tagger)
 # Prepare report and temporary variables for "save best" strategy
 report = Report(args.report_fn,
                 args,
                 score_names=('train loss', '%s-train' % args.evaluator,
                              '%s-dev' % args.evaluator,
                              '%s-test' % args.evaluator))
 # Initialize training variables
 iterations_num = floor(datasets_bank.train_data_num / args.batch_size)
 best_dev_score = -1
Exemple #5
0
    n_values = [len(dataset.categorical_names[i]) for i in cat_names]
    data_encoder = sklearn.preprocessing.OneHotEncoder(
        categorical_features=cat_names, n_values=n_values)
    data_encoder.fit(dataset.data)

    # add dataset info to data_encoder
    data_encoder.feature_names = dataset.feature_names
    data_encoder.categorical_names = dataset.categorical_names

    # Tag_seq_indexer converts lists of lists of tags to lists of lists of integer indices and back
    tag_seq_indexer = SeqIndexerTag(gpu=args.gpu)
    tag_seq_indexer.load_items_from_tag_sequence(dataset.class_names)

    # Create or load pre-trained tagger
    if args.load_name is None:
        tagger = TaggerFactory.create(args, data_encoder, tag_seq_indexer)
        start_epoch = args.start_epoch
    else:
        load_path = os.path.join('saved_models', '%s.hdf5' % args.load_name)
        print("Loading model from %s" % load_path)
        tagger = TaggerFactory.load(load_path, args.gpu)

        report_path = os.path.join('saved_models',
                                   '%s-report.txt' % args.load_name)
        start_epoch = args.start_epoch

    # init proto model layers
    if args.pretrained_model is not None and args.load_name is None:
        pretrained_path = os.path.join('saved_models',
                                       '%s.hdf5' % args.pretrained_model)
        tagger.initialize_from_pretrained(pretrained_path)
Exemple #6
0
    parser.add_argument('--dataset-sort',
                        type=str2bool,
                        default=False,
                        help='Sort sequences by length for training.',
                        nargs='?',
                        choices=['yes', True, 'no (default)', False])
    parser.add_argument('--save-data',
                        type=str2bool,
                        default=False,
                        help='Save a new dataset split.')
    args = parser.parse_args()

    # Load tagger model
    load_path = os.path.join('saved_models', '%s.hdf5' % args.load_name)
    print("Loading model from %s" % load_path)
    tagger = TaggerFactory.load(load_path, args.gpu)

    # Create DataIO object
    data_io = DataIOFactory.create(args)

    # Load text data as lists of lists of words (sequences) and corresponding list of lists of tags
    data_io = DataIOFactory.create(args)
    dataset, X_train, Y_train, X_dev, Y_dev, X_test, Y_test = data_io.read_train_dev_test(
        args)

    # fit imputation models
    # import ipdb; ipdb.set_trace()
    # tagger.fit_imputation_models(dataset, counterfactual_method = 'conditional_expected_value')

    # sklearn baselines
    explainer = anchor_tabular.AnchorTabularExplainer(
Exemple #7
0
                     default='f1-connl',
                     help='Evaluation method.',
                     choices=[
                         'f1-connl', 'f1-alpha-match-10',
                         'f1-alpha-match-05', 'f1-macro', 'f05-macro',
                         'token-acc'
                     ])
 parser.add_argument('--gpu',
                     '-g',
                     type=int,
                     default=0,
                     help='GPU device number, 0 by default, -1 means CPU.')
 print('Start run_tagger.py.')
 args = parser.parse_args()
 # Load tagger model
 tagger = TaggerFactory.load(args.load, args.gpu)
 # Create DataIO object
 data_io = DataIOFactory.create(args)
 # Read data in CoNNL-2003 file format format
 word_sequences, targets_tag_sequences_test = \
     data_io.read_data(args.input)
 # Create evaluator
 evaluator = EvaluatorFactory.create(args)
 # Get tags as sequences of strings
 output_tag_sequences_test = tagger.predict_tags_from_words(word_sequences,
                                                            batch_size=100)
 test_score, test_msg = \
     evaluator.get_evaluation_score(targets_tag_sequences=targets_tag_sequences_test,
                                    outputs_tag_sequences=output_tag_sequences_test,
                                    word_sequences=word_sequences)
 # Show the evaluation results
Exemple #8
0
         unique_words_list=datasets_bank.unique_words_list)
 if args.word_seq_indexer is not None and not isfile(args.word_seq_indexer):
     torch.save(word_seq_indexer, args.word_seq_indexer)
 # Tag_seq_indexer converts lists of lists of tags to lists of lists of integer indices and back
 tag_seq_indexer = SeqIndexerTag(gpu=args.gpu)
 tag_seq_indexer.load_items_from_tag_sequences(tag_sequences_train)
 # Create or load pre-trained tagger
 if args.load is None:
     print("in main")
     print(args.isElmo)
     print(args.elmo_weights)
     print(args.elmo_options)
     tagger = TaggerFactory.create(args,
                                   word_seq_indexer,
                                   tag_seq_indexer,
                                   tag_sequences_train,
                                   args.isElmo,
                                   weight_file=args.elmo_weights,
                                   options_file=args.elmo_options)
 else:
     tagger = TaggerFactory.load(args.load, args.gpu)
 # Create evaluator
 evaluator = EvaluatorFactory.create(args)
 # Create optimizer
 optimizer, scheduler = OptimizerFactory.create(args, tagger)
 # Prepare report and temporary variables for "save best" strategy
 report = Report(args.report_fn,
                 args,
                 score_names=('train loss', '%s-train' % args.evaluator,
                              '%s-dev' % args.evaluator,
                              '%s-test' % args.evaluator))
Exemple #9
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Run simple pipeline')
    parser.add_argument(
        '--input',
        help=
        'input phrases as a list form, i.e [qwestion1, qwestion2, qwestion3]')

    args = parser.parse_args()

    x = ast.literal_eval(args.input)

    words = create_sequence_from_sentence([
        'what is better amazon or itunes for showing',
        'what is better mouse or rat', 'what is easier to make bread o pizza'
    ])
    model = TaggerFactory.load(PATH_TO_PRETRAINED + MODEL_NAME)
    tags = model.predict_tags_from_words(words)

    objects_list = []
    for elem in list(zip(words, tags)):
        objects = get_objects(elem[0], elem[1])
        assert len(objects) >= 2, "We have %d objects to compare" % (
            len(objects))
        objects_list.append((objects[0], objects[1]))

    for obj0, obj1 in objects_list:
        response = get_response(obj0, obj1, False)
        response_json = response.json()
        Merlin = diviner()
        Merlin.create_from_json(response_json)
        Merlin.generate_advice()