def __init__(self, params, batcher, prepare=None): # parameters params = utils.dotdict(params) params.usepytorch = True if 'usepytorch' not in params else params.usepytorch params.seed = 1111 if 'seed' not in params else params.seed params.batch_size = 128 if 'batch_size' not in params else params.batch_size params.nhid = 0 if 'nhid' not in params else params.nhid params.kfold = 5 if 'kfold' not in params else params.kfold if 'classifier' not in params or not params['classifier']: params.classifier = {'nhid': 0} assert 'nhid' in params.classifier, 'Set number of hidden units in classifier config!!' self.params = params # batcher and prepare self.batcher = batcher self.prepare = prepare if prepare else lambda x, y: None self.list_tasks = [ 'CR', 'MR', 'MPQA', 'SUBJ', 'SST2', 'SST5', 'TREC', 'MRPC', 'SICKRelatedness', 'SICKEntailment', 'STSBenchmark', 'SNLI', 'ImageCaptionRetrieval', 'STS12', 'STS13', 'STS14', 'STS15', 'STS16' ]
def __init__(self, params, batcher, prepare=None): # parameters params = utils.dotdict(params) params.usepytorch = True if 'usepytorch' not in params else params.usepytorch params.seed = 1111 if 'seed' not in params else params.seed params.batch_size = 128 if 'batch_size' not in params else params.batch_size params.nhid = 0 if 'nhid' not in params else params.nhid params.kfold = 5 if 'kfold' not in params else params.kfold if 'classifier' not in params or not params['classifier']: params.classifier = {'nhid': 0} assert 'nhid' in params.classifier, 'Set number of hidden units in classifier config!!' self.params = params # batcher and prepare self.batcher = batcher self.prepare = prepare if prepare else lambda x, y: None self.list_tasks = [ 'BioC', 'CitationSA', 'ClinicalSA', 'BioASQ', 'PICO', 'PubMed20K', 'RQE', 'ClinicalSTS', 'BIOSSES', 'MedNLI' ]
def __init__(self, params, batcher, prepare=None): # parameters params = utils.dotdict(params) params.usepytorch = True if 'usepytorch' not in params else params.usepytorch params.seed = 1111 if 'seed' not in params else params.seed params.batch_size = 128 if 'batch_size' not in params else params.batch_size params.nhid = 0 if 'nhid' not in params else params.nhid params.kfold = 5 if 'kfold' not in params else params.kfold if 'classifier' not in params or not params['classifier']: params.classifier = {'nhid': 0} assert 'nhid' in params.classifier, 'Set number of hidden units in classifier config!!' self.params = params # batcher and prepare self.batcher = batcher self.prepare = prepare if prepare else lambda x, y: None self.list_tasks = [ 'CR', 'MR', 'MPQA', 'SUBJ', 'SST2', 'SST5', 'TREC', 'MRPC', 'SICKRelatedness', 'SICKEntailment', 'STSBenchmark', 'SNLI', 'ImageCaptionRetrieval', 'STS12', 'STS13', 'STS14', 'STS15', 'STS16', 'Length', 'WordContent', 'Depth', 'TopConstituents', 'BigramShift', 'Tense', 'SubjNumber', 'ObjNumber', 'OddManOut', 'CoordinationInversion', 'AmenitySimilarEvents' ]
def __init__(self, params, batcher, prepare=None): # parameters params = utils.dotdict(params) params.seed = 1111 if 'seed' not in params else params.seed params.batch_size = 128 if 'batch_size' not in params else params.batch_size self.params = params # batcher and prepare self.batcher = batcher self.prepare = prepare if prepare else lambda x, y: None self.list_tasks = ['STS12', 'STS13', 'STS14', 'STS15', 'STS16']
def apply_logician(s1, s2 , is_list=False, sick_model = False): # is_list : If you are directly sending sentences then keep is_list = False # If you are sending list of list of words then keep is_list = True # sick_model: if True, will use sick model for prediction # : if False, will use snli model for prediction # Load InferSent model params_model = {'bsize': 64, 'word_emb_dim': 300, 'enc_lstm_dim': 2048, 'pool_type': 'max', 'dpout_model': 0.0, 'version': V} model = InferSent(params_model) model.load_state_dict(torch.load(MODEL_PATH)) model.set_w2v_path(PATH_TO_W2V) params_senteval = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 5} params_senteval['classifier'] = {'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128, 'tenacity': 3, 'epoch_size': 2} params_senteval['infersent'] = model.cuda() if not is_list: s1 = convert_str2lst(s1) s2 = convert_str2lst(s2) samples = s1+s2 params_senteval['batch_size'] = min(128,len(s1)) params_senteval = utils.dotdict(params_senteval) params_senteval.usepytorch = True prepare(params_senteval, samples) emb_s1 = batcher(params_senteval, s1) emb_s2 = batcher(params_senteval, s2) if sick_model: testF = np.c_[ np.abs(emb_s1 - emb_s2),emb_s1 * emb_s2] cp = torch.load('./saved_sick.pth') print('[Contradiction Neutral Entailment]') else: testF = np.c_[emb_s1, emb_s2, emb_s1 * emb_s2, np.abs(emb_s1 - emb_s2)] cp = torch.load('./saved_snli_augment_ordered.pth') print('[ Entailment Neutral Contradiction ]') inputdim = testF.shape[1] nclasses = 3 clf = nn.Sequential(nn.Linear(inputdim, nclasses),).cuda() clf.load_state_dict(cp) testF = torch.FloatTensor(testF).cuda() out = clf(testF) sf = nn.Softmax(1) probs = sf(out) return probs
def __init__(self, params, batcher, prepare=None): # parameters params = utils.dotdict(params) params.usepytorch = True if "usepytorch" not in params else params.usepytorch params.seed = 1111 if "seed" not in params else params.seed params.batch_size = 128 if "batch_size" not in params else params.batch_size params.nhid = 0 if "nhid" not in params else params.nhid params.kfold = 5 if "kfold" not in params else params.kfold if "classifier" not in params or not params["classifier"]: params.classifier = {"nhid": 0} if "nhid" not in params.classifier: raise ValueError( "Number of hidden units not set. Please set number of hidden units " + "in classifier config.") self.params = params # batcher and prepare self.batcher = batcher self.prepare = prepare if prepare else lambda x, y: None self.list_tasks = [ # "Length", # "EntityDistance", # "ArgumentOrder", # "EntityExistsBetweenHeadTail", # "EntityCountORGBetweenHeadTail", # "EntityCountPERBetweenHeadTail", # "EntityCountDATEBetweenHeadTail", # "EntityCountMISCBetweenHeadTail", # "EntityCountLOCBetweenHeadTail", # "PosTagHeadLeft", # "PosTagHeadRight", # "PosTagTailLeft", # "PosTagTailRight", # "ArgTypeHead", # "ArgTypeTail", # "TreeDepth", # "SDPTreeDepth", # "ArgumentHeadGrammaticalRole", # "ArgumentTailGrammaticalRole", # Kirk's new code "ArgumentAddGrammarRole_Head", "ArgumentAddGrammarRole_Tail", "ArgumentGrammarRole_ControlHead", "ArgumentGrammarRole_ControlTail", ]
def __init__(self, params, batcher, prepare=None): params = utils.dotdict(params) params.usepytorch = True if 'usepytorch' not in params else params.usepytorch params.seed = 1111 if 'seed' not in params else params.seed params.batch_size = 128 if 'batch_size' not in params else params.batch_size params.nhid = 0 if 'nhid' not in params else params.nhid params.kfold = 5 if 'kfold' not in params else params.kfold if 'classifier' not in params or not params['classifier']: params.classifier = {'nhid': 0} assert 'nhid' in params.classifier, 'Set number of hidden units in classifier config!!' self.params = params self.batcher = batcher self.prepare = prepare if prepare else lambda x, y: None self.list_tasks = [ 'WCCRS_HOTELS', 'WCCRS_MEDICINE', 'SICKRelatedness', 'SICKEntailment', '8TAGS' ]
def __init__(self, params, batcher, prepare=None): # parameters params = utils.dotdict(params) params.usepytorch = True if 'usepytorch' not in params else params.usepytorch params.seed = 1111 if 'seed' not in params else params.seed params.batch_size = 128 if 'batch_size' not in params else params.batch_size params.nhid = 0 if 'nhid' not in params else params.nhid params.kfold = 5 if 'kfold' not in params else params.kfold if 'classifier' not in params or not params['classifier']: params.classifier = {'nhid': 0} assert 'nhid' in params.classifier, 'Set number of hidden units in classifier config!!' self.params = params # batcher and prepare self.batcher = batcher self.prepare = prepare if prepare else lambda x, y: None self.list_tasks = ['SST2', 'SST3', 'MRPC', 'ReadabilityCl', 'TagCl', 'PoemsCl', 'ProzaCl', 'TREC', 'STS', 'SICK']
def __init__(self, params, batcher, prepare=None): # setting default parameters params = utils.dotdict(params) params.usepytorch = True if 'usepytorch' not in params else params.usepytorch params.seed = 1111 if 'seed' not in params else params.seed params.batch_size = 128 if 'batch_size' not in params else params.batch_size params.nhid = 0 if 'nhid' not in params else params.nhid params.kfold = 5 if 'kfold' not in params else params.kfold if 'classifier' not in params or not params['classifier']: params.classifier = {'nhid': 0} assert 'nhid' in params.classifier, 'Set number of hidden units in classifier config!!' self.params = params # set up bilinear projection, with learnable matrix W params.bilinear = False if 'bilinear' not in params else params.bilinear self.batcher = batcher self.prepare = prepare if prepare else lambda x, y: None # sanity check # assert params.classifier in ['LogReg', 'MLP'] # if params.classifier == 'MLP': # assert params.nhid > 0, 'When using an MLP, \ # you need to set params.nhid>0' # if not params.usepytorch and params.classifier == 'MLP': # assert False, 'No MLP implemented in scikit-learn' self.list_tasks = [ 'CR', 'MR', 'MPQA', 'SUBJ', 'SST2', 'SST5', 'TREC', 'MRPC', 'SICKRelatedness', 'SICKEntailment', 'STSBenchmark', 'SNLI', 'ImageCaptionRetrieval', 'STS12', 'STS13', 'STS14', 'STS15', 'STS16', 'DIS', 'PDTB', 'DAT', 'PDTB_EX', 'PDTB_IMEX', 'ABSA_CH', 'ABSA_SP', 'STS_SP' ]
for norm in [True]: for sim_name in [ 'von_mises_correction_tic', 'von_mises_correction_aic' ]: print('Similarity: {0}'.format(sim_name)) params_senteval = { 'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 10, 'padding': '.' } # Word2Vec Google News does not have a . embedding if word_vec == 'word2vec_GN': params_senteval['padding'] = 'the' sim_params = dotdict({ 'similarity': sim_name, 'word_vec': word_vec, 'word_count_path': word_count_path, 'norm': norm }) params_senteval['sim_params'] = sim_params se = senteval.engine.SE(params_senteval, batcher, prepare) result = se.eval(transfer_tasks) result_dict = {'param': dict(sim_params), 'eval': result} results.append(result_dict) with open(args.output_path, 'w') as f: json.dump(results, f)
#transfer_tasks = ['MR', 'CR', 'MPQA', 'SUBJ', 'SST2', 'SST5', 'TREC', 'MRPC', # 'SICKEntailment', 'SICKRelatedness', 'STSBenchmark', # 'Length', 'WordContent', 'Depth', 'TopConstituents', # 'BigramShift', 'Tense', 'SubjNumber', 'ObjNumber'] from embed import Embedder from fairseq import options import sentencepiece as spm from senteval import utils os.chdir(CODE_DIR) parser = options.get_generation_parser(interactive=True) options.add_embed_args(parser) args = options.parse_args_and_arch(parser) params = utils.dotdict(params_senteval) sp = spm.SentencePieceProcessor() sp.Load(args.sentencepiece) embedder = Embedder(args) params.batch_size = 32 params.sp = sp params.embedder = embedder params.encoder = args.eval_encoder params.lang_emb = args.lang_emb results = {} s = ENLengthEval(PATH_TO_MY_DATA) s.do_prepare(params, prepare)