def main(): data_bin, model_bin = '../run/seqVerbMC/data_subsrl_1sv_1sa_argtrim.bin', './full_model/Roberta_BI/full_model_sptoken_ep121_a1.0_m1-0.1_m2-0.1.bin' data = Data() if os.path.exists(data_bin): data.load(data_bin) print("==ATTN== ", len(data.processes), " sequences.") else: data.load_tsv_plain(data_file) data.save(data_bin) data.dump_dataset_format('./dataset_seq.tsv', 'seq') # W/O n-1 gram M = torchpart() M.load(model_bin) M.serve_verb([' '], data, limit_ids=None, topk=1), M.serve_arg([' '], data, limit_ids=None, topk=1) sequence = input("Events split by @ (exit() to quit):") #verbs, sequences, true_ids, v2s, limit_ids while True: sequence = sequence.strip().lower() if sequence[:4] == 'exit': exit() sequence = sequence.split('@') vtype, atype = M.serve_verb(sequence, data, limit_ids=None, topk=6), M.serve_arg(sequence, data, limit_ids=None, topk=6) print(vtype, atype) sequence = input("Events split by @ (exit() to quit):")
def main(): v_thres, l_thres = 50, 5 if len(sys.argv) > 1: v_thres, l_thres = int(sys.argv[1]), int(sys.argv[2]) #data_file, data_bin, model_bin, test_file = '/shared/corpora-tmp/wikihow/wikiHowSubsequences.tsv', '../run/seqVerbMC/data_subsrl_1sv_1sa_argtrim.bin', './seqSSmrl_subsrl/RobertaVerbMC/tmp_fold_ep151_a1.0_m1-0.1_m2-0.1.bin', './seqSSmrl_subsrl/RobertaVerbMC/test_fold_ep151_a1.0_m1-0.1_m2-0.1.txt' data_file, data_bin, model_bin, test_file = '/shared/corpora-tmp/wikihow/wikiHowSubsequences.tsv', '../run/seqVerbMC/data_subsrl_1sv_1sa_argtrim.bin', './seqSSmrl_subsrl/RobertaVerbMC/tmp_fold_ep151_a1.0_m1-0.1_m2-0.1.bin', '../process/recover_test_index_fold1.txt' data = Data() if os.path.exists(data_bin): data.load(data_bin) print("==ATTN== ", len(data.processes), " sequences.") else: data.load_tsv_plain(data_file) data.save(data_bin) # W/O n-1 gram sequences = data.join_batch_sent(data.processes, begin='<s> ', sep=' </s> ') seq_len = np.array([len(x) for x in data.processes]) r_verbs = {y: x for x, y in data.verb_vocab.items()} n_verbs = len([x for x, y in data.verb_vocab.items()]) #print (n_verbs) verbs = [r_verbs[x] for x in range(n_verbs)] vid = np.array(data.verb_id) true_senses = [data.v2s[verbs[x]] for x in vid] r_args = {y: x for x, y in data.arg_vocab.items()} n_args = len([x for x, y in data.arg_vocab.items()]) #print (n_args) args = [r_args[x] for x in range(n_args)] aid = np.array(data.arg_id) true_arg_senses = [data.a2s[args[x]] for x in aid] #print (true_senses[:3]) max_fold = 1 rs = sklearn.model_selection.ShuffleSplit(n_splits=max_fold, test_size=0.1, random_state=777) avg_mrr, avg_hits1, avg_hits10 = [], [], [] avg_mrra, avg_hits1a, avg_hits10a = [], [], [] print(len(verbs), len(args)) test_index = [] for x in open(test_file): test_index.append(int(x.strip())) test_index = np.array(test_index) test_seq = [sequences[x] for x in test_index] test_vid = vid[test_index] test_aid = aid[test_index] M = torchpart() M.load(model_bin) #verbs, sequences, true_ids, v2s, limit_ids M.profile_test_verb(verbs, test_seq, seq_len[test_index], test_vid, data.v2s, v_thres, l_thres) M.profile_test_verb(verbs, test_seq, seq_len[test_index], test_vid, data.v2s, 525, 2) M.profile_test_verb(verbs, test_seq, seq_len[test_index], test_vid, data.v2s, 425, 2)
def main(): data_bin, model_bin = '../run/seqVerbMC/data_subsrl_1sv_1sa_argtrim.bin', './seqSSmrl_subsrl/RobertaVerbMC/tmp_fold_ep151_a1.0_m1-0.1_m2-0.1.bin' data = Data() if os.path.exists(data_bin): data.load(data_bin) print("==ATTN== ", len(data.processes), " sequences.") else: data.load_tsv_plain(data_file) data.save(data_bin) # W/O n-1 gram ifile = None if len(sys.argv) > 1: ifile, ofile = sys.argv[1], sys.argv[2] M = torchpart() M.load(model_bin) #verbs, sequences, true_ids, v2s, limit_ids if ifile is None: sequence = [ 'set locations and date', 'search for tickets', 'compare airfares', 'purchase the ticket' ] vtype, atype = M.serve_verb(sequence, data, limit_ids=None, topk=10), M.serve_arg(sequence, data, limit_ids=None, topk=10) print(vtype, atype) else: with open(ofile, 'w') as fp: for line in tqdm.tqdm(open(ifile)): sequence = line.strip().split('\t') vtype, atype = M.serve_verb(sequence, data, limit_ids=None, topk=10), M.serve_arg( sequence, data, limit_ids=None, topk=10) fp.write(line) fp.write('\t@@@\tVERB: ' + json.dumps(vtype) + '\tARG: ' + json.dumps(atype) + '\n')
### Read parameters ### if len(sys.argv) > 1: debugging = int(sys.argv[1][-1]) ### Model Initialization ### if not debugging: data_bin, model_bin = '../data/wikihow_process/data_subsrl_1sv_1sa_argtrim.bin', './full_model/full_model_sptoken_ep121_a1.0_m1-0.1_m2-0.1.bin' data = Data() if os.path.exists(data_bin): data.load(data_bin) print("==ATTN== ", len(data.processes), " sequences.") else: data.load_tsv_plain(data_file) data.save(data_bin) M = torchpart() M.load(model_bin) M.serve_verb([' '], data, limit_ids=None, topk=1), M.serve_arg([' '], data, limit_ids=None, topk=1) ### Function ### def process_json(sequence): sequence = sequence.split('@') if not debugging: vtype, atype = M.serve_verb(sequence, data, limit_ids=None, topk=6, return_emb=False), M.serve_arg( sequence,