Example #1
0
def main():
    data_bin, model_bin = '../run/seqVerbMC/data_subsrl_1sv_1sa_argtrim.bin', './full_model/Roberta_BI/full_model_sptoken_ep121_a1.0_m1-0.1_m2-0.1.bin'
    data = Data()
    if os.path.exists(data_bin):
        data.load(data_bin)
        print("==ATTN== ", len(data.processes), " sequences.")
    else:
        data.load_tsv_plain(data_file)
        data.save(data_bin)

    data.dump_dataset_format('./dataset_seq.tsv', 'seq')

    # W/O n-1 gram
    M = torchpart()
    M.load(model_bin)
    M.serve_verb([' '], data, limit_ids=None,
                 topk=1), M.serve_arg([' '], data, limit_ids=None, topk=1)

    sequence = input("Events split by @ (exit() to quit):")
    #verbs, sequences, true_ids, v2s, limit_ids
    while True:
        sequence = sequence.strip().lower()
        if sequence[:4] == 'exit':
            exit()
        sequence = sequence.split('@')
        vtype, atype = M.serve_verb(sequence, data, limit_ids=None,
                                    topk=6), M.serve_arg(sequence,
                                                         data,
                                                         limit_ids=None,
                                                         topk=6)
        print(vtype, atype)
        sequence = input("Events split by @ (exit() to quit):")
Example #2
0
def main():
    v_thres, l_thres = 50, 5
    if len(sys.argv) > 1:
        v_thres, l_thres = int(sys.argv[1]), int(sys.argv[2])
    #data_file, data_bin, model_bin, test_file = '/shared/corpora-tmp/wikihow/wikiHowSubsequences.tsv', '../run/seqVerbMC/data_subsrl_1sv_1sa_argtrim.bin', './seqSSmrl_subsrl/RobertaVerbMC/tmp_fold_ep151_a1.0_m1-0.1_m2-0.1.bin', './seqSSmrl_subsrl/RobertaVerbMC/test_fold_ep151_a1.0_m1-0.1_m2-0.1.txt'
    data_file, data_bin, model_bin, test_file = '/shared/corpora-tmp/wikihow/wikiHowSubsequences.tsv', '../run/seqVerbMC/data_subsrl_1sv_1sa_argtrim.bin', './seqSSmrl_subsrl/RobertaVerbMC/tmp_fold_ep151_a1.0_m1-0.1_m2-0.1.bin', '../process/recover_test_index_fold1.txt'
    data = Data()
    if os.path.exists(data_bin):
        data.load(data_bin)
        print("==ATTN== ", len(data.processes), " sequences.")
    else:
        data.load_tsv_plain(data_file)
        data.save(data_bin)

    # W/O n-1 gram
    sequences = data.join_batch_sent(data.processes,
                                     begin='<s> ',
                                     sep=' </s> ')
    seq_len = np.array([len(x) for x in data.processes])
    r_verbs = {y: x for x, y in data.verb_vocab.items()}
    n_verbs = len([x for x, y in data.verb_vocab.items()])
    #print (n_verbs)
    verbs = [r_verbs[x] for x in range(n_verbs)]
    vid = np.array(data.verb_id)
    true_senses = [data.v2s[verbs[x]] for x in vid]

    r_args = {y: x for x, y in data.arg_vocab.items()}
    n_args = len([x for x, y in data.arg_vocab.items()])
    #print (n_args)
    args = [r_args[x] for x in range(n_args)]
    aid = np.array(data.arg_id)
    true_arg_senses = [data.a2s[args[x]] for x in aid]

    #print (true_senses[:3])
    max_fold = 1
    rs = sklearn.model_selection.ShuffleSplit(n_splits=max_fold,
                                              test_size=0.1,
                                              random_state=777)

    avg_mrr, avg_hits1, avg_hits10 = [], [], []
    avg_mrra, avg_hits1a, avg_hits10a = [], [], []
    print(len(verbs), len(args))

    test_index = []
    for x in open(test_file):
        test_index.append(int(x.strip()))
    test_index = np.array(test_index)

    test_seq = [sequences[x] for x in test_index]
    test_vid = vid[test_index]
    test_aid = aid[test_index]
    M = torchpart()
    M.load(model_bin)
    #verbs, sequences, true_ids, v2s, limit_ids
    M.profile_test_verb(verbs, test_seq, seq_len[test_index], test_vid,
                        data.v2s, v_thres, l_thres)
    M.profile_test_verb(verbs, test_seq, seq_len[test_index], test_vid,
                        data.v2s, 525, 2)
    M.profile_test_verb(verbs, test_seq, seq_len[test_index], test_vid,
                        data.v2s, 425, 2)
Example #3
0
def main():
    data_bin, model_bin = '../run/seqVerbMC/data_subsrl_1sv_1sa_argtrim.bin', './seqSSmrl_subsrl/RobertaVerbMC/tmp_fold_ep151_a1.0_m1-0.1_m2-0.1.bin'
    data = Data()
    if os.path.exists(data_bin):
        data.load(data_bin)
        print("==ATTN== ", len(data.processes), " sequences.")
    else:
        data.load_tsv_plain(data_file)
        data.save(data_bin)

    # W/O n-1 gram
    ifile = None
    if len(sys.argv) > 1:
        ifile, ofile = sys.argv[1], sys.argv[2]

    M = torchpart()
    M.load(model_bin)
    #verbs, sequences, true_ids, v2s, limit_ids
    if ifile is None:
        sequence = [
            'set locations and date', 'search for tickets', 'compare airfares',
            'purchase the ticket'
        ]
        vtype, atype = M.serve_verb(sequence, data, limit_ids=None,
                                    topk=10), M.serve_arg(sequence,
                                                          data,
                                                          limit_ids=None,
                                                          topk=10)
        print(vtype, atype)
    else:
        with open(ofile, 'w') as fp:
            for line in tqdm.tqdm(open(ifile)):
                sequence = line.strip().split('\t')
                vtype, atype = M.serve_verb(sequence,
                                            data,
                                            limit_ids=None,
                                            topk=10), M.serve_arg(
                                                sequence,
                                                data,
                                                limit_ids=None,
                                                topk=10)
                fp.write(line)
                fp.write('\t@@@\tVERB: ' + json.dumps(vtype) + '\tARG: ' +
                         json.dumps(atype) + '\n')
### Read parameters ###
if len(sys.argv) > 1:
    debugging = int(sys.argv[1][-1])
### Model Initialization ###
if not debugging:
    data_bin, model_bin = '../data/wikihow_process/data_subsrl_1sv_1sa_argtrim.bin', './full_model/full_model_sptoken_ep121_a1.0_m1-0.1_m2-0.1.bin'
    data = Data()

    if os.path.exists(data_bin):
        data.load(data_bin)
        print("==ATTN== ", len(data.processes), " sequences.")
    else:
        data.load_tsv_plain(data_file)
        data.save(data_bin)

    M = torchpart()
    M.load(model_bin)
    M.serve_verb([' '], data, limit_ids=None,
                 topk=1), M.serve_arg([' '], data, limit_ids=None, topk=1)


### Function ###
def process_json(sequence):
    sequence = sequence.split('@')
    if not debugging:
        vtype, atype = M.serve_verb(sequence,
                                    data,
                                    limit_ids=None,
                                    topk=6,
                                    return_emb=False), M.serve_arg(
                                        sequence,