Exemple #1
0
T1 = 0.01
T2 = 0.001

if __name__ == '__main__':
    save_path = sys.argv[-1]
    shutil.copyfile('settings.py', '%s/settings.txt' % save_path)

    print("Preparing data...")
    # load
    #lhs, rel, rhs = batch.load_labeled_entities(io.open(sys.argv[1],'r'))
    #lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open(sys.argv[2],'r'))
    #lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-train.txt"))
    #lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open("../data/prescription-sparse2-valid.txt"))
    #lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/prescription-sparse2-test.txt"))
    lhs, rel, rhs = batch.load_labeled_entities(
        io.open("../data/yago-sparse-entity-train.txt"))
    lhs_v, rel_v, rhs_v = batch.load_labeled_entities(
        io.open("../data/yago-sparse-entity-valid.txt"))
    lhs_s, rel_s, rhs_s = batch.load_labeled_entities(
        io.open("../data/yago-sparse-entity-test.txt"))

    # left hand side dictionaries, both character and entity
    chardict, charcount = batch.build_char_dictionary(lhs)
    n_char = len(chardict.keys()) + 1
    batch.save_dictionary(chardict, charcount, '%s/dict.pkl' % save_path)

    lhs_dict, lhs_count = batch.build_entity_dictionary(lhs)
    n_lhs = len(lhs_dict.keys())
    batch.save_dictionary(lhs_dict, lhs_count, '%s/lhs_dict.pkl' % save_path)

    # build dictionary for relations
Exemple #2
0
T1 = 0.01
T2 = 0.001

if __name__=='__main__':
    save_path = sys.argv[-1]
    shutil.copyfile('settings.py','%s/settings.txt'%save_path)

    print("Preparing data...")
    # load
    #lhs, rel, rhs = batch.load_labeled_entities(io.open(sys.argv[1],'r'))
    #lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open(sys.argv[2],'r'))
    #lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/prescription-freq-test.txt"))
    #lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-train.txt")) # sparse 2 is by different train,valid,test ratio
    #lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open("../data/prescription-sparse2-valid.txt"))
    #lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/prescription-sparse2-test.txt"))
    lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/yago-sparse-entity-train.txt")) # sparse 2 is by different train,valid,test ratio
    lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open("../data/yago-sparse-entity-valid.txt"))
    lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/yago-sparse-entity-test.txt"))

    # left hand side dictionaries, both character and entity
    chardict, charcount = batch.build_char_dictionary(lhs)
    n_char = len(chardict.keys()) + 1
    batch.save_dictionary(chardict,charcount,'%s/dict.pkl' % save_path)

    lhs_dict, lhs_count = batch.build_entity_dictionary(lhs)
    n_lhs = len(lhs_dict.keys())
    batch.save_dictionary(lhs_dict,lhs_count,'%s/lhs_dict.pkl' % save_path)

    # build dictionary for relations
    rel_dict, rel_count = batch.build_entity_dictionary(rel)
    batch.save_dictionary(rel_dict, rel_count, '%s/rel_dict.pkl' % save_path)
import time
import cPickle as pkl
import io

from collections import OrderedDict

import batch
from settings import N_BATCH, N_EPOCH, DISPF, SAVEF, VALF, WDIM
from model_nn import charLM                   # TODO change model
from model_nn import load_params_shared       # TODO change model

model = "_nn" # or "" or "_nn" where nn is hybrid   # TODO change model

if __name__ == "__main__":
    max_freq = sys.argv[1]
    lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-train.txt")) # sparse 2 is by different train,valid,test ratio
    chardict, charcount = batch.build_char_dictionary(lhs)
    n_char = len(chardict.keys()) + 1
    lhs_dict, lhs_count = batch.build_entity_dictionary(lhs)
    n_lhs = len(lhs_dict.keys())
    rel_dict, rel_count = batch.build_entity_dictionary(rel)
    n_rel = len(rel_dict.keys())
    rhs_dict, rhs_count = batch.build_entity_dictionary(rhs)
    n_rhs = len(rhs_dict.keys())

    lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/prescription-sparse2-rare-{}-test.txt".format(max_freq)))
    test_iter = batch.Batch(lhs_s, rel_s, rhs_s, batch_size=N_BATCH)

    m = charLM(n_char, n_lhs + 1, n_rel, n_rhs) # emb_dim = WDIM by default
    m.param = load_params_shared("temp{}/best_model.npz".format(model))
Exemple #4
0
import time
import cPickle as pkl
import io

from collections import OrderedDict

import batch
from settings import N_BATCH, N_EPOCH, DISPF, SAVEF, VALF, WDIM
from model_transe import charLM                   # TODO change model
from model_transe import load_params_shared       # TODO change model

model = "_tr" # or "" or "_nn" where nn is hybrid   # TODO change model

if __name__ == "__main__":
    max_freq = int(sys.argv[1])
    lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-train.txt")) # sparse 2 is by different train,valid,test ratio
    chardict, charcount = batch.build_char_dictionary(lhs)
    n_char = len(chardict.keys()) + 1
    lhs_dict, lhs_count = batch.build_entity_dictionary(lhs)
    n_lhs = len(lhs_dict.keys())
    rel_dict, rel_count = batch.build_entity_dictionary(rel)
    n_rel = len(rel_dict.keys())
    rhs_dict, rhs_count = batch.build_entity_dictionary(rhs)
    n_rhs = len(rhs_dict.keys())

    lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/prescription-sparse2-rare-{}-test.txt".format(max_freq)))
    test_iter = batch.Batch(lhs_s, rel_s, rhs_s, batch_size=N_BATCH)

    m = charLM(n_char, n_lhs + 1, n_rel, n_rhs) # emb_dim = WDIM by default
    m.param = load_params_shared("temp{}/best_model.npz".format(model))
Exemple #5
0
T1 = 0.01
T2 = 0.001

if __name__=='__main__':
    save_path = sys.argv[-1]
    shutil.copyfile('settings.py','%s/settings.txt'%save_path)

    print("Preparing data...")
    # load
    #lhs, rel, rhs = batch.load_labeled_entities(io.open(sys.argv[1],'r'))
    #lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open(sys.argv[2],'r'))
    #lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-train.txt"))
    #lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open("../data/prescription-sparse2-valid.txt"))
    #lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/prescription-sparse2-test.txt"))
    lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/yago-sparse-entity-train.txt"))
    lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open("../data/yago-sparse-entity-valid.txt"))
    lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/yago-sparse-entity-test.txt"))

    # left hand side dictionaries, both character and entity
    chardict, charcount = batch.build_char_dictionary(lhs)
    n_char = len(chardict.keys()) + 1
    batch.save_dictionary(chardict,charcount,'%s/dict.pkl' % save_path)

    lhs_dict, lhs_count = batch.build_entity_dictionary(lhs)
    n_lhs = len(lhs_dict.keys())
    batch.save_dictionary(lhs_dict,lhs_count,'%s/lhs_dict.pkl' % save_path)

    # build dictionary for relations
    rel_dict, rel_count = batch.build_entity_dictionary(rel)
    batch.save_dictionary(rel_dict, rel_count, '%s/rel_dict.pkl' % save_path)
# input: training set, validation set, test set
# output: part of test set such that each left hand side entity appear no more
# than FREQ times in the training set.

import sys
import io
import numpy as np
import batch

# split the data into training set, validation set and testing set
if __name__ == "__main__":
    max_freq = 2 # selecting lhs that appear <= max_freq times in the training set
    lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-train.txt"))
    lhs_dict, lhs_count = batch.build_entity_dictionary(lhs)

    lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-test.txt"))
    buf = []
    for i in range(len(lhs)):
        if lhs[i] not in lhs_count or lhs_count[lhs[i]] <= max_freq:
            buf.append("{}\t{}\t{}\n".format(lhs[i], rel[i], rhs[i]))

    lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-valid.txt"))
    for i in range(len(lhs)):
        if lhs[i] not in lhs_count or lhs_count[lhs[i]] <= max_freq:
            buf.append("{}\t{}\t{}\n".format(lhs[i], rel[i], rhs[i]))

    with open("../data/prescription-sparse2-rare-{}-test.txt".format(max_freq), "w") as f_out:
        f_out.writelines(buf)
Exemple #7
0
# input: training set, validation set, test set
# output: part of test set such that each left hand side entity appear no more
# than FREQ times in the training set.

import sys
import io
import numpy as np
import batch

# split the data into training set, validation set and testing set
if __name__ == "__main__":
    max_freq = 2  # selecting lhs that appear <= max_freq times in the training set
    lhs, rel, rhs = batch.load_labeled_entities(
        io.open("../data/prescription-sparse2-train.txt"))
    lhs_dict, lhs_count = batch.build_entity_dictionary(lhs)

    lhs, rel, rhs = batch.load_labeled_entities(
        io.open("../data/prescription-sparse2-test.txt"))
    buf = []
    for i in range(len(lhs)):
        if lhs[i] not in lhs_count or lhs_count[lhs[i]] <= max_freq:
            buf.append("{}\t{}\t{}\n".format(lhs[i], rel[i], rhs[i]))

    lhs, rel, rhs = batch.load_labeled_entities(
        io.open("../data/prescription-sparse2-valid.txt"))
    for i in range(len(lhs)):
        if lhs[i] not in lhs_count or lhs_count[lhs[i]] <= max_freq:
            buf.append("{}\t{}\t{}\n".format(lhs[i], rel[i], rhs[i]))

    with open("../data/prescription-sparse2-rare-{}-test.txt".format(max_freq),
              "w") as f_out: