コード例 #1
0
ファイル: task_notrain500.py プロジェクト: pblouw/memNN
def load_data(coref):
    data_path = os.path.join(path, 'MCTest')
    if coref:
        with open(os.path.join(data_path, 'mc500.train.coref'), 'rb') as f:
            train_stories = clean(pickle.load(f))
        with open(os.path.join(data_path, 'mc500.dev.coref'), 'rb') as f:
            dev_stories = clean(pickle.load(f))
        with open(os.path.join(data_path, 'mc500.test.coref'), 'rb') as f:
            test_stories = clean(pickle.load(f))
    else:
        train_stories = clean(load_stories(
            os.path.join(data_path, 'mc500.train.tsv'),
            os.path.join(data_path, 'mc500.train.ans')))
        dev_stories = clean(load_stories(
            os.path.join(data_path, 'mc500.dev.tsv'),
            os.path.join(data_path, 'mc500.dev.ans')))
        test_stories = clean(load_stories(
            os.path.join(data_path, 'mc500.test.tsv'),
            os.path.join(data_path, 'mc500.test.ans')))
    return train_stories, dev_stories, test_stories
コード例 #2
0
ファイル: task_missing.py プロジェクト: pblouw/memNN
def load_data(coref):
    data_path = os.path.join(path, "MCTest")
    if coref:
        with open(os.path.join(data_path, "mc160.train.coref"), "rb") as f:
            train_stories = clean(pickle.load(f))
        with open(os.path.join(data_path, "mc160.dev.coref"), "rb") as f:
            dev_stories = clean(pickle.load(f))
        with open(os.path.join(data_path, "mc160.test.coref"), "rb") as f:
            test_stories = clean(pickle.load(f))
    else:
        train_stories = clean(
            load_stories(os.path.join(data_path, "mc160.train.tsv"), os.path.join(data_path, "mc160.train.ans"))
        )
        dev_stories = clean(
            load_stories(os.path.join(data_path, "mc160.dev.tsv"), os.path.join(data_path, "mc160.dev.ans"))
        )
        test_stories = clean(
            load_stories(os.path.join(data_path, "mc160.test.tsv"), os.path.join(data_path, "mc160.test.ans"))
        )
    return train_stories, dev_stories, test_stories
コード例 #3
0
ファイル: preproc_coref.py プロジェクト: pblouw/memNN
def process(path):
    stories = load_stories(path + '.tsv', path + '.ans')
    processed = []
    filelist = []
    try:
        tmpdir = mkdtemp()
        for i, story in enumerate(stories):
            infile = os.path.join(tmpdir, str(i) + '.txt')
            with open(infile, 'w') as f:
                f.write(story.text)
            
            filelist.append(infile)

        filelist_filename = os.path.join(tmpdir, 'filelist')
        with open(filelist_filename, 'w') as f:
            for name in filelist:
                f.write(name)
                f.write(os.linesep)

        subprocess.check_call([
            'java', '-cp', '*', '-Xmx2g',
            'edu.stanford.nlp.pipeline.StanfordCoreNLP',
            '-annotators', 'tokenize,ssplit,pos,lemma,ner,parse,dcoref',
            '-filelist', filelist_filename])

        for i, story in enumerate(stories):
            outfile = str(i) + '.txt.out'
            with open(outfile) as f:
                lines = f.readlines()
                lines = [l.strip() for l in lines]
                lines = [l for l in lines if len(l) > 0]
                
            processed.append(Story(parse(story.text, lines), story.queries))    
    finally:
        shutil.rmtree(tmpdir)

    with open(path + '.coref', 'wb') as f:
        pickle.dump(processed, f, 2)
コード例 #4
0
ファイル: run_baseline.py プロジェクト: pblouw/memNN
"""
Runs a baseline test on a weakly supervised memory network.
"""
def clean(stories):
    return [s for s in stories if len(s.queries) > 1]

def compute_accuracy(stories, model):
    accuracy = 0
    for story in stories: 
        score = model.predict_answer(story)
        accuracy += score
    return float(accuracy) / float(len(stories))


train_stories = clean(load_stories('MCTest/mc160.train.tsv','MCTest/mc160.train.ans'))
dev_stories = clean(load_stories('MCTest/mc160.dev.tsv','MCTest/mc160.dev.ans'))
test_stories = clean(load_stories('MCTest/mc160.test.tsv','MCTest/mc160.test.ans'))

all_stories = train_stories + test_stories + dev_stories

# initialize with all stories to get full vocab
model = WeakMemoryNetwork(300, 256, all_stories, timetags=True, word2vec=False)

print 'Training Accuracy prior to training: ', compute_accuracy(train_stories, model)
print 'Testing Accuracy prior to training: ', compute_accuracy(test_stories, model)

# Train for a certain number of epochs
count = 0
for i in range(30):
    for story in train_stories + dev_stories: