args.pairStartIndex) + "." + str(args.pairEndIndex) + ".txt"  #

    print("loading dictionary/embedding")
    dictionary = helper.load_object(args.save_path + 'gene_dictionary.p')
    embeddings_index = helper.load_word_embeddings(args.word_vectors_directory,
                                                   args.word_vectors_file,
                                                   dictionary.word2idx)
    print("loading model")
    # print (args)
    model = SentenceClassifier(dictionary,
                               embeddings_index,
                               args,
                               select_method='max')
    if args.cuda:
        model = model.cuda()
    helper.load_model_states_from_checkpoint(
        model, args.save_path + 'model_best.pth.tar', 'state_dict', args.cuda)
    print('vocabulary size = ', len(dictionary))

    annotationBP = pickle.load(
        open(args.goAnnotationFile + "goBP.cPickle", "rb"))
    annotationCC = pickle.load(
        open(args.goAnnotationFile + "goCC.cPickle", "rb"))
    annotationMF = pickle.load(
        open(args.goAnnotationFile + "goMF.cPickle", "rb"))
    annotationAll3 = pickle.load(
        open(args.goAnnotationFile + "go3ontology.cPickle", "rb"))

    f = open(args.pairs2test,
             'r')  ## load genes to be tested. [[gene1,gene2],...]
    pairs = []
    for line in f:
Esempio n. 2
0
            0, di + 1].data].data.cpu().numpy()[0]

    return gen_prob


if __name__ == "__main__":
    dictionary = helper.load_object(args.save_path + 'dictionary.p')
    embeddings_index = helper.load_word_embeddings(args.word_vectors_directory,
                                                   args.word_vectors_file,
                                                   dictionary.word2idx)
    model = Seq2Seq(dictionary, embeddings_index, args)
    print(model)
    if args.cuda:
        model = model.cuda()
    helper.load_model_states_from_checkpoint(
        model, os.path.join(args.save_path, 'model_best.pth.tar'),
        'state_dict', args.cuda)
    print('model, embedding index and dictionary loaded.')
    model.eval()

    # load the test dataset
    test_corpus = data.Corpus(args.tokenize, args.max_query_length)
    test_corpus.parse(args.data + 'dev.txt', args.max_example, False)
    print('test set size = ', len(test_corpus.data))

    candidate_map = dict()
    with open('../data/anchor_candidates.txt', 'r') as f:
        for line in f:
            tokens = line.strip().split(':::')
            candidate_map[tokens[0]] = []
            for i in range(1, len(tokens)):