Example #1
0
def main():
    args = parser.parse_args()

    en_word2id, en_emb = read_embeddings(args.en_embedding_file,
                                         n_max=args.vocab_size)
    ru_word2id, ru_emb = read_embeddings(args.ru_embedding_file,
                                         n_max=args.vocab_size)
    en_emb = torch.from_numpy(en_emb)
    ru_emb = torch.from_numpy(ru_emb)

    borrowed_pairs = load_dictionary(args.loanwords_file, en_word2id,
                                     ru_word2id)
    get_word_translation_accuracy(borrowed_pairs, en_word2id, en_emb,
                                  ru_word2id, ru_emb, args.knn_method)

    if args.flip_dict:
        supervised_pairs = src.evaluation.word_translation.load_dictionary(\
                                args.supervised_dict_file, ru_word2id, en_word2id)
        for i in range(supervised_pairs.size()[0]):
            supervised_pairs[i, 0], supervised_pairs[i, 1] = supervised_pairs[
                i, 1], supervised_pairs[i, 0]
        get_word_translation_accuracy(supervised_pairs, en_word2id, en_emb, ru_word2id, \
                                        ru_emb, args.knn_method)
    else:
        supervised_pairs = src.evaluation.word_translation.load_dictionary(\
                                args.supervised_dict_file, en_word2id, ru_word2id)
        get_word_translation_accuracy(supervised_pairs, en_word2id, en_emb, ru_word2id, \
                                        ru_emb, args.knn_method)
Example #2
0
    def __init__(self):
        utils.make_config_dirs(config)

        #read graph
        if config.app == "link_prediction":
            self.graph = utils.read_edges(train_filename=config.train_filename,
                                          test_filename=config.test_filename)
            self.n_node = max(list(self.graph.keys())) + 1
        elif config.app == "node_classification":
            self.graph = utils.read_edges(train_filename=config.train_filename)
            self.n_node = max(list(self.graph.keys())) + 1
            self.n_classes, self.labels_matrix = utils.read_labels(
                filename=config.labels_filename, n_node=self.n_node)
        elif config.app == "recommendation":
            self.graph, self.rcmd = rcmd_util.read_edges(
                train_filename=config.rcmd_train_filename,
                test_filename=config.rcmd_test_filename)
            self.n_node = max(list(self.graph.keys())) + 1
        else:
            raise Exception("Unknown task: {}".format(config.app))

        #read root nodes
        if config.app == "recommendation":
            self.root_nodes = sorted(list(
                self.graph.keys()))[:self.rcmd.user_max]
        else:
            self.root_nodes = sorted(list(self.graph.keys()))

        #read pre_emb matrix
        node_embed_init_d = utils.read_embeddings(
            filename=config.pretrain_emb_filename_d,
            n_node=self.n_node,
            n_embed=config.n_emb)
        node_embed_init_g = utils.read_embeddings(
            filename=config.pretrain_emb_filename_g,
            n_node=self.n_node,
            n_embed=config.n_emb)
        self.discriminator = Discriminator(n_node=self.n_node,
                                           node_emd_init=node_embed_init_d)
        self.generator = Generator(n_node=self.n_node,
                                   node_emd_init=node_embed_init_g)

        #construct BFS-tree
        if config.app == "recommendation":
            self.BFS_trees = BFS_trees(self.root_nodes,
                                       self.graph,
                                       batch_num=config.cache_batch,
                                       app=config.app,
                                       rcmd=self.rcmd)
        else:
            self.BFS_trees = BFS_trees(self.root_nodes,
                                       self.graph,
                                       batch_num=config.cache_batch)
Example #3
0
    def __init__(self):
        print("reading graphs...")
        self.n_node, self.graph = utils.read_edges(config.train_filename,
                                                   config.test_filename)
        self.root_nodes = [i for i in range(self.n_node)]

        print("reading initial embeddings...")
        self.node_embed_init_d = utils.read_embeddings(
            filename=config.pretrain_emb_filename_d,
            n_node=self.n_node,
            n_embed=config.n_emb)
        self.node_embed_init_g = utils.read_embeddings(
            filename=config.pretrain_emb_filename_g,
            n_node=self.n_node,
            n_embed=config.n_emb)

        # construct or read BFS-trees
        self.trees = None
        if os.path.isfile(config.cache_filename):
            print("reading BFS-trees from cache...")
            pickle_file = open(config.cache_filename, 'rb')
            self.trees = pickle.load(pickle_file)
            pickle_file.close()
        else:
            print("constructing BFS-trees...")
            pickle_file = open(config.cache_filename, 'wb')
            if config.multi_processing:
                self.construct_trees_with_mp(self.root_nodes)
            else:
                self.trees = self.construct_trees(self.root_nodes)
            pickle.dump(self.trees, pickle_file)
            pickle_file.close()

        print("building GAN model...")
        self.discriminator = None
        self.generator = None
        self.build_generator()
        self.build_discriminator()

        self.latest_checkpoint = tf.train.latest_checkpoint(config.model_log)
        self.saver = tf.train.Saver()

        self.config = tf.ConfigProto()
        self.config.gpu_options.allow_growth = True
        self.init_op = tf.group(tf.global_variables_initializer(),
                                tf.local_variables_initializer())
        self.sess = tf.Session(config=self.config)
        self.sess.run(self.init_op)
Example #4
0
 def __init__(self, embed_filename, labels_matrix, n_node, n_embed, n_classes):
     self.embed_filename = embed_filename  # each line: node_id, embeddings(dim: n_embed)
     self.n_node = n_node
     self.n_embed = n_embed
     self.n_classes = n_classes
     self.emd = utils.read_embeddings(embed_filename, n_node=n_node, n_embed=n_embed)
     self.labels_matrix = labels_matrix
Example #5
0
def count_borrowed_words_in_embeddings(args):
    # loading data
    borrowed_pairs = pandas.read_csv(args.loanwords_file)
    en_word2id, en_embeddings = read_embeddings(args.en_embedding_file,
                                                n_max=args.vocab_size)
    ru_word2id, ru_embeddings = read_embeddings(args.ru_embedding_file,
                                                n_max=args.vocab_size)
    print "Number of embeddings loaded per language: ", args.vocab_size

    # find number of words in vocab
    en_words = borrowed_pairs["English"].tolist()
    ru_words = borrowed_pairs["Russian"].tolist()
    en_count = sum([1 if word in en_word2id else 0 for word in en_words])
    print "Number of english words in vocab:", en_count
    ru_count = sum([1 if word in ru_word2id else 0 for word in ru_words])
    print "Number of russian words in vocab:", ru_count
Example #6
0
 def __init__(self, embed_filename, test_filename, test_neg_filename, n_node, n_embed):
     self.embed_filename = embed_filename  # each line: node_id, embeddings(dim: n_embed)
     self.test_filename = test_filename  # each line: node_id1, node_id2
     self.test_neg_filename = test_neg_filename  # each line: node_id1, node_id2
     self.n_node = n_node
     self.n_embed = n_embed
     self.emd = utils.read_embeddings(embed_filename, n_node=n_node, n_embed=n_embed)
Example #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--embedding_file')
    args = parser.parse_args()

    word2id, embeddings = read_embeddings(args.embedding_file, n_max=50)
    for word, index in word2id.iteritems():
        print word, index
Example #8
0
    def __init__(self):
        print("read graph")

        # n_node(5242) 是节点的数量
        # graph 是一个字典
        self.n_node, self.graph = utils.read_edges(config.train_filename, config.test_filename)
        self.root_nodes = [i for i in range(self.n_node)]

        print("reading initial embeddings ...")

        # n_node * n_emb 的矩阵
        self.node_embed_init_d = utils.read_embeddings(filename=config.pretrain_emb_filename_d,
                                                       n_node = self.n_node,
                                                       n_embed = config.n_emb)

        self.node_embed_init_g = utils.read_embeddings(filename=config.pretrain_emb_filename_g,
                                                       n_node = self.n_node,
                                                       n_embed = config.n_emb)
    
        # 构建或读取 BFS-trees
        self.trees = None
        if os.path.isfile(config.cache_filename):
            print("reading BFS-trees from cache ... ")
            pickle_file = open(config.cache_filename, 'rb')
            self.trees = pickle.load(pickle_file)
            pickle_file.close()
        else:
            print("constructiong BFS-trees")
            pickle_file = open(config.cache_filename, 'wb')
            if config.multi_processing:
                self.construct_trees_with_mp(self.root_nodes)
            else:
                self.trees = self.construct_trees(self.root_nodes)
            pickle.dump(self.trees, pickle_file)
            pickle_file.close()

        print("building GAN model...")

        self.discriminator = None
        self.generator = None
        self.build_generator()
        self.build_discriminator()
Example #9
0
    def __init__(self, embed_filename, n_node, n_embed, rcmd):
        self.embed_filename = embed_filename  # each line: node_id, embeddings(dim: n_embed)
        self.n_node = n_node
        self.n_embed = n_embed
        self.embed_filename = embed_filename

        self.watched = rcmd.watched  # 测试集中用户看过的影片
        self.unwatched = rcmd.unwatched  # 训练集中用户未观看(评分)过的电影
        self.K = 1

        self.emd = utils.read_embeddings(self.embed_filename, self.n_node,
                                         self.n_embed)
        self.scores = self.get_movie_score()
Example #10
0
        作为替代,实际运行时使用的是 maximize mean(log(D(Z)))

        因此,对 -mean(log(D(Z))) 梯度下降即可
        '''
        l2_loss = lambda x: torch.sum(x * x) / 2 * config.lambda_gen
        prob = torch.clamp(input=prob, min=1e-5, max=1)
        #正则项
        regularization = l2_loss(self.node_embedding) + l2_loss(
            self.node_neighbor_embedding)
        _loss = -torch.mean(torch.log(prob) * reward) + regularization

        return _loss


if __name__ == "__main__":
    import sys, os
    sys.path.append("../..")
    os.chdir(sys.path[0])
    from src import utils
    n_node, graph = utils.read_edges(train_filename=config.train_filename,
                                     test_filename=config.test_filename)
    node_embed_init_g = utils.read_embeddings(
        filename=config.pretrain_emb_filename_g,
        n_node=n_node,
        n_embed=config.n_emb)
    generator = Generator(n_node=n_node, node_emd_init=node_embed_init_g)
    for p in generator.parameters():
        print(p.name)
        print(p)