def main(): args = parser.parse_args() en_word2id, en_emb = read_embeddings(args.en_embedding_file, n_max=args.vocab_size) ru_word2id, ru_emb = read_embeddings(args.ru_embedding_file, n_max=args.vocab_size) en_emb = torch.from_numpy(en_emb) ru_emb = torch.from_numpy(ru_emb) borrowed_pairs = load_dictionary(args.loanwords_file, en_word2id, ru_word2id) get_word_translation_accuracy(borrowed_pairs, en_word2id, en_emb, ru_word2id, ru_emb, args.knn_method) if args.flip_dict: supervised_pairs = src.evaluation.word_translation.load_dictionary(\ args.supervised_dict_file, ru_word2id, en_word2id) for i in range(supervised_pairs.size()[0]): supervised_pairs[i, 0], supervised_pairs[i, 1] = supervised_pairs[ i, 1], supervised_pairs[i, 0] get_word_translation_accuracy(supervised_pairs, en_word2id, en_emb, ru_word2id, \ ru_emb, args.knn_method) else: supervised_pairs = src.evaluation.word_translation.load_dictionary(\ args.supervised_dict_file, en_word2id, ru_word2id) get_word_translation_accuracy(supervised_pairs, en_word2id, en_emb, ru_word2id, \ ru_emb, args.knn_method)
def __init__(self): utils.make_config_dirs(config) #read graph if config.app == "link_prediction": self.graph = utils.read_edges(train_filename=config.train_filename, test_filename=config.test_filename) self.n_node = max(list(self.graph.keys())) + 1 elif config.app == "node_classification": self.graph = utils.read_edges(train_filename=config.train_filename) self.n_node = max(list(self.graph.keys())) + 1 self.n_classes, self.labels_matrix = utils.read_labels( filename=config.labels_filename, n_node=self.n_node) elif config.app == "recommendation": self.graph, self.rcmd = rcmd_util.read_edges( train_filename=config.rcmd_train_filename, test_filename=config.rcmd_test_filename) self.n_node = max(list(self.graph.keys())) + 1 else: raise Exception("Unknown task: {}".format(config.app)) #read root nodes if config.app == "recommendation": self.root_nodes = sorted(list( self.graph.keys()))[:self.rcmd.user_max] else: self.root_nodes = sorted(list(self.graph.keys())) #read pre_emb matrix node_embed_init_d = utils.read_embeddings( filename=config.pretrain_emb_filename_d, n_node=self.n_node, n_embed=config.n_emb) node_embed_init_g = utils.read_embeddings( filename=config.pretrain_emb_filename_g, n_node=self.n_node, n_embed=config.n_emb) self.discriminator = Discriminator(n_node=self.n_node, node_emd_init=node_embed_init_d) self.generator = Generator(n_node=self.n_node, node_emd_init=node_embed_init_g) #construct BFS-tree if config.app == "recommendation": self.BFS_trees = BFS_trees(self.root_nodes, self.graph, batch_num=config.cache_batch, app=config.app, rcmd=self.rcmd) else: self.BFS_trees = BFS_trees(self.root_nodes, self.graph, batch_num=config.cache_batch)
def __init__(self): print("reading graphs...") self.n_node, self.graph = utils.read_edges(config.train_filename, config.test_filename) self.root_nodes = [i for i in range(self.n_node)] print("reading initial embeddings...") self.node_embed_init_d = utils.read_embeddings( filename=config.pretrain_emb_filename_d, n_node=self.n_node, n_embed=config.n_emb) self.node_embed_init_g = utils.read_embeddings( filename=config.pretrain_emb_filename_g, n_node=self.n_node, n_embed=config.n_emb) # construct or read BFS-trees self.trees = None if os.path.isfile(config.cache_filename): print("reading BFS-trees from cache...") pickle_file = open(config.cache_filename, 'rb') self.trees = pickle.load(pickle_file) pickle_file.close() else: print("constructing BFS-trees...") pickle_file = open(config.cache_filename, 'wb') if config.multi_processing: self.construct_trees_with_mp(self.root_nodes) else: self.trees = self.construct_trees(self.root_nodes) pickle.dump(self.trees, pickle_file) pickle_file.close() print("building GAN model...") self.discriminator = None self.generator = None self.build_generator() self.build_discriminator() self.latest_checkpoint = tf.train.latest_checkpoint(config.model_log) self.saver = tf.train.Saver() self.config = tf.ConfigProto() self.config.gpu_options.allow_growth = True self.init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) self.sess = tf.Session(config=self.config) self.sess.run(self.init_op)
def __init__(self, embed_filename, labels_matrix, n_node, n_embed, n_classes): self.embed_filename = embed_filename # each line: node_id, embeddings(dim: n_embed) self.n_node = n_node self.n_embed = n_embed self.n_classes = n_classes self.emd = utils.read_embeddings(embed_filename, n_node=n_node, n_embed=n_embed) self.labels_matrix = labels_matrix
def count_borrowed_words_in_embeddings(args): # loading data borrowed_pairs = pandas.read_csv(args.loanwords_file) en_word2id, en_embeddings = read_embeddings(args.en_embedding_file, n_max=args.vocab_size) ru_word2id, ru_embeddings = read_embeddings(args.ru_embedding_file, n_max=args.vocab_size) print "Number of embeddings loaded per language: ", args.vocab_size # find number of words in vocab en_words = borrowed_pairs["English"].tolist() ru_words = borrowed_pairs["Russian"].tolist() en_count = sum([1 if word in en_word2id else 0 for word in en_words]) print "Number of english words in vocab:", en_count ru_count = sum([1 if word in ru_word2id else 0 for word in ru_words]) print "Number of russian words in vocab:", ru_count
def __init__(self, embed_filename, test_filename, test_neg_filename, n_node, n_embed): self.embed_filename = embed_filename # each line: node_id, embeddings(dim: n_embed) self.test_filename = test_filename # each line: node_id1, node_id2 self.test_neg_filename = test_neg_filename # each line: node_id1, node_id2 self.n_node = n_node self.n_embed = n_embed self.emd = utils.read_embeddings(embed_filename, n_node=n_node, n_embed=n_embed)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--embedding_file') args = parser.parse_args() word2id, embeddings = read_embeddings(args.embedding_file, n_max=50) for word, index in word2id.iteritems(): print word, index
def __init__(self): print("read graph") # n_node(5242) 是节点的数量 # graph 是一个字典 self.n_node, self.graph = utils.read_edges(config.train_filename, config.test_filename) self.root_nodes = [i for i in range(self.n_node)] print("reading initial embeddings ...") # n_node * n_emb 的矩阵 self.node_embed_init_d = utils.read_embeddings(filename=config.pretrain_emb_filename_d, n_node = self.n_node, n_embed = config.n_emb) self.node_embed_init_g = utils.read_embeddings(filename=config.pretrain_emb_filename_g, n_node = self.n_node, n_embed = config.n_emb) # 构建或读取 BFS-trees self.trees = None if os.path.isfile(config.cache_filename): print("reading BFS-trees from cache ... ") pickle_file = open(config.cache_filename, 'rb') self.trees = pickle.load(pickle_file) pickle_file.close() else: print("constructiong BFS-trees") pickle_file = open(config.cache_filename, 'wb') if config.multi_processing: self.construct_trees_with_mp(self.root_nodes) else: self.trees = self.construct_trees(self.root_nodes) pickle.dump(self.trees, pickle_file) pickle_file.close() print("building GAN model...") self.discriminator = None self.generator = None self.build_generator() self.build_discriminator()
def __init__(self, embed_filename, n_node, n_embed, rcmd): self.embed_filename = embed_filename # each line: node_id, embeddings(dim: n_embed) self.n_node = n_node self.n_embed = n_embed self.embed_filename = embed_filename self.watched = rcmd.watched # 测试集中用户看过的影片 self.unwatched = rcmd.unwatched # 训练集中用户未观看(评分)过的电影 self.K = 1 self.emd = utils.read_embeddings(self.embed_filename, self.n_node, self.n_embed) self.scores = self.get_movie_score()
作为替代,实际运行时使用的是 maximize mean(log(D(Z))) 因此,对 -mean(log(D(Z))) 梯度下降即可 ''' l2_loss = lambda x: torch.sum(x * x) / 2 * config.lambda_gen prob = torch.clamp(input=prob, min=1e-5, max=1) #正则项 regularization = l2_loss(self.node_embedding) + l2_loss( self.node_neighbor_embedding) _loss = -torch.mean(torch.log(prob) * reward) + regularization return _loss if __name__ == "__main__": import sys, os sys.path.append("../..") os.chdir(sys.path[0]) from src import utils n_node, graph = utils.read_edges(train_filename=config.train_filename, test_filename=config.test_filename) node_embed_init_g = utils.read_embeddings( filename=config.pretrain_emb_filename_g, n_node=n_node, n_embed=config.n_emb) generator = Generator(n_node=n_node, node_emd_init=node_embed_init_g) for p in generator.parameters(): print(p.name) print(p)