def load_graph(info_dir, embed_dim=16): print "Loading adjacency info..." adj_lists = pickle.load(open(info_dir + "/adj_lists.pkl")) relations = pickle.load(open(info_dir + "/rels.pkl")) print "Loading feature data.." post_feats = np.load(info_dir + "/post_feats.npy") post_feats = np.concatenate([np.zeros((1,100)), post_feats]) comment_feats = np.load(info_dir + "/comment_feats.npy") comment_feats = np.concatenate([np.zeros((1,100)), comment_feats]) num_users = len(set([id for rel, adj in adj_lists.iteritems() for id in adj if rel[0] == "user"])) num_communities = len(set([id for rel, adj in adj_lists.iteritems() for id in adj if rel[0] == "community"])) feature_modules = { "comment" : nn.Embedding(comment_feats.shape[0], comment_feats.shape[1]), "post" : nn.Embedding(comment_feats.shape[0], comment_feats.shape[1]), "user" : nn.Embedding(num_users+1, embed_dim), "community" : nn.Embedding(num_communities+1, embed_dim), "type" : nn.Embedding(6, embed_dim)} feature_modules["comment"].weight = nn.Parameter(torch.FloatTensor(comment_feats), requires_grad=False) feature_modules["post"].weight = nn.Parameter(torch.FloatTensor(post_feats), requires_grad=False) for mode in ["user", "community", "type"]: feature_modules[mode].weight.data.normal_(0, 1./embed_dim) features = lambda nodes, mode : feature_modules[mode]( torch.autograd.Variable(torch.LongTensor(nodes)+1)) feature_dims = {mode : embed.weight.size()[1] for mode, embed in feature_modules.iteritems()} graph = Graph(features, feature_dims, relations, adj_lists) return graph, feature_modules
def load_graph(data_dir, embed_dim, graph_data_path = "/graph_data.pkl"): ''' Given embed_dim, load graph data from file and construc Graph() object Return: graph: a Graph() object feature_modules: a dict of embedding matrix by node type, each embed matrix shape: [num_ent_by_type + 2, embed_dim] node_maps: a dict() key: type, 5 types: function, sideeffects, protein, disease, drug value: dict(): key: global node id value: local node id for this type ''' ''' rels: a dict() of all triple templates key: domain entity type value: a list of tuples (range entity type, predicate) adj_lists: a dict about the edges in KG key: triple template, i.e. ('drug', 'psoriatic_arthritis', 'drug') value: a defaultdict about all the edges instance of thos metapath key: the head entity id value: a set of tail entity ids node_maps: a dict () about node types key: type, 5 types: function, sideeffects, protein, disease, drug value: a list of node id ''' rels, adj_lists, node_maps, rid2inverse = pickle.load(open(data_dir+graph_data_path, "rb")) node_maps = {m : {n : i for i, n in enumerate(id_list)} for m, id_list in node_maps.iteritems()} ''' node_maps: a dict() key: type, 5 types: function, sideeffects, protein, disease, drug value: dict(): key: global node id value: local node id for this type ''' for m in node_maps: node_maps[m][-1] = -1 feature_dims = {m : embed_dim for m in rels} if embed_dim > 0: # initialze embedding matrix for each node type [num_ent_by_type + 2, embed_dim] feature_modules = {m : torch.nn.Embedding(len(node_maps[m])+1, embed_dim) for m in rels} for mode in rels: # define embedding initialization method: normal dist feature_modules[mode].weight.data.normal_(0, 1./embed_dim) ''' features(nodes, mode): a embedding lookup function to make a dict() from node type to embeddingbag nodes: a lists of global node id which are in type (mode) mode: node type return: embedding vectors, shape [num_node, embed_dim] ''' features = lambda nodes, mode : feature_modules[mode]( torch.autograd.Variable(torch.LongTensor([node_maps[mode][n] for n in nodes])+1)) else: feature_modules = None features = None graph = Graph(features, feature_dims, rels, adj_lists, rid2inverse = rid2inverse) return graph, feature_modules, node_maps
def load_graph(data_dir, embed_dim): rels, adj_lists, node_maps = pickle.load(open(data_dir+"/graph_data.pkl", "rb")) node_maps = {m : {n : i for i, n in enumerate(id_list)} for m, id_list in node_maps.iteritems()} for m in node_maps: node_maps[m][-1] = -1 feature_dims = {m : embed_dim for m in rels} feature_modules = {m : torch.nn.Embedding(len(node_maps[m])+1, embed_dim) for m in rels} for mode in rels: feature_modules[mode].weight.data.normal_(0, 1./embed_dim) features = lambda nodes, mode : feature_modules[mode]( torch.autograd.Variable(torch.LongTensor([node_maps[mode][n] for n in nodes])+1)) graph = Graph(features, feature_dims, rels, adj_lists) return graph, feature_modules, node_maps
def train(feature_dim, lr, model, batch_size, max_batches, tol, max_path_len): feature_dim = 16 relations, adj_lists, node_maps = pickle.load( open("/dfs/scratch0/netquery/cancer.pkl")) relations['disease'].remove(('disease', '0')) del adj_lists[('disease', '0', 'disease')] for rel1 in relations: for rel2 in relations[rel1]: print rel1, rel2, len(adj_lists[(rel1, rel2[1], rel2[0])]) for mode in node_maps: node_maps[mode][-1] = len(node_maps[mode]) feature_dims = {mode: feature_dim for mode in relations} feature_modules = { mode: nn.EmbeddingBag(len(node_maps[mode]), feature_dim) for mode in relations } for feature_module in feature_modules.values(): feature_module.weight.data.normal_(0, 1. / np.sqrt(feature_dim)) cuda = True if cuda: features = lambda nodes, mode, offset: feature_modules[mode].forward( Variable( torch.LongTensor([node_maps[mode][node] for node in nodes])).cuda(), Variable(torch.LongTensor(offset)).cuda()) else: features = lambda nodes, mode, offset: feature_modules[mode].forward( Variable( torch.LongTensor([node_maps[mode][node] for node in nodes])), Variable(torch.LongTensor(offset))) graph = Graph(features, feature_dims, relations, adj_lists) edges = graph.get_all_edges_byrel() train_edges = { rel: edge_list[:int(0.9 * len(edge_list))] for rel, edge_list in edges.iteritems() } test_edges = { rel: edge_list[int(0.9 * len(edge_list)):] for rel, edge_list in edges.iteritems() } graph.remove_edges( [e for edge_list in test_edges.values() for e in edge_list]) direct_enc = DirectEncoder(graph.features, feature_modules) dec = BilinearPathDecoder(graph.relations, feature_dims) enc_dec = PathEncoderDecoder(graph, direct_enc, dec) if cuda: enc_dec.cuda() optimizer = optim.SGD(enc_dec.parameters(), lr=0.5, momentum=0.000) start = time.time() print "{:d} training edges".format( sum([len(rel_edges) for rel_edges in train_edges.values()])) batch_size = 512 num_batches = 20000 tol = 0.0001 losses = [] ema_loss = None for i in range(num_batches): rel = graph.sample_relation() random.shuffle(train_edges[rel]) edges = train_edges[rel][:batch_size] if len(edges) == 0: continue optimizer.zero_grad() enc_dec.graph.remove_edges(edges) loss = enc_dec.margin_loss([edge[0] for edge in edges], [edge[1] for edge in edges], [rel]) enc_dec.graph.add_edges(edges) losses.append(loss.data[0]) if ema_loss == None: ema_loss = loss.data[0] else: ema_loss = 0.99 * ema_loss + 0.01 * loss.data[0] loss.backward() optimizer.step() if i % 100 == 0: print i, ema_loss if i > 2000 and i % 100 == 0: conv = np.mean(losses[i - 2000:i - 1000]) - np.mean( losses[i - 1000:i]) print "conv", conv if conv < tol: break print "MRR:", evaluate_edge_auc(test_edges, graph, enc_dec) batch_size = 512 num_batches = 100000 ema_loss = None optimizer = optim.SGD(enc_dec.parameters(), lr=0.5, momentum=0.000) for i in range(num_batches): rels = graph.sample_metapath() nodes1, nodes2 = zip( *[graph.sample_path_with_rels(rels) for _ in range(batch_size)]) optimizer.zero_grad() loss = enc_dec.margin_loss(nodes1, nodes2, rels) losses.append(loss.data[0]) if ema_loss == None: ema_loss = loss.data[0] else: ema_loss = 0.99 * ema_loss + 0.01 * loss.data[0] loss.backward() optimizer.step() if i % 100 == 0: print i, ema_loss if i % 5000 == 0: print "MRR:", evaluate_edge_auc(test_edges, graph, enc_dec) total = time.time() - start print "Time:", total print "Converged after:", i print "Per example:", total / batch_size / float(i) print "MRR:", evaluate_edge_auc(test_edges, graph, enc_dec)
def train(feature_dim, lr, model, batch_size, max_batches, tol, cuda, results, decoder, opt, agg): # load the data # relations, adj_lists, node_maps = pickle.load(open("/dfs/scratch0/netquery/cancer.pkl")) relations, adj_lists, node_maps = pickle.load(open("cancer.pkl")) # delete this relation because it doesn't have enough data relations['disease'].remove(('disease', '0')) del adj_lists[('disease', '0', 'disease')] # add dummy node (messy hack for nw) for mode in node_maps: node_maps[mode][-1] = len(node_maps[mode]) # set the feature dimensions to be equal for all modes feature_dims = {mode: feature_dim for mode in relations} # the feature modules for all nodes are embedding lookups. feature_modules = { mode: nn.Embedding(len(node_maps[mode]), feature_dim) for mode in relations } # need to define the feature function that maps nodes to features if cuda: features = lambda nodes, mode: feature_modules[mode].forward( Variable( torch.LongTensor([node_maps[mode][node] for node in nodes])).cuda()) else: features = lambda nodes, mode: feature_modules[mode].forward( Variable( torch.LongTensor([node_maps[mode][node] for node in nodes]))) # give reasonable initialization to features for feature_module in feature_modules.values(): feature_module.weight.data.normal_(0, 1. / np.sqrt(feature_dim)) # build the graph graph = Graph(features, feature_dims, relations, adj_lists) # get mapping from relations->list of edges for that relation edges = graph.get_all_edges_byrel() # seperate into train and test sets train_edges = { rel: edge_list[:int(0.9 * len(edge_list))] for rel, edge_list in edges.iteritems() } test_edges = { rel: edge_list[int(0.9 * len(edge_list)):] for rel, edge_list in edges.iteritems() } graph.remove_edges( [e for edge_list in test_edges.values() for e in edge_list]) # for simplicity the embedding and hidden dimensions are equal out_dims = {mode: feature_dim for mode in graph.relations} # define the encoder. # Either direct or based on single-step convolution if model == "direct": enc = DirectEncoder(graph.features, feature_modules) dec = get_decoder(graph, feature_dims, decoder) enc_dec = EdgeEncoderDecoder(graph, enc, dec) else: if agg == "mean": aggregator = MeanAggregator(graph.features) elif agg == "pool": aggregator = PoolAggregator(graph.features, graph.feature_dims) enc = Encoder(graph.features, graph.feature_dims, out_dims, graph.relations, graph.adj_lists, concat=True, feature_modules=feature_modules, cuda=cuda, aggregator=aggregator) dec = get_decoder(graph, enc.out_dims, decoder) enc_dec = EdgeEncoderDecoder(graph, enc, dec) if cuda: enc_dec.cuda() if opt == "sgd": optimizer = optim.SGD(enc_dec.parameters(), lr=lr, momentum=0.000) elif opt == "sgd-momentum": optimizer = optim.SGD(enc_dec.parameters(), lr=lr, momentum=0.9) elif opt == "adam": optimizer = optim.Adam(enc_dec.parameters(), lr=lr) # Main training loop start = time.time() ema_loss = None print "{:d} training edges".format( sum([len(rel_edges) for rel_edges in train_edges.values()])) losses = [] for i in range(max_batches): rel = graph.sample_relation() random.shuffle(train_edges[rel]) edges = train_edges[rel][:batch_size] if len(edges) == 0: continue optimizer.zero_grad() graph.remove_edges(edges) loss = enc_dec.margin_loss([edge[0] for edge in edges], [edge[1] for edge in edges], rel) graph.add_edges(edges) losses.append(loss.data[0]) if ema_loss == None: ema_loss = loss.data[0] else: ema_loss = 0.99 * ema_loss + 0.01 * loss.data[0] loss.backward() optimizer.step() if i % 100 == 0: print i, ema_loss if i > 2000 and i % 100 == 0: conv = np.mean(losses[i - 2000:i - 1000]) - np.mean( losses[i - 1000:i]) print "conv", conv if conv < tol: break total = time.time() - start test_auc = evaluate_edge_auc(test_edges, graph, enc_dec) test_loss = evaluate_edge_margin(test_edges, graph, enc_dec) with open(results, "a") as csvfile: writer = csv.writer(csvfile) writer.writerow([ str(lr), str(batch_size), str(total), str(i), str(total / batch_size / float(i)), str(test_auc), str(test_loss), str(ema_loss), str(conv) ]) print "Time:", total print "Converged after:", i print "Per example:", total / batch_size / float(i) print "AUC:", test_auc print "Loss:", test_loss
def load_graph(info_dir, embed_dim=16, cuda=False): print "Loading adjacency info..." adj_lists = pickle.load(open(info_dir + "/adj_lists.pkl")) relations = pickle.load(open(info_dir + "/rels.pkl")) post_words = pickle.load(open(info_dir + "/post_words.pkl")) num_users = len( set([ id for rel, adj in adj_lists.iteritems() for id in adj if rel[0] == "user" ])) num_communities = len( set([ id for rel, adj in adj_lists.iteritems() for id in adj if rel[0] == "community" ])) num_words = len(set([w for words in post_words.values() for w in words])) post_words = { post: torch.LongTensor([w for w in post_words[post]]) for post in post_words } feature_modules = { "post": nn.EmbeddingBag(num_words, embed_dim), "user": nn.Embedding(num_users + 1, embed_dim), "community": nn.Embedding(num_communities + 1, embed_dim), } for mode in feature_modules: feature_modules[mode].weight.data.normal_(0, 1. / embed_dim) if not cuda: def _feature_func(nodes, mode): if mode != "post": return feature_modules[mode]( torch.autograd.Variable(torch.LongTensor(nodes) + 1)) else: offsets = np.concatenate( ([0], np.cumsum( [post_words[post].size()[0] for post in nodes[:-1]]))) return feature_modules[mode](torch.autograd.Variable( torch.cat([post_words[post] for post in nodes])), torch.autograd.Variable( torch.LongTensor(offsets))) else: def _feature_func(nodes, mode): if mode != "post": return feature_modules[mode]( torch.autograd.Variable(torch.LongTensor(nodes) + 1).cuda()) else: offsets = np.concatenate( ([0], np.cumsum( [post_words[post].size()[0] for post in nodes[:-1]]))) return feature_modules[mode]( torch.autograd.Variable( torch.cat([post_words[post] for post in nodes])).cuda(), torch.autograd.Variable(torch.LongTensor(offsets)).cuda()) feature_dims = { mode: embed.weight.size()[1] for mode, embed in feature_modules.iteritems() } graph = Graph(_feature_func, feature_dims, relations, adj_lists) return graph, feature_modules
def train(feature_dim, lr_edge, lr_metapath, lr_int, model, batch_size, max_batches, tol, max_path_len, cuda, results, decoder, opt, agg): feature_dim = 16 relations, adj_lists, node_maps = pickle.load( open("/dfs/scratch0/netquery/cancer.pkl")) # relations, adj_lists, node_maps = pickle.load(open("cancer.pkl")) relations['disease'].remove(('disease', '0')) del adj_lists[('disease', '0', 'disease')] for rel1 in relations: for rel2 in relations[rel1]: print rel1, rel2, len(adj_lists[(rel1, rel2[1], rel2[0])]) for mode in node_maps: node_maps[mode][-1] = len(node_maps[mode]) feature_dims = {mode: feature_dim for mode in relations} feature_modules = { mode: nn.Embedding(len(node_maps[mode]), feature_dim) for mode in relations } for feature_module in feature_modules.values(): feature_module.weight.data.normal_(0, 1. / feature_dim) if cuda: features = lambda nodes, mode: feature_modules[mode].forward( Variable( torch.LongTensor([node_maps[mode][node] for node in nodes])).cuda()) else: features = lambda nodes, mode: feature_modules[mode].forward( Variable( torch.LongTensor([node_maps[mode][node] for node in nodes]))) graph = Graph(features, feature_dims, relations, adj_lists) #Create chains and intersections on entire graph cancer_chains, cancer_neg_chains = graph.create_chains_byrels() cancer_pos_ints, cancer_neg_ints = graph.create_intersections_byrels() #Create all edges, metapaths and intersections edges = {} metapaths = {} for rel in cancer_chains: if len(rel) == 1: edges[rel[0]] = [(node1, node2, rel[0]) for node1 in cancer_chains[rel] for node2 in cancer_chains[rel][node1]] elif len(rel) in [2, 3]: metapaths[rel] = [(node1, entry[-1], rel) for node1 in cancer_chains[rel] for entry in cancer_chains[rel][node1]] for edge_list in edges.values(): random.shuffle(edge_list) for metapath_list in metapaths.values(): random.shuffle(metapath_list) pos_ints = {} for rel in cancer_pos_ints: if len(rel) == 3: pos_ints[rel] = [(node1, node2, node3, target) for (node1, node2, node3) in cancer_pos_ints[rel] for target in cancer_pos_ints[rel][(node1, node2, node3)]] else: pos_ints[rel] = [(node1, node2, target) for (node1, node2) in cancer_pos_ints[rel] for target in cancer_pos_ints[rel][(node1, node2)] ] #Get test edges and remove them from the graph train_edges = { rel: edge_list[:int(0.9 * len(edge_list))] for rel, edge_list in edges.iteritems() } test_edges = { rel: edge_list[int(0.9 * len(edge_list)):] for rel, edge_list in edges.iteritems() } graph.remove_edges( [e for edge_list in test_edges.values() for e in edge_list]) #Create TRAIN chains and metapaths from the train graph (test edges removed) train_cancer_chains, train_cancer_neg_chains = graph.create_chains_byrels() train_cancer_pos_ints, train_cancer_neg_ints = graph.create_intersections_byrels( ) #Create TRAINING metapaths and intersections from the train graph train_metapaths = {} for rel in train_cancer_chains: if len(rel) in [2, 3]: train_metapaths[rel] = [ (node1, entry[-1], rel) for node1 in train_cancer_chains[rel] for entry in train_cancer_chains[rel][node1] ] train_pos_ints = {} for rel in train_cancer_pos_ints: if len(rel) == 3: train_pos_ints[rel] = [ (node1, node2, node3, target) for (node1, node2, node3) in train_cancer_pos_ints[rel] for target in train_cancer_pos_ints[rel][(node1, node2, node3)] ] else: train_pos_ints[rel] = [ (node1, node2, target) for (node1, node2) in train_cancer_pos_ints[rel] for target in train_cancer_pos_ints[rel][(node1, node2)] ] #Create test metapaths and test intersections be removinf training metapaths and intersections from the full sets respectively test_metapaths = { rel: list(set(metapaths[rel]) - set(train_metapaths[rel])) for rel in metapaths } test_ints = { rel: list(set(pos_ints[rel]) - set(train_pos_ints[rel])) for rel in pos_ints } ''' with open("train_edges.pkl","wb") as f: pickle.dump(train_edges, f) with open("test_edges.pkl", "wb") as f: pickle.dump(test_edges, f) with open("train_metapaths.pkl", "wb") as f: pickle.dump(train_metapaths, f) with open("test_metapaths.pkl", "wb") as f: pickle.dump(test_metapaths, f) with open("train_ints.pkl", "wb") as f: pickle.dump(train_pos_ints, f) with open("test_ints.pkl", "wb") as f: pickle.dump(test_ints, f) ''' # for simplicity the embedding and hidden dimensions are equal out_dims = {mode: feature_dim for mode in graph.relations} if model == "direct": enc = DirectEncoder(graph.features, feature_modules) dec = get_decoder(graph, feature_dims, decoder) else: if agg == "mean": aggregator = FastMeanAggregator(graph.features) elif agg == "pool": aggregator = FastPoolAggregator(graph.features, graph.feature_dims) enc = Encoder(graph.features, graph.feature_dims, out_dims, graph.relations, graph.adj_lists, concat=True, feature_modules=feature_modules, cuda=cuda, aggregator=aggregator) dec = get_decoder(graph, enc.out_dims, decoder) inter_dec = MinIntersection(feature_dims.keys(), feature_dims, feature_dims) combined_enc_dec = LogCombinedEncoderDecoder(graph, enc, dec, inter_dec) if cuda: combined_enc_dec.cuda() print "Checking eval functions" beg_int_auc = evaluate_intersect_auc(test_ints, cancer_neg_ints, graph, combined_enc_dec, True) #beg_int_loss = evaluate_intersect_margin(test_ints, cancer_neg_ints, graph, combined_enc_dec, False) beg_path_auc = evaluate_metapath_auc(test_metapaths, cancer_neg_chains, graph, combined_enc_dec, batch_size=batch_size) beg_edge_auc = evaluate_edge_auc(test_edges, cancer_neg_chains, graph, combined_enc_dec) #beg_edge_loss = evaluate_edge_margin(test_edges, cancer_neg_chains, graph, combined_enc_dec) #beg_path_loss = evaluate_metapath_margin(test_metapaths, cancer_neg_chains, graph, combined_enc_dec) print beg_edge_auc, beg_path_auc #, beg_int_auc, beg_edge_loss, beg_path_loss, beg_int_loss losses = [] ema_loss = None if opt == "sgd": optimizer = optim.SGD(combined_enc_dec.parameters(), lr=lr_edge, momentum=0.000) elif opt == "sgd-momentum": optimizer = optim.SGD(combined_enc_dec.parameters(), lr=lr_edge, momentum=0.9) elif opt == "adam": optimizer = optim.Adam(combined_enc_dec.parameters(), lr=lr_edge) conv = -1 for i in range(max_batches): rel = graph.sample_relation() #print len(train_edges[rel]) start = random.randint(0, max(0, len(train_edges[rel]) - batch_size)) edges = train_edges[rel][start:start + batch_size] if len(edges) == 0: continue optimizer.zero_grad() #combined_enc_dec.graph.remove_edges(edges) neg_nodes = [ random.choice(train_cancer_neg_chains[(rel, )][e[0]]) for e in edges ] loss = combined_enc_dec.margin_loss([edge[0] for edge in edges], [edge[1] for edge in edges], [rel], "path", neg_nodes) #combined_enc_dec.graph.add_edges(edges) losses.append(loss.data[0]) if ema_loss == None: ema_loss = loss.data[0] else: ema_loss = 0.99 * ema_loss + 0.01 * loss.data[0] loss.backward() torch.nn.utils.clip_grad_norm(combined_enc_dec.parameters(), 0.00001) optimizer.step() if i % 100 == 0: print i, ema_loss if i > 2000 and i % 100 == 0: conv = np.mean(losses[i - 2000:i - 1000]) - np.mean( losses[i - 1000:i]) print "conv", conv if conv < tol: break print "After training on edges:" print combined_enc_dec.edge_dec.mats train1_edge_auc = evaluate_edge_auc(test_edges, cancer_neg_chains, graph, combined_enc_dec) #train1_edge_loss = evaluate_edge_margin(test_edges, cancer_neg_chains, graph, combined_enc_dec) #train1_path_loss = evaluate_metapath_margin(test_metapaths, cancer_neg_chains, graph, combined_enc_dec) # train1_path_auc = evaluate_metapath_auc(test_metapaths, cancer_neg_chains, graph, combined_enc_dec, batch_size=batch_size) train1_int_auc = evaluate_intersect_auc(test_ints, cancer_neg_ints, graph, combined_enc_dec, True) #train1_int_loss = evaluate_intersect_margin(test_ints, cancer_neg_ints, graph, combined_enc_dec, False) print train1_edge_auc, train1_int_auc #, train1_edge_loss, train1_path_loss, train1_int_loss """ losses = [] ema_loss = None if opt == "sgd": optimizer = optim.SGD(combined_enc_dec.parameters(), lr=lr_metapath, momentum=0.000) elif opt == "sgd-momentum": optimizer = optim.SGD(combined_enc_dec.parameters(), lr=lr_metapath, momentum=0.9) elif opt == "adam": optimizer = optim.Adam(combined_enc_dec.parameters(), lr=lr_metapath) conv = -1 for i in range(max_batches): while True: rels = graph.sample_metapath() if len(train_metapaths[rels]) > 0: break start = random.randint(0, max(0,len(train_metapaths[rels])-batch_size)) edges = train_metapaths[rels][start:start+batch_size] nodes1 = [edge[0] for edge in edges] nodes2 = [edge[1] for edge in edges] neg_nodes = [random.choice(train_cancer_neg_chains[rels][e[0]]) for e in edges] optimizer.zero_grad() loss = combined_enc_dec.margin_loss(nodes1, nodes2, rels, "path", neg_nodes) losses.append(loss.data[0]) if ema_loss == None: ema_loss = loss.data[0] else: ema_loss = 0.99*ema_loss + 0.01*loss.data[0] loss.backward() optimizer.step() if i % 100 == 0: print i, ema_loss if i > 2000 and i % 100 == 0: conv = np.mean(losses[i-2000:i-1000]) - np.mean(losses[i-1000:i]) print "conv", conv if conv < tol: break if i % 5000 == 0: print "MRR:", evaluate_edge_auc(test_edges, cancer_neg_chains, graph, combined_enc_dec) print "After training on metapaths:" train2_edge_auc = evaluate_edge_auc(test_edges, cancer_neg_chains, graph, combined_enc_dec) #train2_edge_loss = evaluate_edge_margin(test_edges, cancer_neg_chains, graph, combined_enc_dec) #train2_path_loss = evaluate_metapath_margin(test_metapaths, cancer_neg_chains, graph, combined_enc_dec) train2_path_auc = evaluate_metapath_auc(test_metapaths, cancer_neg_chains, graph, combined_enc_dec, batch_size=batch_size) #train2_int_auc = evaluate_intersect_auc(test_ints, cancer_neg_ints, graph, combined_enc_dec, False) #train2_int_loss = evaluate_intersect_margin(test_ints, cancer_neg_ints, graph, combined_enc_dec, False) print train2_edge_auc, train2_path_auc#, train2_int_auc, train2_edge_loss, train2_path_loss, train2_int_loss """ losses = [] ema_loss = None if opt == "sgd": optimizer = optim.SGD(combined_enc_dec.inter_dec.parameters(), lr=lr_int, momentum=0.000) elif opt == "sgd-momentum": optimizer = optim.SGD(combined_enc_dec.parameters(), lr=lr_int, momentum=0.9) elif opt == "adam": optimizer = optim.Adam(combined_enc_dec.parameters(), lr=lr_int) conv = -1 for i in range(max_batches): while True: rels = graph.sample_intersection() random.shuffle(train_pos_ints[rels]) samples = train_pos_ints[rels][:batch_size] if len(samples) > 0: break query_nodes1 = [edge[0] for edge in samples] query_nodes2 = [edge[1] for edge in samples] if len(rels) == 3: query_nodes3 = [edge[2] for edge in samples] target_nodes = [edge[3] for edge in samples] neg_nodes = [ random.choice(train_cancer_neg_ints[rels][(query_nodes1[j], query_nodes2[j], query_nodes3[j])]) for j in range(len(samples)) ] else: query_nodes3 = [] target_nodes = [edge[2] for edge in samples] neg_nodes = [ random.choice(train_cancer_neg_ints[rels][(query_nodes1[j], query_nodes2[j])]) for j in range(len(samples)) ] optimizer.zero_grad() loss = combined_enc_dec.margin_loss(query_nodes1, query_nodes2, rels, "intersect", neg_nodes, target_nodes, query_nodes3) losses.append(loss.data[0]) if ema_loss == None: ema_loss = loss.data[0] else: ema_loss = 0.99 * ema_loss + 0.01 * loss.data[0] loss.backward() optimizer.step() if i % 100 == 0: print i, ema_loss if i > 2000 and i % 100 == 0: conv = np.mean(losses[i - 2000:i - 1000]) - np.mean( losses[i - 1000:i]) print "conv", conv if conv < tol: break if i % 5000 == 0: print "MRR:", evaluate_edge_auc(test_edges, cancer_neg_chains, graph, combined_enc_dec) print "Inersection AUC:", evaluate_intersect_auc( test_ints, cancer_neg_ints, graph, combined_enc_dec, True) print "After training on intersections:" train3_edge_auc = evaluate_edge_auc(test_edges, cancer_neg_chains, graph, combined_enc_dec) # train3_edge_loss = evaluate_edge_margin(test_edges, cancer_neg_chains, graph, combined_enc_dec) #train3_path_loss = evaluate_metapath_margin(test_metapaths, cancer_neg_chains, graph, combined_enc_dec) #train3_path_auc = evaluate_metapath_auc(test_metapaths, cancer_neg_chains, graph, combined_enc_dec, batch_size=batch_size) train3_int_auc = evaluate_intersect_auc(test_ints, cancer_neg_ints, graph, combined_enc_dec, True) train3_int_auc_train = evaluate_intersect_auc(train_pos_ints, train_cancer_neg_ints, graph, combined_enc_dec, True) # train3_int_loss = evaluate_intersect_margin(test_ints, cancer_neg_ints, graph, combined_enc_dec, True) print train3_edge_auc, train3_int_auc, train3_int_auc_train #, train3_edge_loss, train3_path_loss, train3_int_loss with open(results, "a") as csvfile: writer = csv.writer(csvfile) writer.writerow([ str(lr_edge), str(lr_metapath), str(beg_edge_auc), str(beg_path_auc), str(train1_edge_auc), str(train1_path_auc), str(train2_edge_auc), str(train2_path_auc) ])
def train(feature_dim, lr, model, batch_size, max_batches, tol, max_path_len, cuda, results, decoder, opt, agg): feature_dim = 16 # relations, adj_lists, node_maps = pickle.load(open("/dfs/scratch0/netquery/cancer.pkl")) relations, adj_lists, node_maps = pickle.load(open("cancer.pkl")) relations['disease'].remove(('disease', '0')) del adj_lists[('disease', '0', 'disease')] for rel1 in relations: for rel2 in relations[rel1]: print rel1, rel2, len(adj_lists[(rel1, rel2[1], rel2[0])]) for mode in node_maps: node_maps[mode][-1] = len(node_maps[mode]) feature_dims = {mode: feature_dim for mode in relations} feature_modules = { mode: nn.Embedding(len(node_maps[mode]), feature_dim) for mode in relations } for feature_module in feature_modules.values(): feature_module.weight.data.normal_(0, 1. / np.sqrt(feature_dim)) if cuda: features = lambda nodes, mode: feature_modules[mode].forward( Variable( torch.LongTensor([node_maps[mode][node] for node in nodes])).cuda()) else: features = lambda nodes, mode: feature_modules[mode].forward( Variable( torch.LongTensor([node_maps[mode][node] for node in nodes]))) graph = Graph(features, feature_dims, relations, adj_lists) # cancer_chains = graph.create_chains_byrels() # cancer_pos_ints, cancer_neg_ints = graph.create_intersections_byrels(cancer_chains) metapaths = graph.get_all_metapaths_byrel() train_metapaths = { rel: metapath_list[:int(0.9 * len(metapath_list))] for rel, metapath_list in metapaths.iteritems() } test_metapaths = { rel: metapath_list[int(0.9 * len(metapath_list)):] for rel, metapath_list in metapaths.iteritems() } edges = graph.get_all_edges_byrel() train_edges = { rel: edge_list[:int(0.9 * len(edge_list))] for rel, edge_list in edges.iteritems() } test_edges = { rel: edge_list[int(0.9 * len(edge_list)):] for rel, edge_list in edges.iteritems() } graph.remove_edges( [e for edge_list in test_edges.values() for e in edge_list]) # for simplicity the embedding and hidden dimensions are equal out_dims = {mode: feature_dim for mode in graph.relations} if model == "direct": enc = DirectEncoder(graph.features, feature_modules) dec = get_decoder(graph, feature_dims, decoder) else: if agg == "mean": aggregator = FastMeanAggregator(graph.features) elif agg == "pool": aggregator = FastPoolAggregator(graph.features, graph.feature_dims) enc = Encoder(graph.features, graph.feature_dims, out_dims, graph.relations, graph.adj_lists, concat=True, feature_modules=feature_modules, cuda=cuda, aggregator=aggregator) dec = get_decoder(graph, enc.out_dims, decoder) enc_dec = MetapathEncoderDecoder(graph, enc, dec) if cuda: enc_dec.cuda() if opt == "sgd": optimizer = optim.SGD(enc_dec.parameters(), lr=lr, momentum=0.000) elif opt == "sgd-momentum": optimizer = optim.SGD(enc_dec.parameters(), lr=lr, momentum=0.9) elif opt == "adam": optimizer = optim.Adam(enc_dec.parameters(), lr=lr) start = time.time() print "{:d} training edges".format( sum([len(rel_edges) for rel_edges in train_edges.values()])) losses = [] ema_loss = None conv = -1 for i in range(max_batches): rel = graph.sample_relation() random.shuffle(train_edges[rel]) edges = train_edges[rel][:batch_size] if len(edges) == 0: continue optimizer.zero_grad() enc_dec.graph.remove_edges(edges) loss = enc_dec.margin_loss([edge[0] for edge in edges], [edge[1] for edge in edges], [rel]) enc_dec.graph.add_edges(edges) losses.append(loss.data[0]) if ema_loss == None: ema_loss = loss.data[0] else: ema_loss = 0.99 * ema_loss + 0.01 * loss.data[0] loss.backward() optimizer.step() if i % 100 == 0: print i, ema_loss if i > 2000 and i % 100 == 0: conv = np.mean(losses[i - 2000:i - 1000]) - np.mean( losses[i - 1000:i]) print "conv", conv if conv < tol: break old_edge_auc = evaluate_edge_auc(test_edges, graph, enc_dec) print "MRR:", old_edge_auc old_edge_loss = evaluate_edge_margin(test_edges, graph, enc_dec) old_path_loss = evaluate_metapath_margin(test_metapaths, graph, enc_dec) old_path_auc = evaluate_metapath_auc(test_metapaths, graph, enc_dec, batch_size=batch_size) print "Metapath auc:", old_path_auc print "Metapath margin: ", old_path_loss ema_loss = None if opt == "sgd": optimizer = optim.SGD(enc_dec.parameters(), lr=lr, momentum=0.000) elif opt == "sgd-momentum": optimizer = optim.SGD(enc_dec.parameters(), lr=lr, momentum=0.9) elif opt == "adam": optimizer = optim.Adam(enc_dec.parameters(), lr=lr) for i in range(max_batches): while True: rels = graph.sample_metapath() random.shuffle(train_metapaths[rels]) edges = train_metapaths[rels][:batch_size] if len(edges) > 0: break nodes1 = [edge[0] for edge in edges] nodes2 = [edge[1] for edge in edges] optimizer.zero_grad() loss = enc_dec.margin_loss(nodes1, nodes2, rels) losses.append(loss.data[0]) if ema_loss == None: ema_loss = loss.data[0] else: ema_loss = 0.99 * ema_loss + 0.01 * loss.data[0] loss.backward() optimizer.step() if i % 100 == 0: print i, ema_loss if i > 2000 and i % 100 == 0: conv = np.mean(losses[i - 2000:i - 1000]) - np.mean( losses[i - 1000:i]) print "conv", conv if conv < tol: break if i % 5000 == 0: print "MRR:", evaluate_edge_auc(test_edges, graph, enc_dec) total = time.time() - start test_auc = evaluate_edge_auc(test_edges, graph, enc_dec) test_loss = evaluate_edge_margin(test_edges, graph, enc_dec) path_auc = evaluate_metapath_auc(test_metapaths, graph, enc_dec, batch_size=batch_size) path_loss = evaluate_metapath_margin(test_metapaths, graph, enc_dec) with open(results, "a") as csvfile: writer = csv.writer(csvfile) writer.writerow([ str(lr), str(batch_size), str(total), str(i), str(total / batch_size / float(i)), str(old_path_auc), str(old_edge_loss), str(test_auc), str(test_loss), str(ema_loss), str(conv), str(old_path_loss), str(old_path_auc), str(path_loss), str(path_auc) ]) print "Time:", total print "Converged after:", i print "Per example:", total / batch_size / float(i) print "MRR:", test_auc print "Loss:", test_loss print "Metapath auc:", path_auc print "Metapath margin: ", path_loss