def runtest(g_train_bases, ml, validation=True): model.eval() n_users = len(ml.users.index) n_items = len(ml.movies.index) g_prior = g.edge_subgraph(g_train_bases, preserve_nodes=True) g_prior.copy_from_parent() # Pre-compute the representations of users and items hs = [] with torch.no_grad(): with tqdm.trange(n_users + n_items) as tq: for node_id in tq: nodeset = cuda(torch.LongTensor([node_id])) h = forward(model, g_prior, nodeset, False) hs.append(h) h = torch.cat(hs, 0) rr = [] with torch.no_grad(): with tqdm.trange(n_users) as tq: for u_nid in tq: # For each user, exclude the items appearing in # (1) the training set, and # (2) either the validation set when testing, or the test set when # validating. uid = ml.user_ids[u_nid] pids_exclude = ml.ratings[ (ml.ratings['user_id'] == uid) & (ml.ratings['train'] | ml.ratings['test' if validation else 'valid']) ]['movie_id'].values pids_candidate = ml.ratings[ (ml.ratings['user_id'] == uid) & ml.ratings['valid' if validation else 'test'] ]['movie_id'].values pids = np.setdiff1d(ml.movie_ids, pids_exclude) p_nids = np.array([ml.movie_ids_invmap[pid] for pid in pids]) p_nids_candidate = np.array([ml.movie_ids_invmap[pid] for pid in pids_candidate]) # compute scores of items and rank them, then compute the MRR. dst = torch.from_numpy(p_nids) + n_users src = torch.zeros_like(dst).fill_(u_nid) h_dst = h[dst] h_src = h[src] score = (h_src * h_dst).sum(1) score_sort_idx = score.sort(descending=True)[1].cpu().numpy() rank_map = {v: i for i, v in enumerate(p_nids[score_sort_idx])} rank_candidates = np.array([rank_map[p_nid] for p_nid in p_nids_candidate]) rank = 1 / (rank_candidates + 1) rr.append(rank.mean()) tq.set_postfix({'rank': rank.mean()}) return np.array(rr)
def runtrain(g_train_bases, g_train_pairs, train): global opt if train: model.train() else: model.eval() g_prior = g.edge_subgraph(g_train_bases, preserve_nodes=True) g_prior.copy_from_parent() # generate batches of training pairs edge_batches = g_train_pairs[torch.randperm(g_train_pairs.shape[0])].split(batch_size) with tqdm.tqdm(edge_batches) as tq: sum_loss = 0 sum_acc = 0 count = 0 for batch_id, batch in enumerate(tq): count += batch.shape[0] # Get source (user) and destination (item) nodes, as well as negative items src, dst = g.find_edges(batch) dst_neg = [] for i in range(len(dst)): dst_neg.append(np.random.randint( len(ml.user_ids), len(ml.user_ids) + len(ml.movie_ids), n_negs)) dst_neg = torch.LongTensor(dst_neg) dst = dst.view(-1, 1).expand_as(dst_neg).flatten() src = src.view(-1, 1).expand_as(dst_neg).flatten() dst_neg = dst_neg.flatten() # make sure that the source/destination/negative nodes have successors mask = (g_prior.in_degrees(dst_neg) > 0) & \ (g_prior.in_degrees(dst) > 0) & \ (g_prior.in_degrees(src) > 0) src = src[mask] dst = dst[mask] dst_neg = dst_neg[mask] if len(src) == 0: continue nodeset = cuda(torch.cat([src, dst, dst_neg])) src_size, dst_size, dst_neg_size = \ src.shape[0], dst.shape[0], dst_neg.shape[0] # get representations and compute losses h_src, h_dst, h_dst_neg = ( forward(model, g_prior, nodeset, train) .split([src_size, dst_size, dst_neg_size])) diff = (h_src * (h_dst_neg - h_dst)).sum(1) loss = loss_func[args.loss](diff) acc = (diff < 0).sum() assert loss.item() == loss.item() grad_sqr_norm = 0 if train: opt.zero_grad() loss.backward() for name, p in model.named_parameters(): assert (p.grad != p.grad).sum() == 0 grad_sqr_norm += p.grad.norm().item() ** 2 opt.step() sum_loss += loss.item() sum_acc += acc.item() / n_negs avg_loss = sum_loss / (batch_id + 1) avg_acc = sum_acc / count tq.set_postfix({'loss': '%.6f' % loss.item(), 'avg_loss': '%.3f' % avg_loss, 'avg_acc': '%.3f' % avg_acc, 'grad_norm': '%.6f' % np.sqrt(grad_sqr_norm)}) return avg_loss, avg_acc
def runtest(g_prior_edges, epoch, validation=True): model.eval() period = 1 offset = epoch % period n_users = len(db.authors.index) n_items = len(db.papers.index) g_prior_src, g_prior_dst = g.find_edges(g_prior_edges) g_prior = DGLGraph() g_prior.add_nodes(g.number_of_nodes()) g_prior.add_edges(g_prior_src, g_prior_dst) g_prior.ndata.update({k: cuda(v) for k, v in g.ndata.items()}) user_offset = 0 hs = [] with torch.no_grad(): with tqdm.trange(offset, n_users + n_items, period) as tq: for node_id in tq: if user_offset == 0 and node_id >= n_items: user_offset = node_id nodeset = cuda(torch.LongTensor([node_id])) h = forward(model, g_prior, nodeset, False) hs.append(h) h = torch.cat(hs, 0) rr = [] with torch.no_grad(): with tqdm.trange(user_offset, n_items + n_users, period) as tq: for u_nid in tq: # uid = db.user_ids[u_nid] uid = u_nid uhid = (u_nid - offset)//period pids_exclude = db.links[ (db.links['idx_A'] == uid) & (db.links['train'] | db.links['test' if validation else 'valid']) ]['idx_P'].values pids_candidate = db.links[ (db.links['idx_A'] == uid) & db.links['valid' if validation else 'test'] ]['idx_P'].values pids = np.setdiff1d(range(len(db.paper_ids_map)), pids_exclude) hids = id_remap(pids, offset, period) hids_candidate = id_remap(pids_candidate, offset, period) dst = torch.from_numpy(hids) src = torch.zeros_like(dst).fill_(uhid) h_dst = h[dst] h_src = h[src] score = (h_src * h_dst).sum(1) score_sort_idx = score.sort(descending=True)[1].cpu().numpy() rank_map = {v: i for i, v in enumerate(hids[score_sort_idx])} rank_candidates = np.array([rank_map[p_nid] for p_nid in hids_candidate]) rank = 1 / (rank_candidates + 1) if len(rank_candidates)!= 0 else np.array([1/ len(score_sort_idx)]) rr.append(rank.mean()) tq.set_postfix({'rank': rank.mean()}) return np.array(rr)
batch_size = 256 margin = 0.9 n_negs = args.n_negs hard_neg_prob = args.hard_neg_prob loss_func = { 'hinge': lambda diff: (diff + margin).clamp(min=0).mean(), 'bpr': lambda diff: (1 - torch.sigmoid(-diff)).mean(), } model = cuda(PinSage( g.number_of_nodes(), [n_hidden] * (n_layers + 1), 20, 0.5, 10, use_feature=args.use_feature, G=g, )) opt = getattr(torch.optim, args.opt)(model.parameters(), lr=args.lr) def forward(model, g_prior, nodeset, train=True): if train: return model(g_prior, nodeset) else: with torch.no_grad(): return model(g_prior, nodeset)
def runtrain(g_prior_edges, g_train_edges, train): global opt if train: model.train() else: model.eval() g_prior_src, g_prior_dst = g.find_edges(g_prior_edges) g_prior = DGLGraph() g_prior.add_nodes(g.number_of_nodes()) g_prior.add_edges(g_prior_src, g_prior_dst) g_prior.ndata.update({k: cuda(v) for k, v in g.ndata.items()}) edge_batches = g_train_edges[torch.randperm(g_train_edges.shape[0])].split(batch_size) with tqdm.tqdm(edge_batches) as tq: sum_loss = 0 sum_acc = 0 count = 0 for batch_id, batch in enumerate(tq): count += batch.shape[0] src, dst = g.find_edges(batch) dst_neg = [] for i in range(len(dst)): if np.random.rand() < args.hard_neg_prob: nb = torch.LongTensor(neighbors[dst[i].item()]) mask = ~(g.has_edges_between(nb, src[i].item()).byte()) dst_neg.append(np.random.choice(nb[mask].numpy(), n_negs)) else: dst_neg.append(np.random.randint( 0, len(db.papers), n_negs)) dst_neg = torch.LongTensor(dst_neg) dst = dst.view(-1, 1).expand_as(dst_neg).flatten() src = src.view(-1, 1).expand_as(dst_neg).flatten() dst_neg = dst_neg.flatten() mask = (g_prior.in_degrees(dst_neg) > 0) & \ (g_prior.in_degrees(dst) > 0) & \ (g_prior.in_degrees(src) > 0) src = src[mask] dst = dst[mask] dst_neg = dst_neg[mask] if len(src) == 0: continue nodeset = cuda(torch.cat([src, dst, dst_neg])) src_size, dst_size, dst_neg_size = \ src.shape[0], dst.shape[0], dst_neg.shape[0] h_src, h_dst, h_dst_neg = ( forward(model, g_prior, nodeset, train) .split([src_size, dst_size, dst_neg_size])) diff = (h_src * (h_dst_neg - h_dst)).sum(1) loss = loss_func[args.loss](diff) acc = (diff < 0).sum() assert loss.item() == loss.item() grad_sqr_norm = 0 if train: opt.zero_grad() loss.backward() for name, p in model.named_parameters(): assert (p.grad != p.grad).sum() == 0 grad_sqr_norm += p.grad.norm().item() ** 2 opt.step() sum_loss += loss.item() sum_acc += acc.item() / n_negs avg_loss = sum_loss / (batch_id + 1) avg_acc = sum_acc / count tq.set_postfix({'loss': '%.6f' % loss.item(), 'avg_loss': '%.3f' % avg_loss, 'avg_acc': '%.3f' % avg_acc, 'grad_norm': '%.6f' % np.sqrt(grad_sqr_norm)}) return avg_loss, avg_acc
if 'venue' in g.ndata.keys(): emb['venue'] = nn.Embedding( g.ndata['venue'].max().item() + 1, in_features, padding_idx=0 ) emb['fos'] = nn.Sequential( nn.Linear(300, in_features), nn.LeakyReLU(), ) model = cuda(PinSage( g.number_of_nodes(), [n_hidden] * (n_layers + 1), 20, 0.5, 20, emb=emb, G=g, zero_h=args.zero_h )) opt = getattr(torch.optim, args.opt)(model.parameters(), lr=args.lr) sched = torch.optim.lr_scheduler.LambdaLR(opt, sched_lambda[args.sched]) def forward(model, g_prior, nodeset, train=True): if train: return model(g_prior, nodeset) else: with torch.no_grad(): return model(g_prior, nodeset)
def testing(pre_graph_edges, epoch, validation=True): model.eval() period = 1 offset = epoch % period number_of_users = len(dataset.authors.index) number_of_items = len(dataset.papers.index) pre_graph_source, pre_graph_destination = graph.find_edges(pre_graph_edges) pre_graph = DGLGraph() pre_graph.add_nodes(graph.number_of_nodes()) pre_graph.add_edges(pre_graph_source, pre_graph_destination) pre_graph.ndata.update({k: cuda(v) for k, v in graph.ndata.items()}) user_offset = 0 hiddenrepresentationlist = [] with torch.no_grad(): with tqdm.trange(offset, number_of_users + number_of_items, period) as tq: for node_id in tq: if user_offset == 0 and node_id >= number_of_items: user_offset = node_id nodeset = cuda(torch.LongTensor([node_id])) hiddenrepresentation = forward(model, pre_graph, nodeset, False) hiddenrepresentationlist.append(hiddenrepresentation) hiddenrepresentation = torch.cat(hiddenrepresentationlist, 0) rankinglist = [] with torch.no_grad(): with tqdm.trange(user_offset, number_of_items + number_of_users, period) as tq: for u_nid in tq: # userid = dataset.user_ids[u_nid] userid = u_nid uhid = (u_nid - offset) // period paperids_excluded = dataset.links[ (dataset.links['idx_A'] == userid) & (dataset.links['train'] | dataset.links['test' if validation else 'valid'] )]['idx_P'].values papaer_ids_candidate = dataset.links[ (dataset.links['idx_A'] == userid) & dataset. links['valid' if validation else 'test']]['idx_P'].values paper_ids = np.setdiff1d(range(len(dataset.paper_ids_map)), paperids_excluded) hidden_representation_ids = nodeidremap( paper_ids, offset, period) hidden_representation_ids = hidden_representation_ids / 1.0 hidddens_candidate = nodeidremap(papaer_ids_candidate, offset, period) destination = torch.from_numpy(hidden_representation_ids / 1.0).type(torch.long) source = torch.zeros_like(destination).fill_(uhid) hidden_destination = hiddenrepresentation[destination] hidden_source = hiddenrepresentation[source] score = (hidden_source * hidden_destination).sum(1) score_sort_idx = score.sort(descending=True)[1].cpu().numpy() rank_map = { v: i for i, v in enumerate( hidden_representation_ids[score_sort_idx]) } rank_candidates = np.array( [rank_map[p_nid] for p_nid in hidddens_candidate]) rank = 1 / (rank_candidates + 1) if len(rank_candidates) != 0 else np.array( [1 / len(score_sort_idx)]) rankinglist.append(rank.mean()) tq.set_postfix({'rank': rank.mean()}) return np.array(rankinglist)
def train_batches(pre_graph_edges, train_graph_edges, train): global learning_option if train: model.train() else: model.eval() pre_graph_source, pre_graph_destination = graph.find_edges(pre_graph_edges) pre_graph = DGLGraph() pre_graph.add_nodes(graph.number_of_nodes()) pre_graph.add_edges(pre_graph_source, pre_graph_destination) pre_graph.ndata.update({k: cuda(v) for k, v in graph.ndata.items()}) edge_batches = train_graph_edges[torch.randperm( train_graph_edges.shape[0])].split(batch_size) with tqdm.tqdm(edge_batches) as tq: loss_num = 0 acc_num = 0 i = 0 for batch_id, batch in enumerate(tq): i += batch.shape[0] source, detination = graph.find_edges(batch) destination_negatives = [] for i in range(len(detination)): if np.random.rand() < args.hard_neg_prob: neighbor = torch.LongTensor( neighbors[detination[i].item()]) mask = ~(graph.has_edges_between(neighbor, source[i].item()).byte()) destination_negatives.append( np.random.choice(neighbor[mask].numpy(), negative_samples)) else: destination_negatives.append( np.random.randint(0, len(dataset.papers), negative_samples)) destination_negatives = torch.LongTensor(destination_negatives) detination = detination.view( -1, 1).expand_as(destination_negatives).flatten() source = source.view(-1, 1).expand_as(destination_negatives).flatten() destination_negatives = destination_negatives.flatten() mask = (pre_graph.in_degrees(destination_negatives) > 0) & \ (pre_graph.in_degrees(detination) > 0) & \ (pre_graph.in_degrees(source) > 0) source = source[mask] detination = detination[mask] destination_negatives = destination_negatives[mask] if len(source) == 0: continue nodeset = cuda( torch.cat([source, detination, destination_negatives])) source_size, destination_size, negative_destination_size = \ source.shape[0], detination.shape[0], destination_negatives.shape[0] hidden_source, hidden_destination, negative_hidden_destination = ( forward(model, pre_graph, nodeset, train).split( [source_size, destination_size, negative_destination_size])) difference = ( hidden_source * (negative_hidden_destination - hidden_destination)).sum(1) loss = lossfunction[args.loss](difference) accuracy = (difference < 0).sum() assert loss.item() == loss.item() grad_sqr_norm = 0 if train: learning_option.zero_grad() loss.backward() for name, p in model.named_parameters(): assert (p.grad != p.grad).sum() == 0 grad_sqr_norm += p.grad.norm().item()**2 learning_option.step() loss_num += loss.item() acc_num += accuracy.item() / negative_samples avg_loss = loss_num / (batch_id + 1) average_accuracy = acc_num / i tq.set_postfix({ 'loss': '%.6f' % loss.item(), 'avg_loss': '%.3f' % avg_loss, 'average_accuracy': '%.3f' % average_accuracy, 'grad_norm': '%.6f' % np.sqrt(grad_sqr_norm) }) return avg_loss, average_accuracy
embeddings['year'] = nn.Embedding(graph.ndata['year'].max().item() + 1, inputfeatures, padding_idx=0) if 'venue' in graph.ndata.keys(): embeddings['venue'] = nn.Embedding(graph.ndata['venue'].max().item() + 1, inputfeatures, padding_idx=0) embeddings['fos'] = nn.Sequential( nn.Linear(300, inputfeatures), nn.LeakyReLU(), ) model = cuda( PinSage(graph.number_of_nodes(), [hidden_number] * (layer_number + 1), 20, 0.5, 20, emb=embeddings, G=graph, zero_h=args.zero_h)) learning_option = getattr(torch.optim, args.learning_option)(model.parameters(), lr=args.lr) pre_set = torch.optim.lr_scheduler.LambdaLR(learning_option, parameters[args.pre_set]) def forward(model, pre_graph, nodeset, train=True): if train: return model(pre_graph, nodeset) else: with torch.no_grad(): return model(pre_graph, nodeset)