예제 #1
0
파일: main.py 프로젝트: jjhu94/dgl-1
def runtest(g_train_bases, ml, validation=True):
    model.eval()

    n_users = len(ml.users.index)
    n_items = len(ml.movies.index)

    g_prior = g.edge_subgraph(g_train_bases, preserve_nodes=True)
    g_prior.copy_from_parent()

    # Pre-compute the representations of users and items
    hs = []
    with torch.no_grad():
        with tqdm.trange(n_users + n_items) as tq:
            for node_id in tq:
                nodeset = cuda(torch.LongTensor([node_id]))
                h = forward(model, g_prior, nodeset, False)
                hs.append(h)
    h = torch.cat(hs, 0)

    rr = []

    with torch.no_grad():
        with tqdm.trange(n_users) as tq:
            for u_nid in tq:
                # For each user, exclude the items appearing in
                # (1) the training set, and
                # (2) either the validation set when testing, or the test set when
                #     validating.
                uid = ml.user_ids[u_nid]
                pids_exclude = ml.ratings[
                        (ml.ratings['user_id'] == uid) &
                        (ml.ratings['train'] | ml.ratings['test' if validation else 'valid'])
                        ]['movie_id'].values
                pids_candidate = ml.ratings[
                        (ml.ratings['user_id'] == uid) &
                        ml.ratings['valid' if validation else 'test']
                        ]['movie_id'].values
                pids = np.setdiff1d(ml.movie_ids, pids_exclude)
                p_nids = np.array([ml.movie_ids_invmap[pid] for pid in pids])
                p_nids_candidate = np.array([ml.movie_ids_invmap[pid] for pid in pids_candidate])

                # compute scores of items and rank them, then compute the MRR.
                dst = torch.from_numpy(p_nids) + n_users
                src = torch.zeros_like(dst).fill_(u_nid)
                h_dst = h[dst]
                h_src = h[src]

                score = (h_src * h_dst).sum(1)
                score_sort_idx = score.sort(descending=True)[1].cpu().numpy()

                rank_map = {v: i for i, v in enumerate(p_nids[score_sort_idx])}
                rank_candidates = np.array([rank_map[p_nid] for p_nid in p_nids_candidate])
                rank = 1 / (rank_candidates + 1)
                rr.append(rank.mean())
                tq.set_postfix({'rank': rank.mean()})

    return np.array(rr)
예제 #2
0
파일: main.py 프로젝트: jjhu94/dgl-1
def runtrain(g_train_bases, g_train_pairs, train):
    global opt
    if train:
        model.train()
    else:
        model.eval()

    g_prior = g.edge_subgraph(g_train_bases, preserve_nodes=True)
    g_prior.copy_from_parent()

    # generate batches of training pairs
    edge_batches = g_train_pairs[torch.randperm(g_train_pairs.shape[0])].split(batch_size)

    with tqdm.tqdm(edge_batches) as tq:
        sum_loss = 0
        sum_acc = 0
        count = 0
        for batch_id, batch in enumerate(tq):
            count += batch.shape[0]
            # Get source (user) and destination (item) nodes, as well as negative items
            src, dst = g.find_edges(batch)
            dst_neg = []
            for i in range(len(dst)):
                dst_neg.append(np.random.randint(
                    len(ml.user_ids), len(ml.user_ids) + len(ml.movie_ids), n_negs))
            dst_neg = torch.LongTensor(dst_neg)
            dst = dst.view(-1, 1).expand_as(dst_neg).flatten()
            src = src.view(-1, 1).expand_as(dst_neg).flatten()
            dst_neg = dst_neg.flatten()

            # make sure that the source/destination/negative nodes have successors
            mask = (g_prior.in_degrees(dst_neg) > 0) & \
                   (g_prior.in_degrees(dst) > 0) & \
                   (g_prior.in_degrees(src) > 0)
            src = src[mask]
            dst = dst[mask]
            dst_neg = dst_neg[mask]
            if len(src) == 0:
                continue

            nodeset = cuda(torch.cat([src, dst, dst_neg]))
            src_size, dst_size, dst_neg_size = \
                    src.shape[0], dst.shape[0], dst_neg.shape[0]

            # get representations and compute losses
            h_src, h_dst, h_dst_neg = (
                    forward(model, g_prior, nodeset, train)
                    .split([src_size, dst_size, dst_neg_size]))

            diff = (h_src * (h_dst_neg - h_dst)).sum(1)
            loss = loss_func[args.loss](diff)
            acc = (diff < 0).sum()
            assert loss.item() == loss.item()

            grad_sqr_norm = 0
            if train:
                opt.zero_grad()
                loss.backward()
                for name, p in model.named_parameters():
                    assert (p.grad != p.grad).sum() == 0
                    grad_sqr_norm += p.grad.norm().item() ** 2
                opt.step()

            sum_loss += loss.item()
            sum_acc += acc.item() / n_negs
            avg_loss = sum_loss / (batch_id + 1)
            avg_acc = sum_acc / count
            tq.set_postfix({'loss': '%.6f' % loss.item(),
                            'avg_loss': '%.3f' % avg_loss,
                            'avg_acc': '%.3f' % avg_acc,
                            'grad_norm': '%.6f' % np.sqrt(grad_sqr_norm)})

    return avg_loss, avg_acc
예제 #3
0
파일: main.py 프로젝트: AAAEEEE/P2R-GCN
def runtest(g_prior_edges, epoch, validation=True):
    model.eval()
    period = 1
    offset = epoch % period
    n_users = len(db.authors.index)
    n_items = len(db.papers.index)

    g_prior_src, g_prior_dst = g.find_edges(g_prior_edges)
    g_prior = DGLGraph()
    g_prior.add_nodes(g.number_of_nodes())
    g_prior.add_edges(g_prior_src, g_prior_dst)
    g_prior.ndata.update({k: cuda(v) for k, v in g.ndata.items()})

    user_offset = 0
    hs = []
    with torch.no_grad():
        with tqdm.trange(offset, n_users + n_items, period) as tq:
            for node_id in tq:
                if user_offset == 0 and node_id >= n_items:
                    user_offset = node_id

                nodeset = cuda(torch.LongTensor([node_id]))
                h = forward(model, g_prior, nodeset, False)
                hs.append(h)
    h = torch.cat(hs, 0)

    rr = []

    with torch.no_grad():
        with tqdm.trange(user_offset, n_items + n_users, period) as tq:
            for u_nid in tq:
                # uid = db.user_ids[u_nid]
                uid = u_nid
                uhid = (u_nid - offset)//period

                pids_exclude = db.links[
                    (db.links['idx_A'] == uid) &
                    (db.links['train'] | db.links['test' if validation else 'valid'])
                    ]['idx_P'].values
                pids_candidate = db.links[
                    (db.links['idx_A'] == uid) &
                    db.links['valid' if validation else 'test']
                    ]['idx_P'].values

                pids = np.setdiff1d(range(len(db.paper_ids_map)), pids_exclude)

                hids = id_remap(pids, offset, period)
                hids_candidate = id_remap(pids_candidate, offset, period)

                dst = torch.from_numpy(hids)
                src = torch.zeros_like(dst).fill_(uhid)
                h_dst = h[dst]
                h_src = h[src]

                score = (h_src * h_dst).sum(1)
                score_sort_idx = score.sort(descending=True)[1].cpu().numpy()

                rank_map = {v: i for i, v in enumerate(hids[score_sort_idx])}
                rank_candidates = np.array([rank_map[p_nid] for p_nid in hids_candidate])
                rank = 1 / (rank_candidates + 1) if len(rank_candidates)!= 0 else np.array([1/ len(score_sort_idx)])
                rr.append(rank.mean())
                tq.set_postfix({'rank': rank.mean()})

    return np.array(rr)
예제 #4
0
파일: main.py 프로젝트: jjhu94/dgl-1
batch_size = 256
margin = 0.9

n_negs = args.n_negs
hard_neg_prob = args.hard_neg_prob

loss_func = {
        'hinge': lambda diff: (diff + margin).clamp(min=0).mean(),
        'bpr': lambda diff: (1 - torch.sigmoid(-diff)).mean(),
        }

model = cuda(PinSage(
    g.number_of_nodes(),
    [n_hidden] * (n_layers + 1),
    20,
    0.5,
    10,
    use_feature=args.use_feature,
    G=g,
    ))
opt = getattr(torch.optim, args.opt)(model.parameters(), lr=args.lr)


def forward(model, g_prior, nodeset, train=True):
    if train:
        return model(g_prior, nodeset)
    else:
        with torch.no_grad():
            return model(g_prior, nodeset)

예제 #5
0
파일: main.py 프로젝트: AAAEEEE/P2R-GCN
def runtrain(g_prior_edges, g_train_edges, train):
    global opt
    if train:
        model.train()
    else:
        model.eval()

    g_prior_src, g_prior_dst = g.find_edges(g_prior_edges)
    g_prior = DGLGraph()
    g_prior.add_nodes(g.number_of_nodes())
    g_prior.add_edges(g_prior_src, g_prior_dst)
    g_prior.ndata.update({k: cuda(v) for k, v in g.ndata.items()})
    edge_batches = g_train_edges[torch.randperm(g_train_edges.shape[0])].split(batch_size)

    with tqdm.tqdm(edge_batches) as tq:
        sum_loss = 0
        sum_acc = 0
        count = 0
        for batch_id, batch in enumerate(tq):
            count += batch.shape[0]
            src, dst = g.find_edges(batch)
            dst_neg = []
            for i in range(len(dst)):
                if np.random.rand() < args.hard_neg_prob:
                    nb = torch.LongTensor(neighbors[dst[i].item()])
                    mask = ~(g.has_edges_between(nb, src[i].item()).byte())
                    dst_neg.append(np.random.choice(nb[mask].numpy(), n_negs))
                else:
                    dst_neg.append(np.random.randint(
                        0, len(db.papers), n_negs))


            dst_neg = torch.LongTensor(dst_neg)
            dst = dst.view(-1, 1).expand_as(dst_neg).flatten()
            src = src.view(-1, 1).expand_as(dst_neg).flatten()
            dst_neg = dst_neg.flatten()

            mask = (g_prior.in_degrees(dst_neg) > 0) & \
                   (g_prior.in_degrees(dst) > 0) & \
                   (g_prior.in_degrees(src) > 0)
            src = src[mask]
            dst = dst[mask]
            dst_neg = dst_neg[mask]
            if len(src) == 0:
                continue

            nodeset = cuda(torch.cat([src, dst, dst_neg]))
            src_size, dst_size, dst_neg_size = \
                    src.shape[0], dst.shape[0], dst_neg.shape[0]

            h_src, h_dst, h_dst_neg = (
                    forward(model, g_prior, nodeset, train)
                    .split([src_size, dst_size, dst_neg_size]))

            diff = (h_src * (h_dst_neg - h_dst)).sum(1)
            loss = loss_func[args.loss](diff)
            acc = (diff < 0).sum()
            assert loss.item() == loss.item()

            grad_sqr_norm = 0
            if train:
                opt.zero_grad()
                loss.backward()
                for name, p in model.named_parameters():
                    assert (p.grad != p.grad).sum() == 0
                    grad_sqr_norm += p.grad.norm().item() ** 2
                opt.step()

            sum_loss += loss.item()
            sum_acc += acc.item() / n_negs
            avg_loss = sum_loss / (batch_id + 1)
            avg_acc = sum_acc / count
            tq.set_postfix({'loss': '%.6f' % loss.item(),
                            'avg_loss': '%.3f' % avg_loss,
                            'avg_acc': '%.3f' % avg_acc,
                            'grad_norm': '%.6f' % np.sqrt(grad_sqr_norm)})

    return avg_loss, avg_acc
예제 #6
0
파일: main.py 프로젝트: AAAEEEE/P2R-GCN
if 'venue' in g.ndata.keys():
    emb['venue'] = nn.Embedding(
        g.ndata['venue'].max().item() + 1,
        in_features,
        padding_idx=0
            )
emb['fos'] = nn.Sequential(
    nn.Linear(300, in_features),
    nn.LeakyReLU(),
    )

model = cuda(PinSage(
    g.number_of_nodes(),
    [n_hidden] * (n_layers + 1),
    20,
    0.5,
    20,
    emb=emb,
    G=g,
    zero_h=args.zero_h
    ))
opt = getattr(torch.optim, args.opt)(model.parameters(), lr=args.lr)
sched = torch.optim.lr_scheduler.LambdaLR(opt, sched_lambda[args.sched])


def forward(model, g_prior, nodeset, train=True):
    if train:
        return model(g_prior, nodeset)
    else:
        with torch.no_grad():
            return model(g_prior, nodeset)
def testing(pre_graph_edges, epoch, validation=True):
    model.eval()
    period = 1
    offset = epoch % period
    number_of_users = len(dataset.authors.index)
    number_of_items = len(dataset.papers.index)

    pre_graph_source, pre_graph_destination = graph.find_edges(pre_graph_edges)
    pre_graph = DGLGraph()
    pre_graph.add_nodes(graph.number_of_nodes())
    pre_graph.add_edges(pre_graph_source, pre_graph_destination)
    pre_graph.ndata.update({k: cuda(v) for k, v in graph.ndata.items()})

    user_offset = 0
    hiddenrepresentationlist = []
    with torch.no_grad():
        with tqdm.trange(offset, number_of_users + number_of_items,
                         period) as tq:
            for node_id in tq:
                if user_offset == 0 and node_id >= number_of_items:
                    user_offset = node_id

                nodeset = cuda(torch.LongTensor([node_id]))
                hiddenrepresentation = forward(model, pre_graph, nodeset,
                                               False)
                hiddenrepresentationlist.append(hiddenrepresentation)
    hiddenrepresentation = torch.cat(hiddenrepresentationlist, 0)

    rankinglist = []

    with torch.no_grad():
        with tqdm.trange(user_offset, number_of_items + number_of_users,
                         period) as tq:
            for u_nid in tq:
                # userid = dataset.user_ids[u_nid]
                userid = u_nid
                uhid = (u_nid - offset) // period

                paperids_excluded = dataset.links[
                    (dataset.links['idx_A'] == userid)
                    & (dataset.links['train']
                       | dataset.links['test' if validation else 'valid']
                       )]['idx_P'].values
                papaer_ids_candidate = dataset.links[
                    (dataset.links['idx_A'] == userid) & dataset.
                    links['valid' if validation else 'test']]['idx_P'].values

                paper_ids = np.setdiff1d(range(len(dataset.paper_ids_map)),
                                         paperids_excluded)

                hidden_representation_ids = nodeidremap(
                    paper_ids, offset, period)
                hidden_representation_ids = hidden_representation_ids / 1.0
                hidddens_candidate = nodeidremap(papaer_ids_candidate, offset,
                                                 period)

                destination = torch.from_numpy(hidden_representation_ids /
                                               1.0).type(torch.long)
                source = torch.zeros_like(destination).fill_(uhid)
                hidden_destination = hiddenrepresentation[destination]
                hidden_source = hiddenrepresentation[source]

                score = (hidden_source * hidden_destination).sum(1)
                score_sort_idx = score.sort(descending=True)[1].cpu().numpy()

                rank_map = {
                    v: i
                    for i, v in enumerate(
                        hidden_representation_ids[score_sort_idx])
                }
                rank_candidates = np.array(
                    [rank_map[p_nid] for p_nid in hidddens_candidate])
                rank = 1 / (rank_candidates +
                            1) if len(rank_candidates) != 0 else np.array(
                                [1 / len(score_sort_idx)])
                rankinglist.append(rank.mean())
                tq.set_postfix({'rank': rank.mean()})

    return np.array(rankinglist)
def train_batches(pre_graph_edges, train_graph_edges, train):
    global learning_option
    if train:
        model.train()
    else:
        model.eval()

    pre_graph_source, pre_graph_destination = graph.find_edges(pre_graph_edges)
    pre_graph = DGLGraph()
    pre_graph.add_nodes(graph.number_of_nodes())
    pre_graph.add_edges(pre_graph_source, pre_graph_destination)
    pre_graph.ndata.update({k: cuda(v) for k, v in graph.ndata.items()})
    edge_batches = train_graph_edges[torch.randperm(
        train_graph_edges.shape[0])].split(batch_size)

    with tqdm.tqdm(edge_batches) as tq:
        loss_num = 0
        acc_num = 0
        i = 0
        for batch_id, batch in enumerate(tq):
            i += batch.shape[0]
            source, detination = graph.find_edges(batch)
            destination_negatives = []
            for i in range(len(detination)):
                if np.random.rand() < args.hard_neg_prob:
                    neighbor = torch.LongTensor(
                        neighbors[detination[i].item()])
                    mask = ~(graph.has_edges_between(neighbor,
                                                     source[i].item()).byte())
                    destination_negatives.append(
                        np.random.choice(neighbor[mask].numpy(),
                                         negative_samples))
                else:
                    destination_negatives.append(
                        np.random.randint(0, len(dataset.papers),
                                          negative_samples))

            destination_negatives = torch.LongTensor(destination_negatives)
            detination = detination.view(
                -1, 1).expand_as(destination_negatives).flatten()
            source = source.view(-1,
                                 1).expand_as(destination_negatives).flatten()
            destination_negatives = destination_negatives.flatten()

            mask = (pre_graph.in_degrees(destination_negatives) > 0) & \
                   (pre_graph.in_degrees(detination) > 0) & \
                   (pre_graph.in_degrees(source) > 0)
            source = source[mask]
            detination = detination[mask]
            destination_negatives = destination_negatives[mask]
            if len(source) == 0:
                continue

            nodeset = cuda(
                torch.cat([source, detination, destination_negatives]))
            source_size, destination_size, negative_destination_size = \
                    source.shape[0], detination.shape[0], destination_negatives.shape[0]

            hidden_source, hidden_destination, negative_hidden_destination = (
                forward(model, pre_graph, nodeset, train).split(
                    [source_size, destination_size,
                     negative_destination_size]))

            difference = (
                hidden_source *
                (negative_hidden_destination - hidden_destination)).sum(1)
            loss = lossfunction[args.loss](difference)
            accuracy = (difference < 0).sum()
            assert loss.item() == loss.item()

            grad_sqr_norm = 0
            if train:
                learning_option.zero_grad()
                loss.backward()
                for name, p in model.named_parameters():
                    assert (p.grad != p.grad).sum() == 0
                    grad_sqr_norm += p.grad.norm().item()**2
                learning_option.step()

            loss_num += loss.item()
            acc_num += accuracy.item() / negative_samples
            avg_loss = loss_num / (batch_id + 1)
            average_accuracy = acc_num / i
            tq.set_postfix({
                'loss': '%.6f' % loss.item(),
                'avg_loss': '%.3f' % avg_loss,
                'average_accuracy': '%.3f' % average_accuracy,
                'grad_norm': '%.6f' % np.sqrt(grad_sqr_norm)
            })

    return avg_loss, average_accuracy
embeddings['year'] = nn.Embedding(graph.ndata['year'].max().item() + 1,
                                  inputfeatures,
                                  padding_idx=0)
if 'venue' in graph.ndata.keys():
    embeddings['venue'] = nn.Embedding(graph.ndata['venue'].max().item() + 1,
                                       inputfeatures,
                                       padding_idx=0)
embeddings['fos'] = nn.Sequential(
    nn.Linear(300, inputfeatures),
    nn.LeakyReLU(),
)

model = cuda(
    PinSage(graph.number_of_nodes(), [hidden_number] * (layer_number + 1),
            20,
            0.5,
            20,
            emb=embeddings,
            G=graph,
            zero_h=args.zero_h))
learning_option = getattr(torch.optim,
                          args.learning_option)(model.parameters(), lr=args.lr)
pre_set = torch.optim.lr_scheduler.LambdaLR(learning_option,
                                            parameters[args.pre_set])


def forward(model, pre_graph, nodeset, train=True):
    if train:
        return model(pre_graph, nodeset)
    else:
        with torch.no_grad():
            return model(pre_graph, nodeset)