Ejemplo n.º 1
0
def main(args):
    args['device'] = "cuda" if torch.cuda.is_available() else "cpu"
    set_random_seed()

    # Interchangeable with other datasets
    if args['dataset'] == 'Tox21':
        from dgl.data.chem import Tox21
        dataset = Tox21()

    trainset, valset, testset = split_dataset(dataset, args['train_val_test_split'])
    train_loader = DataLoader(trainset, batch_size=args['batch_size'],
                              collate_fn=collate_molgraphs_for_classification)
    val_loader = DataLoader(valset, batch_size=args['batch_size'],
                            collate_fn=collate_molgraphs_for_classification)
    test_loader = DataLoader(testset, batch_size=args['batch_size'],
                             collate_fn=collate_molgraphs_for_classification)

    if args['pre_trained']:
        args['num_epochs'] = 0
        model = model_zoo.chem.load_pretrained(args['exp'])
    else:
        # Interchangeable with other models
        if args['model'] == 'GCN':
            model = model_zoo.chem.GCNClassifier(in_feats=args['in_feats'],
                                                 gcn_hidden_feats=args['gcn_hidden_feats'],
                                                 classifier_hidden_feats=args['classifier_hidden_feats'],
                                                 n_tasks=dataset.n_tasks)
        elif args['model'] == 'GAT':
            model = model_zoo.chem.GATClassifier(in_feats=args['in_feats'],
                                                 gat_hidden_feats=args['gat_hidden_feats'],
                                                 num_heads=args['num_heads'],
                                                 classifier_hidden_feats=args['classifier_hidden_feats'],
                                                 n_tasks=dataset.n_tasks)

        loss_criterion = BCEWithLogitsLoss(pos_weight=dataset.task_pos_weights.to(args['device']),
                                           reduction='none')
        optimizer = Adam(model.parameters(), lr=args['lr'])
        stopper = EarlyStopping(patience=args['patience'])
    model.to(args['device'])

    for epoch in range(args['num_epochs']):
        # Train
        run_a_train_epoch(args, epoch, model, train_loader, loss_criterion, optimizer)

        # Validation and early stop
        val_roc_auc = run_an_eval_epoch(args, model, val_loader)
        early_stop = stopper.step(val_roc_auc, model)
        print('epoch {:d}/{:d}, validation roc-auc score {:.4f}, best validation roc-auc score {:.4f}'.format(
            epoch + 1, args['num_epochs'], val_roc_auc, stopper.best_score))
        if early_stop:
            break

    if not args['pre_trained']:
        stopper.load_checkpoint(model)
    test_roc_auc = run_an_eval_epoch(args, model, test_loader)
    print('test roc-auc score {:.4f}'.format(test_roc_auc))
Ejemplo n.º 2
0
def main(args):
    g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    val_mask, test_mask = load_data(args['dataset'])
    dev = torch.device("cuda:0" if args['gpu'] >= 0 else "cpu")

    features = features.to(dev)
    labels = labels.to(dev)
    train_mask = train_mask.to(dev)
    val_mask = val_mask.to(dev)
    test_mask = test_mask.to(dev)

    model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']],
                in_size=features.shape[1],
                hidden_size=args['hidden_units'],
                out_size=num_classes,
                num_heads=args['num_heads'],
                dropout=args['dropout']).to(dev)

    stopper = EarlyStopping(patience=args['patience'])
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    for epoch in range(args['num_epochs']):
        model.train()
        logits = model(g, features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(
            logits[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(
            model, g, features, labels, val_mask, loss_fcn)
        early_stop = stopper.step(val_loss.data.item(), val_acc, model)

        print(
            'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
            'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.
            format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1,
                   val_loss.item(), val_micro_f1, val_macro_f1))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(
        model, g, features, labels, test_mask, loss_fcn)
    print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.
          format(test_loss.item(), test_micro_f1, test_macro_f1))
Ejemplo n.º 3
0
def main(args):
    args['device'] = "cuda" if torch.cuda.is_available() else "cpu"
    set_random_seed()

    # Interchangeable with other datasets
    train_set, val_set, test_set = load_dataset_for_regression(args)
    train_loader = DataLoader(dataset=train_set,
                              batch_size=args['batch_size'],
                              collate_fn=collate_molgraphs)
    val_loader = DataLoader(dataset=val_set,
                            batch_size=args['batch_size'],
                            collate_fn=collate_molgraphs)
    if test_set is not None:
        test_loader = DataLoader(dataset=test_set,
                                 batch_size=args['batch_size'],
                                 collate_fn=collate_molgraphs)

    if args['model'] == 'MPNN':
        model = model_zoo.chem.MPNNModel(node_input_dim=args['node_in_feats'],
                                         edge_input_dim=args['edge_in_feats'],
                                         output_dim=args['output_dim'])
    elif args['model'] == 'SCHNET':
        model = model_zoo.chem.SchNet(norm=args['norm'],
                                      output_dim=args['output_dim'])
        model.set_mean_std(train_set.mean, train_set.std, args['device'])
    elif args['model'] == 'MGCN':
        model = model_zoo.chem.MGCNModel(norm=args['norm'],
                                         output_dim=args['output_dim'])
        model.set_mean_std(train_set.mean, train_set.std, args['device'])
    model.to(args['device'])

    loss_fn = nn.MSELoss(reduction='none')
    optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
    stopper = EarlyStopping(mode='lower', patience=args['patience'])

    for epoch in range(args['num_epochs']):
        # Train
        run_a_train_epoch(args, epoch, model, train_loader, loss_fn, optimizer)

        # Validation and early stop
        val_score = run_an_eval_epoch(args, model, val_loader)
        early_stop = stopper.step(val_score, model)
        print(
            'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'.
            format(epoch + 1, args['num_epochs'], args['metric_name'],
                   val_score, args['metric_name'], stopper.best_score))
        if early_stop:
            break

    if test_set is not None:
        stopper.load_checkpoint(model)
        test_score = run_an_eval_epoch(args, model, test_loader)
        print('test {} {:.4f}'.format(args['metric_name'], test_score))
Ejemplo n.º 4
0
def main(args):
    args['device'] = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    set_random_seed(args['random_seed'])

    train_set, val_set, test_set = load_dataset_for_regression(args)
    train_loader = DataLoader(dataset=train_set,
                              batch_size=args['batch_size'],
                              shuffle=True,
                              collate_fn=collate_molgraphs)
    val_loader = DataLoader(dataset=val_set,
                            batch_size=args['batch_size'],
                            shuffle=True,
                            collate_fn=collate_molgraphs)
    if test_set is not None:
        test_loader = DataLoader(dataset=test_set,
                                 batch_size=args['batch_size'],
                                 collate_fn=collate_molgraphs)

    if args['pre_trained']:
        args['num_epochs'] = 0
        model = model_zoo.chem.load_pretrained(args['exp'])
    else:
        model = load_model(args)
        if args['model'] in ['SCHNET', 'MGCN']:
            model.set_mean_std(train_set.mean, train_set.std, args['device'])
        loss_fn = nn.MSELoss(reduction='none')
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args['lr'],
                                     weight_decay=args['weight_decay'])
        stopper = EarlyStopping(mode='lower', patience=args['patience'])
    model.to(args['device'])

    for epoch in range(args['num_epochs']):
        # Train
        run_a_train_epoch(args, epoch, model, train_loader, loss_fn, optimizer)

        # Validation and early stop
        val_score = run_an_eval_epoch(args, model, val_loader)
        early_stop = stopper.step(val_score, model)
        print(
            'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'.
            format(epoch + 1, args['num_epochs'], args['metric_name'],
                   val_score, args['metric_name'], stopper.best_score))

        if early_stop:
            break

    if test_set is not None:
        if not args['pre_trained']:
            stopper.load_checkpoint(model)
        test_score = run_an_eval_epoch(args, model, test_loader)
        print('test {} {:.4f}'.format(args['metric_name'], test_score))
Ejemplo n.º 5
0
def main(args):
    args['device'] = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    set_random_seed(args['random_seed'])

    # Interchangeable with other datasets
    dataset, train_set, val_set, test_set = load_dataset_for_classification(
        args)
    train_loader = DataLoader(train_set,
                              batch_size=args['batch_size'],
                              collate_fn=collate_molgraphs)
    val_loader = DataLoader(val_set,
                            batch_size=args['batch_size'],
                            collate_fn=collate_molgraphs)
    test_loader = DataLoader(test_set,
                             batch_size=args['batch_size'],
                             collate_fn=collate_molgraphs)

    if args['pre_trained']:
        args['num_epochs'] = 0
        model = model_zoo.chem.load_pretrained(args['exp'])
    else:
        args['n_tasks'] = dataset.n_tasks
        model = load_model(args)
        loss_criterion = BCEWithLogitsLoss(
            pos_weight=dataset.task_pos_weights.to(args['device']),
            reduction='none')
        optimizer = Adam(model.parameters(), lr=args['lr'])
        stopper = EarlyStopping(patience=args['patience'])
    model.to(args['device'])

    for epoch in range(args['num_epochs']):
        # Train
        run_a_train_epoch(args, epoch, model, train_loader, loss_criterion,
                          optimizer)

        # Validation and early stop
        val_score = run_an_eval_epoch(args, model, val_loader)
        early_stop = stopper.step(val_score, model)
        print(
            'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'.
            format(epoch + 1, args['num_epochs'], args['metric_name'],
                   val_score, args['metric_name'], stopper.best_score))
        if early_stop:
            break

    if not args['pre_trained']:
        stopper.load_checkpoint(model)
    test_score = run_an_eval_epoch(args, model, test_loader)
    print('test {} {:.4f}'.format(args['metric_name'], test_score))
Ejemplo n.º 6
0
def train(gpu, args):
    rank = args.nr * args.gpus + gpu
    print(rank)
    dist.init_process_group(backend='nccl',
                            init_method='env://',
                            world_size=args.world_size,
                            rank=rank)
    cuda_string = 'cuda' + ':' + str(gpu)
    device = torch.device(
        cuda_string) if torch.cuda.is_available() else torch.device("cpu")

    g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    val_mask, test_mask = load_data(args.dataset)

    if hasattr(torch, 'BoolTensor'):
        train_mask = train_mask.bool()
        val_mask = val_mask.bool()
        test_mask = test_mask.bool()

    print(train_mask.size())
    train_mask = th.split(train_mask,
                          math.ceil(len(train_mask) / args.gpus))[rank]
    labels = th.split(labels, math.ceil(len(labels) / args.gpus))[rank]
    features = th.split(features, math.ceil(len(features) / args.gpus))[rank]
    #g = th.split(g, math.ceil(len(g) / args.gpus))[rank]
    print(train_mask.size(), labels.size(), features.size(), len(g))
    print(type(g))
    features = features.to(device)
    labels = labels.to(device)
    train_mask = train_mask.to(device)
    val_mask = val_mask.to(device)
    test_mask = test_mask.to(device)

    if args.hetero:
        from model_hetero import HAN
        model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']],
                    in_size=features.shape[1],
                    hidden_size=args.hidden_units,
                    out_size=num_classes,
                    num_heads=args.num_heads,
                    dropout=args.dropout).to(device)

        model = nn.parallel.DistributedDataParallel(model, device_ids=[gpu])
        g = g.to(device)
    else:
        from model import HAN
        model = HAN(num_meta_paths=len(g),
                    in_size=features.shape[1],
                    hidden_size=args.hidden_units,
                    out_size=num_classes,
                    num_heads=args.num_heads,
                    dropout=args.dropout).to(device)
        model = nn.parallel.DistributedDataParallel(model, device_ids=[gpu])
        g = [graph.to(device) for graph in g]

    stopper = EarlyStopping(patience=args.patience)
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    for epoch in range(args.num_epochs):
        model.train()
        logits = model(g, features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(
            logits[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(
            model, g, features, labels, val_mask, loss_fcn)
        early_stop = stopper.step(val_loss.data.item(), val_acc, model)

        print(
            'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
            'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.
            format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1,
                   val_loss.item(), val_micro_f1, val_macro_f1))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(
        model, g, features, labels, test_mask, loss_fcn)
    print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.
          format(test_loss.item(), test_micro_f1, test_macro_f1))
Ejemplo n.º 7
0
def main(args):
    # If args['hetero'] is True, g would be a heterogeneous graph.
    # Otherwise, it will be a list of homogeneous graphs.
    g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    val_mask, test_mask = load_data(args['dataset'])

    if hasattr(torch, 'BoolTensor'):
        train_mask = train_mask.bool()
        val_mask = val_mask.bool()
        test_mask = test_mask.bool()

    features = features.to(args['device'])
    labels = labels.to(args['device'])
    train_mask = train_mask.to(args['device'])
    val_mask = val_mask.to(args['device'])
    test_mask = test_mask.to(args['device'])

    if args['hetero']:
        from model_hetero import HAN
        model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']],
                    in_size=features.shape[1],
                    hidden_size=args['hidden_units'],
                    out_size=num_classes,
                    num_heads=args['num_heads'],
                    dropout=args['dropout']).to(args['device'])
        g = g.to(args['device'])
    else:
        from model import HAN
        model = HAN(num_meta_paths=len(g),
                    in_size=features.shape[1],
                    hidden_size=args['hidden_units'],
                    out_size=num_classes,
                    num_heads=args['num_heads'],
                    dropout=args['dropout']).to(args['device'])
        g = [graph.to(args['device']) for graph in g]

    stopper = EarlyStopping(patience=args['patience'])
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    for epoch in range(args['num_epochs']):
        model.train()
        logits = model(g, features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(logits[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(model, g, features, labels, val_mask, loss_fcn)
        early_stop = stopper.step(val_loss.data.item(), val_acc, model)

        print('Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
              'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.format(
            epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(model, g, features, labels, test_mask, loss_fcn)
    print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format(
        test_loss.item(), test_micro_f1, test_macro_f1))
Ejemplo n.º 8
0
def main(args):
    # If args['hetero'] is True, g would be a heterogeneous graph.
    # Otherwise, it will be a list of homogeneous graphs.
    args_academic = read_args()
    data = dataprocess_han.input_data_han(args_academic)
    #g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    #val_mask, test_mask = load_data(args['dataset'])
    features = torch.tensor(data.a_text_embed, dtype=torch.float32)
    labels = torch.tensor(data.a_class)

    APA_g = dgl.graph(data.APA_matrix, ntype='author', etype='coauthor')
    APVPA_g = dgl.graph(data.APVPA_matrix, ntype='author', etype='attendance')
    APPA_g = dgl.graph(data.APPA_matrix, ntype='author', etype='reference')

    #g = [APA_g, APPA_g]
    g = [APA_g, APVPA_g, APPA_g]

    num_classes = 4
    features = features.to(args['device'])
    labels = labels.to(args['device'])

    #if args['hetero']:
    #from model_hetero import HAN
    #model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']],
    #in_size=features.shape[1],
    #hidden_size=args['hidden_units'],
    #out_size=num_classes,
    #num_heads=args['num_heads'],
    #dropout=args['dropout']).to(args['device'])
    #else:
    model = HAN(num_meta_paths=len(g),
                in_size=features.shape[1],
                hidden_size=args['hidden_units'],
                out_size=num_classes,
                num_heads=args['num_heads'],
                dropout=args['dropout']).to(args['device'])

    stopper = EarlyStopping(patience=args['patience'])
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])
    model.load_state_dict(torch.load("./model_para.pt"))

    for epoch in range(args['num_epochs']):

        X = [[i] for i in range(args_academic.A_n)]
        train_X, test_X, _, _ = train_test_split(X, X, test_size=0.8)  #
        train_X, test_X, _, _ = train_test_split(train_X,
                                                 train_X,
                                                 test_size=0.2)  #

        train_mask = get_binary_mask(args_academic.A_n, train_X)
        test_mask = get_binary_mask(args_academic.A_n, test_X)

        #train_mask = torch.tensor(data.train_mask)
        #test_mask = torch.tensor(data.test_mask)
        val_mask = test_mask
        train_mask = train_mask.to(args['device'])
        val_mask = val_mask.to(args['device'])
        test_mask = test_mask.to(args['device'])
        model.train()
        logits, _ = model(g, features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(
            logits[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(
            model, g, features, labels, val_mask, loss_fcn)
        early_stop = stopper.step(val_loss.data.item(), val_acc, model)

        print(
            'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
            'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.
            format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1,
                   val_loss.item(), val_micro_f1, val_macro_f1))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    model.eval()
    _, embedding = model(g, features)
    embed_file = open("./node_embedding.txt", "w")
    for k in range(embedding.shape[0]):
        embed_file.write('a' + str(k) + " ")
        for l in range(embedding.shape[1] - 1):
            embed_file.write(str(embedding[k][l].item()) + " ")
        embed_file.write(str(embedding[k][-1].item()) + "\n")
    embed_file.close()
    #test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(model, g, features, labels, test_mask, loss_fcn)
    #print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format(
    #test_loss.item(), test_micro_f1, test_macro_f1))
    torch.save(model.state_dict(), "./model_para.pt")
Ejemplo n.º 9
0
def main(args):
    g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    val_mask, test_mask = load_imdb_raw()

    if hasattr(torch, 'BoolTensor'):
        train_mask = train_mask.bool()
        val_mask = val_mask.bool()
        test_mask = test_mask.bool()

    features_m, features_a, features_d = features

    features_a = torch.zeros(features_a.shape[0], 10)
    features_d = torch.zeros(features_d.shape[0], 10)

    features_m = features_m.to(args['device'])
    features_a = features_a.to(args['device'])
    features_d = features_d.to(args['device'])

    features = {'movie': features_m, 'actor': features_a, 'director':features_d}
    
    in_size = {'actor': features_a.shape[1], 'movie': features_m.shape[1], 'director': features_d.shape[1]}

    labels = labels.to(args['device'])
    train_mask = train_mask.to(args['device'])
    val_mask = val_mask.to(args['device'])
    test_mask = test_mask.to(args['device'])

    model = HMSG(meta_paths = [['ma','am'], ['md', 'dm'], ['am'], ['dm']],
                in_size = in_size,
                hidden_size = args['hidden_units'],
                out_size = num_classes,
                aggre_type = 'attention',
                num_heads = args['num_heads'],
                dropout = args['dropout']).to(args['device'])
    g = g.to(args['device'])

    stopper = EarlyStopping(patience=args['patience'])
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    for epoch in range(args['num_epochs']):
        model.train()
        z, logits = model(g, features)

        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(logits[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1, z = evaluate(model, g, features, labels, val_mask, loss_fcn)
        early_stop = stopper.step(val_loss.data.item(), val_acc, model)

        print('Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
             'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.format(
           epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    test_loss, test_acc, test_micro_f1, test_macro_f1, z = evaluate(model, g, features, labels, test_mask, loss_fcn)

    emd_imdb, label_imdb = z[test_mask], labels[test_mask]
    np.savetxt('./out/emd_imdb.txt',emd_imdb.cpu())
    np.savetxt('./out/label_imdb.txt', np.array(label_imdb.cpu(), dtype=np.int32))

    print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format(
        test_loss.item(), test_micro_f1, test_macro_f1))
Ejemplo n.º 10
0
def main(args):
    g, features, train_val_test_pos_user_item, train_val_test_neg_user_item = load_amazon_raw(
    )

    user_feats, item_feats = features
    # user_feats = torch.zeros(user_feats.shape[0], 100)
    # item_feats = torch.zeros(item_feats.shape[0], 100)
    user_feats = user_feats.to(args['device'])
    item_feats = item_feats.to(args['device'])
    features = {'user': user_feats, 'item': item_feats}

    in_size = {'user': user_feats.shape[1], 'item': item_feats.shape[1]}

    train_pos_user_item = train_val_test_pos_user_item['train_pos_user_item']
    val_pos_user_item = train_val_test_pos_user_item['val_pos_user_item']
    test_pos_user_item = train_val_test_pos_user_item['test_pos_user_item']

    train_neg_user_item = train_val_test_neg_user_item['train_neg_user_item']
    val_neg_user_item = train_val_test_neg_user_item['val_neg_user_item']
    test_neg_user_item = train_val_test_neg_user_item['test_neg_user_item']

    np.random.shuffle(train_neg_user_item)
    train_neg_user_item = train_neg_user_item[:train_pos_user_item.shape[0]]
    np.random.shuffle(val_neg_user_item)
    val_neg_user_item = val_neg_user_item[:val_pos_user_item.shape[0]]
    np.random.shuffle(test_neg_user_item)
    test_neg_user_item = test_neg_user_item[:test_pos_user_item.shape[0]]

    y_true_test = np.array([1] * len(test_pos_user_item) +
                           [0] * len(test_neg_user_item))

    auc_list = []
    ap_list = []

    model = HMSG(
        meta_paths=[['ui', 'iu'], ['iu', 'ui'], ['ui'], ['iu']],  #
        in_size=in_size,
        hidden_size=args['hidden_units'],
        aggre_type='mean',
        num_heads=args['num_heads'],
        dropout=args['dropout']).to(args['device'])
    g = g.to(args['device'])

    stopper = EarlyStopping(patience=args['patience'])
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    for epoch in range(args['num_epochs']):
        model.train()
        embeddings = model(g, features)
        user_embed, item_embed = embeddings['user'], embeddings['item']

        pos_embedding_user = user_embed[train_pos_user_item[:, 0]].view(
            -1, 1, user_embed.shape[1])
        pos_embedding_item = item_embed[train_pos_user_item[:, 1]].view(
            -1, item_embed.shape[1], 1)
        neg_embedding_user = user_embed[train_neg_user_item[:, 0]].view(
            -1, 1, user_embed.shape[1])
        neg_embedding_item = item_embed[train_neg_user_item[:, 1]].view(
            -1, item_embed.shape[1], 1)

        pos_out = torch.bmm(pos_embedding_user,
                            pos_embedding_item)  #.view(-1, 5)
        # pos_out = torch.mean(pos_out, dim=1)
        neg_out = -torch.bmm(neg_embedding_user,
                             neg_embedding_item)  #.view(-1, 5)
        # neg_out = torch.sum(neg_out, dim=1)

        train_loss = -torch.mean(F.logsigmoid(pos_out) + F.logsigmoid(neg_out))
        # print(train_loss.item())
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            embeddings = model(g, features)
            user_embed, item_embed = embeddings['user'], embeddings['item']

            pos_embedding_user = user_embed[val_pos_user_item[:, 0]].view(
                -1, 1, user_embed.shape[1])
            pos_embedding_item = item_embed[val_pos_user_item[:, 1]].view(
                -1, item_embed.shape[1], 1)
            neg_embedding_user = user_embed[val_neg_user_item[:, 0]].view(
                -1, 1, user_embed.shape[1])
            neg_embedding_item = item_embed[val_neg_user_item[:, 1]].view(
                -1, item_embed.shape[1], 1)

            pos_out = torch.bmm(pos_embedding_user, pos_embedding_item)
            neg_out = -torch.bmm(neg_embedding_user, neg_embedding_item)
            val_loss = -torch.mean(
                F.logsigmoid(pos_out) + F.logsigmoid(neg_out))

        early_stop = stopper.step(val_loss.data.item(), model)
        if early_stop:
            break

    stopper.load_checkpoint(model)
    model.eval()

    with torch.no_grad():
        embeddings = model(g, features)
        user_embed, item_embed = embeddings['user'], embeddings['item']

        pos_embedding_user = user_embed[test_pos_user_item[:, 0]].view(
            -1, 1, user_embed.shape[1])
        pos_embedding_item = item_embed[test_pos_user_item[:, 1]].view(
            -1, item_embed.shape[1], 1)
        neg_embedding_user = user_embed[test_neg_user_item[:, 0]].view(
            -1, 1, user_embed.shape[1])
        neg_embedding_item = item_embed[test_neg_user_item[:, 1]].view(
            -1, item_embed.shape[1], 1)

        pos_out = torch.bmm(pos_embedding_user, pos_embedding_item).flatten()
        neg_out = torch.bmm(neg_embedding_user, neg_embedding_item).flatten()

        pos_proba = torch.sigmoid(pos_out)
        neg_proba = torch.sigmoid(neg_out)
        y_proba_test = list(pos_proba.cpu().numpy()) + list(
            neg_proba.cpu().numpy())
    auc = roc_auc_score(y_true_test, y_proba_test)
    ap = average_precision_score(y_true_test, y_proba_test)
    print('--------------Link Prediction Test--------------')
    print('AUC = {:.4f}'.format(auc))
    print('AP = {:.4f}'.format(ap))
    np.save('./out/res.npy', [auc, ap])
Ejemplo n.º 11
0
def main(args):
    # acm data
    if args['dataset'] == 'ACMRaw':
        from utils import load_data
        g, features, labels, n_classes, train_nid, val_nid, test_nid, train_mask, \
        val_mask, test_mask = load_data('ACMRaw')
        metapath_list = [['pa', 'ap'], ['pf', 'fp']]
    else:
        raise NotImplementedError('Unsupported dataset {}'.format(
            args['dataset']))

    # Is it need to set different neighbors numbers for different meta-path based graph?
    num_neighbors = args['num_neighbors']
    han_sampler = HANSampler(g, metapath_list, num_neighbors)
    # Create PyTorch DataLoader for constructing blocks
    dataloader = DataLoader(dataset=train_nid,
                            batch_size=args['batch_size'],
                            collate_fn=han_sampler.sample_blocks,
                            shuffle=True,
                            drop_last=False,
                            num_workers=4)

    model = HAN(num_metapath=len(metapath_list),
                in_size=features.shape[1],
                hidden_size=args['hidden_units'],
                out_size=n_classes,
                num_heads=args['num_heads'],
                dropout=args['dropout']).to(args['device'])

    total_params = sum(p.numel() for p in model.parameters())
    print("total_params: {:d}".format(total_params))
    total_trainable_params = sum(p.numel() for p in model.parameters()
                                 if p.requires_grad)
    print("total trainable params: {:d}".format(total_trainable_params))

    stopper = EarlyStopping(patience=args['patience'])
    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    for epoch in range(args['num_epochs']):
        model.train()
        for step, (seeds, blocks) in enumerate(dataloader):
            h_list = load_subtensors(blocks, features)
            blocks = [block.to(args['device']) for block in blocks]
            hs = [h.to(args['device']) for h in h_list]

            logits = model(blocks, hs)
            loss = loss_fn(logits,
                           labels[numpy.asarray(seeds)].to(args['device']))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # print info in each batch
            train_acc, train_micro_f1, train_macro_f1 = score(
                logits, labels[numpy.asarray(seeds)])
            print(
                "Epoch {:d} | loss: {:.4f} | train_acc: {:.4f} | train_micro_f1: {:.4f} | train_macro_f1: {:.4f}"
                .format(epoch + 1, loss, train_acc, train_micro_f1,
                        train_macro_f1))
        val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(
            model, g, metapath_list, num_neighbors, features, labels, val_nid,
            loss_fn, args['batch_size'])
        early_stop = stopper.step(val_loss.data.item(), val_acc, model)

        print(
            'Epoch {:d} | Val loss {:.4f} | Val Accuracy {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'
            .format(epoch + 1, val_loss.item(), val_acc, val_micro_f1,
                    val_macro_f1))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(
        model, g, metapath_list, num_neighbors, features, labels, test_nid,
        loss_fn, args['batch_size'])
    print(
        'Test loss {:.4f} | Test Accuracy {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'
        .format(test_loss.item(), test_acc, test_micro_f1, test_macro_f1))