Ejemplo n.º 1
0
Archivo: main.py Proyecto: hsack6/AGATE
def main(opt):
    train_dataset = BADataset(opt.dataroot, opt.L, True, False, False)
    train_dataloader = BADataloader(train_dataset, batch_size=opt.batchSize, \
                                      shuffle=True, num_workers=opt.workers, drop_last=True)

    valid_dataset = BADataset(opt.dataroot, opt.L, False, True, False)
    valid_dataloader = BADataloader(valid_dataset, batch_size=opt.batchSize, \
                                     shuffle=True, num_workers=opt.workers, drop_last=True)

    test_dataset = BADataset(opt.dataroot, opt.L, False, False, True)
    test_dataloader = BADataloader(test_dataset, batch_size=opt.batchSize, \
                                     shuffle=True, num_workers=opt.workers, drop_last=True)

    all_dataset = BADataset(opt.dataroot, opt.L, False, False, False)
    all_dataloader = BADataloader(all_dataset, batch_size=opt.batchSize, \
                                     shuffle=False, num_workers=opt.workers, drop_last=False)

    opt.n_edge_types = train_dataset.n_edge_types
    opt.n_node = train_dataset.n_node

    net = STGCN(opt, kernel_size=2, n_blocks=1, state_dim_bottleneck=opt.state_dim, annotation_dim_bottleneck=opt.annotation_dim)
    net.double()
    print(net)

    criterion = nn.MSELoss()
    #criterion = nn.CosineSimilarity(dim=-1, eps=1e-6)

    if opt.cuda:
        net.cuda()
        criterion.cuda()

    optimizer = optim.Adam(net.parameters(), lr=opt.lr)
    early_stopping = EarlyStopping(patience=opt.patience, verbose=True)

    os.makedirs(OutputDir, exist_ok=True)
    train_loss_ls = []
    valid_loss_ls = []
    test_loss_ls = []

    for epoch in range(0, opt.niter):
        train_loss = train(epoch, train_dataloader, net, criterion, optimizer, opt)
        valid_loss = valid(valid_dataloader, net, criterion, opt)
        test_loss = test(test_dataloader, net, criterion, opt)

        train_loss_ls.append(train_loss)
        valid_loss_ls.append(valid_loss)
        test_loss_ls.append(test_loss)

        early_stopping(valid_loss, net, OutputDir)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    df = pd.DataFrame({'epoch':[i for i in range(1, len(train_loss_ls)+1)], 'train_loss': train_loss_ls, 'valid_loss': valid_loss_ls, 'test_loss': test_loss_ls})
    df.to_csv(OutputDir + '/loss.csv', index=False)

    net.load_state_dict(torch.load(OutputDir + '/checkpoint.pt'))
    inference(all_dataloader, net, criterion, opt, OutputDir)
Ejemplo n.º 2
0
        {
            'num_of_time_conv_filters1': 32,
            'num_of_time_conv_filters2': 64,
            'K_t': 3,
            'num_of_cheb_filters': 32,
            'K': 1,
            'cheb_polys': cheb_polys
        },
        {
            'num_of_time_conv_filters1': 32,
            'num_of_time_conv_filters2': 128,
            'K_t': 3,
            'num_of_cheb_filters': 32,
            'K': 1,
            'cheb_polys': cheb_polys
        }
    ]
    net = STGCN(backbones, 128)
    net.initialize(ctx = ctx)

    loss_function = gluon.loss.L2Loss()

    trainer = Trainer(net.collect_params(), optimizer, {'learning_rate': learning_rate})
    training_dataloader = gluon.data.DataLoader(gluon.data.ArrayDataset(training_data_norm, training_target), batch_size = batch_size, shuffle = True)
    validation_dataloader = gluon.data.DataLoader(gluon.data.ArrayDataset(val_data_norm, val_target), batch_size = batch_size, shuffle = False)
    testing_dataloader = gluon.data.DataLoader(gluon.data.ArrayDataset(testing_data_norm, testing_target), batch_size = batch_size, shuffle = False)

    if not os.path.exists('stgcn_params'):
        os.mkdir('stgcn_params')

    train_loss_list, val_loss_list, test_loss_list = train_model(net, training_dataloader, validation_dataloader, testing_dataloader)
Ejemplo n.º 3
0
def main(dataset):
    batch_size = 1024
    graph = Graph(dataset)
    if dataset == 'meituan':
        data_size = 6
        has_user_region=True
    else:
        data_size = 5
        has_user_region = False
    g = graph.g
    g.readonly()
    embeddings = graph.embeddings
    num_nodes = graph.g.number_of_nodes()
    model = Recommender(STGCN(num_nodes, 64, 27, 2, None, embeddings))
    model.cuda()
    opt = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=0)
    model.train()
    print('train neighbors')
    for i in range(30):
        total_loss = 0
        g.apply_edges(edge_func1)
        neighbors = graph.neighbors
        neg_neighbors = np.random.choice(range(num_nodes), neighbors.shape[0], replace=True).reshape(-1, 1)
        neighbors_data = np.concatenate((neighbors, neg_neighbors), axis=1)
        data = torch.from_numpy(neighbors_data).cuda()
        seed_nodes = data.reshape(-1)
        batches = data.split(batch_size)
        sampler = dgl.contrib.sampling.NeighborSampler(
            g,
            batch_size * 3,
            5,
            2,
            seed_nodes=seed_nodes,
            num_workers=11)
        count = 0
        for batch, nf in zip(batches, sampler):
            nf.copy_from_parent(ctx=device)
            batch_nid = nf.map_from_parent_nid(-1, batch.reshape(-1), True)
            batch_nid = batch_nid.reshape(-1, 3).cuda()
            loss = model.train_region(nf, batch_nid)
            opt.zero_grad()
            loss.backward()
            total_loss += loss.item()
            opt.step()
            count += 1
        print('loss', total_loss / count)

    for epoch in range(300):
        model.train()
        begin = time.time()
        total_loss = 0
        count = 0
        for t in range(0, 24, 2):
            g.apply_edges(partial(edge_func, t))
            pos = graph.time_train[t]
            neg_pois = graph.pid2rid[['p', 'r']].sample(n=pos.shape[0], replace=True).to_numpy(copy=True)
            data = np.concatenate((pos, neg_pois), axis=1)
            data.astype(np.int)
            data = torch.from_numpy(data).cuda()
            seed_nodes = data.reshape(-1)
            batches = data.split(batch_size)
            sampler = dgl.contrib.sampling.NeighborSampler(
                g,
                batch_size * data_size,
                5,
                2,
                seed_nodes=seed_nodes,
                transition_prob='final_weight',
                prefetch=False,
                num_workers=11)
            for batch, nf in zip(batches, sampler):
                nf.copy_from_parent(ctx=device)
                batch_nid = nf.map_from_parent_nid(-1, batch.reshape(-1), True)
                batch_nid = batch_nid.reshape(-1, data_size).cuda()
                loss = model(nf, batch_nid, has_user_region=has_user_region)
                opt.zero_grad()
                loss.backward()
                total_loss += loss.item()
                opt.step()
                count += 1
        print('epoch:{}, loss:{}, time:{}'.format(epoch, total_loss / count, time.time() - begin))
        if epoch % 20 ==0 and epoch != 0:
            model.eval()
            with torch.no_grad():
                recallk(graph, model, has_user_region=has_user_region)
    model.eval()
    with torch.no_grad():
        recallk(graph, model, has_user_region=has_user_region)