예제 #1
0
def train(model="sch", epochs=80, device=th.device("cpu")):
    alchemy_dataset = TencentAlchemyDataset()
    alchemy_loader = DataLoader(dataset=alchemy_dataset,
                                batch_size=20,
                                collate_fn=batcher(device),
                                shuffle=False,
                                num_workers=0)

    if model == "sch":
        model = SchNetModel(norm=True, output_dim=12)
    elif model == "mgcn":
        model = MGCNModel(norm=True, output_dim=12)

    model.set_mean_std(alchemy_dataset.mean, alchemy_dataset.std, device)
    model.to(device)

    loss_fn = nn.MSELoss()
    MAE_fn = nn.L1Loss()
    optimizer = th.optim.Adam(model.parameters(), lr=0.0001)

    for epoch in range(epochs):

        w_loss, w_mae = 0, 0
        model.train()

        for idx, batch in enumerate(alchemy_loader):

            res = model(batch.graph)
            loss = loss_fn(res, batch.label)
            mae = MAE_fn(res, batch.label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            w_mae += mae.detach().item()
            w_loss += loss.detach().item()

        w_mae /= idx + 1
        print("Epoch {:2d}, loss: {:.7f}, mae: {:.7f}".format(
            epoch, w_loss, w_mae))
예제 #2
0
파일: gin.py 프로젝트: zaixizhang/Alchemy
import torch
import torch.nn as nn
import torch.nn.functional as F

from Alchemy_dataset import TencentAlchemyDataset
from torch_geometric.nn import GINConv, Set2Set
from torch_geometric.data import DataLoader

import pandas as pd

train_dataset = TencentAlchemyDataset(root='data-bin', mode='dev').shuffle()
valid_dataset = TencentAlchemyDataset(root='data-bin', mode='valid')

valid_loader = DataLoader(valid_dataset, batch_size=64)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)


class GIN(torch.nn.Module):
    def __init__(self,
                 node_input_dim=15,
                 output_dim=12,
                 node_hidden_dim=64,
                 num_step_prop=6,
                 num_step_set2set=6):
        super(GIN, self).__init__()
        self.num_step_prop = num_step_prop
        self.lin0 = nn.Linear(node_input_dim, node_hidden_dim)
        self.mlps = torch.nn.ModuleList()
        self.convs = torch.nn.ModuleList()
        for i in range(num_step_prop):
            self.mlps.append(
def eval(model="sch",
         epochs=80,
         device=th.device("cpu"),
         train_dataset='',
         eval_dataset='',
         epoch=1):
    print("start")
    epoch = int(epoch)
    test_dataset = TencentAlchemyDataset()
    test_dir = './'
    test_file = train_dataset + '_' + eval_dataset + "_cross.csv"
    test_dataset.mode = "Train"
    test_dataset.transform = None
    test_dataset.file_path = test_file
    test_dataset._load()

    test_loader = DataLoader(
        dataset=test_dataset,
        batch_size=10,
        collate_fn=batcher(),
        shuffle=False,
        num_workers=0,
    )

    if model == "sch":
        model = SchNetModel(norm=False, output_dim=1)
    elif model == "mgcn":
        model = MGCNModel(norm=False, output_dim=1)
    elif model == "MPNN":
        model = MPNNModel(output_dim=1)
    print(model)
    # if model.name in ["MGCN", "SchNet"]:
    #     model.set_mean_std(mean, std, device)
    model.load_state_dict(
        th.load('./' + train_dataset + "/model_" + str(epoch)))
    model.to(device)

    loss_fn = nn.MSELoss()
    MAE_fn = nn.L1Loss()
    # optimizer = th.optim.Adam(model.parameters(), lr=0.0001)

    val_loss, val_mae = 0, 0
    res_file = open(
        train_dataset + '_' + eval_dataset + str(epoch) + "_crossres.txt", 'w')
    for jdx, batch in enumerate(test_loader):
        batch.graph.to(device)
        batch.label = batch.label.to(device)

        res = model(batch.graph)
        res_np = res.cpu().detach().numpy()
        label_np = batch.label.cpu().detach().numpy()
        for i in range(len(res_np)):
            res_file.write(str(res_np[i][0]) + '\t')
            res_file.write(str(label_np[i][0]) + '\n')

        loss = loss_fn(res, batch.label)
        mae = MAE_fn(res, batch.label)

        # optimizer.zero_grad()
        # loss.backward()
        # optimizer.step()

        val_mae += mae.detach().item()
        val_loss += loss.detach().item()
    val_mae /= jdx + 1
    val_loss /= jdx + 1
    print("Epoch {:2d}, val_loss: {:.7f}, val_mae: {:.7f}".format(
        epoch, val_loss, val_mae))
    print("test_dataset.mean= %s" % (test_dataset.mean))
    print("test_dataset.std= %s" % (test_dataset.std))
예제 #4
0
def train(model="sch",
          epochs=80,
          device=th.device("cpu"),
          dataset='',
          save=''):
    print("start")
    train_dir = "./"
    train_file = dataset + "_train.csv"
    alchemy_dataset = TencentAlchemyDataset()
    alchemy_dataset.mode = "Train"
    alchemy_dataset.transform = None
    alchemy_dataset.file_path = train_file
    alchemy_dataset._load()

    test_dataset = TencentAlchemyDataset()
    test_dir = train_dir
    test_file = dataset + "_valid.csv"
    test_dataset.mode = "Train"
    test_dataset.transform = None
    test_dataset.file_path = test_file
    test_dataset._load()

    alchemy_loader = DataLoader(
        dataset=alchemy_dataset,
        batch_size=10,
        collate_fn=batcher(),
        shuffle=False,
        num_workers=0,
    )
    test_loader = DataLoader(
        dataset=test_dataset,
        batch_size=10,
        collate_fn=batcher(),
        shuffle=False,
        num_workers=0,
    )

    if model == "sch":
        model = SchNetModel(norm=False, output_dim=1)
    elif model == "mgcn":
        model = MGCNModel(norm=False, output_dim=1)
    print(model)
    # if model.name in ["MGCN", "SchNet"]:
    #     model.set_mean_std(alchemy_dataset.mean, alchemy_dataset.std, device)
    model.to(device)
    # print("test_dataset.mean= %s" % (alchemy_dataset.mean))
    # print("test_dataset.std= %s" % (alchemy_dataset.std))

    loss_fn = nn.MSELoss()
    MAE_fn = nn.L1Loss()
    optimizer = th.optim.Adam(model.parameters(), lr=0.0001)
    scheduler = th.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                        mode='min',
                                                        factor=0.9,
                                                        patience=10,
                                                        threshold=0.0000001,
                                                        threshold_mode='rel',
                                                        cooldown=0,
                                                        min_lr=0.000001,
                                                        eps=1e-08,
                                                        verbose=False)

    for epoch in range(epochs):

        w_loss, w_mae = 0, 0
        model.train()

        for idx, batch in enumerate(alchemy_loader):
            batch.graph.to(device)
            batch.label = batch.label.to(device)

            res = model(batch.graph)
            loss = loss_fn(res, batch.label)
            mae = MAE_fn(res, batch.label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            w_mae += mae.detach().item()
            w_loss += loss.detach().item()
        w_mae /= idx + 1
        w_loss /= idx + 1
        scheduler.step(w_mae)

        print("Epoch {:2d}, loss: {:.7f}, mae: {:.7f}".format(
            epoch, w_loss, w_mae))

        val_loss, val_mae = 0, 0
        for jdx, batch in enumerate(test_loader):
            batch.graph.to(device)
            batch.label = batch.label.to(device)

            res = model(batch.graph)
            loss = loss_fn(res, batch.label)
            mae = MAE_fn(res, batch.label)

            # optimizer.zero_grad()
            # mae.backward()
            # optimizer.step()

            val_mae += mae.detach().item()
            val_loss += loss.detach().item()
        val_mae /= jdx + 1
        val_loss /= jdx + 1
        print("Epoch {:2d}, val_loss: {:.7f}, val_mae: {:.7f}".format(
            epoch, val_loss, val_mae))

        if epoch % 200 == 0:
            th.save(model.state_dict(), save + "/model_" + str(epoch))
예제 #5
0
def train(model="sch", epochs=80, device=th.device("cpu"), dataset=''):
    print("start")
    train_dir = "./"
    train_file = "train_smi.csv"
    alchemy_dataset = TencentAlchemyDataset()
    alchemy_dataset.mode = "Train"
    alchemy_dataset.transform = None
    alchemy_dataset.file_path = train_file
    alchemy_dataset._load()

    test_dataset = TencentAlchemyDataset()
    test_dir = train_dir
    test_file = "val_smi.csv"
    test_dataset.mode = "Train"
    test_dataset.transform = None
    test_dataset.file_path = test_file
    test_dataset._load()

    alchemy_loader = DataLoader(
        dataset=alchemy_dataset,
        batch_size=10,
        collate_fn=batcher(),
        shuffle=False,
        num_workers=0,
    )
    test_loader = DataLoader(
        dataset=test_dataset,
        batch_size=10,
        collate_fn=batcher(),
        shuffle=False,
        num_workers=0,
    )

    if model == "sch":
        model = SchNetModel(norm=False, output_dim=1)
    elif model == "mgcn":
        model = MGCNModel(norm=False, output_dim=1)
    elif model == "MPNN":
        model = MPNNModel(output_dim=1)
    print(model)
    # if model.name in ["MGCN", "SchNet"]:
    #     model.set_mean_std(alchemy_dataset.mean, alchemy_dataset.std, device)
    model.to(device)
    # print("test_dataset.mean= %s" % (alchemy_dataset.mean))
    # print("test_dataset.std= %s" % (alchemy_dataset.std))

    loss_fn = nn.MSELoss()
    MAE_fn = nn.L1Loss()
    optimizer = th.optim.Adam(model.parameters(), lr=0.0001)

    for epoch in range(epochs):

        w_loss, w_mae = 0, 0
        model.train()

        for idx, batch in enumerate(alchemy_loader):
            batch.graph.to(device)
            batch.label = batch.label.to(device)

            res = model(batch.graph)
            loss = loss_fn(res, batch.label)
            mae = MAE_fn(res, batch.label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            w_mae += mae.detach().item()
            w_loss += loss.detach().item()
        w_mae /= idx + 1
        w_loss /= idx + 1

        print("Epoch {:2d}, loss: {:.7f}, mae: {:.7f}".format(
            epoch, w_loss, w_mae))

        val_loss, val_mae = 0, 0
        if (epoch % 50 == 0):
            res_file = open('val_results_%s.txt' % (epoch), 'w')
        for jdx, batch in enumerate(test_loader):
            batch.graph.to(device)
            batch.label = batch.label.to(device)

            res = model(batch.graph)
            loss = loss_fn(res, batch.label)
            mae = MAE_fn(res, batch.label)

            optimizer.zero_grad()
            mae.backward()
            optimizer.step()

            val_mae += mae.detach().item()
            val_loss += loss.detach().item()

            res_np = res.cpu().detach().numpy()
            label_np = batch.label.cpu().detach().numpy()

            if (epoch % 50 == 0):
                for i in range(len(res_np)):
                    res_file.write(str(res_np[i][0]) + '\t')
                    res_file.write(str(label_np[i][0]) + '\n')

        val_mae /= jdx + 1
        val_loss /= jdx + 1
        print("Epoch {:2d}, val_loss: {:.7f}, val_mae: {:.7f}".format(
            epoch, val_loss, val_mae))

        if epoch % 50 == 0:
            th.save(model.state_dict(),
                    './' + dataset + "/model_" + str(epoch))