Beispiel #1
0
def transform_setup(graph_u=False,
                    graph_gcn=False,
                    rotation=180,
                    samplePoints=1024,
                    mesh=False,
                    node_translation=0.01):
    if not graph_u and not graph_gcn:
        # Default transformation for scale noralization, centering, point sampling and rotating
        pretransform = T.Compose([T.NormalizeScale(), T.Center()])
        transform = T.Compose([
            T.SamplePoints(samplePoints),
            T.RandomRotate(rotation[0], rotation[1])
        ])
        print("pointnet rotation {}".format(rotation))
    elif graph_u:
        pretransform = T.Compose([T.NormalizeScale(), T.Center()])
        transform = T.Compose([
            T.NormalizeScale(),
            T.Center(),
            T.SamplePoints(samplePoints, True, True),
            T.RandomRotate(rotation[0], rotation[1]),
            T.KNNGraph(k=graph_u)
        ])
    elif graph_gcn:

        pretransform = T.Compose([T.NormalizeScale(), T.Center()])

        if mesh:
            if mesh == "extraFeatures":
                transform = T.Compose([
                    T.RandomRotate(rotation[0], rotation[1]),
                    T.GenerateMeshNormals(),
                    T.FaceToEdge(True),
                    T.Distance(norm=True),
                    T.TargetIndegree(cat=True)
                ])  # ,
            else:
                transform = T.Compose([
                    T.RandomRotate(rotation[0], rotation[1]),
                    T.GenerateMeshNormals(),
                    T.FaceToEdge(True),
                    T.Distance(norm=True),
                    T.TargetIndegree(cat=True)
                ])
        else:
            transform = T.Compose([
                T.SamplePoints(samplePoints, True, True),
                T.KNNGraph(k=graph_gcn),
                T.Distance(norm=True)
            ])
            print("no mesh")
        print("Rotation {}".format(rotation))
        print("Meshing {}".format(mesh))

    else:
        print('no transfom')

    return transform, pretransform
Beispiel #2
0
def load_dataset(path, specify_target):
    # apply transform
    class SpecifyTarget(object):
        def __call__(self, data):
            data.y = data.y[specify_target].view(-1)
            return data

    transform = T.Compose([SpecifyTarget(), Complete(), T.Distance(norm=True)])

    print('Check split dataset...')
    save_path = path + 'train_valid_test.ckpt'
    if os.path.isfile(save_path):
        trn, val, test = torch.load(save_path)
        trn.transform = transform
        val.transform = transform
        test.transform = transform
        return trn, val, test

    print('Load dataset...')
    dataset = QM9Dataset(root=path).shuffle()

    print('Split the dataset...')
    one_tenth = len(dataset) // 10
    test_dataset = dataset[:one_tenth]
    valid_dataset = dataset[one_tenth:one_tenth * 2]
    train_dataset = dataset[one_tenth * 2:]
    assert len(train_dataset) + len(valid_dataset) + len(test_dataset) == len(
        dataset)

    print('Save dataset...')
    torch.save([train_dataset, valid_dataset, test_dataset], save_path)
    return load_dataset(path, specify_target)
Beispiel #3
0
    def __init__(self):
        dataset = "QM9"
        path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data",
                        dataset)

        target = 0

        class MyTransform(object):
            def __call__(self, data):
                # Specify target.
                data.y = data.y[:, target]
                return data

        class Complete(object):
            def __call__(self, data):
                device = data.edge_index.device
                row = torch.arange(data.num_nodes,
                                   dtype=torch.long,
                                   device=device)
                col = torch.arange(data.num_nodes,
                                   dtype=torch.long,
                                   device=device)
                row = row.view(-1, 1).repeat(1, data.num_nodes).view(-1)
                col = col.repeat(data.num_nodes)
                edge_index = torch.stack([row, col], dim=0)
                edge_attr = None
                if data.edge_attr is not None:
                    idx = data.edge_index[
                        0] * data.num_nodes + data.edge_index[1]
                    size = list(data.edge_attr.size())
                    size[0] = data.num_nodes * data.num_nodes
                    edge_attr = data.edge_attr.new_zeros(size)
                    edge_attr[idx] = data.edge_attr
                edge_index, edge_attr = remove_self_loops(
                    edge_index, edge_attr)
                data.edge_attr = edge_attr
                data.edge_index = edge_index
                return data

        transform = T.Compose(
            [MyTransform(), Complete(),
             T.Distance(norm=False)])
        if not osp.exists(path):
            QM9(path)
        super(QM9Dataset, self).__init__(path)
Beispiel #4
0
            out = out.squeeze(0)

        out = self.set2set(out, data.batch)
        out = F.relu(self.lin1(out))
        out = self.lin2(out)
        return out


results = []
results_log = []
for _ in range(5):
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data',
                    '1t-QM9')
    dataset = QM9(path,
                  transform=T.Compose([Complete(),
                                       T.Distance(norm=False)]))
    dataset.data.y = dataset.data.y[:, 0:12]
    dataset = dataset.shuffle()

    tenpercent = int(len(dataset) * 0.1)
    print("###")
    mean = dataset.data.y.mean(dim=0, keepdim=True)
    std = dataset.data.y.std(dim=0, keepdim=True)
    dataset.data.y = (dataset.data.y - mean) / std
    mean, std = mean.to(device), std.to(device)

    print("###")
    test_dataset = dataset[:tenpercent].shuffle()
    val_dataset = dataset[tenpercent:2 * tenpercent].shuffle()
    train_dataset = dataset[2 * tenpercent:].shuffle()
Beispiel #5
0
def run(prop="h**o", gpuid="0", epoch=500, dataset="t2", size=100000):

    # set logger
    task_name = "MPNN_%s_%s_%s" % (dataset, prop,
                                   datetime.now().strftime("%m%d_%H%M%S"))
    logname = "./logs/%s.log" % (task_name)
    log = logging.getLogger(task_name)
    log.setLevel(logging.INFO)
    fmt = "%(asctime)-s %(levelname)s %(filename)s %(message)s"
    datefmt = "%Y-%m-%d %H:%M:%S"
    handler = logging.FileHandler(filename=logname)  # output to file
    handler.setLevel(logging.INFO)
    handler.setFormatter(logging.Formatter(fmt, datefmt))
    log.addHandler(handler)
    chler = logging.StreamHandler()  # print to console
    chler.setFormatter(logging.Formatter(fmt, datefmt))
    chler.setLevel(logging.INFO)
    log.addHandler(chler)
    log.info("Experiment of model: %s, dataset size: %d" % (task_name, size))

    device = torch.device("cuda:%s" % (gpuid))
    transform = T.Compose([Complete(), T.Distance(norm=False)])
    dataset = TencentAlchemyDataset(root='./tdata/',
                                    mode='dev',
                                    dataset=dataset,
                                    prop=prop,
                                    transform=transform).shuffle()
    dataset = dataset[:size]
    trainset = dataset[:size - 20000]
    valset = dataset[size - 20000:size - 10000]
    testset = dataset[size - 10000:]
    train_loader = DataLoader(trainset, batch_size=64)
    val_loader = DataLoader(valset, batch_size=64)
    test_loader = DataLoader(testset, batch_size=64)
    model = MPNN(node_input_dim=trainset.num_features).to(device)
    print(model)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0002)
    model.train()
    loss_all = 0
    loss = 0
    mae = 0

    st = time.time()
    best_valid = float("inf")
    for it in range(epoch):

        # train
        for data in train_loader:
            data = data.to(device)
            optimizer.zero_grad()
            y_model = model(data)
            loss = F.mse_loss(y_model, data.y)
            mae += F.l1_loss(y_model, data.y).item()
            loss.backward()
            loss_all += loss.item() * data.num_graphs
            optimizer.step()
        loss = loss_all / len(train_loader)
        train_score = mae / len(train_loader)
        mae = 0
        for data in val_loader:
            data = data.to(device)
            y_model = model(data)
            mae += F.l1_loss(y_model, data.y).item()
        valid_score = mae / len(val_loader)
        mae = 0
        for data in test_loader:
            data = data.to(device)
            y_model = model(data)
            mae += F.l1_loss(y_model, data.y).item()
        test_score = mae / len(test_loader)

        log.info("Epoch {:2d}, train loss {:.7f}, test loss no, \
                 train mae {:.7f}, val mae {:.7f}, test mae {:.7f}".format(
            it, loss, train_score, valid_score, test_score))

        if valid_score < best_valid:
            best_valid = valid_score
            related_test = test_score
            ed = time.time()

    log.info(
        "Best val mae: {:.7f}  Related test mae: {:.7f}  Time cost: {:.0f}".
        format(best_valid, related_test, ed - st))
Beispiel #6
0
            edge_attr = data.edge_attr.new_zeros(size)
            edge_attr[idx] = data.edge_attr

        edge_index, edge_attr = remove_self_loops(edge_index, edge_attr)
        data.edge_attr = edge_attr
        data.edge_index = edge_index

        return data


results = []
results_log = []
for _ in range(5):

    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'datasets', 'QM9')
    dataset = QM9(path, transform=T.Compose([Complete(), T.Distance(norm=False)]))
    dataset.data.y = dataset.data.y[:, 0:12]

    dataset = dataset.shuffle()

    tenpercent = int(len(dataset) * 0.1)
    mean = dataset.data.y.mean(dim=0, keepdim=True)
    std = dataset.data.y.std(dim=0, keepdim=True)
    dataset.data.y = (dataset.data.y - mean) / std
    mean, std = mean.to(device), std.to(device)

    tenpercent = int(len(dataset) * 0.1)
    test_dataset = dataset[:tenpercent].shuffle()
    val_dataset = dataset[tenpercent:2 * tenpercent].shuffle()
    train_dataset = dataset[2 * tenpercent:].shuffle()
Beispiel #7
0
    'lr_scheduler_patience': 5,

    'parallel': False,
    'cuda_devices': [gpu],  # works when parallel=True
    'early_stop_patience': -1,  # -1 for no early stop
}


class SpecifyTarget(object):
    def __call__(self, data):
        data.y = data.y[option['task']].view(-1)
        return data


print('Load Dataset...')
transform = T.Compose([SpecifyTarget(), Complete(), T.Distance(norm=True)])
path = '../qm9_dataset'
dataset = QM9Dataset(root=path, transform=transform).shuffle()

print('Split the dataset...')
one_tenth = len(dataset) // 10
test_dataset = dataset[: one_tenth]
valid_dataset = dataset[one_tenth: one_tenth * 2]
train_dataset = dataset[one_tenth * 2:]
assert len(train_dataset) + len(valid_dataset) + len(test_dataset) == len(dataset)

print('Training init...')
model = BGNN()
trainer = Trainer(option, model, \
                  train_dataset, valid_dataset, test_dataset)
trainer.train()
parser.add_argument('--num_steps', type=int, default=10)
parser.add_argument('--lr', type=float, default=0.001)
parser.add_argument('--batch_size', type=int, default=512)
parser.add_argument('--pre_epochs', type=int, default=15)
parser.add_argument('--epochs', type=int, default=15)
parser.add_argument('--runs', type=int, default=20)
parser.add_argument('--test_samples', type=int, default=100)
args = parser.parse_args()

pre_filter1 = lambda d: d.num_nodes > 0  # noqa
pre_filter2 = lambda d: d.num_nodes > 0 and d.name[:4] != '2007'  # noqa

transform = T.Compose([
    T.Delaunay(),
    T.FaceToEdge(),
    T.Distance() if args.isotropic else T.Cartesian(),
])

path = osp.join('..', 'data', 'PascalVOC-WILLOW')
pretrain_datasets = []
for category in PascalVOC.categories:
    dataset = PascalVOC(path,
                        category,
                        train=True,
                        transform=transform,
                        pre_filter=pre_filter2
                        if category in ['car', 'motorbike'] else pre_filter1)
    pretrain_datasets += [ValidPairDataset(dataset, dataset, sample=True)]
pretrain_dataset = torch.utils.data.ConcatDataset(pretrain_datasets)
pretrain_loader = DataLoader(pretrain_dataset,
                             args.batch_size,
Beispiel #9
0
def main():
    np.random.seed(0)
    torch.manual_seed(0)
    # --------------------- PARSE ARGS -----------------------
    parser = argparse.ArgumentParser()

    parser.add_argument("--train-size", type=int, default=5000)
    parser.add_argument("--target",
                        type=int,
                        choices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
                        default=0)
    parser.add_argument("--batch-size", type=int, default=20)
    parser.add_argument("--num-epoch", type=int, default=500)
    parser.add_argument("--lr", type=float, default=0.001)
    parser.add_argument("--weight-decay", type=float, default=0.0)
    parser.add_argument("--encoder-hidden-dim", type=int, default=64)
    parser.add_argument("--lamda", type=float, default=0.001)
    parser.add_argument("--patience", type=int, default=30)

    args = parser.parse_args()

    print("- Args ----------------------")
    for k, v in vars(args).items():
        print(" - {}={}".format(k, v))
    print("-----------------------------")

    # --------------------- LOAD DATASET ---------------------
    print("Loading dataset...")
    dataset = QM9(QM9_DATASET_PATH,
                  pre_transform=T.Compose([Complete(),
                                           T.Distance(norm=False)]),
                  transform=TargetLabelSelection(args.target)).shuffle()

    mean = dataset.data.y[:, args.target].mean().item()
    std = dataset.data.y[:, args.target].std().item()
    dataset.data.y[:,
                   args.target] = (dataset.data.y[:, args.target] - mean) / std

    test_dataset = dataset[:10000]
    val_dataset = dataset[10000:20000]
    train_dataset = dataset[20000:20000 + args.train_size]

    test_loader = DataLoader(test_dataset, batch_size=args.batch_size)
    val_loader = DataLoader(val_dataset, batch_size=args.batch_size)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True)

    unsup_train_dataset = dataset[20000:]
    unsup_train_loader = DataLoader(unsup_train_dataset,
                                    batch_size=args.batch_size,
                                    shuffle=True)

    print("- Dataset -------------------")
    print(" - # train: {:,}".format(len(train_dataset)))
    print(" - # val: {:,}".format(len(val_dataset)))
    print(" - # test: {:,}".format(len(test_dataset)))
    print(" - # train (unsup.): {:,}".format(len(unsup_train_dataset)))
    print("-----------------------------")

    # --------------------- TRAIN MODEL ----------------------
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = InfoGraphSemi(dataset.num_features,
                          args.encoder_hidden_dim).to(device)
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     mode='min',
                                                     factor=0.7,
                                                     patience=5,
                                                     min_lr=0.000001)

    val_error = evaluate(model, val_loader, std, device)
    print("| Epoch: {:3} | Val MAE: {:10.4f} |".format(0, val_error))
    print("Starting training...")

    start_time = time.time()
    checkpoint_path = "model_{}.pt".format(start_time)
    min_val_error = None
    min_val_epoch = 0
    for epoch in range(1, args.num_epoch + 1):
        train_loss = train(model, train_loader, unsup_train_loader, optimizer,
                           args.lamda, device)
        val_error = evaluate(model, val_loader, std, device)
        scheduler.step(val_error)

        if min_val_error is None or val_error < min_val_error:
            min_val_error = val_error
            min_val_epoch = epoch
            torch.save(model.state_dict(), checkpoint_path)

        lr = scheduler.optimizer.param_groups[0]['lr']
        elapsed_time = datetime.timedelta(seconds=int(time.time() -
                                                      start_time))
        print(
            "| Epoch: {:3} | time: {} | lr: {:7f} | Train loss: {:8.4f} | Val MAE: {:8.4f} |{}"
            .format(epoch, elapsed_time, lr, train_loss, val_error,
                    " *" if min_val_epoch == epoch else ""))

        if epoch - min_val_epoch > args.patience:
            print("Early stopping...")
            break
    print("Training finished!")

    print("Evaluating on test set...")
    model.load_state_dict(torch.load(checkpoint_path))
    test_error = evaluate(model, test_loader, std, device)
    print("| Val MAE: {:8.4f} | Test MAE: {:8.4f} |".format(
        min_val_error, test_error))
Beispiel #10
0
        return data.num_nodes > 6  # Remove graphs with less than 6 nodes.


class MyPreTransform(object):
    def __call__(self, data):
        x = data.x
        data.x = data.x[:, :5]
        data = ConnectedThreeMalkin()(data)
        data.x = x
        return data


path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data',
                '1-23-QssM9')
dataset = QM9(path,
              transform=T.Compose([T.Distance(norm=False)]),
              pre_transform=MyPreTransform(),
              pre_filter=MyFilter())
dataset.data.y = dataset.data.y[:, 0:12]

dataset.data.iso_type_3 = torch.unique(dataset.data.iso_type_3, True, True)[1]
num_i_3 = dataset.data.iso_type_3.max().item() + 1
dataset.data.iso_type_3 = F.one_hot(dataset.data.iso_type_3,
                                    num_classes=num_i_3).to(torch.float)

#gfggg


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
class MyTransform(object):
    def __call__(self, data):
        data.y = data.y[:, int(args.target)]  # Specify target: 0 = mu
        return data


parser = argparse.ArgumentParser()
parser.add_argument('--target', default=0)
args = parser.parse_args()
target = int(args.target)

print('---- Target: {} ----'.format(target))

path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', '1-QM9')
dataset = QM9(path, transform=T.Compose([MyTransform(), T.Distance()]))

dataset = dataset.shuffle()

# Normalize targets to mean = 0 and std = 1.
tenpercent = int(len(dataset) * 0.1)
mean = dataset.data.y[tenpercent:].mean(dim=0)
std = dataset.data.y[tenpercent:].std(dim=0)
dataset.data.y = (dataset.data.y - mean) / std

test_dataset = dataset[:tenpercent]
val_dataset = dataset[tenpercent:2 * tenpercent]
train_dataset = dataset[2 * tenpercent:]
test_loader = DataLoader(test_dataset, batch_size=64)
val_loader = DataLoader(val_dataset, batch_size=64)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    def __init__(self,
                 landmark='IP',
                 batch_size=64,
                 num_point_samples=2048,
                 patch_size=8.):
        root = '../data/PatchBasedCompleteDentalDataModule_{}/'.format(
            landmark)

        self.prep_dir = os.path.join(root, 'prep_parameters')
        labels = [landmark + '_X', landmark + '_Y', landmark + '_Z']
        gt_labels = [
            'gt_' + landmark + '_X', 'gt_' + landmark + '_Y',
            'gt_' + landmark + '_Z'
        ]

        # read ids used in training
        training_ids = pd.read_csv(os.path.join(
            root, 'normalised_train_data_landmarks.csv'),
                                   header=0,
                                   index_col=0)['StudyID']

        # read predicted landmarks for training examples
        landmarks = pd.concat([
            pd.read_csv(f, header=0, index_col=0)
            for f in glob.glob(os.path.join(root, 'v0*.csv'))
        ])
        landmarks = landmarks[landmarks['StudyID'].isin(training_ids.values)]

        assert len(landmarks) == len(training_ids)

        # get ground truth and predicted labels
        gt_landmarks = landmarks[gt_labels].astype(float)
        pred_landmarks = landmarks[labels].astype(float)

        # sample weights are proportional to the error, since these are less likely to occur
        self.sample_weights = torch.from_numpy(
            (pred_landmarks - gt_landmarks.values).abs().sum(1).values)

        # setup preprocessing steps, load them if they were executed before
        pre_transform = local_transforms.InvertibleCompose(
            [
                # convert mesh faces to graph edges
                ptg_transforms.FaceToEdge(remove_faces=False),

                # # add edge_attr containing relative euclidean distance
                ptg_transforms.Distance(norm=False, cat=False),
                # extract patch around
                local_transforms.ExtractGeodesicPatch(patch_size,
                                                      key='patch_center'),

                # b-normalise
                local_transforms.NormalizeScale(invertible=True),

                # save vertex features
                local_transforms.LabelCloner('x', 'continuous_curvature'),
                ptg_transforms.GenerateMeshNormals(),

                # remove labels used in preprocessing
                local_transforms.LabelCloner('pos', 'mesh_vert'),
                local_transforms.LabelCleaner(['edge_index', 'edge_attr'])
            ],
            invertible=True,
            skip_non_invertible=True,
            store_directory=self.prep_dir).load_parameters()

        pre_process = self.set_patch_centers

        transform = ptg_transforms.Compose([
            local_transforms.LabelCloner('continuous_curvature', 'x'),
            local_transforms.SamplePoints(num_point_samples,
                                          remove_faces=False,
                                          include_normals=True,
                                          include_features=True),
            local_transforms.MergeLabels('norm'),
            local_transforms.ZNormalise('x'),
            local_transforms.LabelCleaner([
                'mesh_vert', 'norm', 'continuous_curvature', 'patch_center',
                'face', 'mesh_norm', 'mesh_color'
            ])
        ])

        super().__init__(root,
                         batch_size=batch_size,
                         pre_process=pre_process,
                         transform=transform,
                         labels=labels,
                         pre_transform=pre_transform)
Beispiel #13
0
if __name__ == '__main__':
    seed_everything()
    from model import Net
    from arguments import arg_parse
    args = arg_parse()

    target = args.target
    dim = 64
    epochs = 500
    batch_size = 20
    lamda = args.lamda
    use_unsup_loss = args.use_unsup_loss
    separate_encoder = args.separate_encoder

    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'QM9')
    transform = T.Compose([MyTransform(), Complete(), T.Distance(norm=False)])
    dataset = QM9(path, transform=transform).shuffle()
    print('num_features : {}\n'.format(dataset.num_features))

    # Normalize targets to mean = 0 and std = 1.
    mean = dataset.data.y[:, target].mean().item()
    std = dataset.data.y[:, target].std().item()
    dataset.data.y[:, target] = (dataset.data.y[:, target] - mean) / std

    # print(type(dataset[0]))
    # print(type(dataset.data.x)) #tensor
    # print(type(dataset.data.y)) #tensor
    # input()

    # Split datasets.
    test_dataset = dataset[:10000]
Beispiel #14
0
    def __call__(self, data):
        return data.num_nodes > 6  # Remove graphs with less than 6 nodes.


class MyPreTransform(object):
    def __call__(self, data):
        x = data.x
        data.x = data.x[:, :5]
        data = TwoMalkin()(data)
        data.x = x
        return data


path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', '1-2-QM9')
dataset = QM9(path,
              transform=T.Compose([T.Distance()]),
              pre_transform=MyPreTransform(),
              pre_filter=MyFilter())

dataset.data.y = dataset.data.y[:, 0:12]

dataset.data.iso_type_2 = torch.unique(dataset.data.iso_type_2, True, True)[1]
num_i_2 = dataset.data.iso_type_2.max().item() + 1
dataset.data.iso_type_2 = F.one_hot(dataset.data.iso_type_2,
                                    num_classes=num_i_2).to(torch.float)


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        M_in, M_out = dataset.num_features, 32
Beispiel #15
0
 def __init__(self, config):
     transform = T.Compose([Complete(), T.Distance(norm=False)])
     self.config = config
     return super(QM9Dataset, self).__init__(config["src"],
                                             transform=transform)
def graph_transform_2():
   return transforms_G.Compose([
       transforms_G.Distance(norm=False, cat=True),
       transforms_G.Cartesian(norm=False, cat=True),
       BiPolar(norm=False, cat=True)
   ])
    def __init__(self, norm=True, cat=True):
        self.norm = norm
        self.cat = cat

        self.ppf = transforms.PointPairFeatures(cat=False)
        self.distance = transforms.Distance(norm=norm, cat=False)
Beispiel #18
0
from torch_geometric.datasets import PascalVOCKeypoints as PascalVOC
import torch_geometric.transforms as T
from torch_geometric.data import DataLoader
from torch_geometric.data import (InMemoryDataset, Data, download_url,
                                  extract_tar)

from pascalVOCSingle import PascalVOCSingleObject
import torch_geometric.transforms as T

from dgmc.utils import ValidPairDataset

pre_filter = lambda data: data.pos.size(0) > 0  # noqa
transform = T.Compose([
    T.Delaunay(),
    T.FaceToEdge(),
    T.Distance() if False else T.Cartesian(),
])

train_datasets = []
test_datasets = []
path = osp.join('..', 'data', 'PascalVOC')
for category in PascalVOCSingleObject.categories:
        if category != 'chair':
            dataset_single = PascalVOCSingleObject(path, category, train=True, transform=transform,
                                pre_filter=pre_filter)
            train_datasets += [ValidPairDataset(dataset_single, dataset_single, sample=True)]
            dataset = PascalVOCSingleObject(path, category, train=False, transform=transform,
                        pre_filter=pre_filter)
            test_datasets += [ValidPairDataset(dataset, dataset, sample=True)]
train_dataset = torch.utils.data.ConcatDataset(train_datasets)
train_loader = DataLoader(train_dataset, 16, shuffle=True,
Beispiel #19
0
device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')

parser = argparse.ArgumentParser()
parser.add_argument('--target', type=int, default=0)
parser.add_argument('--dim', type=int, default=64)
args = parser.parse_args()


class MyTransform:
    def __call__(self, data):
        data.y = data.y[:, args.target]
        return data


path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'datasets', 'QM9')
transform = T.Compose([MyTransform(), T.Distance()])
dataset = QM9(path, transform=transform).shuffle()

# Normalize targets to mean=0 and std=1
mean = dataset.data.y[:, args.target].mean().item()
std = dataset.data.y[:, args.target].std().item()
dataset.data.y[:, args.target] = (dataset.data.y[:, args.target] - mean) / std

# dataset split
tenpercent = int(len(dataset) * 0.1)
test_dataset = dataset[:tenpercent]
val_dataset = dataset[tenpercent:2 * tenpercent]
train_dataset = dataset[2 * tenpercent:]

test_loader = DataLoader(test_dataset, batch_size=256)
val_loader = DataLoader(val_dataset, batch_size=256)
Beispiel #20
0
def main(target=0, dim=64, prefix='mu', seed=42):

    # Set the random seed
    torch.manual_seed(seed)

    # Set up logging
    try:
        os.mkdir('log')
    except FileExistsError:
        pass
    try:
        os.remove('log/' + prefix + '.log')
    except FileNotFoundError:
        pass
    logging.basicConfig(format='%(message)s',
                        filename='log/' + prefix + '.log',
                        level=logging.DEBUG)

    # Create the data set
    logging.info(
        'Loading the QM9 dataset.\n target: %i, prefix for log files: %s' %
        (target, prefix))
    path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data/qm9/')
    transform = T.Compose(
        [MyTransform(target),
         Complete(), T.Distance(norm=False)])
    dataset = GraphQM9(path, transform=transform)

    # Normalize targets to mean = 0 and std = 1.
    logging.info('Normalizing the data set.')
    mean = dataset.data.y.mean(dim=0, keepdim=True)
    std = dataset.data.y.std(dim=0, keepdim=True)
    dataset.data.y = (dataset.data.y - mean) / std
    mean, std = mean[:, target].item(), std[:, target].item()
    logging.info(' mean: {:.7f}; standard dev.: {:.7f}'.format(mean, std))

    # Load the indices for the split
    logging.info('Loading split from ' + path + '/processed')
    test_indices = np.loadtxt(path + '/processed/processed_test.dat',
                              dtype=int)
    vali_indices = np.loadtxt(path + '/processed/processed_valid.dat',
                              dtype=int)
    train_indices = np.loadtxt(path + '/processed/processed_train.dat',
                               dtype=int)

    test_dataset = dataset[test_indices.tolist()]
    val_dataset = dataset[vali_indices.tolist()]
    train_dataset = dataset[train_indices.tolist()]

    logging.info(
        ' training: %i molecules, validation: %i molecules, test: %i molecules.'
        % (len(train_indices), len(vali_indices), len(test_indices)))

    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = Net(dataset.num_features, dim).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode='min',
                                                           factor=0.7,
                                                           patience=5,
                                                           min_lr=0.00001)

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    logging.info('Number of trainable parameters: %i' % params)

    # TRAINING
    num_epochs = 300
    logging.info('Starting the training with %i epochs.' % (num_epochs))
    best_val_error = None
    for epoch in range(1, num_epochs + 1):
        lr = scheduler.optimizer.param_groups[0]['lr']

        model.train()

        # Calculate the loss
        loss_all = 0
        for data in train_loader:
            data = data.to(device)
            optimizer.zero_grad()
            l = F.mse_loss(model(data), data.y)
            l.backward()
            loss_all += l.item() * data.num_graphs
            optimizer.step()
        loss = loss_all / len(train_loader.dataset)

        # Calculate the validation error
        val_error = test(model, val_loader, device, std)

        scheduler.step(val_error)

        # Calculate the test error
        if best_val_error is None or val_error <= best_val_error:
            test_error = test(model, test_loader, device, std)
            best_val_error = val_error

        logging.info(
            'Epoch: {:03d}, LR: {:7f}, Loss: {:.7f}, Validation MAE: {:.7f}, '
            'Test MAE: {:.7f}'.format(epoch, lr, loss, val_error, test_error))

    logging.info(
        '---------------------------------------------------------------------'
    )
    logging.info(
        'Best validation MAE: {:.7f}, corresp. test MAE: {:.7f}'.format(
            best_val_error, test_error))

    return