Esempio n. 1
0
def main():

    global args, best_acc1
    args = parser.parse_args()

    # Check if CUDA is enabled
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    # Load data
    root = args.datasetPath
    subset = args.subSet

    print('Prepare files')

    train_classes, train_ids = read_cxl(os.path.join(root, subset,
                                                     'train.cxl'))
    test_classes, test_ids = read_cxl(os.path.join(root, subset, 'test.cxl'))
    valid_classes, valid_ids = read_cxl(
        os.path.join(root, subset, 'validation.cxl'))

    class_list = list(set(train_classes + test_classes))
    num_classes = len(class_list)
    data_train = datasets.LETTER(root, subset, train_ids, train_classes,
                                 class_list)
    data_valid = datasets.LETTER(root, subset, valid_ids, valid_classes,
                                 class_list)
    data_test = datasets.LETTER(root, subset, test_ids, test_classes,
                                class_list)

    # Define model and optimizer
    print('Define model')
    # Select one graph
    g_tuple, l = data_train[0]
    g, h_t, e = g_tuple

    #TODO: Need attention
    print('\tStatistics')
    stat_dict = {}
    # stat_dict = datasets.utils.get_graph_stats(data_train, ['edge_labels'])
    stat_dict['edge_labels'] = [1]

    # Data Loader
    train_loader = torch.utils.data.DataLoader(
        data_train,
        batch_size=args.batch_size,
        shuffle=True,
        collate_fn=datasets.utils.collate_g,
        num_workers=args.prefetch,
        pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(
        data_valid,
        batch_size=args.batch_size,
        collate_fn=datasets.utils.collate_g,
        num_workers=args.prefetch,
        pin_memory=True)
    test_loader = torch.utils.data.DataLoader(
        data_test,
        batch_size=args.batch_size,
        collate_fn=datasets.utils.collate_g,
        num_workers=args.prefetch,
        pin_memory=True)

    print('\tCreate model')
    model = MpnnIntNet([len(h_t[0]), len(list(e.values())[0])], [5, 15, 15],
                       [10, 20, 20],
                       num_classes,
                       type='classification')

    print('Optimizer')
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    criterion = nn.NLLLoss()

    evaluation = utils.accuracy

    print('Logger')
    logger = Logger(args.logPath)

    lr_step = (args.lr - args.lr * args.lr_decay) / (
        args.epochs * args.schedule[1] - args.epochs * args.schedule[0])

    # get the best checkpoint if available without training
    if args.resume:
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
        if not os.path.isdir(best_model_file):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded best model '{}' (epoch {}; accuracy {})".format(
                best_model_file, checkpoint['epoch'], best_acc1))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    print('Check cuda')
    if args.cuda:
        print('\t* Cuda')
        model = model.cuda()
        criterion = criterion.cuda()

    # Epoch for loop
    for epoch in range(0, args.epochs):

        if epoch > args.epochs * args.schedule[
                0] and epoch < args.epochs * args.schedule[1]:
            args.lr -= lr_step
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, evaluation,
              logger)

        # evaluate on test set
        acc1 = validate(valid_loader, model, criterion, evaluation, logger)

        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)
        utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_acc1': best_acc1,
                'optimizer': optimizer.state_dict(),
            },
            is_best=is_best,
            directory=args.resume)

        # Logger step
        logger.log_value('learning_rate', args.lr).step()

    # get the best checkpoint and test it with test set
    if args.resume:
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
        if not os.path.isdir(best_model_file):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded best model '{}' (epoch {}; accuracy {})".format(
                best_model_file, checkpoint['epoch'], best_acc1))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    # For testing
    validate(test_loader, model, criterion, evaluation)
Esempio n. 2
0
def main():

    global args, best_acc1
    args = parser.parse_args()

    # Check if CUDA is enabled
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    # Load data
    root = args.datasetPath

    print('Prepare files')
    files = [
        f for f in os.listdir(root) if os.path.isfile(os.path.join(root, f))
    ]

    idx = np.random.permutation(len(files))
    idx = idx.tolist()

    valid_ids = [files[i] for i in idx[0:10000]]
    test_ids = [files[i] for i in idx[10000:20000]]
    train_ids = [files[i] for i in idx[20000:]]

    data_train = datasets.Qm9(root, train_ids)
    data_valid = datasets.Qm9(root, valid_ids)
    data_test = datasets.Qm9(root, test_ids)

    # Define model and optimizer
    print('Define model')
    # Select one graph
    g_tuple, l = data_train[0]
    g, h_t, e = g_tuple

    print('\tStatistics')
    # stat_dict = datasets.utils.get_graph_stats(data_valid, ['degrees', 'target_mean', 'target_std', 'edge_labels'])

    stat_dict = {}

    stat_dict['degrees'] = [1, 2, 3, 4]
    stat_dict['target_mean'] = np.array([
        2.71802732e+00, 7.51685080e+01, -2.40259300e-01, 1.09503300e-02,
        2.51209430e-01, 1.18997445e+03, 1.48493130e-01, -4.11609491e+02,
        -4.11601022e+02, -4.11600078e+02, -4.11642909e+02, 3.15894998e+01
    ])
    stat_dict['target_std'] = np.array([
        1.58422291e+00, 8.29443552e+00, 2.23854977e-02, 4.71030547e-02,
        4.77156393e-02, 2.80754665e+02, 3.37238236e-02, 3.97717205e+01,
        3.97715029e+01, 3.97715029e+01, 3.97722334e+01, 4.09458852e+00
    ])
    stat_dict['edge_labels'] = [1, 2, 3, 4]

    data_train.set_target_transform(lambda x: datasets.utils.normalize_data(
        x, stat_dict['target_mean'], stat_dict['target_std']))
    data_valid.set_target_transform(lambda x: datasets.utils.normalize_data(
        x, stat_dict['target_mean'], stat_dict['target_std']))
    data_test.set_target_transform(lambda x: datasets.utils.normalize_data(
        x, stat_dict['target_mean'], stat_dict['target_std']))

    # Data Loader
    train_loader = torch.utils.data.DataLoader(
        data_train,
        batch_size=args.batch_size,
        shuffle=True,
        collate_fn=datasets.utils.collate_g,
        num_workers=args.prefetch,
        pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(
        data_valid,
        batch_size=args.batch_size,
        collate_fn=datasets.utils.collate_g,
        num_workers=args.prefetch,
        pin_memory=True)
    test_loader = torch.utils.data.DataLoader(
        data_test,
        batch_size=args.batch_size,
        collate_fn=datasets.utils.collate_g,
        num_workers=args.prefetch,
        pin_memory=True)

    print('\tCreate model')
    model = MpnnDuvenaud(stat_dict['degrees'],
                         [len(h_t[0]), len(list(e.values())[0])], [5, 15, 15],
                         30,
                         len(l),
                         type='regression')

    print('Optimizer')
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    criterion = nn.MSELoss()

    evaluation = lambda output, target: torch.mean(
        torch.abs(output - target) / torch.abs(target))

    print('Logger')
    logger = Logger(args.logPath)

    lr_step = (args.lr - args.lr * args.lr_decay) / (
        args.epochs * args.schedule[1] - args.epochs * args.schedule[0])

    # get the best checkpoint if available without training
    if args.resume:
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
        if not os.path.isdir(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded best model '{}' (epoch {})".format(
                best_model_file, checkpoint['epoch']))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    print('Check cuda')
    if args.cuda:
        print('\t* Cuda')
        model = model.cuda()
        criterion = criterion.cuda()

    # Epoch for loop
    for epoch in range(0, args.epochs):

        if epoch > args.epochs * args.schedule[
                0] and epoch < args.epochs * args.schedule[1]:
            args.lr -= lr_step
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, evaluation,
              logger)

        # evaluate on test set
        acc1 = validate(valid_loader, model, criterion, evaluation, logger)

        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)
        utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_acc1': best_acc1,
                'optimizer': optimizer.state_dict(),
            },
            is_best=is_best,
            directory=args.resume)

        # Logger step
        logger.log_value('learning_rate', args.lr).step()

    # get the best checkpoint and test it with test set
    if args.resume:
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
        if not os.path.isdir(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded best model '{}' (epoch {})".format(
                best_model_file, checkpoint['epoch']))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    # For testing
    validate(test_loader, model, criterion, evaluation)
Esempio n. 3
0
def main():
    global args
    args = parser.parse_args()

    # Check if CUDA is enabled
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    # Load data
    root = args.datasetPath

    print('Prepare files')

    label_file = 'labels.txt'
    list_file = 'graphs.txt'
    with open(os.path.join(root, label_file), 'r') as f:
        l = f.read()
        classes = [int(float(s) > 0.5)
                   for s in l.split()]  #classes based on 0.5
        # just makes them all 1
        # print(set(classes))
        unique, counts = np.unique(np.array(classes), return_counts=True)
        print(dict(zip(unique, counts)))
    with open(os.path.join(root, list_file), 'r') as f:

        files = [s + '.pkl' for s in f.read().splitlines()]

    train_ids, train_classes, valid_ids, valid_classes, test_ids, test_classes = divide_datasets(
        files, classes)

    #shuffle here
    c = list(zip(train_ids, train_classes))

    random.shuffle(c)

    train_ids, train_classes = zip(*c)

    data_train = PrGr(root, train_ids, train_classes)
    print(data_train[0])
    print(len(data_train))
    data_valid = PrGr(root, valid_ids, valid_classes)
    data_test = PrGr(root, test_ids, test_classes)
    print(len(data_test))
    # Define model and optimizer
    print('Define model')
    # Select one graph
    g_tuple, l = data_train[6]
    g, h_t, e = g_tuple

    print('\tStatistics')
    stat_dict = datasets.utils.get_graph_stats(data_train, ['degrees'])

    # Data Loader
    train_loader = torch.utils.data.DataLoader(
        data_train,
        batch_size=args.batch_size,
        shuffle=False,
        collate_fn=datasets.utils.collate_g,
        num_workers=args.prefetch,
        pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(
        data_valid,
        batch_size=args.batch_size,
        collate_fn=datasets.utils.collate_g,
        num_workers=args.prefetch,
        pin_memory=True)
    test_loader = torch.utils.data.DataLoader(
        data_test,
        batch_size=args.batch_size,
        collate_fn=datasets.utils.collate_g,
        num_workers=args.prefetch,
        pin_memory=True)
    criterion = nn.NLLLoss()
    evaluation = utils.accuracy
    print('\tCreate model')
    num_classes = 2
    print(stat_dict['degrees'])
    logger = Logger(args.logPath)

    model = torch.load('test.pth')
    print(model)
    return
    # print(model.r.learn_modules[0].fcs[3]) #penultimate layer
    model.eval()
    acc1 = validate_with_output(train_loader, model, criterion, evaluation,
                                logger)
    print(acc1)
    print(train_classes)
def main():
    global args, best_er1
    args = parser.parse_args()

    # Check if CUDA is enabled
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    for tgt_idx, tgt in enumerate(dataset_targets[args.dataset]):
        print("Training a model for {}".format(tgt))

        # Load data
        root = args.dataset_path if args.dataset_path else dataset_paths[
            args.dataset]
        task_type = args.dataset_type if args.dataset_type else dataset_types[
            args.dataset]
        if args.resume:
            resume_dir = args.resume.format(dataset=args.dataset,
                                            model=args.model,
                                            layers=args.layers,
                                            feature=tgt)
        #end if
        Model_Class = model_dict[args.model]

        print("Preparing dataset")
        node_features, edge_features, target_features, task_type, train_loader, valid_loader, test_loader = read_dataset(
            args.dataset, root, args.batch_size, args.prefetch)

        # Define model and optimizer

        print('\tCreate model')
        hidden_state_size = args.hidden
        model = Model_Class(node_features=node_features,
                            edge_features=edge_features,
                            target_features=1,
                            hidden_features=hidden_state_size,
                            num_layers=args.layers,
                            dropout=0.5,
                            type=task_type,
                            s2s_processing_steps=args.s2s)
        print("#Parameters: {param_count}".format(
            param_count=count_params(model)))

        print('Optimizer')
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)

        criterion, evaluation, metric_name, metric_compare, metric_best = get_metric_by_task_type(
            task_type, target_features)

        print('Logger')
        logger = Logger(
            args.log_path.format(dataset=args.dataset,
                                 model=args.model,
                                 layers=args.layers,
                                 feature=tgt))

        lr_step = (args.lr - args.lr * args.lr_decay) / (
            args.epochs * args.schedule[1] - args.epochs * args.schedule[0])

        # get the best checkpoint if available without training
        if args.resume:
            checkpoint_dir = resume_dir
            best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
            if not os.path.isdir(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            if os.path.isfile(best_model_file):
                print("=> loading best model '{}'".format(best_model_file))
                checkpoint = torch.load(best_model_file)
                args.start_epoch = checkpoint['epoch']
                best_acc1 = checkpoint['best_er1']
                model.load_state_dict(checkpoint['state_dict'])
                if args.cuda:
                    model.cuda()
                optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded best model '{}' (epoch {})".format(
                    best_model_file, checkpoint['epoch']))
            else:
                print("=> no best model found at '{}'".format(best_model_file))

        print('Check cuda')
        if args.cuda:
            print('\t* Cuda')
            model = model.cuda()
            criterion = criterion.cuda()

        # Epoch for loop
        for epoch in range(0, args.epochs):
            try:
                if epoch > args.epochs * args.schedule[
                        0] and epoch < args.epochs * args.schedule[1]:
                    args.lr -= lr_step
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = args.lr
                #end if

                # train for one epoch
                train(train_loader,
                      model,
                      criterion,
                      optimizer,
                      epoch,
                      evaluation,
                      logger,
                      target_range=(tgt_idx, ),
                      tgt_name=tgt,
                      metric_name=metric_name,
                      cuda=args.cuda,
                      log_interval=args.log_interval)

                # evaluate on test set
                er1 = validate(valid_loader,
                               model,
                               criterion,
                               evaluation,
                               logger,
                               target_range=(tgt_idx, ),
                               tgt_name=tgt,
                               metric_name=metric_name,
                               cuda=args.cuda,
                               log_interval=args.log_interval)

                is_best = metric_compare(er1, best_er1)
                best_er1 = metric_best(er1, best_er1)
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'best_er1': best_er1,
                        'optimizer': optimizer.state_dict(),
                    },
                    is_best=is_best,
                    directory=resume_dir)

                # Logger step
                logger.log_value('learning_rate', args.lr).step()
            except KeyboardInterrupt:
                break
            #end try
        #end for

        # get the best checkpoint and test it with test set
        if args.resume:
            checkpoint_dir = resume_dir
            best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
            if not os.path.isdir(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            if os.path.isfile(best_model_file):
                print("=> loading best model '{}'".format(best_model_file))
                checkpoint = torch.load(best_model_file)
                args.start_epoch = checkpoint['epoch']
                best_acc1 = checkpoint['best_er1']
                model.load_state_dict(checkpoint['state_dict'])
                if args.cuda:
                    model.cuda()
                optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded best model '{}' (epoch {})".format(
                    best_model_file, checkpoint['epoch']))
            else:
                print("=> no best model found at '{}'".format(best_model_file))
            #end if
        #end if

        # (For testing)
        validate(test_loader,
                 model,
                 criterion,
                 evaluation,
                 target_range=(tgt_idx, ),
                 tgt_name=tgt,
                 metric_name=metric_name,
                 cuda=args.cuda,
                 log_interval=args.log_interval)
Esempio n. 5
0
def main():

    global args, best_er1
    args = parser.parse_args()

    # Check if CUDA is enabled
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    # Load data
    root = args.datasetPath

    print('Prepare files')
    files = [
        f for f in os.listdir(root) if os.path.isfile(os.path.join(root, f))
    ]

    idx = np.random.permutation(len(files))
    idx = idx.tolist()

    valid_ids = [files[i] for i in idx[0:10000]]
    test_ids = [files[i] for i in idx[10000:20000]]
    train_ids = [files[i] for i in idx[20000:]]

    data_train = datasets.Qm9(root,
                              train_ids,
                              edge_transform=utils.qm9_edges,
                              e_representation='raw_distance')
    data_valid = datasets.Qm9(root,
                              valid_ids,
                              edge_transform=utils.qm9_edges,
                              e_representation='raw_distance')
    data_test = datasets.Qm9(root,
                             test_ids,
                             edge_transform=utils.qm9_edges,
                             e_representation='raw_distance')

    # Define model and optimizer
    print('Define model')
    # Select one graph
    g_tuple, l = data_train[0]
    g, h_t, e = g_tuple

    print('\tStatistics')
    stat_dict = datasets.utils.get_graph_stats(data_valid,
                                               ['target_mean', 'target_std'])

    data_train.set_target_transform(lambda x: datasets.utils.normalize_data(
        x, stat_dict['target_mean'], stat_dict['target_std']))
    data_valid.set_target_transform(lambda x: datasets.utils.normalize_data(
        x, stat_dict['target_mean'], stat_dict['target_std']))
    data_test.set_target_transform(lambda x: datasets.utils.normalize_data(
        x, stat_dict['target_mean'], stat_dict['target_std']))

    # Data Loader
    train_loader = torch.utils.data.DataLoader(
        data_train,
        batch_size=args.batch_size,
        shuffle=True,
        collate_fn=datasets.utils.collate_g,
        num_workers=args.prefetch,
        pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(
        data_valid,
        batch_size=args.batch_size,
        collate_fn=datasets.utils.collate_g,
        num_workers=args.prefetch,
        pin_memory=True)
    test_loader = torch.utils.data.DataLoader(
        data_test,
        batch_size=args.batch_size,
        collate_fn=datasets.utils.collate_g,
        num_workers=args.prefetch,
        pin_memory=True)

    print('\tCreate model')
    in_n = [len(h_t[0]), len(list(e.values())[0])]
    hidden_state_size = 73
    message_size = 73
    n_layers = 3
    l_target = len(l)
    type = 'regression'
    model = MPNN(in_n,
                 hidden_state_size,
                 message_size,
                 n_layers,
                 l_target,
                 type=type)
    del in_n, hidden_state_size, message_size, n_layers, l_target, type

    print('Optimizer')
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    criterion = nn.MSELoss()

    def evaluation(output, target):
        return torch.mean(torch.abs(output - target) / torch.abs(target))

    print('Logger')
    logger = Logger(args.logPath)

    lr_step = (args.lr - args.lr * args.lr_decay) / (
        args.epochs * args.schedule[1] - args.epochs * args.schedule[0])

    # get the best checkpoint if available without training
    if args.resume:
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
        if not os.path.isdir(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_er1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded best model '{}' (epoch {})".format(
                best_model_file, checkpoint['epoch']))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    print('Check cuda')
    if args.cuda:
        print('\t* Cuda')
        model = model.cuda()
        criterion = criterion.cuda()

    # Epoch for loop
    for epoch in range(0, args.epochs):

        if epoch > args.epochs * args.schedule[
                0] and epoch < args.epochs * args.schedule[1]:
            args.lr -= lr_step
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, evaluation,
              logger)

        # evaluate on test set
        er1 = validate(valid_loader, model, criterion, evaluation, logger)

        is_best = er1 > best_er1
        best_er1 = min(er1, best_er1)
        utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_er1': best_er1,
                'optimizer': optimizer.state_dict(),
            },
            is_best=is_best,
            directory=args.resume)

        # Logger step
        logger.log_value('learning_rate', args.lr).step()

    # get the best checkpoint and test it with test set
    if args.resume:
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
        if not os.path.isdir(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_er1']
            model.load_state_dict(checkpoint['state_dict'])
            if args.cuda:
                model.cuda()
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded best model '{}' (epoch {})".format(
                best_model_file, checkpoint['epoch']))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    # For testing
    validate(test_loader, model, criterion, evaluation)
def adjust_learning_rate(optimizer, epoch):
    """Updates the learning rate given an schedule and a gamma parameter.
    """
    if epoch in args.schedule:
        args.learning_rate *= args.gamma
        for param_group in optimizer.param_groups:
            param_group['lr'] = args.learning_rate

if __name__ == '__main__':
    # Parse options
    args = Options().parse()

    # Check cuda
    args.cuda = args.ngpu > 0 and torch.cuda.is_available()
    
    # Check Test and load
    if args.test and args.load is None:
        raise Exception('Cannot test withoud loading a model.')

    if not args.test:
        print('Initialize logger')
        log_dir = args.log + '{}_run-batchSize_{}/' \
                .format(len(glob.glob(args.log + '*_run-batchSize_{}'.format(args.batch_size))),args.batch_size)

        # Create Logger
        logger = Logger(log_dir, force=True)

    main()

Esempio n. 7
0
def main():
    global args
    args = parser.parse_args()

    # Check if CUDA is enabled
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    # Load data
    root = args.datasetPath

    print('Prepare files')

    label_file = 'labels.txt'
    list_file = 'graphs.txt'
    with open(os.path.join(root, label_file), 'r') as f:
        l = f.read()
        classes = [int(float(s) > 0.5)
                   for s in l.split()]  #classes based on 0.5
        # just makes them all 1
        # print(set(classes))
        unique, counts = np.unique(np.array(classes), return_counts=True)
        print(dict(zip(unique, counts)))
    with open(os.path.join(root, list_file), 'r') as f:

        files = [s + '.pkl' for s in f.read().splitlines()]

    train_ids, train_classes, valid_ids, valid_classes, test_ids, test_classes = divide_datasets(
        files, classes)

    #shuffle here
    c = list(zip(train_ids, train_classes))

    random.shuffle(c)

    train_ids, train_classes = zip(*c)

    data_train = PrGr(root, train_ids, train_classes)
    print(data_train[0])
    print(len(data_train))
    data_valid = PrGr(root, valid_ids, valid_classes)
    data_test = PrGr(root, test_ids, test_classes)
    print(len(data_test))
    # Define model and optimizer
    print('Define model')
    # Select one graph
    g_tuple, l = data_train[6]
    g, h_t, e = g_tuple

    print('\tStatistics')
    stat_dict = datasets.utils.get_graph_stats(data_train, ['degrees'])

    # Data Loader
    train_loader = torch.utils.data.DataLoader(
        data_train,
        batch_size=args.batch_size,
        shuffle=False,
        collate_fn=datasets.utils.collate_g,
        num_workers=args.prefetch,
        pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(
        data_valid,
        batch_size=args.batch_size,
        collate_fn=datasets.utils.collate_g,
        num_workers=args.prefetch,
        pin_memory=True)
    test_loader = torch.utils.data.DataLoader(
        data_test,
        batch_size=args.batch_size,
        collate_fn=datasets.utils.collate_g,
        num_workers=args.prefetch,
        pin_memory=True)

    print('\tCreate model')
    num_classes = 2
    print(stat_dict['degrees'])
    model = MpnnDuvenaud(stat_dict['degrees'],
                         [len(h_t[0]), len(list(e.values())[0])], [7, 3, 5],
                         11,
                         num_classes,
                         type='classification')

    print('Check cuda')

    print('Optimizer')
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    criterion = nn.NLLLoss()

    evaluation = utils.accuracy

    print('Logger')
    logger = Logger(args.logPath)

    lr_step = (args.lr - args.lr * args.lr_decay) / (
        args.epochs * args.schedule[1] - args.epochs * args.schedule[0])

    ### get the best checkpoint if available without training
    best_acc1 = 0
    # if args.resume:
    #     checkpoint_dir = args.resume
    #     best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
    #     if not os.path.isdir(checkpoint_dir):
    #         os.makedirs(checkpoint_dir)
    #     if os.path.isfile(best_model_file):
    #         print("=> loading best model '{}'".format(best_model_file))
    #         checkpoint = torch.load(best_model_file)
    #         args.start_epoch = checkpoint['epoch']
    #         best_acc1 = checkpoint['best_acc1']
    #         model.load_state_dict(checkpoint['state_dict'])
    #         optimizer.load_state_dict(checkpoint['optimizer'])
    #         print("=> loaded best model '{}' (epoch {}; accuracy {})".format(best_model_file, checkpoint['epoch'],
    #                                                                          best_acc1))
    #     else:
    #         print("=> no best model found at '{}'".format(best_model_file))

    print('Check cuda')
    if args.cuda:
        print('\t* Cuda')
        model = model.cuda()
        criterion = criterion.cuda()

    # Epoch for loop
    for epoch in range(0, args.epochs):

        if epoch > args.epochs * args.schedule[
                0] and epoch < args.epochs * args.schedule[1]:
            args.lr -= lr_step
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, evaluation,
              logger)

        # evaluate on test set
        acc1 = validate(valid_loader, model, criterion, evaluation, logger)

        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)
        utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_acc1': best_acc1,
                'optimizer': optimizer.state_dict(),
            },
            is_best=is_best,
            directory=args.resume)

        # Logger step
        logger.log_value('learning_rate', args.lr).step()

    # get the best checkpoint and test it with test set
    # if args.resume:
    #     checkpoint_dir = args.resume
    #     best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
    #     if not os.path.isdir(checkpoint_dir):
    #         os.makedirs(checkpoint_dir)
    #     if os.path.isfile(best_model_file):
    #         print("=> loading best model '{}'".format(best_model_file))
    #         checkpoint = torch.load(best_model_file)
    #         args.start_epoch = checkpoint['epoch']
    #         best_acc1 = checkpoint['best_acc1']
    #         model.load_state_dict(checkpoint['state_dict'])
    #         optimizer.load_state_dict(checkpoint['optimizer'])
    #         print("=> loaded best model '{}' (epoch {}; accuracy {})".format(best_model_file, checkpoint['epoch'],
    #                                                                          best_acc1))
    #     else:
    #         print("=> no best model found at '{}'".format(best_model_file))

    # For testing
    validate(test_loader, model, criterion, evaluation)
    torch.save(model, 'test.pth')
    print(train_classes)
    print(valid_classes)
    print(test_classes)