Esempio n. 1
0
def runExperiment():
    seed = int(cfg['model_tag'].split('_')[0])
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    dataset = fetch_dataset(cfg['data_name'], cfg['subset'])
    process_dataset(dataset['train'])
    if cfg['raw']:
        data_loader = make_data_loader(dataset)['train']
        metric = Metric()
        img, label = [], []
        for i, input in enumerate(data_loader):
            input = collate(input)
            img.append(input['img'])
            label.append(input['label'])
        img = torch.cat(img, dim=0)
        label = torch.cat(label, dim=0)
        output = {'img': img, 'label': label}
        evaluation = metric.evaluate(cfg['metric_name']['test'], None, output)
        dbi_result = evaluation['DBI']
        print('Davies-Bouldin Index ({}): {}'.format(cfg['data_name'],
                                                     dbi_result))
        save(dbi_result,
             './output/result/dbi_created_{}.npy'.format(cfg['data_name']),
             mode='numpy')
    else:
        created = np.load('./output/npy/created_{}.npy'.format(
            cfg['model_tag']),
                          allow_pickle=True)
        test(created)
    return
def runExperiment():
    cfg['batch_size']['train'] = cfg['batch_size']['test']
    seed = int(cfg['model_tag'].split('_')[0])
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    dataset = fetch_dataset(cfg['data_name'], cfg['subset'])
    process_dataset(dataset)
    model = eval(
        'models.{}(model_rate=cfg["global_model_rate"]).to(cfg["device"]).to(cfg["device"])'
        .format(cfg['model_name']))
    last_epoch, data_split, label_split, model, _, _, _ = resume(
        model, cfg['model_tag'], load_tag='best', strict=False)
    current_time = datetime.datetime.now().strftime('%b%d_%H-%M-%S')
    logger_path = 'output/runs/test_{}_{}'.format(cfg['model_tag'],
                                                  current_time)
    test_logger = Logger(logger_path)
    test_logger.safe(True)
    test(dataset['test'], model, test_logger, last_epoch)
    test_logger.safe(False)
    _, _, _, _, _, _, train_logger = resume(model,
                                            cfg['model_tag'],
                                            load_tag='checkpoint',
                                            strict=False)
    save_result = {
        'cfg': cfg,
        'epoch': last_epoch,
        'logger': {
            'train': train_logger,
            'test': test_logger
        }
    }
    save(save_result, './output/result/{}.pt'.format(cfg['model_tag']))
    return
def runExperiment():
    seed = int(cfg['model_tag'].split('_')[0])
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    dataset = fetch_dataset(cfg['data_name'], cfg['subset'])
    process_dataset(dataset['train'])
    if cfg['raw']:
        data_loader = make_data_loader(dataset)['train']
        metric = Metric()
        img = []
        for i, input in enumerate(data_loader):
            input = collate(input)
            img.append(input['img'])
        img = torch.cat(img, dim=0)
        output = {'img': img}
        evaluation = metric.evaluate(cfg['metric_name']['test'], None, output)
        is_result, fid_result = evaluation['InceptionScore'], evaluation['FID']
        print('Inception Score ({}): {}'.format(cfg['data_name'], is_result))
        print('FID ({}): {}'.format(cfg['data_name'], fid_result))
        save(is_result,
             './output/result/is_generated_{}.npy'.format(cfg['data_name']),
             mode='numpy')
        save(fid_result,
             './output/result/fid_generated_{}.npy'.format(cfg['data_name']),
             mode='numpy')
    else:
        generated = np.load('./output/npy/generated_{}.npy'.format(
            cfg['model_tag']),
                            allow_pickle=True)
        test(generated)
    return
def runExperiment():
    seed = int(cfg['model_tag'].split('_')[0])
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    dataset = fetch_dataset(cfg['data_name'], cfg['subset'])
    process_dataset(dataset['train'])
    data_loader = make_data_loader(dataset)
    model = eval('models.{}().to(cfg["device"])'.format(cfg['model_name']))
    optimizer = make_optimizer(model)
    scheduler = make_scheduler(optimizer)
    if cfg['resume_mode'] == 1:
        last_epoch, model, optimizer, scheduler, logger = resume(
            model, cfg['model_tag'], optimizer, scheduler)
    elif cfg['resume_mode'] == 2:
        last_epoch = 1
        _, model, _, _, _ = resume(model, cfg['model_tag'])
        current_time = datetime.datetime.now().strftime('%b%d_%H-%M-%S')
        logger_path = 'output/runs/{}_{}'.format(cfg['model_tag'],
                                                 current_time)
        logger = Logger(logger_path)
    else:
        last_epoch = 1
        current_time = datetime.datetime.now().strftime('%b%d_%H-%M-%S')
        logger_path = 'output/runs/train_{}_{}'.format(cfg['model_tag'],
                                                       current_time)
        logger = Logger(logger_path)
    if cfg['world_size'] > 1:
        model = torch.nn.DataParallel(model,
                                      device_ids=list(range(
                                          cfg['world_size'])))
    for epoch in range(last_epoch, cfg['num_epochs'] + 1):
        logger.safe(True)
        train(data_loader['train'], model, optimizer, logger, epoch)
        test(data_loader['train'], model, logger, epoch)
        if cfg['scheduler_name'] == 'ReduceLROnPlateau':
            scheduler.step(
                metrics=logger.mean['test/{}'.format(cfg['pivot_metric'])])
        else:
            scheduler.step()
        logger.safe(False)
        model_state_dict = model.module.state_dict(
        ) if cfg['world_size'] > 1 else model.state_dict()
        save_result = {
            'cfg': cfg,
            'epoch': epoch + 1,
            'model_dict': model_state_dict,
            'optimizer_dict': optimizer.state_dict(),
            'scheduler_dict': scheduler.state_dict(),
            'logger': logger
        }
        save(save_result,
             './output/model/{}_checkpoint.pt'.format(cfg['model_tag']))
        if cfg['pivot'] > logger.mean['test/{}'.format(cfg['pivot_metric'])]:
            cfg['pivot'] = logger.mean['test/{}'.format(cfg['pivot_metric'])]
            shutil.copy(
                './output/model/{}_checkpoint.pt'.format(cfg['model_tag']),
                './output/model/{}_best.pt'.format(cfg['model_tag']))
        logger.reset()
    logger.safe(False)
    return
def runExperiment():
    seed = int(cfg['model_tag'].split('_')[0])
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    dataset = fetch_dataset(cfg['data_name'], cfg['subset'])
    process_dataset(dataset['train'])
    model = eval('models.{}().to(cfg["device"])'.format(cfg['model_name']))
    _, model, _, _, _ = resume(model, cfg['model_tag'], load_tag='best')
    transit(model)
    return
Esempio n. 6
0
def runExperiment():
    dataset = fetch_dataset(cfg['data_name'], cfg['subset'])
    process_dataset(dataset)
    data_loader = make_data_loader(dataset)
    model = eval('models.{}(model_rate=cfg["global_model_rate"]).to(cfg["device"])'.format(cfg['model_name']))
    summary = summarize(data_loader['train'], model)
    content, total = parse_summary(summary)
    print(content)
    save_result = total
    save_tag = '{}_{}_{}'.format(cfg['data_name'], cfg['model_name'], cfg['model_mode'][0])
    save(save_result, './output/result/{}.pt'.format(save_tag))
    return
Esempio n. 7
0
def runExperiment():
    dataset = fetch_dataset(cfg['data_name'], cfg['subset'])
    process_dataset(dataset['train'])
    data_loader = make_data_loader(dataset)
    if 'pixelcnn' in cfg['model_name']:
        ae = eval('models.{}().to(cfg["device"])'.format(cfg['ae_name']))
    else:
        ae = None
    model = eval('models.{}().to(cfg["device"])'.format(cfg['model_name']))
    summary = summarize(data_loader['train'], model, ae)
    content = parse_summary(summary)
    print(content)
    return
def runExperiment():
    seed = int(cfg['model_tag'].split('_')[0])
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    dataset = fetch_dataset(cfg['data_name'], cfg['subset'])
    process_dataset(dataset)
    model = eval('models.{}(model_rate=cfg["global_model_rate"]).to(cfg["device"])'.format(cfg['model_name']))
    optimizer = make_optimizer(model, cfg['lr'])
    scheduler = make_scheduler(optimizer)
    if cfg['resume_mode'] == 1:
        last_epoch, data_split, label_split, model, optimizer, scheduler, logger = resume(model, cfg['model_tag'],
                                                                                          optimizer, scheduler)
    elif cfg['resume_mode'] == 2:
        last_epoch = 1
        _, data_split, label_split, model, _, _, _ = resume(model, cfg['model_tag'])
        current_time = datetime.datetime.now().strftime('%b%d_%H-%M-%S')
        logger_path = 'output/runs/{}_{}'.format(cfg['model_tag'], current_time)
        logger = Logger(logger_path)
    else:
        last_epoch = 1
        data_split, label_split = split_dataset(dataset, cfg['num_users'], cfg['data_split_mode'])
        current_time = datetime.datetime.now().strftime('%b%d_%H-%M-%S')
        logger_path = 'output/runs/train_{}_{}'.format(cfg['model_tag'], current_time)
        logger = Logger(logger_path)
    if data_split is None:
        data_split, label_split = split_dataset(dataset, cfg['num_users'], cfg['data_split_mode'])
    global_parameters = model.state_dict()
    federation = Federation(global_parameters, cfg['model_rate'], label_split)
    for epoch in range(last_epoch, cfg['num_epochs']['global'] + 1):
        logger.safe(True)
        train(dataset['train'], data_split['train'], label_split, federation, model, optimizer, logger, epoch)
        test_model = stats(dataset['train'], model)
        test(dataset['test'], data_split['test'], label_split, test_model, logger, epoch)
        if cfg['scheduler_name'] == 'ReduceLROnPlateau':
            scheduler.step(metrics=logger.mean['train/{}'.format(cfg['pivot_metric'])])
        else:
            scheduler.step()
        logger.safe(False)
        model_state_dict = model.state_dict()
        save_result = {
            'cfg': cfg, 'epoch': epoch + 1, 'data_split': data_split, 'label_split': label_split,
            'model_dict': model_state_dict, 'optimizer_dict': optimizer.state_dict(),
            'scheduler_dict': scheduler.state_dict(), 'logger': logger}
        save(save_result, './output/model/{}_checkpoint.pt'.format(cfg['model_tag']))
        if cfg['pivot'] < logger.mean['test/{}'.format(cfg['pivot_metric'])]:
            cfg['pivot'] = logger.mean['test/{}'.format(cfg['pivot_metric'])]
            shutil.copy('./output/model/{}_checkpoint.pt'.format(cfg['model_tag']),
                        './output/model/{}_best.pt'.format(cfg['model_tag']))
        logger.reset()
    logger.safe(False)
    return
Esempio n. 9
0
def runExperiment(model_tag):
    seed = int(model_tag.split('_')[0])
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    config.PARAM['randomGen'] = np.random.RandomState(seed)
    dataset = {'test': fetch_dataset(data_name=config.PARAM['data_name']['test'])['test']}
    data_loader = split_dataset(dataset, data_size=config.PARAM['data_size'], batch_size=config.PARAM['batch_size'],
                                radomGen=config.PARAM['randomGen'])
    model = eval('models.{}().to(config.PARAM["device"])'.format(config.PARAM['model_name']))
    best = load('./output/model/{}_best.pkl'.format(model_tag))
    model.load_state_dict(best['model_dict'])
    result = test(data_loader['test'], model)
    save(result, './output/result/{}.pkl'.format(model_tag))
    return
Esempio n. 10
0
def runExperiment():
    seed = int(cfg['model_tag'].split('_')[0])
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    dataset = fetch_dataset(cfg['data_name'], cfg['subset'])
    process_dataset(dataset['train'])
    if 'pixelcnn' in cfg['model_name']:
        ae = eval('models.{}().to(cfg["device"])'.format(cfg['ae_name']))
        _, ae, _, _, _ = resume(ae, cfg['ae_tag'], load_tag='best')
    else:
        ae = None
    model = eval('models.{}().to(cfg["device"])'.format(cfg['model_name']))
    _, model, _, _, _ = resume(model, cfg['model_tag'], load_tag='best')
    generate(model, ae)
    return
def runExperiment():
    seed = int(cfg['model_tag'].split('_')[0])
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    dataset = fetch_dataset(cfg['data_name'], cfg['subset'])
    process_dataset(dataset['train'])
    data_loader = make_data_loader(dataset)
    model = eval('models.{}().to(cfg["device"])'.format(cfg['model_name']))
    load_tag = 'best'
    last_epoch, model, _, _, _ = resume(model, cfg['model_tag'], load_tag=load_tag)
    logger_path = 'output/runs/test_{}_{}'.format(cfg['model_tag'], datetime.datetime.now().strftime('%b%d_%H-%M-%S'))
    logger = Logger(logger_path)
    logger.safe(True)
    test(data_loader['train'], model, logger, last_epoch)
    logger.safe(False)
    save_result = {'cfg': cfg, 'epoch': last_epoch, 'logger': logger}
    save(save_result, './output/result/{}.pt'.format(cfg['model_tag']))
    return
def runExperiment(model_tag):
    model_tag_list = model_tag.split('_')
    seed = int(model_tag_list[0])
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    randomGen = np.random.RandomState(seed)
    dataset = {
        'test':
        fetch_dataset(data_name=config.PARAM['data_name']['test'])['test']
    }
    data_loader = split_dataset(dataset,
                                data_size=config.PARAM['data_size'],
                                batch_size=config.PARAM['batch_size'],
                                radomGen=randomGen)
    model = eval('models.{}().to(device)'.format(config.PARAM['model_name']))
    logger = Logger('runs/{}'.format(model_tag))
    print(config.PARAM)
    test(data_loader['test'], model, logger)
    return
Esempio n. 13
0
import data
import model

data = data.Data()
model = model.Model()

print("fetching data...")
mydata = data.fetch_dataset()
print("dropping correlated features...")
data.drop_correlated(mydata)
print("removing outliers...")
data.remove_outliers(mydata)
print("encoding categorical features...")
mydata = data.encode_features(mydata)
print("spliting data into train/test sets...")
train, test = data.train_split(mydata)
print("up sampling...")
train = data.upsample_minority(train)
print("spliting predictor/target features...")
X_train, y_train, X_test, y_test = data.target_split(train, test)
print("scaling datasets...")
X_train, X_test = data.scale(X_train, X_test)
print("performing dimensionality reduction...")
X_train, X_test = data.reduce_dimension(X_train, X_test)
X_train = data.to_df(data=X_train)
X_test = data.to_df(data=X_test)
y_train = data.to_df(data=y_train)
y_test = data.to_df(data=y_test)
print("Modelling using logistic regression...")
logistic_reg = model.train_logistic_classifier(X_train, y_train)
print("Modelling using xgboost classifier...")
Esempio n. 14
0
def runExperiment():
    dataset = fetch_dataset(cfg['data_name'], cfg['subset'])
    process_dataset(dataset['train'])
    data_loader = make_data_loader(dataset)
    test(data_loader['train'])
    return
Esempio n. 15
0
def FID(img):
    with torch.no_grad():
        batch_size = 32
        cfg['batch_size']['train'] = batch_size
        dataset = fetch_dataset(cfg['data_name'], cfg['subset'], verbose=False)
        real_data_loader = make_data_loader(dataset)['train']
        generated_data_loader = DataLoader(img, batch_size=batch_size)
        if cfg['data_name'] in ['COIL100', 'Omniglot']:
            model = models.classifier().to(cfg['device'])
            model_tag = ['0', cfg['data_name'], cfg['subset'], 'classifier']
            model_tag = '_'.join(filter(None, model_tag))
            checkpoint = load(
                './metrics_tf/res/classifier/{}_best.pt'.format(model_tag))
            model.load_state_dict(checkpoint['model_dict'])
            model.train(False)
            real_feature = []
            for i, input in enumerate(real_data_loader):
                input = collate(input)
                input = to_device(input, cfg['device'])
                real_feature_i = model.feature(input)
                real_feature.append(real_feature_i.cpu().numpy())
            real_feature = np.concatenate(real_feature, axis=0)
            generated_feature = []
            for i, input in enumerate(generated_data_loader):
                input = {
                    'img': input,
                    'label': input.new_zeros(input.size(0)).long()
                }
                input = to_device(input, cfg['device'])
                generated_feature_i = model.feature(input)
                generated_feature.append(generated_feature_i.cpu().numpy())
            generated_feature = np.concatenate(generated_feature, axis=0)
        else:
            model = inception_v3(pretrained=True,
                                 transform_input=False).to(cfg['device'])
            up = nn.Upsample(size=(299, 299),
                             mode='bilinear',
                             align_corners=False)
            model.feature = nn.Sequential(*[
                up, model.Conv2d_1a_3x3, model.Conv2d_2a_3x3,
                model.Conv2d_2b_3x3,
                nn.MaxPool2d(kernel_size=3, stride=2), model.Conv2d_3b_1x1,
                model.Conv2d_4a_3x3,
                nn.MaxPool2d(kernel_size=3, stride=2), model.Mixed_5b,
                model.Mixed_5c, model.Mixed_5d, model.Mixed_6a, model.Mixed_6b,
                model.Mixed_6c, model.Mixed_6d, model.Mixed_6e, model.Mixed_7a,
                model.Mixed_7b, model.Mixed_7c,
                nn.AdaptiveAvgPool2d(1),
                nn.Flatten()
            ])
            model.train(False)
            real_feature = []
            for i, input in enumerate(real_data_loader):
                input = collate(input)
                input = to_device(input, cfg['device'])
                real_feature_i = model.feature(input['img'])
                real_feature.append(real_feature_i.cpu().numpy())
            real_feature = np.concatenate(real_feature, axis=0)
            generated_feature = []
            for i, input in enumerate(generated_data_loader):
                input = to_device(input, cfg['device'])
                generated_feature_i = model.feature(input)
                generated_feature.append(generated_feature_i.cpu().numpy())
            generated_feature = np.concatenate(generated_feature, axis=0)
        mu1 = np.mean(real_feature, axis=0)
        sigma1 = np.cov(real_feature, rowvar=False)
        mu2 = np.mean(generated_feature, axis=0)
        sigma2 = np.cov(generated_feature, rowvar=False)
        mu1 = np.atleast_1d(mu1)
        mu2 = np.atleast_1d(mu2)
        sigma1 = np.atleast_2d(sigma1)
        sigma2 = np.atleast_2d(sigma2)
        assert mu1.shape == mu2.shape, "Training and test mean vectors have different lengths"
        assert sigma1.shape == sigma2.shape, "Training and test covariances have different dimensions"
        diff = mu1 - mu2
        # product might be almost singular
        covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
        if not np.isfinite(covmean).all():
            offset = np.eye(sigma1.shape[0]) * 1e-6
            covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
        # numerical error might give slight imaginary component
        if np.iscomplexobj(covmean):
            if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
                m = np.max(np.abs(covmean.imag))
                raise ValueError("Imaginary component {}".format(m))
            covmean = covmean.real
        tr_covmean = np.trace(covmean)
        fid = diff.dot(diff) + np.trace(sigma1) + np.trace(
            sigma2) - 2 * tr_covmean
        fid = fid.item()
    return fid