def runExperiment(): seed = int(cfg['model_tag'].split('_')[0]) torch.manual_seed(seed) torch.cuda.manual_seed(seed) dataset = fetch_dataset(cfg['data_name'], cfg['subset']) process_dataset(dataset['train']) if cfg['raw']: data_loader = make_data_loader(dataset)['train'] metric = Metric() img, label = [], [] for i, input in enumerate(data_loader): input = collate(input) img.append(input['img']) label.append(input['label']) img = torch.cat(img, dim=0) label = torch.cat(label, dim=0) output = {'img': img, 'label': label} evaluation = metric.evaluate(cfg['metric_name']['test'], None, output) dbi_result = evaluation['DBI'] print('Davies-Bouldin Index ({}): {}'.format(cfg['data_name'], dbi_result)) save(dbi_result, './output/result/dbi_created_{}.npy'.format(cfg['data_name']), mode='numpy') else: created = np.load('./output/npy/created_{}.npy'.format( cfg['model_tag']), allow_pickle=True) test(created) return
def runExperiment(): cfg['batch_size']['train'] = cfg['batch_size']['test'] seed = int(cfg['model_tag'].split('_')[0]) torch.manual_seed(seed) torch.cuda.manual_seed(seed) dataset = fetch_dataset(cfg['data_name'], cfg['subset']) process_dataset(dataset) model = eval( 'models.{}(model_rate=cfg["global_model_rate"]).to(cfg["device"]).to(cfg["device"])' .format(cfg['model_name'])) last_epoch, data_split, label_split, model, _, _, _ = resume( model, cfg['model_tag'], load_tag='best', strict=False) current_time = datetime.datetime.now().strftime('%b%d_%H-%M-%S') logger_path = 'output/runs/test_{}_{}'.format(cfg['model_tag'], current_time) test_logger = Logger(logger_path) test_logger.safe(True) test(dataset['test'], model, test_logger, last_epoch) test_logger.safe(False) _, _, _, _, _, _, train_logger = resume(model, cfg['model_tag'], load_tag='checkpoint', strict=False) save_result = { 'cfg': cfg, 'epoch': last_epoch, 'logger': { 'train': train_logger, 'test': test_logger } } save(save_result, './output/result/{}.pt'.format(cfg['model_tag'])) return
def runExperiment(): seed = int(cfg['model_tag'].split('_')[0]) torch.manual_seed(seed) torch.cuda.manual_seed(seed) dataset = fetch_dataset(cfg['data_name'], cfg['subset']) process_dataset(dataset['train']) if cfg['raw']: data_loader = make_data_loader(dataset)['train'] metric = Metric() img = [] for i, input in enumerate(data_loader): input = collate(input) img.append(input['img']) img = torch.cat(img, dim=0) output = {'img': img} evaluation = metric.evaluate(cfg['metric_name']['test'], None, output) is_result, fid_result = evaluation['InceptionScore'], evaluation['FID'] print('Inception Score ({}): {}'.format(cfg['data_name'], is_result)) print('FID ({}): {}'.format(cfg['data_name'], fid_result)) save(is_result, './output/result/is_generated_{}.npy'.format(cfg['data_name']), mode='numpy') save(fid_result, './output/result/fid_generated_{}.npy'.format(cfg['data_name']), mode='numpy') else: generated = np.load('./output/npy/generated_{}.npy'.format( cfg['model_tag']), allow_pickle=True) test(generated) return
def runExperiment(): seed = int(cfg['model_tag'].split('_')[0]) torch.manual_seed(seed) torch.cuda.manual_seed(seed) dataset = fetch_dataset(cfg['data_name'], cfg['subset']) process_dataset(dataset['train']) data_loader = make_data_loader(dataset) model = eval('models.{}().to(cfg["device"])'.format(cfg['model_name'])) optimizer = make_optimizer(model) scheduler = make_scheduler(optimizer) if cfg['resume_mode'] == 1: last_epoch, model, optimizer, scheduler, logger = resume( model, cfg['model_tag'], optimizer, scheduler) elif cfg['resume_mode'] == 2: last_epoch = 1 _, model, _, _, _ = resume(model, cfg['model_tag']) current_time = datetime.datetime.now().strftime('%b%d_%H-%M-%S') logger_path = 'output/runs/{}_{}'.format(cfg['model_tag'], current_time) logger = Logger(logger_path) else: last_epoch = 1 current_time = datetime.datetime.now().strftime('%b%d_%H-%M-%S') logger_path = 'output/runs/train_{}_{}'.format(cfg['model_tag'], current_time) logger = Logger(logger_path) if cfg['world_size'] > 1: model = torch.nn.DataParallel(model, device_ids=list(range( cfg['world_size']))) for epoch in range(last_epoch, cfg['num_epochs'] + 1): logger.safe(True) train(data_loader['train'], model, optimizer, logger, epoch) test(data_loader['train'], model, logger, epoch) if cfg['scheduler_name'] == 'ReduceLROnPlateau': scheduler.step( metrics=logger.mean['test/{}'.format(cfg['pivot_metric'])]) else: scheduler.step() logger.safe(False) model_state_dict = model.module.state_dict( ) if cfg['world_size'] > 1 else model.state_dict() save_result = { 'cfg': cfg, 'epoch': epoch + 1, 'model_dict': model_state_dict, 'optimizer_dict': optimizer.state_dict(), 'scheduler_dict': scheduler.state_dict(), 'logger': logger } save(save_result, './output/model/{}_checkpoint.pt'.format(cfg['model_tag'])) if cfg['pivot'] > logger.mean['test/{}'.format(cfg['pivot_metric'])]: cfg['pivot'] = logger.mean['test/{}'.format(cfg['pivot_metric'])] shutil.copy( './output/model/{}_checkpoint.pt'.format(cfg['model_tag']), './output/model/{}_best.pt'.format(cfg['model_tag'])) logger.reset() logger.safe(False) return
def runExperiment(): seed = int(cfg['model_tag'].split('_')[0]) torch.manual_seed(seed) torch.cuda.manual_seed(seed) dataset = fetch_dataset(cfg['data_name'], cfg['subset']) process_dataset(dataset['train']) model = eval('models.{}().to(cfg["device"])'.format(cfg['model_name'])) _, model, _, _, _ = resume(model, cfg['model_tag'], load_tag='best') transit(model) return
def runExperiment(): dataset = fetch_dataset(cfg['data_name'], cfg['subset']) process_dataset(dataset) data_loader = make_data_loader(dataset) model = eval('models.{}(model_rate=cfg["global_model_rate"]).to(cfg["device"])'.format(cfg['model_name'])) summary = summarize(data_loader['train'], model) content, total = parse_summary(summary) print(content) save_result = total save_tag = '{}_{}_{}'.format(cfg['data_name'], cfg['model_name'], cfg['model_mode'][0]) save(save_result, './output/result/{}.pt'.format(save_tag)) return
def runExperiment(): dataset = fetch_dataset(cfg['data_name'], cfg['subset']) process_dataset(dataset['train']) data_loader = make_data_loader(dataset) if 'pixelcnn' in cfg['model_name']: ae = eval('models.{}().to(cfg["device"])'.format(cfg['ae_name'])) else: ae = None model = eval('models.{}().to(cfg["device"])'.format(cfg['model_name'])) summary = summarize(data_loader['train'], model, ae) content = parse_summary(summary) print(content) return
def runExperiment(): seed = int(cfg['model_tag'].split('_')[0]) torch.manual_seed(seed) torch.cuda.manual_seed(seed) dataset = fetch_dataset(cfg['data_name'], cfg['subset']) process_dataset(dataset) model = eval('models.{}(model_rate=cfg["global_model_rate"]).to(cfg["device"])'.format(cfg['model_name'])) optimizer = make_optimizer(model, cfg['lr']) scheduler = make_scheduler(optimizer) if cfg['resume_mode'] == 1: last_epoch, data_split, label_split, model, optimizer, scheduler, logger = resume(model, cfg['model_tag'], optimizer, scheduler) elif cfg['resume_mode'] == 2: last_epoch = 1 _, data_split, label_split, model, _, _, _ = resume(model, cfg['model_tag']) current_time = datetime.datetime.now().strftime('%b%d_%H-%M-%S') logger_path = 'output/runs/{}_{}'.format(cfg['model_tag'], current_time) logger = Logger(logger_path) else: last_epoch = 1 data_split, label_split = split_dataset(dataset, cfg['num_users'], cfg['data_split_mode']) current_time = datetime.datetime.now().strftime('%b%d_%H-%M-%S') logger_path = 'output/runs/train_{}_{}'.format(cfg['model_tag'], current_time) logger = Logger(logger_path) if data_split is None: data_split, label_split = split_dataset(dataset, cfg['num_users'], cfg['data_split_mode']) global_parameters = model.state_dict() federation = Federation(global_parameters, cfg['model_rate'], label_split) for epoch in range(last_epoch, cfg['num_epochs']['global'] + 1): logger.safe(True) train(dataset['train'], data_split['train'], label_split, federation, model, optimizer, logger, epoch) test_model = stats(dataset['train'], model) test(dataset['test'], data_split['test'], label_split, test_model, logger, epoch) if cfg['scheduler_name'] == 'ReduceLROnPlateau': scheduler.step(metrics=logger.mean['train/{}'.format(cfg['pivot_metric'])]) else: scheduler.step() logger.safe(False) model_state_dict = model.state_dict() save_result = { 'cfg': cfg, 'epoch': epoch + 1, 'data_split': data_split, 'label_split': label_split, 'model_dict': model_state_dict, 'optimizer_dict': optimizer.state_dict(), 'scheduler_dict': scheduler.state_dict(), 'logger': logger} save(save_result, './output/model/{}_checkpoint.pt'.format(cfg['model_tag'])) if cfg['pivot'] < logger.mean['test/{}'.format(cfg['pivot_metric'])]: cfg['pivot'] = logger.mean['test/{}'.format(cfg['pivot_metric'])] shutil.copy('./output/model/{}_checkpoint.pt'.format(cfg['model_tag']), './output/model/{}_best.pt'.format(cfg['model_tag'])) logger.reset() logger.safe(False) return
def runExperiment(model_tag): seed = int(model_tag.split('_')[0]) torch.manual_seed(seed) torch.cuda.manual_seed(seed) config.PARAM['randomGen'] = np.random.RandomState(seed) dataset = {'test': fetch_dataset(data_name=config.PARAM['data_name']['test'])['test']} data_loader = split_dataset(dataset, data_size=config.PARAM['data_size'], batch_size=config.PARAM['batch_size'], radomGen=config.PARAM['randomGen']) model = eval('models.{}().to(config.PARAM["device"])'.format(config.PARAM['model_name'])) best = load('./output/model/{}_best.pkl'.format(model_tag)) model.load_state_dict(best['model_dict']) result = test(data_loader['test'], model) save(result, './output/result/{}.pkl'.format(model_tag)) return
def runExperiment(): seed = int(cfg['model_tag'].split('_')[0]) torch.manual_seed(seed) torch.cuda.manual_seed(seed) dataset = fetch_dataset(cfg['data_name'], cfg['subset']) process_dataset(dataset['train']) if 'pixelcnn' in cfg['model_name']: ae = eval('models.{}().to(cfg["device"])'.format(cfg['ae_name'])) _, ae, _, _, _ = resume(ae, cfg['ae_tag'], load_tag='best') else: ae = None model = eval('models.{}().to(cfg["device"])'.format(cfg['model_name'])) _, model, _, _, _ = resume(model, cfg['model_tag'], load_tag='best') generate(model, ae) return
def runExperiment(): seed = int(cfg['model_tag'].split('_')[0]) torch.manual_seed(seed) torch.cuda.manual_seed(seed) dataset = fetch_dataset(cfg['data_name'], cfg['subset']) process_dataset(dataset['train']) data_loader = make_data_loader(dataset) model = eval('models.{}().to(cfg["device"])'.format(cfg['model_name'])) load_tag = 'best' last_epoch, model, _, _, _ = resume(model, cfg['model_tag'], load_tag=load_tag) logger_path = 'output/runs/test_{}_{}'.format(cfg['model_tag'], datetime.datetime.now().strftime('%b%d_%H-%M-%S')) logger = Logger(logger_path) logger.safe(True) test(data_loader['train'], model, logger, last_epoch) logger.safe(False) save_result = {'cfg': cfg, 'epoch': last_epoch, 'logger': logger} save(save_result, './output/result/{}.pt'.format(cfg['model_tag'])) return
def runExperiment(model_tag): model_tag_list = model_tag.split('_') seed = int(model_tag_list[0]) torch.manual_seed(seed) torch.cuda.manual_seed(seed) randomGen = np.random.RandomState(seed) dataset = { 'test': fetch_dataset(data_name=config.PARAM['data_name']['test'])['test'] } data_loader = split_dataset(dataset, data_size=config.PARAM['data_size'], batch_size=config.PARAM['batch_size'], radomGen=randomGen) model = eval('models.{}().to(device)'.format(config.PARAM['model_name'])) logger = Logger('runs/{}'.format(model_tag)) print(config.PARAM) test(data_loader['test'], model, logger) return
import data import model data = data.Data() model = model.Model() print("fetching data...") mydata = data.fetch_dataset() print("dropping correlated features...") data.drop_correlated(mydata) print("removing outliers...") data.remove_outliers(mydata) print("encoding categorical features...") mydata = data.encode_features(mydata) print("spliting data into train/test sets...") train, test = data.train_split(mydata) print("up sampling...") train = data.upsample_minority(train) print("spliting predictor/target features...") X_train, y_train, X_test, y_test = data.target_split(train, test) print("scaling datasets...") X_train, X_test = data.scale(X_train, X_test) print("performing dimensionality reduction...") X_train, X_test = data.reduce_dimension(X_train, X_test) X_train = data.to_df(data=X_train) X_test = data.to_df(data=X_test) y_train = data.to_df(data=y_train) y_test = data.to_df(data=y_test) print("Modelling using logistic regression...") logistic_reg = model.train_logistic_classifier(X_train, y_train) print("Modelling using xgboost classifier...")
def runExperiment(): dataset = fetch_dataset(cfg['data_name'], cfg['subset']) process_dataset(dataset['train']) data_loader = make_data_loader(dataset) test(data_loader['train']) return
def FID(img): with torch.no_grad(): batch_size = 32 cfg['batch_size']['train'] = batch_size dataset = fetch_dataset(cfg['data_name'], cfg['subset'], verbose=False) real_data_loader = make_data_loader(dataset)['train'] generated_data_loader = DataLoader(img, batch_size=batch_size) if cfg['data_name'] in ['COIL100', 'Omniglot']: model = models.classifier().to(cfg['device']) model_tag = ['0', cfg['data_name'], cfg['subset'], 'classifier'] model_tag = '_'.join(filter(None, model_tag)) checkpoint = load( './metrics_tf/res/classifier/{}_best.pt'.format(model_tag)) model.load_state_dict(checkpoint['model_dict']) model.train(False) real_feature = [] for i, input in enumerate(real_data_loader): input = collate(input) input = to_device(input, cfg['device']) real_feature_i = model.feature(input) real_feature.append(real_feature_i.cpu().numpy()) real_feature = np.concatenate(real_feature, axis=0) generated_feature = [] for i, input in enumerate(generated_data_loader): input = { 'img': input, 'label': input.new_zeros(input.size(0)).long() } input = to_device(input, cfg['device']) generated_feature_i = model.feature(input) generated_feature.append(generated_feature_i.cpu().numpy()) generated_feature = np.concatenate(generated_feature, axis=0) else: model = inception_v3(pretrained=True, transform_input=False).to(cfg['device']) up = nn.Upsample(size=(299, 299), mode='bilinear', align_corners=False) model.feature = nn.Sequential(*[ up, model.Conv2d_1a_3x3, model.Conv2d_2a_3x3, model.Conv2d_2b_3x3, nn.MaxPool2d(kernel_size=3, stride=2), model.Conv2d_3b_1x1, model.Conv2d_4a_3x3, nn.MaxPool2d(kernel_size=3, stride=2), model.Mixed_5b, model.Mixed_5c, model.Mixed_5d, model.Mixed_6a, model.Mixed_6b, model.Mixed_6c, model.Mixed_6d, model.Mixed_6e, model.Mixed_7a, model.Mixed_7b, model.Mixed_7c, nn.AdaptiveAvgPool2d(1), nn.Flatten() ]) model.train(False) real_feature = [] for i, input in enumerate(real_data_loader): input = collate(input) input = to_device(input, cfg['device']) real_feature_i = model.feature(input['img']) real_feature.append(real_feature_i.cpu().numpy()) real_feature = np.concatenate(real_feature, axis=0) generated_feature = [] for i, input in enumerate(generated_data_loader): input = to_device(input, cfg['device']) generated_feature_i = model.feature(input) generated_feature.append(generated_feature_i.cpu().numpy()) generated_feature = np.concatenate(generated_feature, axis=0) mu1 = np.mean(real_feature, axis=0) sigma1 = np.cov(real_feature, rowvar=False) mu2 = np.mean(generated_feature, axis=0) sigma2 = np.cov(generated_feature, rowvar=False) mu1 = np.atleast_1d(mu1) mu2 = np.atleast_1d(mu2) sigma1 = np.atleast_2d(sigma1) sigma2 = np.atleast_2d(sigma2) assert mu1.shape == mu2.shape, "Training and test mean vectors have different lengths" assert sigma1.shape == sigma2.shape, "Training and test covariances have different dimensions" diff = mu1 - mu2 # product might be almost singular covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False) if not np.isfinite(covmean).all(): offset = np.eye(sigma1.shape[0]) * 1e-6 covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset)) # numerical error might give slight imaginary component if np.iscomplexobj(covmean): if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3): m = np.max(np.abs(covmean.imag)) raise ValueError("Imaginary component {}".format(m)) covmean = covmean.real tr_covmean = np.trace(covmean) fid = diff.dot(diff) + np.trace(sigma1) + np.trace( sigma2) - 2 * tr_covmean fid = fid.item() return fid