def fill_files(self, remaining=None, root=None): log.trace("fill_files") if not remaining: # First time in... remaining, dummy, dummy = utils.from_readable_form(self.options.size) root = self.files_folder list = [root] done = False while not done: newlist = [] for folder in list: for dname in ["dir1", "dir2", "dir3"]: path = os.path.join(folder, dname) utils.makedirs(path) newlist.append(path) for fname in ["f1.avi", "f2.mp3", "f3.exe", "f4.txt"]: path = os.path.join(folder, fname) with open(path, "w") as f: f.write(self.teststring1) remaining -= len(self.teststring1) if remaining < 0: done = True break list = newlist return
def get(self, src, dest): """ Get a given file from the remote location The src MUST exist, and MUST be a file on the remote system IF dest ends in os.sep (i.e. the path component separator - '/' on linux): it will be created if it doesn't exist the final dest file name will be dest/basename(src) Otherwise dest MUST be the full file name the folder will be created as required. the actual filename will be returned. """ if not self.connected: self.connect() if dest[-1] == os.sep: folder = dest dest = os.path.join(folder, os.path.basename(src)) else: folder = os.path.split(dest)[0] utils.makedirs(folder) retries = 0 success = False while not success: try: self._get(src, dest) success = True except Exception as e: self.disconnect() self.connect() retries += 1 if retries > const.Retries: raise e return dest
def _make_dir(self, folder): # Make the folder. utils.makedirs wont fail if the folder exists. if folder in ["", ".", "/"]: folder = self.root else: folder = utils.join_paths(self.root, folder) utils.makedirs(folder) if not os.path.isdir(folder): raise Exception("Unable to build folder")
def __init__(self): # Make sure the folder exists. An exception is fatal utils.makedirs(const.DataDir) # We permit a 'selected' entry to be kept fs = current file/dir self.sel_fs = None self.sel_fs_path = None self.cur_run_id = None self.cur_store = None self.sel_cache = {} self.fs_saved_cache = []
def testChanges(self): pass # Full Backup # change a file # Incremental backup # Restore most recent. ensure you get latest file # Restore to just prior to incremental, ensure you get earlier file # Run a full backup file = os.path.join(self.files_folder, "changer") restore_file = os.path.join(self.restore_folder, file[1:]) # t=0 - file does not exist b = Run("testbackup", const.FullBackup, self.options) b.run() # Make sure we have ticked to another second since the start of the last backup. while datetime.now() - b.start_time < timedelta(seconds=1): time.sleep(0.01) # t=1 - file exists with open(file, "w") as f: f.write("1") b = Run("testbackup", const.IncrBackup, self.options) b.run() # Make sure we have ticked to another second since the start of the last backup. while datetime.now() - b.start_time < timedelta(seconds=1): time.sleep(0.01) # t=2 - file changed with open(file, "w") as f: f.write("2") b = Run("testbackup", const.IncrBackup, self.options) b.run() # Get the times runs = self.db.runs("testbackup") t0 = runs[0].start_time t1 = runs[1].start_time t2 = runs[2].start_time for t, exists, contents in [(t0, False, None), (t1, True, "1"), (t2, True, "2"), (None, True, "2")]: # Attempt to restore most recent of ALL files # This tests the default restore. r = Restore(self.restore_folder, [self.files_folder], t, self.options) r.run() if exists: with open(restore_file, "r") as f: self.assertEqual(f.read(), contents) else: self.assertFalse(os.path.exists(restore_file)) # clean shutil.rmtree(self.restore_folder) utils.makedirs(self.restore_folder)
def test7bitFilenames(self): # Make some 7 bit filenames strange_folder = os.path.join(self.files_folder, "strange") utils.makedirs(strange_folder) for i in xrange(1, 117, 10): name = "".join([chr(j) for j in xrange(i, i + 10) if chr(j) != "/"]) path = os.path.join(strange_folder, name) with open(path, "w") as f: f.write(os.urandom(100)) self.backup_restore_compare()
def testUnicodeFilenames(self): # Make some unicode bit filenames # Clean out the ordinary files shutil.rmtree(self.files_folder) utils.makedirs(self.files_folder) unicode_folder = os.path.join(unicode(self.files_folder), u"unicode") utils.makedirs(unicode_folder) for i in xrange(1000, 1200, 10): name = u"".join([unichr(j) for j in xrange(i, i + 10) if unichr(j) != u"/"]) path = os.path.join(unicode_folder, name) with open(path, "w") as f: f.write(os.urandom(10)) self.backup_restore_compare()
def setUp(self): self.config = Config.get_config() self.db = DB() self.db.check_upgrade() self.mark_db_ids() self.test_folder = tempfile.mkdtemp() self.files_folder = os.path.join(self.test_folder, "files") self.store_folder = os.path.join(self.test_folder, "store") self.restore_folder = os.path.join(self.test_folder, "restore") utils.makedirs(self.files_folder) utils.makedirs(self.store_folder) utils.makedirs(self.restore_folder) utils.build_file_structure(self.files_folder, 50 * const.Kilobyte, 500 * const.Kilobyte) # Build a store object (dont save config) # Note the careful size selection - we want backups to overflow the FolderStore. self.store = FolderStore("teststore", "2MB", True, self.store_folder) self.config.storage[self.store.name] = self.store # Build the backup object (dont save config) self.backup = Backup("testbackup") self.backup.include_folders = [self.files_folder] self.backup.store = self.store.name self.backup.notify_msg = False self.include_packages = True self.config.backups[self.backup.name] = self.backup # build an options object for use with the backup self.options = BlankClass() self.options.dry_run = False self.options.message = False self.options.email = False self.options.shutdown = False self.options.norecurse = False self.old_pass = self.config.data_passphrase self.config.data_passphrase = "banana"
parser.add_argument('--nworkers', type=int, default=2) #parser.add_argument('--nworkers', type=int, default=2) #parser.add_argument('--nworkers', type=int, default=1) parser.add_argument('--print-freq', help='Print progress every so iterations', type=int, default=20) parser.add_argument('--vis-freq', help='Visualize progress every so iterations', type=int, default=500) args = parser.parse_args() # Random seed if args.seed is None: args.seed = np.random.randint(100000) # logger utils.makedirs(args.save) logger = utils.get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__)) logger.info(args) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') if device.type == 'cuda': logger.info('Found {} CUDA devices.'.format(torch.cuda.device_count())) for i in range(torch.cuda.device_count()): props = torch.cuda.get_device_properties(i) logger.info('{} \t Memory: {:.2f}GB'.format(props.name, props.total_memory / (1024**3))) else: logger.info('WARNING: Using device {}'.format(device)) np.random.seed(args.seed) torch.manual_seed(args.seed)
def main(conf): dump_dir = conf['lightgbm']['dump']['dir'] makedirs(dump_dir) write_config(conf, join_path(dump_dir, 'application.conf'), 'hocon') write_config(conf, join_path(dump_dir, 'application.json'), 'json') logging.getLogger().addHandler( logging.FileHandler(join_path(dump_dir, 'application.log'))) logging.info('Kaggle Talking Data') label = conf['lightgbm']['label'] features = conf['lightgbm']['features'] categorical_features = conf['lightgbm']['categorical_features'] logging.info('Label: %s', label) logging.info('Features: %s', features) logging.info('Categorical features: %s', categorical_features) data_dir = abspath(conf['lightgbm']['data']['dir']) dfc = DataFrameCols(data_dir) train_index_name = conf['lightgbm']['data']['train']['index'] train_index = dfc.load_index(train_index_name) df = dfc.load_df(columns=[label] + features, index=train_index) if conf['lightgbm']['valid_size'] > 0: train_df, valid_df = train_test_split( df, test_size=conf['lightgbm']['valid_size']) train_dataset = lgb.Dataset(data=train_df[features].values, label=train_df[label].values, feature_name=features, categorical_feature=categorical_features) valid_dataset = lgb.Dataset(data=valid_df[features].values, label=valid_df[label].values, feature_name=features, categorical_feature=categorical_features) del train_df del valid_df gc.collect() else: train_dataset = lgb.Dataset(data=df[features].values, label=df[label].values, feature_name=features, categorical_feature=categorical_features) valid_dataset = None params = conf['lightgbm']['params'] options = conf['lightgbm']['options'] model = train_lightgbm(params, train_dataset, valid_dataset, **options) model.save_model(join_path(dump_dir, 'model.bin')) del train_dataset del valid_dataset gc.collect() # load model # model = lgb.Booster(model_file=join_path(dump_dir, 'model.bin')) # train_label = train_df[label].values # train_pred = model.predict(train_df[features]) # train_quality = quality(train_label, train_pred) # logging.info('Train quality: %s', train_quality) # # valid_label = valid_df[label].values # valid_pred = model.predict(valid_df[features]) # valid_quality = quality(valid_label, valid_pred) # logging.info('Valid quality: %s', valid_quality) test_index_name = conf['lightgbm']['data']['test']['index'] test_index = dfc.load_index(test_index_name) test_df = dfc.load_df(columns=features + ['click_id_submission'], index=test_index) test_df['is_attributed'] = model.predict(test_df[features]) test_df = test_df[['click_id_submission', 'is_attributed' ]].rename(columns={'click_id_submission': 'click_id'}) test_df.sort_values(by='click_id', inplace=True) test_df.to_csv(join_path(dump_dir, 'submission.csv'), header=True, index=False) gain = model.feature_importance('gain') ft = pd.DataFrame({ 'feature': model.feature_name(), 'split': model.feature_importance('split'), 'gain': 100 * gain / gain.sum() }).sort_values('gain', ascending=False) ft.to_csv(join_path(dump_dir, 'feature_strength.csv'), header=True, index=False, sep='\t')
if experimentID is None: # Make a new experiment ID experimentID = int(SystemRandom().random() * 100000) ckpt_path = os.path.join(args.save, "experiment_" + str(experimentID) + '.ckpt') start = time.time() print("Sampling dataset of {} training examples".format(args.n)) input_command = sys.argv ind = [i for i in range(len(input_command)) if input_command[i] == "--load"] if len(ind) == 1: ind = ind[0] input_command = input_command[:ind] + input_command[(ind + 2):] input_command = " ".join(input_command) utils.makedirs("results/") ################################################################## data_obj = parse_datasets(args, device) input_dim = data_obj["input_dim"] classif_per_tp = False if ("classif_per_tp" in data_obj): # do classification per time point rather than on a time series as a whole classif_per_tp = data_obj["classif_per_tp"] if args.classif and (args.dataset == "hopper" or args.dataset == "periodic"): raise Exception("Classification task is not available for MuJoCo and 1d datasets") n_labels = 1 if args.classif:
Learning the optimal transport map (between Gaussians) via CP-Flow (comparing to IAF) """ import gc from scipy import linalg import numpy as np import matplotlib import matplotlib.pyplot as plt import torch from lib.flows import SequentialFlow, DeepConvexFlow, LinearIAF from lib.icnn import ICNN3 from lib import distributions from data.toy_data import Gaussian as ToyData from lib.utils import makedirs makedirs('figures/OT') def savefig(fn): plt.savefig(f'figures/OT/{fn}') batch_size_train = 128 batch_size_test = 64 dimx = 2 if dimx == 2: m = np.array([1.5, 1.0]) C = np.array([[0.9, -0.75], [-0.75, 0.9]]) # fixed for visualization else: m = None C = None
parser.add_argument('--print-freq', help='Print progress every so iterations', type=int, default=20) parser.add_argument('--vis-freq', help='Visualize progress every so iterations', type=int, default=500) args = parser.parse_args() # Random seed if args.seed is None: args.seed = np.random.randint(100000) # logger utils.makedirs(args.save) logger = utils.get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__)) logger.info(args) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') if device.type == 'cuda': logger.info('Found {} CUDA devices.'.format(torch.cuda.device_count())) for i in range(torch.cuda.device_count()): props = torch.cuda.get_device_properties(i) logger.info('{} \t Memory: {:.2f}GB'.format( props.name, props.total_memory / (1024**3))) else: logger.info('WARNING: Using device {}'.format(device))
def main(conf): logging.info('Loading train dataset') train_df = load_train_df(conf['dataset_raw']) logging.info('Loading test dataset') test_df = load_test_df(conf['dataset_raw']) class_weight = {int(c['class']): c['weight'] for c in conf['weights']} for w, cnf in conf['linear'].iteritems(): if not cnf.get_bool('enabled', True): continue if w == 'dataset': continue logging.info('Start training linear model: %s', w) dump_dir = cnf.get('dump.dir') or '.' makedirs(dump_dir) config_file = join_path(dump_dir, 'application.conf') dump_config(conf, config_file) vectorizer_file = join_path(dump_dir, 'vectorizer.pkl') quality_file = join_path(dump_dir, 'quality.json') y = train_df[FieldsTrain.is_duplicate] if cnf['dump.cache.enabled']: logging.info('Loading vectorizer') try: vectorizer = joblib.load(vectorizer_file) except: logging.info('Unable to load vectorizer') vectorizer = None if vectorizer is None: logging.info('Training vectorizer') vectorizer = train_vectorizer(train_df, **cnf['vectorizer']) nf = len(vectorizer.vocabulary_) logging.info('Feature count: %d', nf) logging.info('Dumping vectorizer') joblib.dump(vectorizer, vectorizer_file) features_cache_file = join_path(dump_dir, cnf['dump.cache.train']) logging.info('Loading cached train feature matrix from %s', features_cache_file) X = load_feature_matrix(features_cache_file) if X is None: logging.info('Unable to load cached train feature matrix') logging.info('Computing train feature matrix') X = compute_feature_matrix(train_df, vectorizer, combine=cnf['combine']) logging.info('Writing train feature matrix to %s', features_cache_file) save_feature_matrix(X, features_cache_file) else: logging.info('Training vectorizer') vectorizer = train_vectorizer(train_df, **cnf['vectorizer']) X = compute_feature_matrix(train_df, vectorizer, combine=cnf['combine']) nf = len(vectorizer.vocabulary_) logging.info('Feature count: %d', nf) logging.info('Training feature matrix: %s', X.shape) quality, predictions = train(X, y, skfold(), class_weight, dump_dir=dump_dir, **cnf['model']) with open(quality_file, 'w') as qfh: json.dump(quality, qfh) logging.info('Writing train set to disk') train_df[FieldsTrain.linear] = predictions train_df[[ FieldsTrain.id, FieldsTrain.is_duplicate, FieldsTrain.linear ]].to_csv(join_path(dump_dir, 'train.csv'), index=False) if cnf['dump.cache.enabled']: features_cache_file = join_path(dump_dir, cnf['dump.cache.test']) logging.info('Loading cached test feature matrix from %s', features_cache_file) X = load_feature_matrix(features_cache_file) if X is None: logging.info('Unable to load cached test feature matrix') logging.info('Computing test feature matrix') X = compute_feature_matrix(test_df, vectorizer, combine=cnf['combine']) logging.info('Writing test feature matrix to cache') save_feature_matrix(X, features_cache_file) else: logging.info('Computing test feature matrix') X = compute_feature_matrix(test_df, vectorizer, combine=cnf['combine']) logging.info( 'Computing test predictions as average logit of cross-validation models' ) test_df[FieldsTest.linear_cv] = np.zeros(X.shape[0]) for fold in quality['folds']: f = joblib.load(fold['dump']) p = logit(f.predict_proba(X)[:, 1]) test_df[FieldsTest.linear_cv] = test_df[FieldsTest.linear_cv] + p test_df[FieldsTest.linear_cv] = test_df[FieldsTest.linear_cv] / len( quality['folds']) logging.info('Computing test predictions with full model') f = joblib.load(quality['full']['unweighted']['dump']) p = logit(f.predict_proba(X)[:, 1]) test_df[FieldsTest.linear_full] = p logging.info('Computing test predictions with full weighted model') f = joblib.load(quality['full']['weighted']['dump']) p = logit(f.predict_proba(X)[:, 1]) test_df[FieldsTest.linear_full_weighted] = p logging.info('Writing test set to disk') test_df[[ FieldsTest.test_id, FieldsTest.linear_cv, FieldsTest.linear_full, FieldsTest.linear_full_weighted ]].to_csv(join_path(dump_dir, 'test.csv'), index=False)
def main(conf): dump_dir = conf['xgboost.dump.dir'] makedirs(dump_dir) dump_config_file = join_path(dump_dir, 'application.conf') dump_config(conf, dump_config_file) logging.info('Loading train dataset') train_df = load_train_df(conf['xgboost.dataset']) logging.info('Loading test dataset') test_df = load_test_df(conf['xgboost.dataset']) logging.info('Loading features') features = [] for group, cnf in conf['features'].iteritems(): logging.info('Loading features group: %s', group) features_dump_dir = cnf['dump'] train_features_file = join_path(features_dump_dir, 'train.csv') test_features_file = join_path(features_dump_dir, 'test.csv') train_features = pd.read_csv(train_features_file) test_features = pd.read_csv(test_features_file) for fcnf in cnf['features']: feature = fcnf['feature'] features.append(feature) train_col = fcnf.get('train_col', feature) test_col = fcnf.get('test_col', feature) train_df[feature] = train_features[train_col] test_df[feature] = test_features[test_col] feature_map_file = join_path(dump_dir, 'xgb.fmap') create_feature_map(features, feature_map_file) train_df_flipped = train_df.copy() for flip in conf['flip']: train_df_flipped[flip[0]] = train_df[[flip[1]]] train_df_flipped[flip[1]] = train_df[[flip[0]]] train_df = pd.concat([train_df, train_df_flipped], axis=0, ignore_index=True) logging.info('Train dataset: %s', train_df.shape) y = train_df[[FieldsTrain.is_duplicate]].values.flatten() logging.info('Train dataset CTR: %s', y.sum() / len(y)) class_weight = {int(c['class']): c['weight'] for c in conf['weights']} w = np.vectorize(class_weight.get)(y) logging.info('Train dataset weighted CTR: %s', sum(y * w) / sum(w)) q1 = train_df[Fields.question1].values q2 = train_df[Fields.question2].values train_df.drop([ FieldsTrain.id, FieldsTrain.qid1, FieldsTrain.qid2, FieldsTrain.question1, FieldsTrain.question2, FieldsTrain.is_duplicate ], axis=1, inplace=True) logging.info('Computing test predictions') test_ids = test_df[[FieldsTest.test_id]] test_df.drop( [FieldsTest.test_id, FieldsTest.question1, FieldsTest.question2], axis=1, inplace=True) dtest = xgb.DMatrix(test_df.values) model = xgb.Booster({'nthread': 4}) model.load_model(join_path(dump_dir, 'model.bin')) p_test = model.predict(dtest) logging.info('Writing submission file') submission_file = join_path(dump_dir, 'submission.csv') submission(submission_file, test_ids, p_test)
def main(conf): dump_dir = conf['word2vec']['dump']['dir'] makedirs(dump_dir) logging.warning('Loading train dataset') train_df = load_train_df(conf['word2vec']['dataset']) logging.warning('Loading test dataset') test_df = load_test_df(conf['word2vec']['dataset']) logging.warning('Loading embeddings') embeddings_dir = conf['word2vec']['embeddings']['dir'] embeddings_file = join_path(embeddings_dir, conf['word2vec']['embeddings']['file']) w2v = gensim.models.KeyedVectors.load_word2vec_format(embeddings_file, binary=True) w2v_norm = gensim.models.KeyedVectors.load_word2vec_format(embeddings_file, binary=True) w2v_norm.init_sims(replace=True) processor = Word2Vec(w2v, w2v_norm) logging.warning('Computing train features') train_df[Fields.w2v_wmd], \ train_df[Fields.w2v_wmd_norm], \ train_df[Fields.w2v_cos], \ train_df[Fields.w2v_city], \ train_df[Fields.w2v_jacc], \ train_df[Fields.w2v_canb], \ train_df[Fields.w2v_eucl], \ train_df[Fields.w2v_mink], \ train_df[Fields.w2v_bray], \ train_df[Fields.w2v_skew_q1], \ train_df[Fields.w2v_skew_q2], \ train_df[Fields.w2v_kurt_q1], \ train_df[Fields.w2v_kurt_q2] = \ zip(*train_df.progress_apply(lambda r: processor.features(r['question1'], r['question2']), axis=1)) for feature in [f for f in dir(Fields()) if f.startswith('w2v')]: logging.warning( 'Feature %s AUC=%s', feature, roc_auc_score(train_df[FieldsTrain.is_duplicate], train_df[feature])) logging.warning('Writing train feature dump') train_df.drop([ Fields.question1, Fields.question2, FieldsTrain.qid1, FieldsTrain.qid2 ], axis=1, inplace=True) train_df.to_csv(join_path(dump_dir, 'train.csv'), index=False) logging.warning('Computing test features') test_df[Fields.w2v_wmd], \ test_df[Fields.w2v_wmd_norm], \ test_df[Fields.w2v_cos], \ test_df[Fields.w2v_city], \ test_df[Fields.w2v_jacc], \ test_df[Fields.w2v_canb], \ test_df[Fields.w2v_eucl], \ test_df[Fields.w2v_mink], \ test_df[Fields.w2v_bray], \ test_df[Fields.w2v_skew_q1], \ test_df[Fields.w2v_skew_q2], \ test_df[Fields.w2v_kurt_q1], \ test_df[Fields.w2v_kurt_q2] = \ zip(*test_df.progress_apply(lambda r: processor.features(r['question1'], r['question2']), axis=1)) logging.warning('Writing test feature dump') test_df.drop([Fields.question1, Fields.question2], axis=1, inplace=True) test_df.to_csv(join_path(dump_dir, 'test.csv'), index=False)
def train_it( Model, Data_obj, args, file_name, ExperimentID, #Trainwriter, Validationwriter, input_command, Devices): """ parameters: Model, #List of Models Data_obj, #List of Data_objects which live on different devices args, file_name, ExperimentID, #List of IDs trainwriter, #List of TFwriters validationwriter, #List of TFwriters input_command, Devices #List of devices """ Ckpt_path = [] Top_ckpt_path = [] Best_test_acc = [] Best_test_acc_step = [] Logger = [] Optimizer = [] otherOptimizer = [] ODEOptimizer = [] for i, device in enumerate(Devices): Ckpt_path.append( os.path.join(args.save, "experiment_" + str(ExperimentID[i]) + '.ckpt')) Top_ckpt_path.append( os.path.join( args.save, "experiment_" + str(ExperimentID[i]) + '_topscore.ckpt')) Best_test_acc.append(0) Best_test_acc_step.append(0) log_path = "logs/" + file_name + "_" + str(ExperimentID[i]) + ".log" if not os.path.exists("logs/"): utils.makedirs("logs/") Logger.append( utils.get_logger(logpath=log_path, filepath=os.path.abspath(__file__))) Logger[i].info(input_command) Optimizer.append( get_optimizer(args.optimizer, args.lr, Model[i].parameters())) num_batches = Data_obj[0]["n_train_batches"] labels = Data_obj[0]["dataset_obj"].label_list #create empty lists for results and similar num_gpus = len(Devices) train_res = [None] * num_gpus batch_dict = [None] * num_gpus test_res = [None] * num_gpus label_dict = [None] * num_gpus # empty result placeholder somedict = {} test_res = [somedict] test_res[0]["accuracy"] = float(0) if args.v == 1 or args.v == 2: pbar = tqdm(range(1, num_batches * (args.niters) + 1), position=0, leave=True, ncols=160) else: pbar = range(1, num_batches * (args.niters) + 1) for itr in pbar: for i, device in enumerate(Devices): Optimizer[i].zero_grad() for i, device in enumerate(Devices): # default decay_rate = 0.999, lowest= args.lr/10 # original # decay_rate = 0.9995, lowest = args.lr / 50 # new utils.update_learning_rate(Optimizer[i], decay_rate=args.lrdecay, lowest=args.lr / 1000) wait_until_kl_inc = 10 if itr // num_batches < wait_until_kl_inc: kl_coef = 0.01 else: kl_coef = (1 - 0.99**(itr // num_batches - wait_until_kl_inc)) for i, device in enumerate(Devices): batch_dict[i] = utils.get_next_batch( Data_obj[i]["train_dataloader"]) for i, device in enumerate(Devices): train_res[i] = Model[i].compute_all_losses(batch_dict[i], n_traj_samples=3, kl_coef=kl_coef) for i, device in enumerate(Devices): train_res[i]["loss"].backward() for i, device in enumerate(Devices): Optimizer[i].step() n_iters_to_viz = 0.333 if args.dataset == "swisscrop": n_iters_to_viz /= 20 if (itr != 0) and (itr % args.val_freq) == 0: with torch.no_grad(): # Calculate labels and loss on test data for i, device in enumerate(Devices): test_res[i], label_dict[i] = compute_loss_all_batches( Model[i], Data_obj[i]["test_dataloader"], args, n_batches=Data_obj[i]["n_test_batches"], experimentID=ExperimentID[i], device=Devices[i], n_traj_samples=3, kl_coef=kl_coef) for i, device in enumerate(Devices): #make confusion matrix cm, conf_fig = plot_confusion_matrix( label_dict[0]["correct_labels"], label_dict[0]["predict_labels"], Data_obj[0]["dataset_obj"].label_list, tensor_name='dev/cm') Validationwriter[i].add_figure( "Validation_Confusionmatrix", conf_fig, itr * args.batch_size) # prepare GT labels and predictions y_ref_train = torch.argmax( train_res[0]['label_predictions'], dim=2).squeeze().cpu() y_pred_train = torch.argmax(batch_dict[0]['labels'], dim=1).cpu() y_ref = label_dict[0]["correct_labels"].cpu() y_pred = label_dict[0]["predict_labels"] # prepare GT labels and predictions y_ref_train = torch.argmax( train_res[0]['label_predictions'], dim=2).squeeze().cpu() y_pred_train = torch.argmax(batch_dict[0]['labels'], dim=1).cpu() y_ref = label_dict[0]["correct_labels"].cpu() y_pred = label_dict[0]["predict_labels"] #Make checkpoint torch.save( { 'args': args, 'state_dict': Model[i].state_dict(), }, Ckpt_path[i]) if test_res[i]["accuracy"] > Best_test_acc[i]: Best_test_acc[i] = test_res[i]["accuracy"] Best_test_acc_step[i] = itr * args.batch_size torch.save( { 'args': args, 'state_dict': Model[i].state_dict(), 'cm': cm }, Top_ckpt_path[i]) #utils.plot_confusion_matrix2(y_ref, y_pred, Data_obj[0]["dataset_obj"].label_list, ExperimentID[i]) # Save trajectory here #if not test_res[i]["PCA_traj"] is None: # with open( os.path.join('vis', 'traj_dict' + str(ExperimentID[i]) + '.pickle' ), 'wb') as handle: # pickle.dump(test_res[i]["PCA_traj"], handle, protocol=pickle.HIGHEST_PROTOCOL) # make PCA visualization if "PCA_traj" in test_res[0]: #PCA_fig = get_pca_fig(test_res[0]["PCA_traj"]["PCA_trajs1"]) PCA_fig = None else: PCA_fig = None logdict = { 'Classification_accuracy/train': train_res[i]["accuracy"], 'Classification_accuracy/validation': test_res[i]["accuracy"], 'Classification_accuracy/validation_peak': Best_test_acc[i], 'Classification_accuracy/validation_peak_step': Best_test_acc_step[i], 'loss/train': train_res[i]["loss"].detach(), 'loss/validation': test_res[i]["loss"].detach(), 'Other_metrics/train_cm': sklearn_cm(y_ref_train, y_pred_train), 'Other_metrics/train_precision': precision_score(y_ref_train, y_pred_train, average='macro'), 'Other_metrics/train_recall': recall_score(y_ref_train, y_pred_train, average='macro'), 'Other_metrics/train_f1': f1_score(y_ref_train, y_pred_train, average='macro'), 'Other_metrics/train_kappa': cohen_kappa_score(y_ref_train, y_pred_train), 'Other_metrics/validation_cm': sklearn_cm(y_ref, y_pred), 'Other_metrics/validation_precision': precision_score(y_ref, y_pred, average='macro'), 'Other_metrics/validation_recall': recall_score(y_ref, y_pred, average='macro'), 'Other_metrics/validation_f1': f1_score(y_ref, y_pred, average='macro'), 'Other_metrics/validation_kappa': cohen_kappa_score(y_ref, y_pred), } if "PCA_traj" in test_res[0]: pass #logdict['Visualization/latent_trajectory'] = wandb.Image( get_pca_fig(test_res[0]["PCA_traj"]) ) wandb.log(logdict, step=itr * args.batch_size) # wandb.sklearn.plot_confusion_matrix(y_ref, y_pred, labels) # Write training loss and accuracy after every batch (Only recommanded for debugging) fine_train_writer = False if fine_train_writer: if "loss" in train_res[i]: Validationwriter[i].add_scalar('loss/train', train_res[i]["loss"].detach(), itr * args.batch_size) if "accuracy" in train_res[i]: Validationwriter[i].add_scalar('Classification_accuracy/train', train_res[i]["accuracy"], itr * args.batch_size) #update progressbar if args.v == 2: pbar.set_description( "Train Ac: {:.3f} % | Test Ac: {:.3f} %, Peak Test Ac.: {:.3f} % (at {} batches) |" .format(train_res[0]["accuracy"] * 100, test_res[0]["accuracy"] * 100, Best_test_acc[i] * 100, Best_test_acc_step[0] // args.batch_size)) #empty all training variables #train_res = [None] * num_gpus batch_dict = [None] * num_gpus #test_res = [None] * num_gpus label_dict = [None] * num_gpus print(Best_test_acc[0], " at step ", Best_test_acc_step[0]) return train_res, test_res, Best_test_acc[0], Best_test_acc_step[0]
def main(conf): dump_dir = conf['svdres.dump.dir'] makedirs(dump_dir) dump_config_file = join_path(dump_dir, 'application.conf') dump_config(conf, dump_config_file) logging.info('Loading train dataset') train_df = load_train_df(conf['svdres.dataset']) vectorizer_file = join_path(dump_dir, 'vectorizer.pkl') try: logging.info('Loading vectorizer dump') vectorizer = joblib.load(vectorizer_file) except: logging.info('Loading vectorizer dump failed') logging.info('Traininig vectorizer') vectorizer = train_vectorizer(train_df, **conf['svdres.vectorizer']) logging.info('Writing vectorizer dump') joblib.dump(vectorizer, vectorizer_file) features_file = join_path(dump_dir, 'features_train.npz') logging.info('Loading cached train feature matrix from %s', features_file) X = load_feature_matrix(features_file) if X is None: logging.info('Unable to load cached train feature matrix') logging.info('Computing train feature matrix') X = compute_feature_matrix(train_df, vectorizer, combine='stack') logging.info('Writing train feature matrix to %s', features_file) save_feature_matrix(X, features_file) logging.info('Loading SVD decomposition') k = conf['svdres.svd'].get_int('k') singular_values_file = join_path(dump_dir, 'singular_values.txt') singular_vectors_file = join_path(dump_dir, 'singular_vectors.npz') try: S = np.loadtxt(singular_values_file) VT = np.load(singular_vectors_file)['VT'] assert k == len(S) except: logging.info('Loading SVD decomposition failed') logging.info('Computing SVD decomposition') S, VT = compute_svd(X.asfptype(), **conf['svdres.svd']) logging.info('Writing singular values to file') np.savetxt(singular_values_file, S) np.savez(singular_vectors_file, VT=VT) logging.info('Train matrix %s', X.shape) logging.info('Computing train SVD residuals') L = X.shape[0] / 2 Xq1 = X[:L, :] Xq2 = X[L:, :] start = 0 batch = 100 eucl = np.zeros(Xq1.shape[0]) cos = np.zeros(Xq1.shape[0]) q1res = np.zeros(Xq1.shape[0]) q2res = np.zeros(Xq1.shape[0]) while start < Xq1.shape[0]: finish = min(start + batch, Xq1.shape[0]) Xq1_batch = Xq1[start:finish, :] nq1 = (Xq1_batch.multiply(Xq1_batch)).sum(axis=1).flatten() Rq1 = safe_sparse_dot(Xq1_batch, VT.transpose()).dot(VT) - Xq1_batch nrq1 = np.sum(np.multiply(Rq1, Rq1), axis=1).flatten() Xq2_batch = Xq2[start:finish, :] nq2 = (Xq2_batch.multiply(Xq2_batch)).sum(axis=1).flatten() Rq2 = safe_sparse_dot(Xq2_batch, VT.transpose()).dot(VT) - Xq2_batch nrq2 = np.sum(np.multiply(Rq2, Rq2), axis=1).flatten() q1res[start:finish] = np.sqrt(nrq1) / np.sqrt(nq1) q2res[start:finish] = np.sqrt(nrq2) / np.sqrt(nq2) eucl[start:finish] = euclidean(Rq1, Rq2).flatten() cos[start:finish] = cosine(Rq1, Rq2).flatten() start = finish train_df['svd_res_q1'] = q1res train_df['svd_res_q2'] = q2res train_df['svd_res_eucl'] = eucl train_df['svd_res_cos'] = cos train_df[[ FieldsTrain.id, FieldsTrain.is_duplicate, 'svd_res_q1', 'svd_res_q2', 'svd_res_eucl', 'svd_res_cos' ]].to_csv(join_path(dump_dir, 'train.csv'), index=False) logging.info('Loading test dataset') test_df = load_test_df(conf['svddist.dataset']) logging.info('Computing test features') X = compute_feature_matrix(test_df, vectorizer, combine='stack') logging.info('Computing train SVD residuals') L = X.shape[0] / 2 Xq1 = X[:L, :] Xq2 = X[L:, :] start = 0 batch = 100 eucl = np.zeros(Xq1.shape[0]) cos = np.zeros(Xq1.shape[0]) q1res = np.zeros(Xq1.shape[0]) q2res = np.zeros(Xq1.shape[0]) while start < Xq1.shape[0]: finish = min(start + batch, Xq1.shape[0]) Xq1_batch = Xq1[start:finish, :] nq1 = (Xq1_batch.multiply(Xq1_batch)).sum(axis=1).flatten() Rq1 = safe_sparse_dot(Xq1_batch, VT.transpose()).dot(VT) - Xq1_batch nrq1 = np.sum(np.multiply(Rq1, Rq1), axis=1).flatten() Xq2_batch = Xq2[start:finish, :] nq2 = (Xq2_batch.multiply(Xq2_batch)).sum(axis=1).flatten() Rq2 = safe_sparse_dot(Xq2_batch, VT.transpose()).dot(VT) - Xq2_batch nrq2 = np.sum(np.multiply(Rq2, Rq2), axis=1).flatten() q1res[start:finish] = np.sqrt(nrq1) / np.sqrt(nq1) q2res[start:finish] = np.sqrt(nrq2) / np.sqrt(nq2) eucl[start:finish] = euclidean(Rq1, Rq2).flatten() cos[start:finish] = cosine(Rq1, Rq2).flatten() start = finish test_df['svd_res_q1'] = q1res test_df['svd_res_q2'] = q2res test_df['svd_res_eucl'] = eucl test_df['svd_res_cos'] = cos logging.info('Writing test dataset') test_df[[ FieldsTest.test_id, 'svd_res_q1', 'svd_res_q2', 'svd_res_eucl', 'svd_res_cos' ]].to_csv(join_path(dump_dir, 'test.csv'), index=False)
def run(args, kwargs): # ================================================================================================================== # SNAPSHOTS # ================================================================================================================== args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_') args.model_signature = args.model_signature.replace(':', '_') snapshots_path = os.path.join(args.out_dir, 'vae_' + args.dataset + '_') snap_dir = snapshots_path + args.flow if args.flow != 'no_flow': snap_dir += '_' + 'num_flows_' + str(args.num_flows) if args.flow == 'orthogonal': snap_dir = snap_dir + '_num_vectors_' + str(args.num_ortho_vecs) elif args.flow == 'orthogonalH': snap_dir = snap_dir + '_num_householder_' + str(args.num_householder) elif args.flow == 'iaf': snap_dir = snap_dir + '_madehsize_' + str(args.made_h_size) elif args.flow == 'permutation': snap_dir = snap_dir + '_' + 'kernelsize_' + str(args.kernel_size) elif args.flow == 'mixed': snap_dir = snap_dir + '_' + 'num_householder_' + str( args.num_householder) elif args.flow == 'cnf_rank': snap_dir = snap_dir + '_rank_' + str( args.rank) + '_' + args.dims + '_num_blocks_' + str( args.num_blocks) elif 'cnf' in args.flow: snap_dir = snap_dir + '_' + args.dims + '_num_blocks_' + str( args.num_blocks) if args.retrain_encoder: snap_dir = snap_dir + '_retrain-encoder_' elif args.evaluate: snap_dir = snap_dir + '_evaluate_' snap_dir = snap_dir + '__' + args.model_signature + '/' args.snap_dir = snap_dir if not os.path.exists(snap_dir): os.makedirs(snap_dir) # logger utils.makedirs(args.snap_dir) logger = utils.get_logger(logpath=os.path.join(args.snap_dir, 'logs'), filepath=os.path.abspath(__file__)) logger.info(args) # SAVING torch.save(args, snap_dir + args.flow + '.config') # ================================================================================================================== # LOAD DATA # ================================================================================================================== #train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs) args.dynamic_binarization = False args.input_type = 'binary' transform = transforms.Compose([ transforms.Grayscale(1), transforms.Resize((28, 28), interpolation=2), transforms.ToTensor() #transforms.Normalize((0.5,), (0.5,)) ]) args.input_size = [1, 28, 28] train_loader = torch.utils.data.DataLoader(FashionMNIST( './data', train=True, download=True, transform=transforms.ToTensor()), batch_size=args.batch_size, shuffle=True) N_mini_batches = len(train_loader) test_loader = torch.utils.data.DataLoader(FashionMNIST( './data', train=False, download=True, transform=transforms.ToTensor()), batch_size=args.batch_size, shuffle=False) if not args.evaluate: # ============================================================================================================== # SELECT MODEL # ============================================================================================================== # flow parameters and architecture choice are passed on to model through args if args.flow == 'no_flow': model = VAE.VAE(args) elif args.flow == 'planar': model = VAE.PlanarVAE(args) elif args.flow == 'iaf': model = VAE.IAFVAE(args) elif args.flow == 'orthogonal': model = VAE.OrthogonalSylvesterVAE(args) elif args.flow == 'householder': model = VAE.HouseholderSylvesterVAE(args) elif args.flow == 'triangular': model = VAE.TriangularSylvesterVAE(args) elif args.flow == 'cnf': model = CNFVAE.CNFVAE(args) elif args.flow == 'cnf_bias': model = CNFVAE.AmortizedBiasCNFVAE(args) elif args.flow == 'cnf_hyper': model = CNFVAE.HypernetCNFVAE(args) elif args.flow == 'cnf_lyper': model = CNFVAE.LypernetCNFVAE(args) elif args.flow == 'cnf_rank': model = CNFVAE.AmortizedLowRankCNFVAE(args) else: raise ValueError('Invalid flow choice') if args.retrain_encoder: logger.info(f"Initializing decoder from {args.model_path}") dec_model = torch.load(args.model_path) dec_sd = {} for k, v in dec_model.state_dict().items(): if 'p_x' in k: dec_sd[k] = v model.load_state_dict(dec_sd, strict=False) if args.cuda: logger.info("Model on GPU") model.cuda() logger.info(model) if args.retrain_encoder: parameters = [] logger.info('Optimizing over:') for name, param in model.named_parameters(): if 'p_x' not in name: logger.info(name) parameters.append(param) else: parameters = model.parameters() #optimizer = optim.Adamax(parameters, lr=args.learning_rate, eps=1.e-7) optimizer = optim.Adamax(parameters, args.learning_rate, eps=1.e-7) # ================================================================================================================== # TRAINING AND EVALUATION # ================================================================================================================== def train(epoch): override_divergence_fn(model, "approximate") beta = min([(epoch * 1.) / max([args.warmup, 1.]), args.max_beta]) model.train() train_loss_meter = AverageMeter() # NOTE: is_paired is 1 if the example is paired for batch_idx, (image, text) in enumerate(train_loader): if epoch < args.annealing_epochs: # compute the KL annealing factor for the current mini-batch in the current epoch annealing_factor = ( float(batch_idx + (epoch - 1) * N_mini_batches + 1) / float(args.annealing_epochs * N_mini_batches)) else: # by default the KL annealing factor is unity annealing_factor = 1.0 if args.cuda: image = image.cuda() text = text.cuda() image = Variable(image) text = Variable(text) batch_size = len(image) # refresh the optimizer optimizer.zero_grad() # pass data through model recon_image_1, recon_text_1, mu_1, logvar_1, logj1, z01, zk1 = model( image, text) recon_image_2, recon_text_2, mu_2, logvar_2, logj2, z02, zk2 = model( image) recon_image_3, recon_text_3, mu_3, logvar_3, logj3, z03, zk3 = model( text=text) # compute ELBO for each data combo joint_loss, rec1_1, rec1_2, kl_1 = elbo_loss( recon_image_1, image, recon_text_1, text, mu_1, logvar_1, z01, zk1, logj1, args, lambda_image=1.0, lambda_text=10.0, annealing_factor=annealing_factor, beta=beta) image_loss, rec1_2, rec2_2, kl_2 = elbo_loss( recon_image_2, image, None, None, mu_2, logvar_2, z02, zk2, logj2, args, lambda_image=1.0, lambda_text=10.0, annealing_factor=annealing_factor, beta=beta) text_loss, rec1, rec2, kl = elbo_loss( None, None, recon_text_3, text, mu_3, logvar_3, z03, zk3, logj3, args, lambda_image=1.0, lambda_text=10.0, annealing_factor=annealing_factor, beta=beta) #print("TEXT", r, "TEXTLOSS",text_loss, image_loss.shape, image_loss) train_loss = joint_loss + image_loss + text_loss # joint_loss# ovie se tie 3 losses, za sekoja kombinacija poedinecno ama aj so 2 ke testiram train_loss_meter.update(train_loss.item(), batch_size) # compute gradients and take step train_loss.backward() optimizer.step() if batch_idx % args.log_interval == 0: print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAnnealing-Factor: {:.3f}' .format(epoch, batch_idx * len(image), len(train_loader.dataset), 100. * batch_idx / len(train_loader), train_loss_meter.avg, annealing_factor)) print('====> Epoch: {}\tLoss: {:.4f}'.format( epoch, train_loss_meter.avg)) def test(epoch): model.eval() beta = min([(epoch * 1.) / max([args.warmup, 1.]), args.max_beta]) image_loss_meter = AverageMeter() text_loss_meter = AverageMeter() test_loss_meter = AverageMeter() override_divergence_fn(model, "brute_force") for batch_idx, (image, text) in enumerate(test_loader): if args.cuda: image = image.cuda() text = text.cuda() image = Variable(image, volatile=True) text = Variable(text, volatile=True) batch_size = len(image) recon_image_1, recon_text_1, mu_1, logvar_1, logj1, z01, zk1 = model( image, text) recon_image_2, recon_text_2, mu_2, logvar_2, logj2, z02, zk2 = model( image) recon_image_3, recon_text_3, mu_3, logvar_3, logj3, z03, zk3 = model( text=text) # compute ELBO for each data combo joint_loss, rec1, rec2, kl = elbo_loss(recon_image_1, image, recon_text_1, text, mu_1, logvar_1, z01, zk1, logj1, args) image_loss_meter.update(rec1.mean().item(), batch_size) text_loss_meter.update(rec2.mean().item(), batch_size) image_loss, rec1, rec2, kl = elbo_loss(recon_image_2, image, None, None, mu_2, logvar_2, z02, zk2, logj2, args) image_loss_meter.update(rec1.mean().item(), batch_size) text_loss, rec1, rec2, kl = elbo_loss(None, None, recon_text_3, text, mu_3, logvar_3, z03, zk3, logj3, args) text_loss_meter.update(rec2.mean().item(), batch_size) test_loss = joint_loss + image_loss + text_loss test_loss_meter.update(test_loss.item(), batch_size) print('====> Test image loss: {:.4f}'.format(image_loss_meter.avg)) print('====> Test text loss: {:.4f}'.format(text_loss_meter.avg)) print('====> Test Loss: {:.4f}'.format(test_loss_meter.avg)) return test_loss_meter.avg best_loss = sys.maxsize for epoch in range(1, args.epochs + 1): train(epoch) #print ("Test") test_loss = test(epoch) is_best = test_loss < best_loss best_loss = min(test_loss, best_loss) # save the best model and current model save_checkpoint( { 'state_dict': model.state_dict(), 'args': args, 'best_loss': best_loss, 'n_latents': args.z_size, 'optimizer': optimizer.state_dict(), }, is_best, folder='./trained_models')
def main(args): # logger print(args.no_display_loss) utils.makedirs(args.save) logger = utils.get_logger( logpath=os.path.join(args.save, "logs"), filepath=os.path.abspath(__file__), displaying=~args.no_display_loss, ) if args.layer_type == "blend": logger.info("!! Setting time_scale from None to 1.0 for Blend layers.") args.time_scale = 1.0 logger.info(args) device = torch.device( "cuda:" + str(args.gpu) if torch.cuda.is_available() else "cpu" ) if args.use_cpu: device = torch.device("cpu") args.data = dataset.SCData.factory(args.dataset, args.max_dim) args.timepoints = args.data.get_unique_times() # Use maximum timepoint to establish integration_times # as some timepoints may be left out for validation etc. args.int_tps = (np.arange(max(args.timepoints) + 1) + 1.0) * args.time_scale regularization_fns, regularization_coeffs = create_regularization_fns(args) model = build_model_tabular(args, args.data.get_shape()[0], regularization_fns).to( device ) if args.use_growth: if args.leaveout_timepoint == -1: growth_model_path = ( "../data/externel/growth_model_v2.ckpt" ) elif args.leaveout_timepoint in [1, 2, 3]: assert args.max_dim == 5 growth_model_path = ( "../data/growth/model_%d" % args.leaveout_timepoint ) else: print("WARNING: Cannot use growth with this timepoint") growth_model = torch.load(growth_model_path, map_location=device) if args.spectral_norm: add_spectral_norm(model) set_cnf_options(args, model) if args.test: state_dict = torch.load(args.save + "/checkpt.pth", map_location=device) model.load_state_dict(state_dict["state_dict"]) # if "growth_state_dict" not in state_dict: # print("error growth model note in save") # growth_model = None # else: # checkpt = torch.load(args.save + "/checkpt.pth", map_location=device) # growth_model.load_state_dict(checkpt["growth_state_dict"]) # TODO can we load the arguments from the save? # eval_utils.generate_samples( # device, args, model, growth_model, timepoint=args.leaveout_timepoint # ) # with torch.no_grad(): # evaluate(device, args, model, growth_model) # exit() else: logger.info(model) n_param = count_parameters(model) logger.info("Number of trainable parameters: {}".format(n_param)) train( device, args, model, growth_model, regularization_coeffs, regularization_fns, logger, ) if args.data.data.shape[1] == 2: plot_output(device, args, model)
def trainAE(net, train_loader, val_loader, saveDir, sStartTime, argType=torch.float32, device=torch.device('cpu')): """ :param net: AutoEncoder :param train_loader: MNIST loader of training data :param val_loader: MNIST loader of validation data :param saveDir: string, path :param sStartTime: string, start time :param argType: torch type :param device: torch device :return: """ print("training auto_encoder") cvt = lambda x: x.type(argType).to(device, non_blocking=True) utils.makedirs(saveDir) # specify loss function criterion = nn.MSELoss() # specify loss function optimizer = torch.optim.Adam(net.parameters(), lr=0.001) best_loss = float('inf') bestParams = None # number of epochs to train the model n_epochs = 600 for epoch in range(1, n_epochs + 1): # train the encoder-decoder net.train() train_loss = 0.0 for data in train_loader: # _ stands in for labels, here images, _ = data # flatten images images = images.view(images.size(0), -1) images = cvt(images) optimizer.zero_grad() outputs = net(images) loss = criterion(outputs, images) loss.backward() optimizer.step() train_loss += loss.item() * images.size(0) # validate the encoder-decoder net.eval() val_loss = 0.0 for data in val_loader: images, _ = data images = images.view(images.size(0), -1) images = cvt(images) outputs = net(images) loss = criterion(outputs, images) loss.backward() optimizer.step() val_loss += loss.item() * images.size(0) # print avg training statistics...different batch_sizes will scale these differnetly train_loss = train_loss / len(train_loader) val_loss = val_loss / len(val_loader) print('Epoch: {} \tTraining Loss: {:.6f} \t Validation Loss: {:.6f}'. format(epoch, train_loss, val_loss)) # save best set of parameters if val_loss < best_loss: best_loss = val_loss bestParams = net.state_dict() # plot if epoch % 20 == 0: net.eval() sSavePath = os.path.join( saveDir, 'figs', sStartTime + '_autoencoder{:d}.png'.format(epoch)) xRecreate = net(images) plotAutoEnc(images, xRecreate, sSavePath) # shrink step size if epoch % 150 == 0: for p in optimizer.param_groups: p['lr'] /= 10.0 print("lr: ", p['lr']) d = net.d # compute mean and std for normalization mu = torch.zeros((1, d), dtype=argType, device=device) musqrd = torch.zeros((1, d), dtype=argType, device=device) totImages = 0 net.load_state_dict(bestParams) i = 0 net.eval() with torch.no_grad(): for data in train_loader: # _ stands in for labels, here images, _ = data images = images.view(images.size(0), -1) images = cvt(images) outputs = net.encode(images) nImages = outputs.shape[0] totImages += nImages mu += torch.mean(outputs, dim=0, keepdims=True) # *nImages musqrd += torch.mean(outputs**2, dim=0, keepdims=True) # *nImages # check quality if i == 0: sSavePath = os.path.join(saveDir, 'figs', sStartTime + '_autoencoder.png') outputs = (net.encode(images) - 2.34) / 0.005 xRecreate = net.decode(outputs * 0.005 + 2.34) plotAutoEnc(images, xRecreate, sSavePath) sSavePath = os.path.join(saveDir, 'figs', sStartTime + '_noise_autoencoder.png') xRecreate = net.decode(outputs + 1.0 * torch.randn_like(outputs)) plotAutoEnc(images, xRecreate, sSavePath) i += 1 mu = mu / i musqrd = musqrd / i std = torch.sqrt(torch.abs(mu**2 - musqrd)) mu.requires_grad = False std.requires_grad = False net.mu = mu net.std = std torch.save({ 'state_dict': net.state_dict(), }, os.path.join(saveDir, sStartTime + '_autoenc_checkpt.pth')) return net
# -*- coding: utf-8 -*- """ CP-Flow on toy conditional distributions """ import gc import matplotlib.pyplot as plt import seaborn as sns import numpy as np import torch from lib.flows import SequentialFlow, DeepConvexFlow, ActNorm from lib.icnn import PICNN as PICNN from data.toy_data import OneDMixtureOfGaussians as ToyData from lib.utils import makedirs makedirs('figures/toy/cond_MoG/') def savefig(fn): plt.savefig(f'figures/toy/cond_MoG/{fn}') torch.set_default_dtype(torch.float64) batch_size_train = 128 batch_size_test = 64 # noinspection PyUnresolvedReferences train_loader = torch.utils.data.DataLoader(ToyData(50000), batch_size=batch_size_train, shuffle=True)
def run(args, kwargs): # ================================================================================================================== # SNAPSHOTS # ================================================================================================================== args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_') args.model_signature = args.model_signature.replace(':', '_') if args.automatic_saving == True: path = '{}/{}/{}/{}/{}/{}/{}/{}/{}/'.format(args.solver, args.dataset, args.layer_type, args.atol, args.rtol, args.atol_start, args.rtol_start, args.warmup_steps, args.manual_seed) else: path = 'test/' args.snap_dir = os.path.join(args.out_dir, path) if not os.path.exists(args.snap_dir): os.makedirs(args.snap_dir) # logger utils.makedirs(args.snap_dir) logger = utils.get_logger(logpath=os.path.join(args.snap_dir, 'logs'), filepath=os.path.abspath(__file__)) logger.info(args) # SAVING torch.save(args, args.snap_dir + 'config.config') # ================================================================================================================== # LOAD DATA # ================================================================================================================== train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs) if not args.evaluate: nfef_meter = utils.AverageMeter() nfeb_meter = utils.AverageMeter() # ============================================================================================================== # SELECT MODEL # ============================================================================================================== # flow parameters and architecture choice are passed on to model through args if args.flow == 'no_flow': model = VAE.VAE(args) elif args.flow == 'planar': model = VAE.PlanarVAE(args) elif args.flow == 'iaf': model = VAE.IAFVAE(args) elif args.flow == 'orthogonal': model = VAE.OrthogonalSylvesterVAE(args) elif args.flow == 'householder': model = VAE.HouseholderSylvesterVAE(args) elif args.flow == 'triangular': model = VAE.TriangularSylvesterVAE(args) elif args.flow == 'cnf': model = CNFVAE.CNFVAE(args) elif args.flow == 'cnf_bias': model = CNFVAE.AmortizedBiasCNFVAE(args) elif args.flow == 'cnf_hyper': model = CNFVAE.HypernetCNFVAE(args) elif args.flow == 'cnf_lyper': model = CNFVAE.LypernetCNFVAE(args) elif args.flow == 'cnf_rank': model = CNFVAE.AmortizedLowRankCNFVAE(args) else: raise ValueError('Invalid flow choice') if args.retrain_encoder: logger.info(f"Initializing decoder from {args.model_path}") dec_model = torch.load(args.model_path) dec_sd = {} for k, v in dec_model.state_dict().items(): if 'p_x' in k: dec_sd[k] = v model.load_state_dict(dec_sd, strict=False) if args.cuda: logger.info("Model on GPU") model.cuda() logger.info(model) logger.info("Number of trainable parameters: {}".format( count_parameters(model))) if args.retrain_encoder: parameters = [] logger.info('Optimizing over:') for name, param in model.named_parameters(): if 'p_x' not in name: logger.info(name) parameters.append(param) else: parameters = model.parameters() optimizer = optim.Adamax(parameters, lr=args.learning_rate, eps=1.e-7) # ================================================================================================================== # TRAINING # ================================================================================================================== train_loss = [] val_loss = [] # for early stopping best_loss = np.inf best_bpd = np.inf e = 0 epoch = 0 train_times = [] for epoch in range(1, args.epochs + 1): atol, rtol = update_tolerances(args, epoch, decay_factors) print(atol) set_cnf_options(args, atol, rtol, model) t_start = time.time() if 'cnf' not in args.flow: tr_loss = train(epoch, train_loader, model, optimizer, args, logger) else: tr_loss, nfef_meter, nfeb_meter = train( epoch, train_loader, model, optimizer, args, logger, nfef_meter, nfeb_meter) train_loss.append(tr_loss) train_times.append(time.time() - t_start) logger.info('One training epoch took %.2f seconds' % (time.time() - t_start)) v_loss, v_bpd = evaluate(val_loader, model, args, logger, epoch=epoch) val_loss.append(v_loss) # early-stopping if v_loss < best_loss: e = 0 best_loss = v_loss if args.input_type != 'binary': best_bpd = v_bpd logger.info('->model saved<-') torch.save(model, args.snap_dir + 'model.model') # torch.save(model, snap_dir + args.flow + '_' + args.architecture + '.model') elif (args.early_stopping_epochs > 0) and (epoch >= args.warmup): e += 1 if e > args.early_stopping_epochs: break if args.input_type == 'binary': logger.info( '--> Early stopping: {}/{} (BEST: loss {:.4f})\n'.format( e, args.early_stopping_epochs, best_loss)) else: logger.info( '--> Early stopping: {}/{} (BEST: loss {:.4f}, bpd {:.4f})\n' .format(e, args.early_stopping_epochs, best_loss, best_bpd)) if math.isnan(v_loss): raise ValueError('NaN encountered!') train_loss = np.hstack(train_loss) val_loss = np.array(val_loss) plot_training_curve(train_loss, val_loss, fname=args.snap_dir + '/training_curve.pdf') # training time per epoch train_times = np.array(train_times) mean_train_time = np.mean(train_times) std_train_time = np.std(train_times, ddof=1) logger.info('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time)) # ================================================================================================================== # EVALUATION # ================================================================================================================== logger.info(args) logger.info('Stopped after %d epochs' % epoch) logger.info('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time)) final_model = torch.load(args.snap_dir + 'model.model') validation_loss, validation_bpd = evaluate(val_loader, final_model, args, logger) else: validation_loss = "N/A" validation_bpd = "N/A" logger.info(f"Loading model from {args.model_path}") final_model = torch.load(args.model_path) test_loss, test_bpd = evaluate(test_loader, final_model, args, logger, testing=True) logger.info( 'FINAL EVALUATION ON VALIDATION SET. ELBO (VAL): {:.4f}'.format( validation_loss))
type=float, default=5., help="We subsample points in the interval [0, args.max_tp]") parser.add_argument('--noise-weight', type=float, default=0.01, help="Noise amplitude for generated traejctories") parser.add_argument('--gpu', type=int, default=0, help="GPU") args = parser.parse_args() device = torch.device( "cuda:{}".format(args.gpu) if torch.cuda.is_available() else "cpu") file_name = os.path.basename(__file__)[:-3] utils.makedirs(args.save) ##################################################################################################### if __name__ == '__main__': torch.manual_seed(args.random_seed) np.random.seed(args.random_seed) experimentID = args.load if experimentID is None: # Make a new experiment ID experimentID = int(SystemRandom().random() * 100000) ckpt_path = os.path.join(args.save, "experiment_" + str(experimentID) + '.ckpt') start = time.time()
def main(conf): dump_dir = conf['svddist.dump.dir'] makedirs(dump_dir) dump_config_file = join_path(dump_dir, 'application.conf') dump_config(conf, dump_config_file) logging.info('Loading train dataset') train_df = load_train_df(conf['svddist.dataset']) vectorizer_file = join_path(dump_dir, 'vectorizer.pkl') try: logging.info('Loading vectorizer dump') vectorizer = joblib.load(vectorizer_file) except: logging.info('Loading vectorizer dump failed') logging.info('Traininig vectorizer') vectorizer = train_vectorizer(train_df, **conf['svddist.vectorizer']) logging.info('Writing vectorizer dump') joblib.dump(vectorizer, vectorizer_file) features_file = join_path(dump_dir, 'features_train.npz') logging.info('Loading cached train feature matrix from %s', features_file) X = load_feature_matrix(features_file) if X is None: logging.info('Unable to load cached train feature matrix') logging.info('Computing train feature matrix') X = compute_feature_matrix(train_df, vectorizer, combine='stack') logging.info('Writing train feature matrix to %s', features_file) save_feature_matrix(X, features_file) logging.info('Loading SVD decomposition') k = conf['svddist.svd'].get_int('k') singular_values_file = join_path(dump_dir, 'singular_values.txt') singular_vectors_file = join_path(dump_dir, 'singular_vectors.npz') try: S = np.loadtxt(singular_values_file) VT = np.load(singular_vectors_file)['VT'] assert k == len(S) except: logging.info('Loading SVD decomposition failed') logging.info('Computing SVD decomposition') S, VT = compute_svd(X.asfptype(), **conf['svddist.svd']) logging.info('Writing singular values to file') np.savetxt(singular_values_file, S) np.savez(singular_vectors_file, VT=VT) logging.info('Computing train SVD features') Sinv = np.diag(1. / S) * np.sqrt(X.shape[0]) U = X.dot(VT.transpose().dot(Sinv)) logging.info('Train feature matrix dimensions: %s', U.shape) logging.info('Symmetrizing input features') Uq1, Uq2 = np.vsplit(U, 2) del U logging.info('Computing euclidean') train_df['svd_eucl'] = euclidean(Uq1, Uq2) logging.info('Computing cosine') train_df['svd_cosine'] = cosine(Uq1, Uq2) del Uq1, Uq2 train_df[[ FieldsTrain.id, FieldsTrain.is_duplicate, 'svd_eucl', 'svd_cosine' ]].to_csv(join_path(dump_dir, 'train.csv'), index=False) logging.info('Loading test dataset') test_df = load_test_df(conf['svddist.dataset']) logging.info('Computing test features') X = compute_feature_matrix(test_df, vectorizer, combine='stack') logging.info('Computing test SVD features') U = X.dot(VT.transpose().dot(Sinv)) logging.info('Symmetrizing input features') Uq1, Uq2 = np.vsplit(U, 2) del U logging.info('Computing test euclidean') test_df['svd_eucl'] = euclidean(Uq1, Uq2) logging.info('Computing test cosine') test_df['svd_cosine'] = cosine(Uq1, Uq2) del Uq1, Uq2 logging.info('Writing test dataset') test_df[[ FieldsTest.test_id, 'svd_eucl', 'svd_cosine' ]].to_csv(join_path(dump_dir, 'test.csv'), index=False)
def main(conf): dump_dir = abspath(conf['libffm']['dump']['dir']) makedirs(dump_dir) data_dir = abspath(conf['libffm']['data']['dir']) dfc = DataFrameCols(data_dir) target = 'is_attributed' fields = {'ip': 0, 'app': 1, 'device': 2, 'os': 3, 'channel': 4} shifts = { 'ip': 0, 'app': 364779, 'device': 365548, 'os': 369776, 'channel': 370733 } # 1) write test data # logging.info('Writing test data in libffm format') # df = dfc.load_df(columns=['id', target] + list(fields.keys())) # df = df[df[target] == -1] # df[target] = 0 # do we need this? # df = write_libffm_data(df, target, fields, shifts) test_fname = join_path(dump_dir, 'test.txt') # df[['data']].to_csv(test_fname, header=False, index=False, quoting=csv.QUOTE_NONE) # del df # gc.collect() # exit() # 2) write training folds # logging.info('Writing k-fold training data') # df = dfc.load_df(columns=['id', target] + list(fields.keys())) # df = df[df[target] >= 0] # df = write_libffm_data(df, target, fields, shifts) # # folds = [] # skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1337) # for fold_idx, valid_idx in skf.split(df['id'].values, df[target].values): # folds.append((fold_idx, valid_idx)) # # with open(join_path(dump_dir, 'folds.pkl'), 'wb') as f: # pickle.dump(folds, f) # # for j_fold, (fold_idx, valid_idx) in enumerate(folds): # logging.info('Writing fold %d in libffm format', j_fold) # train_fname = join_path(dump_dir, 'train_fold_%d.txt' % j_fold) # df.loc[fold_idx, ['data']].to_csv(train_fname, header=False, index=False, quoting=csv.QUOTE_NONE) # valid_fname = join_path(dump_dir, 'valid_fold_%d.txt' % j_fold) # df.loc[valid_idx, ['data']].to_csv(valid_fname, header=False, index=False, quoting=csv.QUOTE_NONE) # # del df # gc.collect() # exit() df = dfc.load_df(columns=['id', target]) df = df[df[target] >= 0] with open(join_path(dump_dir, 'folds.pkl'), 'rb') as f: folds = pickle.load(f) chdir(dump_dir) for j_fold, (fold_idx, valid_idx) in enumerate(folds): logging.info('Training on fold %d', j_fold) train_fname = join_path(dump_dir, 'train_fold_%d.txt' % j_fold) valid_fname = join_path(dump_dir, 'valid_fold_%d.txt' % j_fold) model_fname = join_path(dump_dir, 'model_%d.bin' % j_fold) proc = subprocess.run([ 'ffm-train', '-p', valid_fname, '-l', str(conf['libffm']['options']['lambda']), '-k', str(conf['libffm']['options']['factor']), '-r', str(conf['libffm']['options']['learning_rate']), '-t', str(conf['libffm']['options']['num_iter']), train_fname, model_fname ], stdout=subprocess.PIPE, check=True) logging.info('Running command %s', ' '.join(proc.args)) logging.info('Process return code %d', proc.returncode) logging.info(proc.stdout.decode('utf-8')) train_pred_file = join_path(dump_dir, 'train_pred_%d.txt' % j_fold) proc = subprocess.run( ['ffm-predict', train_fname, model_fname, train_pred_file], stdout=subprocess.PIPE, check=True) logging.info('Running command %s', ' '.join(proc.args)) logging.info('Process return code %d', proc.returncode) with open(train_pred_file, 'r') as f: p_train = np.array([float(s) for s in f.readlines()], dtype=np.float32) auc_train = roc_auc_score(df.loc[fold_idx, target].values, p_train) valid_pred_file = join_path(dump_dir, 'valid_pred_%d.txt' % j_fold) proc = subprocess.run( ['ffm-predict', valid_fname, model_fname, valid_pred_file], stdout=subprocess.PIPE, check=True) logging.info('Running command %s', ' '.join(proc.args)) logging.info('Process return code %d', proc.returncode) with open(valid_pred_file, 'r') as f: p_valid = np.array([float(s) for s in f.readlines()], dtype=np.float32) auc_valid = roc_auc_score(df.loc[valid_idx, target].values, p_valid) logging.info('Fold quality: auc_train=%f auc_valid=%f', auc_train, auc_valid) test_pred_file = join_path(dump_dir, 'test_pred_%d.txt' % j_fold) proc = subprocess.run( ['ffm-predict', test_fname, model_fname, test_pred_file], stdout=subprocess.PIPE, check=True) logging.info('Running command %s', ' '.join(proc.args)) logging.info('Process return code %d', proc.returncode)
def visualize(epoch, model, gmm, itr, real_imgs, global_itr): print("Starting Visualisation") model.eval() gmm.eval() utils.makedirs(os.path.join(args.save, 'imgs')) for x_test, y_test in test_loader: # x_test = x_test[0,...].unsqueeze(0) # y_test = y_test[0,...].unsqueeze(0) x_test = x_test.to(device) ### TEMPLATES ### D = real_imgs[:, 0, ...].unsqueeze(1) D = rescale(D) # Scale to [0,1] interval D = D.repeat(1, args.nclusters, 1, 1) x = real_imgs with torch.no_grad(): if isinstance(model, torch.nn.DataParallel): z_logp = model.module(D.view(-1, *input_size[1:]), 0, classify=False) else: z_logp = model(D.view(-1, *input_size[1:]), 0, classify=False) z, delta_logp = z_logp if isinstance(gmm, torch.nn.DataParallel): logpz, params = gmm.module( z.view(-1, args.nclusters, args.imagesize, args.imagesize), x.permute(0, 2, 3, 1)) else: logpz, params = gmm( z.view(-1, args.nclusters, args.imagesize, args.imagesize), x.permute(0, 2, 3, 1)) mu_tmpl, std_tmpl, gamma = params mu_tmpl = mu_tmpl.cpu().numpy() std_tmpl = std_tmpl.cpu().numpy() gamma = gamma.cpu().numpy() mu_tmpl = mu_tmpl[..., np.newaxis] std_tmpl = std_tmpl[..., np.newaxis] mu_tmpl = np.swapaxes(mu_tmpl, 0, 1) # (3,4,1) -> (4,3,1) mu_tmpl = np.swapaxes(mu_tmpl, 1, 2) # (4,3,1) -> (4,1,3) std_tmpl = np.swapaxes(std_tmpl, 0, 1) # (3,4,1) -> (4,3,1) std_tmpl = np.swapaxes(std_tmpl, 1, 2) # (4,3,1) -> (4,1,3) ### DEPLOY ### D = x_test[:, 0, ...].unsqueeze(1) D = rescale(D) # Scale to [0,1] interval D = D.repeat(1, args.nclusters, 1, 1) with torch.no_grad(): if isinstance(model, torch.nn.DataParallel): z_logp = model.module(D.view(-1, *input_size[1:]), 0, classify=False) else: z_logp = model(D.view(-1, *input_size[1:]), 0, classify=False) z, delta_logp = z_logp if isinstance(gmm, torch.nn.DataParallel): logpz, params = gmm.module( z.view(-1, args.nclusters, args.imagesize, args.imagesize), x_test.permute(0, 2, 3, 1)) else: logpz, params = gmm( z.view(-1, args.nclusters, args.imagesize, args.imagesize), x_test.permute(0, 2, 3, 1)) mu, std, pi = params mu = mu.cpu().numpy() std = std.cpu().numpy() pi = pi.cpu().numpy() mu = mu[..., np.newaxis] std = std[..., np.newaxis] mu = np.swapaxes(mu, 0, 1) # (3,4,1) -> (4,3,1) mu = np.swapaxes(mu, 1, 2) # (4,3,1) -> (4,1,3) std = np.swapaxes(std, 0, 1) # (3,4,1) -> (4,3,1) std = np.swapaxes(std, 1, 2) # (4,3,1) -> (4,1,3) X_hsd = np.swapaxes(x_test.cpu().numpy(), 1, 2) X_hsd = np.swapaxes(X_hsd, 2, 3) X_conv = imgtf.image_dist_transform(X_hsd, mu, std, pi, mu_tmpl, std_tmpl, args) # save a random image from the batch im_no = random.randint(0, args.batchsize - 1) im_tmpl = real_imgs[im_no, ...].cpu().numpy() im_tmpl = np.swapaxes(im_tmpl, 0, 1) im_tmpl = np.swapaxes(im_tmpl, 1, -1) im_tmpl = imgtf.HSD2RGB_Numpy(im_tmpl) im_tmpl = (im_tmpl * 255).astype('uint8') im_tmpl = Image.fromarray(im_tmpl) im_tmpl.save( os.path.join(args.save, 'imgs', f'im_tmpl_{global_itr}.png')) im_test = x_test[im_no, ...].cpu().numpy() im_test = np.swapaxes(im_test, 0, 1) im_test = np.swapaxes(im_test, 1, -1) im_test = imgtf.HSD2RGB_Numpy(im_test) im_test = (im_test * 255).astype('uint8') im_test = Image.fromarray(im_test) im_test.save( os.path.join(args.save, 'imgs', f'im_test_{global_itr}.png')) im_D = D[0, 0, ...].cpu().numpy() im_D = (im_D * 255).astype('uint8') im_D = Image.fromarray(im_D, 'L') im_D.save(os.path.join(args.save, 'imgs', f'im_D_{global_itr}.png')) im_conv = X_conv[im_no, ...].reshape(args.imagesize, args.imagesize, 3) im_conv = Image.fromarray(im_conv) im_conv.save( os.path.join(args.save, 'imgs', f'im_conv_{global_itr}.png')) # gamma ClsLbl = np.argmax(gamma, axis=-1) ClsLbl = ClsLbl.astype('float32') ColorTable = [[255, 0, 0], [0, 255, 0], [0, 0, 255], [255, 255, 0], [0, 255, 255], [255, 0, 255]] colors = np.array(ColorTable, dtype='float32') Msk = np.tile(np.expand_dims(ClsLbl, axis=-1), (1, 1, 1, 3)) for k in range(0, args.nclusters): # 1 x 256 x 256 x 1 1 x 3 ClrTmpl = np.einsum('anmd,df->anmf', np.expand_dims(np.ones_like(ClsLbl), axis=3), np.reshape(colors[k, ...], [1, 3])) # ClrTmpl = 1 x 256 x 256 x 3 Msk = np.where(np.equal(Msk, k), ClrTmpl, Msk) im_gamma = Msk[0].astype('uint8') im_gamma = Image.fromarray(im_gamma) im_gamma.save( os.path.join(args.save, 'imgs', f'im_gamma_{global_itr}.png')) # pi ClsLbl = np.argmax(pi, axis=-1) ClsLbl = ClsLbl.astype('float32') ColorTable = [[255, 0, 0], [0, 255, 0], [0, 0, 255], [255, 255, 0], [0, 255, 255], [255, 0, 255]] colors = np.array(ColorTable, dtype='float32') Msk = np.tile(np.expand_dims(ClsLbl, axis=-1), (1, 1, 1, 3)) for k in range(0, args.nclusters): # 1 x 256 x 256 x 1 1 x 3 ClrTmpl = np.einsum('anmd,df->anmf', np.expand_dims(np.ones_like(ClsLbl), axis=3), np.reshape(colors[k, ...], [1, 3])) # ClrTmpl = 1 x 256 x 256 x 3 Msk = np.where(np.equal(Msk, k), ClrTmpl, Msk) im_gamma = Msk[0].astype('uint8') im_gamma = Image.fromarray(im_gamma) im_gamma.save( os.path.join(args.save, 'imgs', f'im_pi_{global_itr}.png')) model.train() gmm.train() return
def main(conf): logging.info('Loading train dataset') train_df = load_train_df(conf['svd.dataset']) logging.info('Loading test dataset') test_df = load_test_df(conf['svd.dataset']) for f, cnf in conf['svd'].iteritems(): if f == 'dataset': continue if not cnf.get('enabled', True): continue logging.info('Start traning SVD model %s', f) dump_dir = cnf['dump.dir'] makedirs(dump_dir) logging.info('Dump %s', dump_dir) vectorizer_file = join_path(dump_dir, 'vectorizer.pkl') try: logging.info('Loading vectorizer dump') vectorizer = joblib.load(vectorizer_file) except: logging.info('Loading vectorizer dump failed') logging.info('Traininig vectorizer: %s', cnf['vectorizer']) vectorizer = train_vectorizer(train_df, **cnf['vectorizer']) logging.info('Writing vectorizer dump') joblib.dump(vectorizer, vectorizer_file) train_features_matrix_file = join_path(dump_dir, 'train_features.npz') logging.info('Loading train features matrix') X = load_feature_matrix(train_features_matrix_file) if X is None: logging.info('Loading train feature matrix failed') logging.info('Computing train feature matrix') X = compute_feature_matrix(train_df, vectorizer, combine=cnf.get('model.transform', None)) logging.info('Writing train feature matrix dump') save_feature_matrix(X, train_features_matrix_file) logging.info('Computing SVD decomposition') ksvd = cnf['model'].get_int('k') S, VT = compute_svd(X.asfptype(), **cnf['model']) Sinv = np.diag(1. / S) * np.sqrt(X.shape[0]) logging.info('Singular values %s', S) logging.info('Computing train SVD features') U = X.dot(VT.transpose()).dot(Sinv) logging.info('Train features variance: %s', np.var(U, axis=0)) features = map(lambda i: f + '_%d' % i, range(U.shape[1])) if cnf.get('model.transform', None) == 'stack': features_q1 = map(lambda s: s + '_q1', features) features_q2 = map(lambda s: s + '_q2', features) features = features_q1 + features_q2 train_features_df_q1 = pd.DataFrame(U[:train_df.shape[0], :], columns=features_q1) train_features_df_q2 = pd.DataFrame(U[train_df.shape[0]:, :], columns=features_q2) train_df = pd.concat([train_df, train_features_df_q1, train_features_df_q2], axis=1) train_df['svd_dist_eucl'] = train_df.apply(lambda r: compute_svd_distance_eucl(r, f, ksvd), axis=1) features.append('svd_dist_eucl') else: train_features_df = pd.DataFrame(U, columns=features) train_df = pd.concat([train_df, train_features_df], axis=1) for feature in features: logging.info('Feature %s AUC=%s', feature, roc_auc_score(train_df[FieldsTrain.is_duplicate], train_df[feature])) logging.info('Writing train features dump') train_file = join_path(dump_dir, 'train.csv') train_df[[FieldsTrain.id, FieldsTrain.is_duplicate] + features].to_csv(train_file, index=False) test_features_matrix_file = join_path(dump_dir, 'test_features.npz') logging.info('Loading test features matrix') X = load_feature_matrix(test_features_matrix_file) if X is None: logging.info('Loading test feature matrix failed') logging.info('Computing test feature matrix') X = compute_feature_matrix(test_df, vectorizer, combine=cnf.get('model.transform', None)) logging.info('Writing test feature matrix dump') save_feature_matrix(X, test_features_matrix_file) U = X.dot(VT.transpose()).dot(Sinv) logging.info('Test features variance: %s', np.var(U, axis=0)) logging.info('Computing test SVD features') if cnf.get('model.transform', None) == 'stack': logging.info('Computing q1 test SVD features') test_features_df_q1 = pd.DataFrame(U[:test_df.shape[0], :], columns=features_q1) test_df = pd.concat([test_df, test_features_df_q1], axis=1) del test_features_df_q1 logging.info('Computing q2 test SVD features') test_features_df_q2 = pd.DataFrame(U[test_df.shape[0]:, :], columns=features_q2) test_df = pd.concat([test_df, test_features_df_q2], axis=1) del test_features_df_q2 logging.info('Computing svd distances') test_df['svd_dist_eucl'] = test_df.apply(lambda r: compute_svd_distance_eucl(r, f, ksvd), axis=1) else: test_features_df = pd.DataFrame(U, columns=features) test_df = pd.concat([test_df, test_features_df], axis=1) logging.info('Writing test features dump') test_file = join_path(dump_dir, 'test.csv') test_df[[FieldsTest.test_id] + features].to_csv(test_file, index=False)
if args.extrap == "True": print("Running extrap mode" + "-" * 80) args.mode = "extrap" elif args.extrap == "False": print("Running interp mode" + "-" * 80) args.mode = "interp" ##################################################################################################### if __name__ == '__main__': torch.manual_seed(args.random_seed) np.random.seed(args.random_seed) ############ Saving Path and Preload. file_name = os.path.basename(__file__)[:-3] # run_models utils.makedirs(args.save) utils.makedirs(args.save_graph) experimentID = args.load if experimentID is None: # Make a new experiment ID experimentID = int(SystemRandom().random() * 100000) ############ Loading Data print("Loading dataset: " + args.dataset) dataloader = ParseData(args.dataset, suffix=args.suffix, mode=args.mode, args=args) test_encoder, test_decoder, test_graph, test_batch = dataloader.load_data( sample_percent=args.sample_percent_test,
def setUp(self): self.config = Config.get_config() self.db = DB() self.db.check_upgrade() self.mark_db_ids() self.test_folder = tempfile.mkdtemp() self.files_folder = os.path.join(self.test_folder, "files") self.store_folder = os.path.join(self.test_folder, "store") self.restore_folder = os.path.join(self.test_folder, "restore") utils.makedirs(self.files_folder) utils.makedirs(self.store_folder) utils.makedirs(self.restore_folder) # Build the base set of files with open(os.path.join(self.files_folder, "base"), "w") as f: f.write("base") with open(os.path.join(self.files_folder, "incr"), "w") as f: f.write("0") config_file = os.path.expanduser("~/.vault") if not os.path.exists(config_file): raise Exception("Vault test configuration file (~/.vault) does not exist") self.store_config = ConfigParser.RawConfigParser() self.store_config.read(config_file) # FOLDER STORE self.store = FolderStore("teststore", "50MB", True, self.store_folder) # DROPBOX STORE # self.login = self.store_config.get("DropBox", "login") # self.password = self.store_config.get("DropBox", "password") # self.folder = self.store_config.get("DropBox", "folder") # self.app_key = self.store_config.get("DropBox", "app_key") # self.app_secret_key = self.store_config.get("DropBox", "app_secret_key") # self.store = DropBoxStore("teststore", 0, False, self.folder, self.login, self.password, # self.app_key, self.app_secret_key) # S3 STORE # self.key = self.store_config.get("Amazon", "aws_access_key_id") # self.secret_key = self.store_config.get("Amazon", "aws_secret_access_key") # self.bucket = self.store_config.get("Amazon", "bucket") # self.store = S3Store("teststore", 0, False, bucket=self.bucket, key=self.key, secret_key=self.secret_key) # Now record the existance of this store self.config.storage[self.store.name] = self.store # Build the backup object (dont save config) self.backup = Backup("testbackup") self.backup.include_folders = [self.files_folder] self.backup.store = self.store.name self.backup.notify_msg = False self.old_pass = self.config.data_passphrase self.config.data_passphrase = "goofy" self.backup.encrypt = True self.config.backups[self.backup.name] = self.backup # build an options object for use with the backup self.options = BlankClass() self.options.dry_run = False self.options.message = False self.options.email = False self.options.shutdown = False self.options.norecurse = False # How many cycles? self.cycles = 20
def main(rank, world_size, args): setup(rank, world_size, args.port) # setup logger if rank == 0: utils.makedirs(args.save) logger = utils.get_logger(os.path.join(args.save, "logs")) def mprint(msg): if rank == 0: logger.info(msg) mprint(args) device = torch.device( f'cuda:{rank}' if torch.cuda.is_available() else 'cpu') if device.type == 'cuda': mprint('Found {} CUDA devices.'.format(torch.cuda.device_count())) for i in range(torch.cuda.device_count()): props = torch.cuda.get_device_properties(i) mprint('{} \t Memory: {:.2f}GB'.format( props.name, props.total_memory / (1024**3))) else: mprint('WARNING: Using device {}'.format(device)) np.random.seed(args.seed + rank) torch.manual_seed(args.seed + rank) if device.type == 'cuda': torch.cuda.manual_seed(args.seed + rank) mprint('Loading dataset {}'.format(args.data)) # Dataset and hyperparameters if args.data == 'cifar10': im_dim = 3 transform_train = transforms.Compose([ transforms.Resize(args.imagesize), transforms.RandomHorizontalFlip(), transforms.ToTensor(), add_noise if args.add_noise else identity, ]) transform_test = transforms.Compose([ transforms.Resize(args.imagesize), transforms.ToTensor(), add_noise if args.add_noise else identity, ]) init_layer = flows.LogitTransform(0.05) train_set = vdsets.SVHN(args.dataroot, download=True, split="train", transform=transform_train) sampler = torch.utils.data.distributed.DistributedSampler(train_set) train_loader = torch.utils.data.DataLoader( train_set, batch_size=args.batchsize, sampler=sampler, ) test_loader = torch.utils.data.DataLoader( vdsets.SVHN(args.dataroot, download=True, split="test", transform=transform_test), batch_size=args.val_batchsize, shuffle=False, ) elif args.data == 'mnist': im_dim = 1 init_layer = flows.LogitTransform(1e-6) train_set = datasets.MNIST( args.dataroot, train=True, transform=transforms.Compose([ transforms.Resize(args.imagesize), transforms.ToTensor(), add_noise if args.add_noise else identity, ])) sampler = torch.utils.data.distributed.DistributedSampler(train_set) train_loader = torch.utils.data.DataLoader( train_set, batch_size=args.batchsize, sampler=sampler, ) test_loader = torch.utils.data.DataLoader( datasets.MNIST(args.dataroot, train=False, transform=transforms.Compose([ transforms.Resize(args.imagesize), transforms.ToTensor(), add_noise if args.add_noise else identity, ])), batch_size=args.val_batchsize, shuffle=False, ) else: raise Exception(f'dataset not one of mnist / cifar10, got {args.data}') mprint('Dataset loaded.') mprint('Creating model.') input_size = (args.batchsize, im_dim, args.imagesize, args.imagesize) model = MultiscaleFlow( input_size, block_fn=partial(cpflow_block_fn, block_type=args.block_type, dimh=args.dimh, num_hidden_layers=args.num_hidden_layers, icnn_version=args.icnn, num_pooling=args.num_pooling), n_blocks=list(map(int, args.nblocks.split('-'))), factor_out=args.factor_out, init_layer=init_layer, actnorm=args.actnorm, fc_end=args.fc_end, glow=args.glow, ) model.to(device) model = DDP(model, device_ids=[rank], find_unused_parameters=True) ema = utils.ExponentialMovingAverage(model) mprint(model) mprint('EMA: {}'.format(ema)) optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.99), weight_decay=args.wd) # Saving and resuming best_test_bpd = math.inf begin_epoch = 0 most_recent_path = os.path.join(args.save, 'models', 'most_recent.pth') checkpt_exists = os.path.exists(most_recent_path) if checkpt_exists: mprint(f"Resuming from {most_recent_path}") # deal with data-dependent initialization like actnorm. with torch.no_grad(): x = torch.rand(8, *input_size[1:]).to(device) model(x) checkpt = torch.load(most_recent_path) begin_epoch = checkpt["epoch"] + 1 model.module.load_state_dict(checkpt["state_dict"]) ema.set(checkpt['ema']) optimizer.load_state_dict(checkpt["opt_state_dict"]) elif args.resume: mprint(f"Resuming from {args.resume}") # deal with data-dependent initialization like actnorm. with torch.no_grad(): x = torch.rand(8, *input_size[1:]).to(device) model(x) checkpt = torch.load(args.resume) begin_epoch = checkpt["epoch"] + 1 model.module.load_state_dict(checkpt["state_dict"]) ema.set(checkpt['ema']) optimizer.load_state_dict(checkpt["opt_state_dict"]) mprint(optimizer) batch_time = utils.RunningAverageMeter(0.97) bpd_meter = utils.RunningAverageMeter(0.97) gnorm_meter = utils.RunningAverageMeter(0.97) cg_meter = utils.RunningAverageMeter(0.97) hnorm_meter = utils.RunningAverageMeter(0.97) update_lr(optimizer, 0, args) # for visualization fixed_x = next(iter(train_loader))[0][:8].to(device) fixed_z = torch.randn(8, im_dim * args.imagesize * args.imagesize).to(fixed_x) if rank == 0: utils.makedirs(os.path.join(args.save, 'figs')) # visualize(model, fixed_x, fixed_z, os.path.join(args.save, 'figs', 'init.png')) for epoch in range(begin_epoch, args.nepochs): sampler.set_epoch(epoch) flows.CG_ITERS_TRACER.clear() flows.HESS_NORM_TRACER.clear() mprint('Current LR {}'.format(optimizer.param_groups[0]['lr'])) train(epoch, train_loader, model, optimizer, bpd_meter, gnorm_meter, cg_meter, hnorm_meter, batch_time, ema, device, mprint, world_size, args) val_time, test_bpd = validate(epoch, model, test_loader, ema, device) mprint( 'Epoch: [{0}]\tTime {1:.2f} | Test bits/dim {test_bpd:.4f}'.format( epoch, val_time, test_bpd=test_bpd)) if rank == 0: utils.makedirs(os.path.join(args.save, 'figs')) visualize(model, fixed_x, fixed_z, os.path.join(args.save, 'figs', f'{epoch}.png')) utils.makedirs(os.path.join(args.save, "models")) if test_bpd < best_test_bpd: best_test_bpd = test_bpd torch.save( { 'epoch': epoch, 'state_dict': model.module.state_dict(), 'opt_state_dict': optimizer.state_dict(), 'args': args, 'ema': ema, 'test_bpd': test_bpd, }, os.path.join(args.save, 'models', 'best_model.pth')) if rank == 0: torch.save( { 'epoch': epoch, 'state_dict': model.module.state_dict(), 'opt_state_dict': optimizer.state_dict(), 'args': args, 'ema': ema, 'test_bpd': test_bpd, }, os.path.join(args.save, 'models', 'most_recent.pth')) cleanup()
print(model) print("Number of trainable parameters: {}".format(count_parameters(model))) model.eval() p_samples = toy_data.inf_train_gen(args.data, batch_size=800**2) with torch.no_grad(): sample_fn, density_fn = get_transforms(model) plt.figure(figsize=(10, 10)) ax = ax = plt.gca() viz_flow.plt_samples(p_samples, ax, npts=800) plt.subplots_adjust(left=0, right=1, top=1, bottom=0) fig_filename = os.path.join(args.save, 'figs', 'true_samples.jpg') utils.makedirs(os.path.dirname(fig_filename)) plt.savefig(fig_filename) plt.close() plt.figure(figsize=(10, 10)) ax = ax = plt.gca() viz_flow.plt_flow_density(standard_normal_logprob, density_fn, ax, npts=800, memory=200, device=device) plt.subplots_adjust(left=0, right=1, top=1, bottom=0) fig_filename = os.path.join(args.save, 'figs', 'model_density.jpg') utils.makedirs(os.path.dirname(fig_filename)) plt.savefig(fig_filename)
def run(args, kwargs): # ================================================================================================================== # SNAPSHOTS # ================================================================================================================== args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_') args.model_signature = args.model_signature.replace(':', '_') snapshots_path = os.path.join(args.out_dir, 'vae_' + args.dataset + '_') snap_dir = snapshots_path + args.flow if args.flow != 'no_flow': snap_dir += '_' + 'num_flows_' + str(args.num_flows) if args.flow == 'orthogonal': snap_dir = snap_dir + '_num_vectors_' + str(args.num_ortho_vecs) elif args.flow == 'orthogonalH': snap_dir = snap_dir + '_num_householder_' + str(args.num_householder) elif args.flow == 'iaf': snap_dir = snap_dir + '_madehsize_' + str(args.made_h_size) elif args.flow == 'permutation': snap_dir = snap_dir + '_' + 'kernelsize_' + str(args.kernel_size) elif args.flow == 'mixed': snap_dir = snap_dir + '_' + 'num_householder_' + str(args.num_householder) elif args.flow == 'cnf_rank': snap_dir = snap_dir + '_rank_' + str(args.rank) + '_' + args.dims + '_num_blocks_' + str(args.num_blocks) elif 'cnf' in args.flow: snap_dir = snap_dir + '_' + args.dims + '_num_blocks_' + str(args.num_blocks) if args.retrain_encoder: snap_dir = snap_dir + '_retrain-encoder_' elif args.evaluate: snap_dir = snap_dir + '_evaluate_' snap_dir = snap_dir + '__' + args.model_signature + '/' args.snap_dir = snap_dir if not os.path.exists(snap_dir): os.makedirs(snap_dir) # logger utils.makedirs(args.snap_dir) logger = utils.get_logger(logpath=os.path.join(args.snap_dir, 'logs'), filepath=os.path.abspath(__file__)) logger.info(args) # SAVING torch.save(args, snap_dir + args.flow + '.config') # ================================================================================================================== # LOAD DATA # ================================================================================================================== train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs) if not args.evaluate: # ============================================================================================================== # SELECT MODEL # ============================================================================================================== # flow parameters and architecture choice are passed on to model through args if args.flow == 'no_flow': model = VAE.VAE(args) elif args.flow == 'planar': model = VAE.PlanarVAE(args) elif args.flow == 'iaf': model = VAE.IAFVAE(args) elif args.flow == 'orthogonal': model = VAE.OrthogonalSylvesterVAE(args) elif args.flow == 'householder': model = VAE.HouseholderSylvesterVAE(args) elif args.flow == 'triangular': model = VAE.TriangularSylvesterVAE(args) elif args.flow == 'cnf': model = CNFVAE.CNFVAE(args) elif args.flow == 'cnf_bias': model = CNFVAE.AmortizedBiasCNFVAE(args) elif args.flow == 'cnf_hyper': model = CNFVAE.HypernetCNFVAE(args) elif args.flow == 'cnf_lyper': model = CNFVAE.LypernetCNFVAE(args) elif args.flow == 'cnf_rank': model = CNFVAE.AmortizedLowRankCNFVAE(args) else: raise ValueError('Invalid flow choice') if args.retrain_encoder: logger.info(f"Initializing decoder from {args.model_path}") dec_model = torch.load(args.model_path) dec_sd = {} for k, v in dec_model.state_dict().items(): if 'p_x' in k: dec_sd[k] = v model.load_state_dict(dec_sd, strict=False) if args.cuda: logger.info("Model on GPU") model.cuda() logger.info(model) if args.retrain_encoder: parameters = [] logger.info('Optimizing over:') for name, param in model.named_parameters(): if 'p_x' not in name: logger.info(name) parameters.append(param) else: parameters = model.parameters() optimizer = optim.Adamax(parameters, lr=args.learning_rate, eps=1.e-7) # ================================================================================================================== # TRAINING # ================================================================================================================== train_loss = [] val_loss = [] # for early stopping best_loss = np.inf best_bpd = np.inf e = 0 epoch = 0 train_times = [] for epoch in range(1, args.epochs + 1): t_start = time.time() tr_loss = train(epoch, train_loader, model, optimizer, args, logger) train_loss.append(tr_loss) train_times.append(time.time() - t_start) logger.info('One training epoch took %.2f seconds' % (time.time() - t_start)) v_loss, v_bpd = evaluate(val_loader, model, args, logger, epoch=epoch) val_loss.append(v_loss) # early-stopping if v_loss < best_loss: e = 0 best_loss = v_loss if args.input_type != 'binary': best_bpd = v_bpd logger.info('->model saved<-') torch.save(model, snap_dir + args.flow + '.model') # torch.save(model, snap_dir + args.flow + '_' + args.architecture + '.model') elif (args.early_stopping_epochs > 0) and (epoch >= args.warmup): e += 1 if e > args.early_stopping_epochs: break if args.input_type == 'binary': logger.info( '--> Early stopping: {}/{} (BEST: loss {:.4f})\n'.format(e, args.early_stopping_epochs, best_loss) ) else: logger.info( '--> Early stopping: {}/{} (BEST: loss {:.4f}, bpd {:.4f})\n'. format(e, args.early_stopping_epochs, best_loss, best_bpd) ) if math.isnan(v_loss): raise ValueError('NaN encountered!') train_loss = np.hstack(train_loss) val_loss = np.array(val_loss) plot_training_curve(train_loss, val_loss, fname=snap_dir + '/training_curve_%s.pdf' % args.flow) # training time per epoch train_times = np.array(train_times) mean_train_time = np.mean(train_times) std_train_time = np.std(train_times, ddof=1) logger.info('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time)) # ================================================================================================================== # EVALUATION # ================================================================================================================== logger.info(args) logger.info('Stopped after %d epochs' % epoch) logger.info('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time)) final_model = torch.load(snap_dir + args.flow + '.model') validation_loss, validation_bpd = evaluate(val_loader, final_model, args, logger) else: validation_loss = "N/A" validation_bpd = "N/A" logger.info(f"Loading model from {args.model_path}") final_model = torch.load(args.model_path) test_loss, test_bpd = evaluate(test_loader, final_model, args, logger)
def train(): model = build_model_tabular(args, 1).to(device) set_cnf_options(args, model) logger.info(model) logger.info("Number of trainable parameters: {}".format( count_parameters(model))) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) time_meter = utils.RunningAverageMeter(0.93) loss_meter = utils.RunningAverageMeter(0.93) nfef_meter = utils.RunningAverageMeter(0.93) nfeb_meter = utils.RunningAverageMeter(0.93) tt_meter = utils.RunningAverageMeter(0.93) end = time.time() best_loss = float('inf') model.train() for itr in range(1, args.niters + 1): optimizer.zero_grad() loss = compute_loss(args, model) loss_meter.update(loss.item()) total_time = count_total_time(model) nfe_forward = count_nfe(model) loss.backward() optimizer.step() nfe_total = count_nfe(model) nfe_backward = nfe_total - nfe_forward nfef_meter.update(nfe_forward) nfeb_meter.update(nfe_backward) time_meter.update(time.time() - end) tt_meter.update(total_time) log_message = ( 'Iter {:04d} | Time {:.4f}({:.4f}) | Loss {:.6f}({:.6f}) | NFE Forward {:.0f}({:.1f})' ' | NFE Backward {:.0f}({:.1f}) | CNF Time {:.4f}({:.4f})'.format( itr, time_meter.val, time_meter.avg, loss_meter.val, loss_meter.avg, nfef_meter.val, nfef_meter.avg, nfeb_meter.val, nfeb_meter.avg, tt_meter.val, tt_meter.avg)) logger.info(log_message) if itr % args.val_freq == 0 or itr == args.niters: with torch.no_grad(): model.eval() test_loss = compute_loss(args, model, batch_size=args.test_batch_size) test_nfe = count_nfe(model) log_message = '[TEST] Iter {:04d} | Test Loss {:.6f} | NFE {:.0f}'.format( itr, test_loss, test_nfe) logger.info(log_message) if test_loss.item() < best_loss: best_loss = test_loss.item() utils.makedirs(args.save) torch.save( { 'args': args, 'state_dict': model.state_dict(), }, os.path.join(args.save, 'checkpt.pth')) model.train() if itr % args.viz_freq == 0: with torch.no_grad(): model.eval() xx = torch.linspace(-10, 10, 10000).view(-1, 1) true_p = data_density(xx) plt.plot(xx.view(-1).cpu().numpy(), true_p.view(-1).exp().cpu().numpy(), label='True') true_p = model_density(xx, model) plt.plot(xx.view(-1).cpu().numpy(), true_p.view(-1).exp().cpu().numpy(), label='Model') utils.makedirs(os.path.join(args.save, 'figs')) plt.savefig( os.path.join(args.save, 'figs', '{:06d}.jpg'.format(itr))) plt.close() model.train() end = time.time() logger.info('Training has finished.')
def main(): #os.system('shutdown -c') # cancel previous shutdown command if write_log: utils.makedirs(args.save) logger = utils.get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__)) logger.info(args) args_file_path = os.path.join(args.save, 'args.yaml') with open(args_file_path, 'w') as f: yaml.dump(vars(args), f, default_flow_style=False) if args.distributed: if write_log: logger.info('Distributed initializing process group') torch.cuda.set_device(args.local_rank) distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=dist_utils.env_world_size(), rank=env_rank()) assert (dist_utils.env_world_size() == distributed.get_world_size()) if write_log: logger.info("Distributed: success (%d/%d)" % (args.local_rank, distributed.get_world_size())) # get deivce # device = torch.device("cuda:%d"%torch.cuda.current_device() if torch.cuda.is_available() else "cpu") device = "cpu" cvt = lambda x: x.type(torch.float32).to(device, non_blocking=True) # load dataset train_loader, test_loader, data_shape = get_dataset(args) trainlog = os.path.join(args.save, 'training.csv') testlog = os.path.join(args.save, 'test.csv') traincolumns = [ 'itr', 'wall', 'itr_time', 'loss', 'bpd', 'fe', 'total_time', 'grad_norm' ] testcolumns = [ 'wall', 'epoch', 'eval_time', 'bpd', 'fe', 'total_time', 'transport_cost' ] # build model regularization_fns, regularization_coeffs = create_regularization_fns(args) model = create_model(args, data_shape, regularization_fns) # model = model.cuda() if args.distributed: model = dist_utils.DDP(model, device_ids=[args.local_rank], output_device=args.local_rank) traincolumns = append_regularization_keys_header(traincolumns, regularization_fns) if not args.resume and write_log: with open(trainlog, 'w') as f: csvlogger = csv.DictWriter(f, traincolumns) csvlogger.writeheader() with open(testlog, 'w') as f: csvlogger = csv.DictWriter(f, testcolumns) csvlogger.writeheader() set_cnf_options(args, model) if write_log: logger.info(model) if write_log: logger.info("Number of trainable parameters: {}".format( count_parameters(model))) if write_log: logger.info('Iters per train epoch: {}'.format(len(train_loader))) if write_log: logger.info('Iters per test: {}'.format(len(test_loader))) # optimizer if args.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, momentum=0.9, nesterov=False) # restore parameters if args.resume is not None: checkpt = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda(args.local_rank)) model.load_state_dict(checkpt["state_dict"]) if "optim_state_dict" in checkpt.keys(): optimizer.load_state_dict(checkpt["optim_state_dict"]) # Manually move optimizer state to device. for state in optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = cvt(v) # For visualization. if write_log: fixed_z = cvt(torch.randn(min(args.test_batch_size, 100), *data_shape)) if write_log: time_meter = utils.RunningAverageMeter(0.97) bpd_meter = utils.RunningAverageMeter(0.97) loss_meter = utils.RunningAverageMeter(0.97) steps_meter = utils.RunningAverageMeter(0.97) grad_meter = utils.RunningAverageMeter(0.97) tt_meter = utils.RunningAverageMeter(0.97) if not args.resume: best_loss = float("inf") itr = 0 wall_clock = 0. begin_epoch = 1 else: chkdir = os.path.dirname(args.resume) tedf = pd.read_csv(os.path.join(chkdir, 'test.csv')) trdf = pd.read_csv(os.path.join(chkdir, 'training.csv')) wall_clock = trdf['wall'].to_numpy()[-1] itr = trdf['itr'].to_numpy()[-1] best_loss = tedf['bpd'].min() begin_epoch = int(tedf['epoch'].to_numpy()[-1] + 1) # not exactly correct if args.distributed: if write_log: logger.info('Syncing machines before training') dist_utils.sum_tensor(torch.tensor([1.0]).float().cuda()) for epoch in range(begin_epoch, args.num_epochs + 1): if not args.validate: model.train() with open(trainlog, 'a') as f: if write_log: csvlogger = csv.DictWriter(f, traincolumns) for _, (x, y) in enumerate(train_loader): start = time.time() update_lr(optimizer, itr) optimizer.zero_grad() # cast data and move to device x = add_noise(cvt(x), nbits=args.nbits) #x = x.clamp_(min=0, max=1) # compute loss bpd, (x, z), reg_states = compute_bits_per_dim(x, model) if np.isnan(bpd.data.item()): raise ValueError('model returned nan during training') elif np.isinf(bpd.data.item()): raise ValueError('model returned inf during training') loss = bpd if regularization_coeffs: reg_loss = sum(reg_state * coeff for reg_state, coeff in zip( reg_states, regularization_coeffs) if coeff != 0) loss = loss + reg_loss total_time = count_total_time(model) loss.backward() nfe_opt = count_nfe(model) if write_log: steps_meter.update(nfe_opt) grad_norm = torch.nn.utils.clip_grad_norm_( model.parameters(), args.max_grad_norm) optimizer.step() itr_time = time.time() - start wall_clock += itr_time batch_size = x.size(0) metrics = torch.tensor([ 1., batch_size, loss.item(), bpd.item(), nfe_opt, grad_norm, *reg_states ]).float() rv = tuple(torch.tensor(0.) for r in reg_states) total_gpus, batch_total, r_loss, r_bpd, r_nfe, r_grad_norm, *rv = dist_utils.sum_tensor( metrics).cpu().numpy() if write_log: time_meter.update(itr_time) bpd_meter.update(r_bpd / total_gpus) loss_meter.update(r_loss / total_gpus) grad_meter.update(r_grad_norm / total_gpus) tt_meter.update(total_time) fmt = '{:.4f}' logdict = { 'itr': itr, 'wall': fmt.format(wall_clock), 'itr_time': fmt.format(itr_time), 'loss': fmt.format(r_loss / total_gpus), 'bpd': fmt.format(r_bpd / total_gpus), 'total_time': fmt.format(total_time), 'fe': r_nfe / total_gpus, 'grad_norm': fmt.format(r_grad_norm / total_gpus), } if regularization_coeffs: rv = tuple(v_ / total_gpus for v_ in rv) logdict = append_regularization_csv_dict( logdict, regularization_fns, rv) csvlogger.writerow(logdict) if itr % args.log_freq == 0: log_message = ( "Itr {:06d} | Wall {:.3e}({:.2f}) | " "Time/Itr {:.2f}({:.2f}) | BPD {:.2f}({:.2f}) | " "Loss {:.2f}({:.2f}) | " "FE {:.0f}({:.0f}) | Grad Norm {:.3e}({:.3e}) | " "TT {:.2f}({:.2f})".format( itr, wall_clock, wall_clock / (itr + 1), time_meter.val, time_meter.avg, bpd_meter.val, bpd_meter.avg, loss_meter.val, loss_meter.avg, steps_meter.val, steps_meter.avg, grad_meter.val, grad_meter.avg, tt_meter.val, tt_meter.avg)) if regularization_coeffs: log_message = append_regularization_to_log( log_message, regularization_fns, rv) logger.info(log_message) itr += 1 # compute test loss model.eval() if args.local_rank == 0: utils.makedirs(args.save) torch.save( { "args": args, "state_dict": model.module.state_dict() if torch.cuda.is_available() else model.state_dict(), "optim_state_dict": optimizer.state_dict(), "fixed_z": fixed_z.cpu() }, os.path.join(args.save, "checkpt.pth")) if epoch % args.val_freq == 0 or args.validate: with open(testlog, 'a') as f: if write_log: csvlogger = csv.DictWriter(f, testcolumns) with torch.no_grad(): start = time.time() if write_log: logger.info("validating...") lossmean = 0. meandist = 0. steps = 0 tt = 0. for i, (x, y) in enumerate(test_loader): sh = x.shape x = shift(cvt(x), nbits=args.nbits) loss, (x, z), _ = compute_bits_per_dim(x, model) dist = (x.view(x.size(0), -1) - z).pow(2).mean(dim=-1).mean() meandist = i / (i + 1) * dist + meandist / (i + 1) lossmean = i / (i + 1) * lossmean + loss / (i + 1) tt = i / (i + 1) * tt + count_total_time(model) / (i + 1) steps = i / (i + 1) * steps + count_nfe(model) / (i + 1) loss = lossmean.item() metrics = torch.tensor([1., loss, meandist, steps]).float() total_gpus, r_bpd, r_mdist, r_steps = dist_utils.sum_tensor( metrics).cpu().numpy() eval_time = time.time() - start if write_log: fmt = '{:.4f}' logdict = { 'epoch': epoch, 'eval_time': fmt.format(eval_time), 'bpd': fmt.format(r_bpd / total_gpus), 'wall': fmt.format(wall_clock), 'total_time': fmt.format(tt), 'transport_cost': fmt.format(r_mdist / total_gpus), 'fe': '{:.2f}'.format(r_steps / total_gpus) } csvlogger.writerow(logdict) logger.info( "Epoch {:04d} | Time {:.4f}, Bit/dim {:.4f}, Steps {:.4f}, TT {:.2f}, Transport Cost {:.2e}" .format(epoch, eval_time, r_bpd / total_gpus, r_steps / total_gpus, tt, r_mdist / total_gpus)) loss = r_bpd / total_gpus if loss < best_loss and args.local_rank == 0: best_loss = loss shutil.copyfile(os.path.join(args.save, "checkpt.pth"), os.path.join(args.save, "best.pth")) # visualize samples and density if write_log: with torch.no_grad(): fig_filename = os.path.join(args.save, "figs", "{:04d}.jpg".format(epoch)) utils.makedirs(os.path.dirname(fig_filename)) generated_samples, _, _ = model(fixed_z, reverse=True) generated_samples = generated_samples.view(-1, *data_shape) nb = int(np.ceil(np.sqrt(float(fixed_z.size(0))))) save_image(unshift(generated_samples, nbits=args.nbits), fig_filename, nrow=nb) if args.validate: break
parser.add_argument('--weight-decay', type=float, default=1e-5) parser.add_argument('--annealing-iters', type=int, default=0) parser.add_argument('--save', type=str, default='experiments/') parser.add_argument('--viz_freq', type=int, default=1000) parser.add_argument('--val_freq', type=int, default=1000) parser.add_argument('--log_freq', type=int, default=1000) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--gpu', type=int, default=0) args = parser.parse_args() # logger utils.makedirs(args.save) logger = utils.get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__)) logger.info(args) device = torch.device('cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu') print('') print(device) print(device.type) print('') np.random.seed(args.seed) torch.manual_seed(args.seed)
def main(conf): dump_dir = conf['xgboost.dump.dir'] makedirs(dump_dir) dump_config_file = join_path(dump_dir, 'application.conf') dump_config(conf, dump_config_file) logging.info('Loading train dataset') train_df = load_train_df(conf['xgboost.dataset']) logging.info('Loading test dataset') test_df = load_test_df(conf['xgboost.dataset']) logging.info('Loading features') features = [] for group, cnf in conf['features'].iteritems(): logging.info('Loading features group: %s', group) features_dump_dir = cnf['dump'] train_features_file = join_path(features_dump_dir, 'train.csv') test_features_file = join_path(features_dump_dir, 'test.csv') train_features = pd.read_csv(train_features_file) test_features = pd.read_csv(test_features_file) for fcnf in cnf['features']: feature = fcnf['feature'] features.append(feature) train_col = fcnf.get('train_col', feature) test_col = fcnf.get('test_col', feature) train_df[feature] = train_features[train_col] test_df[feature] = test_features[test_col] feature_map_file = join_path(dump_dir, 'xgb.fmap') create_feature_map(features, feature_map_file) train_df_flipped = train_df.copy() for flip in conf['flip']: train_df_flipped[flip[0]] = train_df[[flip[1]]] train_df_flipped[flip[1]] = train_df[[flip[0]]] train_df = pd.concat([train_df, train_df_flipped], axis=0, ignore_index=True) logging.info('Train dataset: %s', train_df.shape) y = train_df[[FieldsTrain.is_duplicate]].values.flatten() logging.info('Train dataset CTR: %s', y.sum() / len(y)) class_weight = {int(c['class']): c['weight'] for c in conf['weights']} w = np.vectorize(class_weight.get)(y) logging.info('Train dataset weighted CTR: %s', sum(y * w) / sum(w)) q1 = train_df[Fields.question1].values q2 = train_df[Fields.question2].values train_df.drop([ FieldsTrain.id, FieldsTrain.qid1, FieldsTrain.qid2, FieldsTrain.question1, FieldsTrain.question2, FieldsTrain.is_duplicate], axis=1, inplace=True) X = train_df.values logging.info('Training XGBoost model') model, progress, quality = train_xgboost(X, y, w, **conf['xgboost.param']) logging.info('Writing model dump') model_dump_file = join_path(dump_dir, 'model_dump.txt') model.dump_model(model_dump_file, fmap=feature_map_file, with_stats=True) model_file = join_path(dump_dir, 'model.bin') model.save_model(model_file) logging.info('Writing quality') # plot_quality(quality, dump_dir) logging.info('Writing top errors') errors_file = join_path(dump_dir, 'errors.csv') with open(errors_file, 'w') as fh: fh.write('y,p,question1,question2,sample\n') for e in quality['errors']['train']['type_i']: fh.write('%d,%s,%s,%s,%s\n' % (0, e[0], q1[e[1]], q2[e[1]], 'train')) for e in quality['errors']['train']['type_ii']: fh.write('%d,%s,%s,%s,%s\n' % (1, e[0], q1[e[1]], q2[e[1]], 'train')) for e in quality['errors']['valid']['type_i']: fh.write('%d,%s,%s,%s,%s\n' % (0, e[0], q1[e[1]], q2[e[1]], 'valid')) for e in quality['errors']['valid']['type_ii']: fh.write('%d,%s,%s,%s,%s\n' % (1, e[0], q1[e[1]], q2[e[1]], 'valid')) logging.info('Writing progress file') # plot_progress(progress, dump_dir) progress_file = join_path(dump_dir, 'progress.json') with open(progress_file, 'w') as fh: json.dump(progress, fh) logging.info('Writing feature scores') score_weight = model.get_score(fmap=feature_map_file, importance_type='weight') score_gain = model.get_score(fmap=feature_map_file, importance_type='gain') score_cover = model.get_score(fmap=feature_map_file, importance_type='cover') split_histograms = dict() for f in features: split_histograms[f] = model.get_split_value_histogram(f, fmap=feature_map_file) scores = pd.DataFrame([score_weight, score_gain, score_cover]).transpose() scores.index.name = 'feature' scores.rename(columns={0: 'weight', 1: 'gain', 2: 'cover'}, inplace=True) weight_total = scores['weight'].sum() scores['weight'] = scores['weight'] / weight_total scores.sort_values(by='weight', ascending=False, inplace=True) scores.to_csv(join_path(dump_dir, 'feature_scores.csv')) logging.info('Computing test predictions') test_ids = test_df[[FieldsTest.test_id]] test_df.drop([FieldsTest.test_id, FieldsTest.question1, FieldsTest.question2], axis=1, inplace=True) dtest = xgb.DMatrix(test_df.values) p_test = model.predict(dtest) logging.info('Writing submission file') submission_file = join_path(dump_dir, 'submission.csv') submission(submission_file, test_ids, p_test)
def run(args, logger, train_loader, validation_loader, data_shape): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = module.HouseholderSylvesterVAE(args, data_shape) # model = module.OrthogonalSylvesterVAE(args, data_shape) model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.2, patience=5, min_lr=1e-8) start_epoch = 0 # restore parameters if args.resume is not None: checkpt = torch.load(args.resume, map_location=lambda storage, loc: storage) model.load_state_dict(checkpt["state_dict"]) optimizer.load_state_dict(checkpt["optim_state_dict"]) args = checkpt["args"] start_epoch = checkpt["epoch"] + 1 logger.info("Resuming at epoch {} with args {}.".format( start_epoch, args)) time_meter = utils.RunningAverageMeter(0.97) beta = args.beta train_loader_break = 500000 break_train = int(train_loader_break / args.batch_size) break_training = 50 best_loss = float("inf") itr = 0 for epoch in range(start_epoch, args.num_epochs): logger.info('Epoch: {}/{} \tBeta: {}'.format(epoch, args.num_epochs, beta)) model.train() num_data = 0 end = time.time() for idx_count, data in enumerate(train_loader): # if idx_count > break_training: # break if args.data == 'piv': x_, y_ = data['ComImages'], data['AllGenDetails'] if args.heterogen: x = torch.zeros([x_.size(0), 4, 32, 32]) x[:, :2, :, :] = x_ for idx in range(x_.size(0)): u_vector = torch.zeros([1, 32, 32]) u_vector.fill_(y_[idx][0] / 20 * 0.5 + 0.5) v_vector = torch.zeros([1, 32, 32]) v_vector.fill_(y_[idx][1] / 20 * 0.5 + 0.5) x[idx, 2, :, :] = u_vector x[idx, 3, :, :] = v_vector else: x = x_ y = y_ elif args.data == 'mnist' and args.heterogen: x_, y_ = data x = torch.zeros([x_.size(0), 2, 28, 28]) x[:, :1, :, :] = x_ for idx in range(x_.size(0)): labels = torch.zeros([1, 28, 28]) labels.fill_(y_[idx] / 10) x[idx, 1, :, :] = labels elif args.data == 'cifar10' and args.heterogen: x_, y_ = data x = torch.zeros([x_.size(0), 4, 32, 32]) x[:, :3, :, :] = x_ for idx in range(x_.size(0)): labels = torch.zeros([1, 32, 32]) labels.fill_(y_[idx]) x[idx, 3, :, :] = labels else: x, y = data x = x.to(device) start = time.time() optimizer.zero_grad() recon_images, z_mu, z_var, ldj, z0, z_k = model(x) loss, rec, kl = loss_function.binary_loss_function( recon_images, x, z_mu, z_var, z0, z_k, ldj, beta) loss.backward() optimizer.step() rec = rec.item() kl = kl.item() num_data += len(x) batch_time = time.time() - end end = time.time() if itr % args.log_freq == 0: log_message = ( "Epoch {:03d} | [{:5d}/{:5d} ({:2.0f}%)] | Time {:.3f} | Loss: {:11.6f} |" "rec:{:11.6f} | kl: {:11.6f}".format( epoch, num_data, len(train_loader.sampler), 100. * idx_count / len(train_loader), batch_time, loss.item(), rec, kl)) logger.info(log_message) itr += 1 scheduler.step(loss.item()) # Evaluate and save model if args.evaluate: if epoch % args.val_freq == 0: model.eval() with torch.no_grad(): start = time.time() logger.info("validating...") losses_vec_recon_images = [] losses_vec_images_recon_images = [] losses = [] for _, (data) in enumerate(validation_loader): if _ > break_training: break if args.data == 'piv': x_, y_ = data['ComImages'], data['AllGenDetails'] if args.heterogen: x = torch.zeros([x_.size(0), 4, 32, 32]) x[:, :2, :, :] = x_ for idx in range(x_.size(0)): u_vector = torch.zeros([1, 32, 32]) u_vector.fill_(y_[idx][0] / 20 * 0.5 + 0.5) v_vector = torch.zeros([1, 32, 32]) v_vector.fill_(y_[idx][1] / 20 * 0.5 + 0.5) x[idx, 2, :, :] = u_vector x[idx, 3, :, :] = v_vector else: x = x_ y = y_ elif args.data == 'mnist' and args.heterogen: x_, y_ = data x = torch.zeros([x_.size(0), 2, 28, 28]) x[:, :1, :, :] = x_ for idx in range(x_.size(0)): labels = torch.zeros([1, 28, 28]) labels.fill_(y_[idx] / 10) x[idx, 1, :, :] = labels elif args.data == 'cifar10' and args.heterogen: x_, y_ = data x = torch.zeros([x_.size(0), 4, 32, 32]) x[:, :3, :, :] = x_ for idx in range(x_.size(0)): labels = torch.zeros([1, 32, 32]) labels.fill_(y_[idx]) x[idx, 3, :, :] = labels else: x, y = data x = x.to(device) recon_images, z_mu, z_var, ldj, z0, z_k = model(x) loss, rec, kl = loss_function.binary_loss_function( recon_images, x, z_mu, z_var, z0, z_k, ldj, beta) losses.append(loss.item()) if args.data == "piv" and args.heterogen == False: loss_vec_recon_images, loss_vec_images_recon_images = resnet_pretrained.run( args, logger, recon_images, x, y, data_shape) losses_vec_recon_images.append( loss_vec_recon_images.item()) losses_vec_images_recon_images.append( loss_vec_images_recon_images.item()) if args.data == "piv" and args.heterogen == False: logger.info( "Loss vector reconstructed images {}, Loss vector images reconstructed images {}" .format(np.mean(losses_vec_recon_images), np.mean(losses_vec_images_recon_images))) loss = np.mean(losses) logger.info( "Epoch {:04d} | Time {:.4f} | Loss {:.4f}".format( epoch, time.time() - start, loss)) if loss < best_loss: best_loss = loss utils.makedirs(args.save) torch.save( { "args": args, "epoch": epoch, "state_dict": model.state_dict(), "optim_state_dict": optimizer.state_dict(), }, os.path.join(args.save, "checkpt.pth")) logger.info("Saving model at epoch {}.".format(epoch)) if beta < 1: beta += 0.01 # Evaluation evaluation.save_recon_images(args, model, validation_loader, data_shape, logger) evaluation.save_fixed_z_image(args, model, data_shape, logger)
def train(args, model, growth_model): logger.info(model) logger.info("Number of trainable parameters: {}".format(count_parameters(model))) #optimizer = optim.Adam(set(model.parameters()) | set(growth_model.parameters()), optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) #growth_optimizer = optim.Adam(growth_model.parameters(), lr=args.lr, weight_decay=args.weight_decay) time_meter = utils.RunningAverageMeter(0.93) loss_meter = utils.RunningAverageMeter(0.93) nfef_meter = utils.RunningAverageMeter(0.93) nfeb_meter = utils.RunningAverageMeter(0.93) tt_meter = utils.RunningAverageMeter(0.93) end = time.time() best_loss = float('inf') model.train() growth_model.eval() for itr in range(1, args.niters + 1): optimizer.zero_grad() #growth_optimizer.zero_grad() ### Train if args.spectral_norm: spectral_norm_power_iteration(model, 1) #if args.spectral_norm: spectral_norm_power_iteration(growth_model, 1) loss = compute_loss(args, model, growth_model) loss_meter.update(loss.item()) if len(regularization_coeffs) > 0: # Only regularize on the last timepoint reg_states = get_regularization(model, regularization_coeffs) reg_loss = sum( reg_state * coeff for reg_state, coeff in zip(reg_states, regularization_coeffs) if coeff != 0 ) loss = loss + reg_loss #if len(growth_regularization_coeffs) > 0: # growth_reg_states = get_regularization(growth_model, growth_regularization_coeffs) # reg_loss = sum( # reg_state * coeff for reg_state, coeff in zip(growth_reg_states, growth_regularization_coeffs) if coeff != 0 # ) # loss2 = loss2 + reg_loss total_time = count_total_time(model) nfe_forward = count_nfe(model) loss.backward() #loss2.backward() optimizer.step() #growth_optimizer.step() ### Eval nfe_total = count_nfe(model) nfe_backward = nfe_total - nfe_forward nfef_meter.update(nfe_forward) nfeb_meter.update(nfe_backward) time_meter.update(time.time() - end) tt_meter.update(total_time) log_message = ( 'Iter {:04d} | Time {:.4f}({:.4f}) | Loss {:.6f}({:.6f}) | NFE Forward {:.0f}({:.1f})' ' | NFE Backward {:.0f}({:.1f}) | CNF Time {:.4f}({:.4f})'.format( itr, time_meter.val, time_meter.avg, loss_meter.val, loss_meter.avg, nfef_meter.val, nfef_meter.avg, nfeb_meter.val, nfeb_meter.avg, tt_meter.val, tt_meter.avg ) ) if len(regularization_coeffs) > 0: log_message = append_regularization_to_log(log_message, regularization_fns, reg_states) logger.info(log_message) if itr % args.val_freq == 0 or itr == args.niters: with torch.no_grad(): model.eval() growth_model.eval() test_loss = compute_loss(args, model, growth_model) test_nfe = count_nfe(model) log_message = '[TEST] Iter {:04d} | Test Loss {:.6f} | NFE {:.0f}'.format(itr, test_loss, test_nfe) logger.info(log_message) if test_loss.item() < best_loss: best_loss = test_loss.item() utils.makedirs(args.save) torch.save({ 'args': args, 'state_dict': model.state_dict(), 'growth_state_dict': growth_model.state_dict(), }, os.path.join(args.save, 'checkpt.pth')) model.train() if itr % args.viz_freq == 0: with torch.no_grad(): model.eval() for i, tp in enumerate(timepoints): p_samples = viz_sampler(tp) sample_fn, density_fn = get_transforms(model, int_tps[:i+1]) #growth_sample_fn, growth_density_fn = get_transforms(growth_model, int_tps[:i+1]) plt.figure(figsize=(9, 3)) visualize_transform( p_samples, torch.randn, standard_normal_logprob, transform=sample_fn, inverse_transform=density_fn, samples=True, npts=100, device=device ) fig_filename = os.path.join(args.save, 'figs', '{:04d}_{:01d}.jpg'.format(itr, i)) utils.makedirs(os.path.dirname(fig_filename)) plt.savefig(fig_filename) plt.close() #visualize_transform( # p_samples, torch.rand, uniform_logprob, transform=growth_sample_fn, # inverse_transform=growth_density_fn, # samples=True, npts=800, device=device #) #fig_filename = os.path.join(args.save, 'growth_figs', '{:04d}_{:01d}.jpg'.format(itr, i)) #utils.makedirs(os.path.dirname(fig_filename)) #plt.savefig(fig_filename) #plt.close() model.train() """ if itr % args.viz_freq_growth == 0: with torch.no_grad(): growth_model.eval() # Visualize growth transform growth_filename = os.path.join(args.save, 'growth', '{:04d}.jpg'.format(itr)) utils.makedirs(os.path.dirname(growth_filename)) visualize_growth(growth_model, data, labels, npts=200, device=device) plt.savefig(growth_filename) plt.close() growth_model.train() """ end = time.time() logger.info('Training has finished.')