def main(): args = parseCommandLineArguments() rmgpy_branch = args.rmgpy_branch rmgdb_branch = args.rmgdb_branch rmgpy_sha = args.rmgpy_sha rmgdb_sha = args.rmgdb_sha meta_dict = { "rmgpy_branch": rmgpy_branch, "rmgdb_branch": rmgdb_branch, "rmgpy_sha": rmgpy_sha, "rmgdb_sha": rmgdb_sha } dataset_file = args.datasets[0] test_tables = get_datasets(dataset_file) # connect to database auth_info = get_RTD_authentication_info() rtdi = RMGTestsDatabaseInterface(*auth_info) rtd = getattr(rtdi.client, 'rmg_tests') thermo_val_table = getattr(rtd, 'thermo_val_table') # check if database has this record performance_dict = evaluate_performance(dataset_file, model_kernel='GA') # push to database save_results_in_database(thermo_val_table, meta_dict, performance_dict) # save to txt file validataion_summary_path = os.path.join(os.path.dirname(dataset_file), 'validation_summary.txt') save_results_in_file(performance_dict, validataion_summary_path)
def setup_data_loaders(args): train_transforms, val_transforms = get_transforms( crop_size=args.crop_size, shorter_side=args.shorter_side, low_scale=args.low_scale, high_scale=args.high_scale, img_mean=args.img_mean, img_std=args.img_std, img_scale=args.img_scale, ignore_label=args.ignore_label, num_stages=args.num_stages, augmentations_type=args.augmentations_type, dataset_type=args.dataset_type, ) train_sets, val_set = get_datasets( train_dir=args.train_dir, val_dir=args.val_dir, train_list_path=args.train_list_path, val_list_path=args.val_list_path, train_transforms=train_transforms, val_transforms=val_transforms, masks_names=("segm", ), dataset_type=args.dataset_type, stage_names=args.stage_names, train_download=args.train_download, val_download=args.val_download, ) train_loaders, val_loader = dt.data.get_loaders( train_batch_size=args.train_batch_size, val_batch_size=args.val_batch_size, train_set=train_sets, val_set=val_set, num_stages=args.num_stages, ) return train_loaders, val_loader
def evaluate_performance(dataset_file, model_kernel='GA'): # get a list of test table names # from files or input test_tables = get_datasets(dataset_file) # model instantiation model = ThermoEstimator(kernel_type=model_kernel) # start test evaluation performance_dict = {} for _, db_name, collection_name in test_tables: data = get_data(db_name, collection_name) spec_labels = [] spec_dict = {} H298s_true = [] H298s_pred = [] comments = [] for db_mol in data: smiles_in = str(db_mol["SMILES_input"]) H298_true = float(db_mol["Hf298(kcal/mol)"]) # unit: kcal/mol thermo = model.predict_thermo(smiles_in) H298_pred = thermo.H298.value_si / 4184.0 spec_labels.append(smiles_in) H298s_true.append(H298_true) H298s_pred.append(H298_pred) comments.append(thermo.comment) # create pandas dataframe test_df = pd.DataFrame(index=spec_labels) test_df['SMILES'] = test_df.index test_df['H298_pred(kcal/mol)'] = pd.Series(H298s_pred, index=test_df.index) test_df['H298_true(kcal/mol)'] = pd.Series(H298s_true, index=test_df.index) diff = abs(test_df['H298_pred(kcal/mol)'] - test_df['H298_true(kcal/mol)']) test_df['H298_diff(kcal/mol)'] = pd.Series(diff, index=test_df.index) test_df['Comments'] = pd.Series(comments, index=test_df.index) # save test_df for future reference and possible comparison test_df_save_path = os.path.join( os.path.dirname(dataset_file), 'test_df_{0}_{1}.csv'.format(db_name, collection_name)) with open(test_df_save_path, 'w') as fout: test_df.to_csv(fout, index=False) performance_dict[( db_name, collection_name )] = test_df['H298_diff(kcal/mol)'].describe()['mean'] return performance_dict
def main(): parser = get_parser() args = parser.parse_args() # load model. model_options defined in models/__init__.py model = sk_model_options[args.model](args.choice, args.freq_floor) # load data data_path = data_paths[args.dataset] train_set, dev_set, test_set = get_datasets(model.batch_size, data_path, model.preprocess_inputs, sk=True) print 'training...' train(model, train_set) print 'done training.' truth_file = os.path.join(data_path, 'truth.jsonl') mkdir(os.path.join(CKPT, args.sess_name)) results_dir = os.path.join(CKPT, args.sess_name, 'results') mkdir(results_dir) print 'evaluating...' evaluate(model, train_set, results_dir, 'train', truth_file) evaluate(model, dev_set, results_dir, 'dev', truth_file) evaluate(model, test_set, results_dir, 'test', truth_file) print 'done evaluating.'
def simple_train(model, batch_size): num_epochs = 1 train_loader, _ = get_datasets(batch_size) loss_fn = nn.CrossEntropyLoss().cuda() model.train() loss_fn = nn.CrossEntropyLoss().cuda() optimizer = optim.SGD(model.parameters(), lr=0.001) for _ in range(num_epochs): for batch_idx, (inputs, labels) in enumerate(train_loader): # run forward pass optimizer.zero_grad() outputs = model(inputs.to("cuda:1")) # run backward pass labels = labels.to(outputs.device) loss_fn(outputs, labels).backward() optimizer.step() if batch_idx == 0: memory = utils.get_memory_usage() return memory
def read_data(args): # template: Nx3 (torch.Tensor) # source: Nx3 (torch.Tensor) print("You can modify the code to read the point clouds.") trainset, testset = data.get_datasets(args) template, source, _ = testset[0] return template, source
def test_nll_estimation(data_path, device, embedding_size, hidden_size, latent_size, num_layers, word_dropout, freebits, model_save_path, batch_size_valid, saved_model_file, num_samples, **kwargs): start_time = datetime.now() train_data, val_data, test_data = get_datasets(data_path) device = torch.device(device) vocab_size = train_data.tokenizer.vocab_size padding_index = train_data.tokenizer.pad_token_id model = SentenceVAE( vocab_size=vocab_size, embedding_size=embedding_size, hidden_size=hidden_size, latent_size=latent_size, num_layers=num_layers, word_dropout_probability=word_dropout, unk_token_idx=train_data.tokenizer.unk_token_id, freebits= freebits, # Freebits value is the lambda value as described in Kingma et al. model_save_path=model_save_path) model.load_from(saved_model_file) model.to(device) test_loader = DataLoader(test_data, batch_size=batch_size_valid, shuffle=False, collate_fn=padded_collate) epoch_start_time = datetime.now() try: ppl = perplexity(model, data_loader=test_loader, device=device, num_samples=num_samples) loss, kl, _ = approximate_nll(model=model, data_loader=test_loader, device=device, padding_index=padding_index, num_samples=num_samples) except KeyboardInterrupt: print("Manually stopped current epoch") __import__('pdb').set_trace() print("Approximate NLL:") print(loss) print("Approximate KL:") print(kl) print("Testing took {}".format(datetime.now() - start_time)) return loss, kl, ppl
def main(): data_path = '../Data/Dataset' train_data, val_data, test_data = get_datasets(data_path) tokenizer = train_data.tokenizer print("Lengths: ", len(train_data), len(val_data), len(test_data)) test_loader = DataLoader(val_data, batch_size=32, shuffle=False, collate_fn=padded_collate) model = SentenceVAE( vocab_size=tokenizer.vocab_size, embedding_size=300, hidden_size=256, latent_size=16, num_layers=1, #1, word_dropout_probability=1.0, unk_token_idx=tokenizer.unk_token_id, freebits= 0, # Freebits value is the lambda value as described in Kingma et al. ) # model_load_name = Path('1vanilla.pt') # model_load_name = Path('3word_dropout.pt') # model_load_name = Path('5freebits_dropout.pt') # model_load_name = Path('6freebits_worddropout_mdr.pt') model_load_name = Path('A.pt') models_path = Path('models') model_load_path = models_path / model_load_name model.load_from(model_load_path) # print(model.state_dict) sentence = sample_sentence(model, tokenizer, number=2) # print(sentences) # model.to(torch.device('cuda')) test_loss = evaluate(model, test_loader, torch.device('cpu'), padding_index=0, print_every=50) print("Test loss: ", test_loss)
def main(args): set_seed(args) dataset_train, dataset_val, dataset_test = get_datasets(args) optimizer = get_optimizer(args) obj = get_objective(args, optimizer.hparams) xp = get_xp(args, optimizer) for i in range(args.epochs): xp.Epoch.update(1).log() train(obj, optimizer, dataset_train, xp, args, i) test(obj, optimizer, dataset_val, xp, args, i) test(obj, optimizer, dataset_test, xp, args, i) print_total_time(xp)
def main(): # config args = parse_args() cnfg = utils.parse_config(args.config) # data tr_loader, valid_loader, tst_loader = get_datasets( cnfg['data']['dir'], cnfg['data']['batch_size']) # initialization utils.set_seed(cnfg['seed']) device = torch.device('cuda:0') if cnfg['gpu'] is None else torch.device( cnfg['gpu']) logger = Logger(cnfg) model = utils.get_model(cnfg['model']).to(device) criterion = nn.CrossEntropyLoss() opt = torch.optim.SGD(model.parameters(), lr=cnfg['train']['lr'], momentum=cnfg['train']['momentum'], weight_decay=cnfg['train']['weight_decay']) amp_args = dict(opt_level=cnfg['opt']['level'], loss_scale=cnfg['opt']['loss_scale'], verbosity=False) if cnfg['opt']['level'] == '02': amp_args['master_weights'] = cnfg['opt']['store'] model, opt = amp.initialize(model, opt, **amp_args) scheduler = utils.get_scheduler(opt, cnfg['train'], cnfg['train']['epochs'] * len(tr_loader)) # train+test for epoch in range(cnfg['train']['epochs']): train(epoch, model, criterion, opt, scheduler, tr_loader, device, logger, cnfg['train']['lr_scheduler']) # testing test(epoch, model, tst_loader, criterion, device, logger) # save if (epoch + 1) % cnfg['save']['epochs'] == 0 and epoch > 0: pth = 'models/' + cnfg['logger']['project'] + '_' \ + cnfg['logger']['run'] + '_' + str(epoch) + '.pth' utils.save_model(model, cnfg, epoch, pth) logger.log_model(pth)
def main(): # Parse command line arguments args = parse_args() # Session setup tf.compat.v1.enable_eager_execution( config=tf.compat.v1.ConfigProto( inter_op_parallelism_threads=args.inter_threads, intra_op_parallelism_threads=args.intra_threads)) # Not running distributed dist = SimpleNamespace(rank=0, size=1, local_rank=0, local_size=1) # Load the dataset data = get_datasets(name='cosmo', data_dir=args.data_dir, sample_shape=[128, 128, 128, 4], n_train=args.n_samples, n_valid=0, batch_size=args.batch_size, n_epochs=args.n_epochs, apply_log=True, shard=False, dist=dist) pprint.pprint(data) start_time = time.perf_counter() for x, y in data['train_dataset']: # Perform a simple operation tf.math.reduce_sum(x) tf.math.reduce_sum(y) duration = time.perf_counter() - start_time print('Total time: %.4f s' % duration) print('Throughput: %.4f samples/s' % (args.n_samples / duration)) print('All done!')
import numpy as np import pandas as pd from sklearn.utils import check_random_state from functools import partial from pprint import pprint from hmmlearn import hmm from hmmlearn.utils import normalize from data import get_datasets from sshmm import _do_mstep, split_state_startprob, split_state_transmat, split_state_emission, entropy pd.options.display.max_colwidth = 150 topk_cluster = 30 train_dataset, dev_dataset, vocab, cnt = get_datasets( "./data/kmedoids_agent_150", topk_cluster) vocab = {v: k for k, v in vocab.items()} print('vocab size = ', len(vocab)) model_path = sys.argv[1] df_path = sys.argv[2] with open(model_path, "rb") as f: model = pickle.load(f) df = pd.read_csv(df_path) xs = list(iter(dev_dataset)) x_lens = [len(x) for x in xs] sample_len = 25
def main(): """Main function""" # Initialization args = parse_args() dist = init_workers(args.distributed) config = load_config(args) os.makedirs(config['output_dir'], exist_ok=True) config_logging(verbose=args.verbose) logging.info('Initialized rank %i size %i local_rank %i local_size %i', dist.rank, dist.size, dist.local_rank, dist.local_size) if dist.rank == 0: logging.info('Configuration: %s', config) # Setup MLPerf logging if args.mlperf: mllogger = configure_mllogger(config['output_dir']) if dist.rank == 0 and args.mlperf: mllogger.event(key=mllog.constants.CACHE_CLEAR) mllogger.start(key=mllog.constants.INIT_START) # Initialize Weights & Biases logging if args.wandb and dist.rank == 0: import wandb wandb.init(project='cosmoflow', name=args.run_tag, id=args.run_tag, config=config, resume=args.run_tag) # Device and session configuration gpu = dist.local_rank if args.rank_gpu else None if gpu is not None: logging.info('Taking gpu %i', gpu) configure_session(gpu=gpu, intra_threads=args.intra_threads, inter_threads=args.inter_threads, kmp_blocktime=args.kmp_blocktime, kmp_affinity=args.kmp_affinity, omp_num_threads=args.omp_num_threads) # Mixed precision if args.amp: logging.info('Enabling mixed float16 precision') # Suggested bug workaround from https://github.com/tensorflow/tensorflow/issues/38516 if tf.__version__.startswith('2.2.'): from tensorflow.python.keras.mixed_precision.experimental import device_compatibility_check device_compatibility_check.log_device_compatibility_check = lambda policy_name, skip_local: None tf.keras.mixed_precision.experimental.set_policy('mixed_float16') # TF 2.3 #tf.keras.mixed_precision.set_global_policy('mixed_float16') # Start MLPerf logging if dist.rank == 0 and args.mlperf: log_submission_info(**config.get('mlperf', {})) mllogger.end(key=mllog.constants.INIT_STOP) mllogger.start(key=mllog.constants.RUN_START) # Load the data data_config = config['data'] if dist.rank == 0: logging.info('Loading data') datasets = get_datasets(dist=dist, **data_config) logging.debug('Datasets: %s', datasets) # Construct or reload the model if dist.rank == 0: logging.info('Building the model') train_config = config['train'] initial_epoch = 0 checkpoint_format = os.path.join(config['output_dir'], 'checkpoint-{epoch:03d}.h5') if args.resume and os.path.exists(checkpoint_format.format(epoch=1)): # Reload model from last checkpoint initial_epoch, model = reload_last_checkpoint( checkpoint_format, data_config['n_epochs'], distributed=args.distributed) else: # Build a new model model = get_model(**config['model']) # Configure the optimizer opt = get_optimizer(distributed=args.distributed, **config['optimizer']) # Compile the model model.compile(optimizer=opt, loss=train_config['loss'], metrics=train_config['metrics']) if dist.rank == 0: model.summary() # Save configuration to output directory if dist.rank == 0: config['n_ranks'] = dist.size save_config(config) # Prepare the callbacks if dist.rank == 0: logging.info('Preparing callbacks') callbacks = [] if args.distributed: # Broadcast initial variable states from rank 0 to all processes. callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0)) # Average metrics across workers callbacks.append(hvd.callbacks.MetricAverageCallback()) # Learning rate decay schedule if 'lr_schedule' in config: global_batch_size = data_config['batch_size'] * dist.size callbacks.append( tf.keras.callbacks.LearningRateScheduler( get_lr_schedule(global_batch_size=global_batch_size, **config['lr_schedule']))) # Timing timing_callback = TimingCallback() callbacks.append(timing_callback) # Checkpointing and logging from rank 0 only if dist.rank == 0: callbacks.append(tf.keras.callbacks.ModelCheckpoint(checkpoint_format)) callbacks.append( tf.keras.callbacks.CSVLogger(os.path.join(config['output_dir'], 'history.csv'), append=args.resume)) if args.tensorboard: callbacks.append( tf.keras.callbacks.TensorBoard( os.path.join(config['output_dir'], 'tensorboard'))) if args.mlperf: callbacks.append(MLPerfLoggingCallback()) if args.wandb: callbacks.append(wandb.keras.WandbCallback()) # Early stopping patience = train_config.get('early_stopping_patience', None) if patience is not None: callbacks.append( tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=patience, verbose=1)) # Stopping at specified target target_mae = train_config.get('target_mae', None) callbacks.append(StopAtTargetCallback(target_max=target_mae)) if dist.rank == 0: logging.debug('Callbacks: %s', callbacks) # Train the model if dist.rank == 0: logging.info('Beginning training') fit_verbose = 1 if (args.verbose and dist.rank == 0) else 2 model.fit(datasets['train_dataset'], steps_per_epoch=datasets['n_train_steps'], epochs=data_config['n_epochs'], validation_data=datasets['valid_dataset'], validation_steps=datasets['n_valid_steps'], callbacks=callbacks, initial_epoch=initial_epoch, verbose=fit_verbose) # Stop MLPerf timer if dist.rank == 0 and args.mlperf: mllogger.end(key=mllog.constants.RUN_STOP, metadata={'status': 'success'}) # Print training summary if dist.rank == 0: print_training_summary(config['output_dir'], args.print_fom) # Print GPU memory - not supported in TF 2.2? #if gpu is not None: # device = tf.config.list_physical_devices('GPU')[gpu] # #print(tf.config.experimental.get_memory_usage(device)) # #print(tf.config.experimental.get_memory_info(device)) # Finalize if dist.rank == 0: logging.info('All done!')
def main(): """Main function""" # Initialization args = parse_args() rank, local_rank, n_ranks = init_workers(args.distributed) config = load_config(args.config, output_dir=args.output_dir, data_config=args.data_config) os.makedirs(config['output_dir'], exist_ok=True) config_logging(verbose=args.verbose) logging.info('Initialized rank %i local_rank %i size %i', rank, local_rank, n_ranks) if rank == 0: logging.info('Configuration: %s', config) # Device and session configuration gpu = local_rank if args.rank_gpu else None configure_session(gpu=gpu, **config.get('device', {})) # Load the data data_config = config['data'] if rank == 0: logging.info('Loading data') datasets = get_datasets(rank=rank, n_ranks=n_ranks, **data_config) logging.debug('Datasets: %s', datasets) # Construct or reload the model if rank == 0: logging.info('Building the model') initial_epoch = 0 checkpoint_format = os.path.join(config['output_dir'], 'checkpoint-{epoch:03d}.h5') if args.resume: # Reload model from last checkpoint initial_epoch, model = reload_last_checkpoint( checkpoint_format, data_config['n_epochs'], distributed=args.distributed) else: # Build a new model model = get_model(**config['model']) # Configure the optimizer opt = get_optimizer(n_ranks=n_ranks, distributed=args.distributed, **config['optimizer']) # Compile the model train_config = config['train'] model.compile(optimizer=opt, loss=train_config['loss'], metrics=train_config['metrics']) if rank == 0: model.summary() # Save configuration to output directory if rank == 0: data_config['n_train'] = datasets['n_train'] data_config['n_valid'] = datasets['n_valid'] save_config(config) # Prepare the callbacks if rank == 0: logging.info('Preparing callbacks') callbacks = [] if args.distributed: # Broadcast initial variable states from rank 0 to all processes. callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0)) # Average metrics across workers callbacks.append(hvd.callbacks.MetricAverageCallback()) # Learning rate warmup train_config = config['train'] warmup_epochs = train_config.get('lr_warmup_epochs', 0) callbacks.append( hvd.callbacks.LearningRateWarmupCallback( warmup_epochs=warmup_epochs, verbose=1)) # Learning rate decay schedule lr_schedule = train_config.get('lr_schedule', {}) if rank == 0: logging.info('Adding LR decay schedule: %s', lr_schedule) callbacks.append( tf.keras.callbacks.LearningRateScheduler( schedule=lambda epoch, lr: lr * lr_schedule.get(epoch, 1))) # Timing timing_callback = TimingCallback() callbacks.append(timing_callback) # Checkpointing and CSV logging from rank 0 only if rank == 0: callbacks.append(tf.keras.callbacks.ModelCheckpoint(checkpoint_format)) callbacks.append( tf.keras.callbacks.CSVLogger(os.path.join(config['output_dir'], 'history.csv'), append=args.resume)) if rank == 0: logging.debug('Callbacks: %s', callbacks) # Train the model if rank == 0: logging.info('Beginning training') fit_verbose = 1 if (args.verbose and rank == 0) else 2 model.fit(datasets['train_dataset'], steps_per_epoch=datasets['n_train_steps'], epochs=data_config['n_epochs'], validation_data=datasets['valid_dataset'], validation_steps=datasets['n_valid_steps'], callbacks=callbacks, initial_epoch=initial_epoch, verbose=fit_verbose) # Print training summary if rank == 0: print_training_summary(config['output_dir']) # Finalize if rank == 0: logging.info('All done!')
model = get_model(rho_length_in=rho_length_in, **config['data_and_model'], **config['model']) rank=0 n_ranks=1 # Configure optimizer # opt = get_optimizer(n_ranks=n_ranks, distributed=False, # **config['optimizer']) # Compile the model model.compile(loss=train_config['loss'], optimizer=config['optimizer']['name'],#opt metrics=train_config['metrics']) train_gen, valid_gen = get_datasets(batch_size=train_config['batch_size'], **config['data_and_model'],**config['data']) steps_per_epoch = len(train_gen) // n_ranks # Timing callbacks = [] timing_callback = TimingCallback() callbacks.append(timing_callback) callbacks.append(keras.callbacks.EarlyStopping(monitor='val_loss',patience=5)) callbacks.append(keras.callbacks.ModelCheckpoint(filepath=os.path.join(output_dir, output_file_name), monitor='val_mean_absolute_error', save_best_only=True, verbose=1)) history = model.fit_generator(train_gen,
import torch from models import HMM from data import get_datasets, read_config from training import Trainer # Generate datasets from text file path = "data" N = 128 config = read_config(N,path) train_dataset, valid_dataset = get_datasets(config) checkpoint_path = "." # Initialize model model = HMM(config=config) # Train the model num_epochs = 10 trainer = Trainer(model, config, lr=0.003) trainer.load_checkpoint(checkpoint_path) for epoch in range(num_epochs): print("========= Epoch %d of %d =========" % (epoch+1, num_epochs)) train_loss = trainer.train(train_dataset) valid_loss = trainer.test(valid_dataset) trainer.save_checkpoint(epoch, checkpoint_path) print("========= Results: epoch %d of %d =========" % (epoch+1, num_epochs)) print("train loss: %.2f| valid loss: %.2f\n" % (train_loss, valid_loss) )
from pybrain.utilities import percentError from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.modules import SoftmaxLayer from pybrain.tools.customxml.networkreader import NetworkReader from pybrain.tools.customxml.networkwriter import NetworkWriter import os import data train_type = "symbol" hidden_units = 10 xmldir = "xml/" networkname = train_type + "-" + str(hidden_units) + ".xml" training_set, test_set = data.get_datasets("pics/resized/", dstype=train_type) training_set._convertToOneOfMany() test_set._convertToOneOfMany() print("Test type: '{}'".format(train_type)) print("Number of training patterns:", len(training_set)) print("Number of test patterns:", len(test_set)) print("Input and output dimensions:", training_set.indim, training_set.outdim) print("Number of hidden units:", hidden_units) print() print("First sample (input, target, class):") print(training_set['input'][0], training_set['target'][0], training_set['class'][0]) print() network = buildNetwork(training_set.indim,
def main(): """Main function""" # Initialization args = parse_args() rank, local_rank, n_ranks = init_workers(args.distributed) # Load configuration config = load_config(args.config) # Configure logging config_logging(verbose=args.verbose) logging.info('Initialized rank %i local_rank %i size %i', rank, local_rank, n_ranks) # Device configuration configure_session(gpu=local_rank, **config.get('device', {})) # Load the data train_data, valid_data = get_datasets(rank=rank, n_ranks=n_ranks, **config['data']) if rank == 0: logging.info(train_data) logging.info(valid_data) # Construct the model and optimizer model = get_model(**config['model']) optimizer = get_optimizer(n_ranks=n_ranks, **config['optimizer']) train_config = config['train'] # Custom metrics for pixel accuracy and IoU metrics = [PixelAccuracy(), PixelIoU(name='iou', num_classes=3)] # Compile the model model.compile(loss=train_config['loss'], optimizer=optimizer, metrics=metrics) # Print a model summary if rank == 0: model.summary() # Prepare the callbacks callbacks = [] if args.distributed: # Broadcast initial variable states from rank 0 to all processes. callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0)) # Average metrics across workers callbacks.append(hvd.callbacks.MetricAverageCallback()) # Learning rate warmup warmup_epochs = train_config.get('lr_warmup_epochs', 0) callbacks.append(hvd.callbacks.LearningRateWarmupCallback( warmup_epochs=warmup_epochs, verbose=1)) # Timing timing_callback = TimingCallback() callbacks.append(timing_callback) # Checkpointing and CSV logging from rank 0 only #if rank == 0: # callbacks.append(tf.keras.callbacks.ModelCheckpoint(checkpoint_format)) # callbacks.append(tf.keras.callbacks.CSVLogger( # os.path.join(config['output_dir'], 'history.csv'), append=args.resume)) if rank == 0: logging.debug('Callbacks: %s', callbacks) # Train the model verbosity = 2 if rank==0 or args.verbose else 0 history = model.fit(train_data, validation_data=valid_data, epochs=train_config['n_epochs'], callbacks=callbacks, verbose=verbosity) # All done if rank == 0: logging.info('All done!')
def main(): """Main function""" # Initialization args = parse_args() dist = init_workers(args.distributed) config = load_config(args) os.makedirs(config['output_dir'], exist_ok=True) config_logging(verbose=args.verbose) logging.info('Initialized rank %i size %i local_rank %i local_size %i', dist.rank, dist.size, dist.local_rank, dist.local_size) if dist.rank == 0: logging.info('Configuration: %s', config) # Device and session configuration gpu = dist.local_rank if args.rank_gpu else None if gpu is not None: logging.info('Taking gpu %i', gpu) configure_session(gpu=gpu, intra_threads=args.intra_threads, inter_threads=args.inter_threads, kmp_blocktime=args.kmp_blocktime, kmp_affinity=args.kmp_affinity, omp_num_threads=args.omp_num_threads) # Load the data data_config = config['data'] if dist.rank == 0: logging.info('Loading data') datasets = get_datasets(dist=dist, **data_config) logging.debug('Datasets: %s', datasets) # Construct or reload the model if dist.rank == 0: logging.info('Building the model') train_config = config['train'] initial_epoch = 0 checkpoint_format = os.path.join(config['output_dir'], 'checkpoint-{epoch:03d}.h5') if args.resume and os.path.exists(checkpoint_format.format(epoch=1)): # Reload model from last checkpoint initial_epoch, model = reload_last_checkpoint( checkpoint_format, data_config['n_epochs'], distributed=args.distributed) else: # Build a new model model = get_model(**config['model']) # Configure the optimizer opt = get_optimizer(distributed=args.distributed, **config['optimizer']) # Compile the model model.compile(optimizer=opt, loss=train_config['loss'], metrics=train_config['metrics']) if dist.rank == 0: model.summary() # Save configuration to output directory if dist.rank == 0: config['n_ranks'] = dist.size save_config(config) # Prepare the callbacks if dist.rank == 0: logging.info('Preparing callbacks') callbacks = [] if args.distributed: # Broadcast initial variable states from rank 0 to all processes. callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0)) # Average metrics across workers callbacks.append(hvd.callbacks.MetricAverageCallback()) # Learning rate decay schedule if 'lr_schedule' in config: global_batch_size = data_config['batch_size'] * dist.size callbacks.append( tf.keras.callbacks.LearningRateScheduler( get_lr_schedule(global_batch_size=global_batch_size, **config['lr_schedule']))) # Timing timing_callback = TimingCallback() callbacks.append(timing_callback) # Checkpointing and logging from rank 0 only if dist.rank == 0: callbacks.append(tf.keras.callbacks.ModelCheckpoint(checkpoint_format)) callbacks.append( tf.keras.callbacks.CSVLogger(os.path.join(config['output_dir'], 'history.csv'), append=args.resume)) if args.tensorboard: callbacks.append( tf.keras.callbacks.TensorBoard( os.path.join(config['output_dir'], 'tensorboard'))) # Early stopping patience = config.get('early_stopping_patience', None) if patience is not None: callbacks.append( tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=patience, verbose=1)) if dist.rank == 0: logging.debug('Callbacks: %s', callbacks) # Train the model if dist.rank == 0: logging.info('Beginning training') fit_verbose = 1 if (args.verbose and dist.rank == 0) else 2 model.fit(datasets['train_dataset'], steps_per_epoch=datasets['n_train_steps'], epochs=data_config['n_epochs'], validation_data=datasets['valid_dataset'], validation_steps=datasets['n_valid_steps'], callbacks=callbacks, initial_epoch=initial_epoch, verbose=fit_verbose) # Print training summary if dist.rank == 0: print_training_summary(config['output_dir'], args.print_fom) # Finalize if dist.rank == 0: logging.info('All done!')
def train( data_path, device, num_epochs, batch_size_train, batch_size_valid, learning_rate, num_layers, embedding_size, hidden_size, latent_size, word_dropout, print_every, save_every, tensorboard_logging, model_save_path, early_stopping_patience, freebits, MDR, # losses_save_path, args=None, ): start_time = datetime.now() train_data, val_data, test_data = get_datasets(data_path) device = torch.device(device) vocab_size = train_data.tokenizer.vocab_size padding_index = train_data.tokenizer.pad_token_id model = SentenceVAE( vocab_size=vocab_size, embedding_size=embedding_size, hidden_size=hidden_size, latent_size=latent_size, num_layers=num_layers, word_dropout_probability=word_dropout, unk_token_idx=train_data.tokenizer.unk_token_id, freebits= freebits, # Freebits value is the lambda value as described in Kingma et al. model_save_path=model_save_path) lagrangian = Lagrangian(MDR) model.to(device) lagrangian.to(device) if MDR is not None: ### Define lagrangian parameter and optimizers lagrangian_optimizer = RMSprop( lagrangian.parameters(), lr=learning_rate) # TODO: Move this to other scope and use args.lr optimizer = Adam(model.parameters(), lr=learning_rate) train_loader = DataLoader(train_data, batch_size=batch_size_train, shuffle=True, collate_fn=padded_collate) val_loader = DataLoader(val_data, batch_size=batch_size_valid, shuffle=False, collate_fn=padded_collate) iterations = 0 patience = 0 best_val_loss = torch.tensor(np.inf, device=device) best_model = None for epoch in range(num_epochs): epoch_start_time = datetime.now() try: nll_list = [] kl_list = [] lists = (nll_list, kl_list) if MDR is None: iterations = train_one_epoch(model, optimizer, train_loader, device, iter_start=iterations, padding_index=padding_index, save_every=save_every, print_every=print_every, loss_lists=lists) else: iterations = train_one_epoch_MDR(model, lagrangian, lagrangian_optimizer, optimizer, train_loader, device, iter_start=iterations, padding_index=padding_index, save_every=save_every, minimum_rate=MDR, loss_lists=lists) except KeyboardInterrupt: print("Manually stopped current epoch") __import__('pdb').set_trace() print("Training this epoch took {}".format(datetime.now() - epoch_start_time)) print("Validation phase:") val_loss, ppl = evaluate(model, val_loader, device, padding_index=padding_index, print_every=print_every) if val_loss < best_val_loss: best_val_loss = val_loss best_model = model.saved_model_files[-1] patience = 0 else: patience += 1 if patience >= early_stopping_patience: print("EARLY STOPPING") break print( f"###############################################################") print( f"Epoch {epoch} finished, validation loss: {val_loss}, ppl: {ppl}") print( f"###############################################################") print("Current epoch training took {}".format(datetime.now() - epoch_start_time)) losses_file_name = f"MDR{MDR}-freebits{freebits}-word_dropout{word_dropout}-print_every{print_every}-iterations{iterations}" save_losses_path = Path(model_save_path) / losses_file_name with open(save_losses_path, 'wb') as file: print("Saving losses..") pickle.dump((lists, print_every, args), file) print("Training took {}".format(datetime.now() - start_time)) print(f"Best validation loss: {best_val_loss}") print(f"Best model: {best_model}")
import h5py import matplotlib.pyplot as plt import matplotlib.image as mpimg import scipy import tensorflow as tf from data import get_datasets import random from tensorflow.keras import layers import time from tensorflow.keras import datasets, layers, models, applications os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) X_train, X_dev, X_test, Y_train, Y_dev, Y_test = get_datasets() print('datasets retrieved') print('train:', len(X_train)) print('dev:', len(X_dev)) print('test:', len(X_test)) #define model model = tf.keras.Sequential() model.add( layers.Conv2D(8, (3, 3), activation='relu', input_shape=(48, 48, 3), data_format='channels_last')) model.add( layers.MaxPooling2D((2, 2), (2, 2),
""" import sys sys.dont_write_bytecode = True from config import base_config from optimizer import get_optimizer from loss import get_loss_function from model import get_model from metrics import get_metrics_lst from callback import get_callbacks from trainer import Trainer from data import get_datasets if __name__ == "__main__": config = base_config() config.METRICS_LST = get_metrics_lst() config.OPTIMIZER = get_optimizer() config.LOSS_FUNC = get_loss_function() config.CALLBACK_LST = get_callbacks(config) config.display() model = get_model(config) datasets = get_datasets(config) trainer = Trainer(datasets, model, config) trainer._compile() trainer.train()
from utils import get_device, load_json, get_writer import argparse from statistics import mean parser = argparse.ArgumentParser() parser.add_argument("-d", "--device", type=int, help="gpu id") parser.add_argument("-n", "--log", type=str, help="name of log folder") parser.add_argument("-p", "--hparams", type=str, help="hparams config file") opts = parser.parse_args() # Get CUDA/CPU device device = get_device(opts.device) print('Loading data..') hparams = load_json('./configs', opts.hparams) dataset_a, dataset_b = get_datasets(**hparams['dataset']) loader_a = DataLoader(dataset_a, **hparams['loading']) loader_b = DataLoader(dataset_b, **hparams['loading']) model = TravelGAN(hparams['model'], device=device) writer, monitor = get_writer(opts.log) print('Start training..') for epoch in range(hparams['n_epochs']): # Run one epoch dis_losses, gen_losses = [], [] for x_a, x_b in zip(loader_a, loader_b): # Loading on device x_a = x_a.to(device, non_blocking=True) x_b = x_b.to(device, non_blocking=True) # Calculate losses and update weights
def main(args): logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) device = torch.device( 'cuda' if args.use_cuda and torch.cuda.is_available() else 'cpu') # Create output folder if (args.output_folder is not None): if not os.path.exists(args.output_folder): os.makedirs(args.output_folder) logging.debug('Creating folder `{0}`'.format(args.output_folder)) output_folder = os.path.join(args.output_folder, time.strftime('%Y-%m-%d_%H%M%S')) os.makedirs(output_folder) logging.debug('Creating folder `{0}`'.format(output_folder)) args.datafolder = os.path.abspath(args.datafolder) args.model_path = os.path.abspath( os.path.join(output_folder, 'model.th')) # Save the configuration in a config.json file with open(os.path.join(output_folder, 'config.json'), 'w') as f: json.dump(vars(args), f, indent=2) logging.info('Saving configuration file in `{0}`'.format( os.path.abspath(os.path.join(output_folder, 'config.json')))) # Get datasets and load into meta learning format meta_train_dataset, meta_val_dataset, _ = get_datasets( args.dataset, args.datafolder, args.num_ways, args.num_shots, args.num_shots_test, augment=augment, fold=args.fold, download=download_data) meta_train_dataloader = BatchMetaDataLoader(meta_train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) meta_val_dataloader = BatchMetaDataLoader(meta_val_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) # Define model model = Unet(device=device, feature_scale=args.feature_scale) model = model.to(device) print(f'Using device: {device}') # Define optimizer meta_optimizer = torch.optim.Adam(model.parameters(), lr=args.meta_lr) #, weight_decay=1e-5) #meta_optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, momentum = 0.99) # Define meta learner metalearner = ModelAgnosticMetaLearning( model, meta_optimizer, first_order=args.first_order, num_adaptation_steps=args.num_adaption_steps, step_size=args.step_size, learn_step_size=False, loss_function=loss_function, device=device) best_value = None # Training loop epoch_desc = 'Epoch {{0: <{0}d}}'.format(1 + int(math.log10(args.num_epochs))) train_losses = [] val_losses = [] train_ious = [] train_accuracies = [] val_accuracies = [] val_ious = [] start_time = time.time() for epoch in range(args.num_epochs): print('start epoch ', epoch + 1) print('start train---------------------------------------------------') train_loss, train_accuracy, train_iou = metalearner.train( meta_train_dataloader, max_batches=args.num_batches, verbose=args.verbose, desc='Training', leave=False) print(f'\n train accuracy: {train_accuracy}, train loss: {train_loss}') print('end train---------------------------------------------------') train_losses.append(train_loss) train_accuracies.append(train_accuracy) train_ious.append(train_iou) # Evaluate in given intervals if epoch % args.val_step_size == 0: print( 'start evaluate-------------------------------------------------' ) results = metalearner.evaluate(meta_val_dataloader, max_batches=args.num_batches, verbose=args.verbose, desc=epoch_desc.format(epoch + 1), is_test=False) val_acc = results['accuracy'] val_loss = results['mean_outer_loss'] val_losses.append(val_loss) val_accuracies.append(val_acc) val_ious.append(results['iou']) print( f'\n validation accuracy: {val_acc}, validation loss: {val_loss}' ) print( 'end evaluate-------------------------------------------------' ) # Save best model if 'accuracies_after' in results: if (best_value is None) or (best_value < results['accuracies_after']): best_value = results['accuracies_after'] save_model = True elif (best_value is None) or (best_value > results['mean_outer_loss']): best_value = results['mean_outer_loss'] save_model = True else: save_model = False if save_model and (args.output_folder is not None): with open(args.model_path, 'wb') as f: torch.save(model.state_dict(), f) print('end epoch ', epoch + 1) elapsed_time = time.time() - start_time print('Finished after ', time.strftime('%H:%M:%S', time.gmtime(elapsed_time))) r = {} r['train_losses'] = train_losses r['train_accuracies'] = train_accuracies r['train_ious'] = train_ious r['val_losses'] = val_losses r['val_accuracies'] = val_accuracies r['val_ious'] = val_ious r['time'] = time.strftime('%H:%M:%S', time.gmtime(elapsed_time)) with open(os.path.join(output_folder, 'train_results.json'), 'w') as g: json.dump(r, g) logging.info('Saving results dict in `{0}`'.format( os.path.abspath(os.path.join(output_folder, 'train_results.json')))) # Plot results plot_errors(args.num_epochs, train_losses, val_losses, val_step_size=args.val_step_size, output_folder=output_folder, save=True, bce_dice_focal=bce_dice_focal) plot_accuracy(args.num_epochs, train_accuracies, val_accuracies, val_step_size=args.val_step_size, output_folder=output_folder, save=True) plot_iou(args.num_epochs, train_ious, val_ious, val_step_size=args.val_step_size, output_folder=output_folder, save=True) if hasattr(meta_train_dataset, 'close'): meta_train_dataset.close() meta_val_dataset.close()
def main(args): # If passed through command line, check if CUDA available and use GPU if possible if args.cuda: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print('Device: {}'.format(device)) else: device = torch.device('cpu') print('Device: {}'.format(device)) # Prerequisites for training train_data, val_data, test_data = get_datasets() # Build model vocab_size = train_data.tokenizer.vocab_size model = RNNLM(ntoken = vocab_size, ninp = args.emsize, nhid = args.nhid, nlayers = args.nlayers, dropout = args.dropout).to(device) train_loader = DataLoader( train_data, batch_size = args.batch_size, shuffle = False, collate_fn = padded_collate, num_workers = 1 ) val_loader = DataLoader( val_data, batch_size = args.eval_batch_size, shuffle = False, collate_fn = padded_collate, num_workers = 1 ) test_loader = DataLoader( test_data, batch_size = args.eval_batch_size, shuffle = False, collate_fn = padded_collate, num_workers = 1 ) # Till here print('Split sizes | Train: {} | Val: {} | Test: {} |'.format(len(train_loader), len(val_loader), len(test_loader))) optimizer = Adam(model.parameters(), lr = args.lr) print(model) # store best validation loss best_val_loss = None # Use Ctrl + C to break out of training at any time try: for epoch in range(1, args.epochs + 1): epoch_start_time = time.time() train(model, train_data, train_loader, args, device, optimizer, epoch) val_loss = evaluate(val_loader, val_data, device, model) print('-' * 89) print('| End of epoch {:3d} | Time: {:5.2f} | Validation loss: {:5.2f} |'.format(epoch, (time.time() - epoch_start_time), val_loss)) print('-' * 89) if not best_val_loss or val_loss < best_val_loss: with open(args.save, 'wb') as f: torch.save(model, f) best_val_loss = val_loss except KeyboardInterrupt: print('-' * 89) print('Terminating training early.') # Load best model with open(args.save, 'rb') as f: model = torch.load(f) # Ensure rnn parameters are a continuous chunk of memory model.rnn.flatten_parameters() test_loss = evaluate(test_loader, test_data, device, model) print('=' * 89) print('|End of training and testing. | Test loss {:5.2f}'.format(test_loss)) print('=' * 89)
def main(): """Main function""" # Initialization args = parse_args() rank, n_ranks = init_workers(args.distributed) # Load configuration config = load_config(args.config) train_config = config['training'] output_dir = os.path.expandvars(config['output_dir']) checkpoint_format = os.path.join(output_dir, 'checkpoints', 'checkpoint-{epoch}.h5') if rank==0: os.makedirs(output_dir, exist_ok=True) # Loggging config_logging(verbose=args.verbose) logging.info('Initialized rank %i out of %i', rank, n_ranks) if args.show_config: logging.info('Command line config: %s', args) if rank == 0: logging.info('Job configuration: %s', config) logging.info('Saving job outputs to %s', output_dir) # Configure session device_config = config.get('device', {}) configure_session(**device_config) # Load the data train_gen, valid_gen = get_datasets(batch_size=train_config['batch_size'], **config['data']) # Build the model model = get_model(**config['model']) # Configure optimizer opt = get_optimizer(n_ranks=n_ranks, dist_wrapper=hvd.DistributedOptimizer, **config['optimizer']) # Compile the model model.compile(loss=train_config['loss'], optimizer=opt, metrics=train_config['metrics']) if rank == 0: model.summary() # Prepare the training callbacks callbacks = get_basic_callbacks(args.distributed) # Learning rate warmup warmup_epochs = train_config.get('lr_warmup_epochs', 0) callbacks.append(hvd.callbacks.LearningRateWarmupCallback( warmup_epochs=warmup_epochs, verbose=1)) # Learning rate decay schedule for lr_schedule in train_config.get('lr_schedule', []): if rank == 0: logging.info('Adding LR schedule: %s', lr_schedule) callbacks.append(hvd.callbacks.LearningRateScheduleCallback(**lr_schedule)) # Checkpoint only from rank 0 if rank == 0: os.makedirs(os.path.dirname(checkpoint_format), exist_ok=True) callbacks.append(keras.callbacks.ModelCheckpoint(checkpoint_format)) # Timing callback timing_callback = TimingCallback() callbacks.append(timing_callback) # Train the model train_steps_per_epoch = max([len(train_gen) // n_ranks, 1]) valid_steps_per_epoch = max([len(valid_gen) // n_ranks, 1]) history = model.fit_generator(train_gen, epochs=train_config['n_epochs'], steps_per_epoch=train_steps_per_epoch, validation_data=valid_gen, validation_steps=valid_steps_per_epoch, callbacks=callbacks, workers=4, verbose=2 if rank==0 else 0) # Save training history if rank == 0: # Print some best-found metrics if 'val_acc' in history.history.keys(): logging.info('Best validation accuracy: %.3f', max(history.history['val_acc'])) if 'val_top_k_categorical_accuracy' in history.history.keys(): logging.info('Best top-5 validation accuracy: %.3f', max(history.history['val_top_k_categorical_accuracy'])) logging.info('Average time per epoch: %.3f s', np.mean(timing_callback.times)) np.savez(os.path.join(output_dir, 'history'), n_ranks=n_ranks, **history.history) # Drop to IPython interactive shell if args.interactive and (rank == 0): logging.info('Starting IPython interactive session') import IPython IPython.embed() if rank == 0: logging.info('All done!')
def train_confidnet(self, convnet_path, epoch=100, epoch_to_restore=0): train, val = get_datasets(self.dataset, self.train_val_split, self.seed) train_loader = DataLoader(train, **self.loader_kwargs) val_loader = DataLoader(val, **self.loader_kwargs) conv_net = self.convnet(**self.convnet_kwargs).to(self.device) conv_net.load_state_dict(torch.load(convnet_path)) conv_net.eval() confid_net = ConfidNet(**self.confidnet_kwargs).to(self.device) optimizer = Adam(confid_net.parameters(), **self.optimizer_kwargs) # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=3) criterion = torch.nn.MSELoss() writer = SummaryWriter(self.log_dir) for e in range(epoch_to_restore + 1, epoch + epoch_to_restore + 1): confid_net.train() history_train = {"loss": [], "metric": []} train_histograms = ConfidenceHistograms() for idx_batch, (imgs, label) in enumerate(train_loader): imgs, label = imgs.to(self.device), label.to(self.device) with torch.no_grad(): pred, encoded = conv_net(imgs) confid_net.zero_grad() confidence = confid_net(encoded) loss = criterion( confidence, F.softmax(pred, dim=1).gather(1, label.unsqueeze(1))) loss.backward() optimizer.step() train_histograms.step(label, pred, confidence.detach()) history_train["loss"].append(loss.detach().item()) step = min((idx_batch + 1) * self.loader_kwargs["batch_size"], len(train)) sys.stdout.write("Training : " f"Epoch {e}/{epoch + epoch_to_restore}; " f"Step {step}/{len(train)}; " f"Loss {loss.detach().item()};\r") sys.stdout.flush() print() print() confid_net.eval() history_val = {"loss": [], "metric": []} val_histograms = ConfidenceHistograms() for idx_batch, (imgs, label) in enumerate(val_loader): imgs, label = imgs.to(self.device), label.to(self.device) with torch.no_grad(): pred, encoded = conv_net(imgs) confidence = confid_net(encoded) val_histograms.step(label, pred, confidence) loss = criterion( confidence, F.softmax(pred, dim=1).gather(1, label.unsqueeze(1))) history_val["loss"].append(loss.detach().item()) step = min((idx_batch + 1) * self.loader_kwargs["batch_size"], len(val)) sys.stdout.write("Validation : " f"Epoch {e}/{epoch + epoch_to_restore}; " f"Step {step}/{len(val)}; " f"Loss {loss.detach().item()};\r") sys.stdout.flush() # scheduler.step(np.mean(history_val["loss"])) train_mcp_hist, train_tcp_hist = train_histograms.get_histograms() writer.add_figure("ConfidNet/train/MCP", train_mcp_hist, e) writer.add_figure("ConfidNet/train/TCP", train_tcp_hist, e) val_mcp_hist, val_tcp_hist = val_histograms.get_histograms() writer.add_figure("ConfidNet/val/MCP", val_mcp_hist, e) writer.add_figure("ConfidNet/val/TCP", val_tcp_hist, e) writer.add_scalars( 'ConfidNet/Loss', { "train": np.mean(history_train["loss"]), "val": np.mean(history_val["loss"]) }, e) print(f"\n\n[*] Finished epoch {e};\n\n" "Train :\n" f"\tLoss : {np.mean(history_train['loss'])}\n" "Test :\n" f"\tLoss : {np.mean(history_val['loss'])}\n\n\n") if e % self.model_checkpoint == 0: filename = str(self.model_filename).format(model="ConfidNet", epoch=e) torch.save(confid_net.state_dict(), filename) return confid_net
parser.add_argument("--data", type = Path, default = Path("data/alignments/BLAT_ECOLX_hmmerbit_plmc_n5_m30_f50_t0.2_r24-286_id100_b105.a2m"), help = "Fasta input file of sequences.") parser.add_argument("--data_sheet", type = str, default = "BLAT_ECOLX_Ranganathan2015", help = "Protein family data sheet in mutation_data.pickle.") parser.add_argument("--metric_column", type = str, default = "2500", help = "Metric column of sheet used for Spearman's Rho calculation.") parser.add_argument("--ensemble_count", type = int, default = 2000, help = "How many samples of the model to use for evaluation as an ensemble.") parser.add_argument("--results_dir", type = Path, default = Path(f"results_{datetime.now().strftime('%Y-%m-%dT%H_%M_%S')}"), help = "Directory to save results to.") with torch.no_grad(): args = parser.parse_args() print("Arguments given:") for arg, value in args.__dict__.items(): print(f" {arg}: {value}") print("") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") protein_dataset, *_ = get_datasets(args.data, device, 0.8) print('Data loaded') wt, *_ = protein_dataset[0] size = len(wt) * NUM_TOKENS # load model model = VAE([size, 1500, 1500, 30, 100, 2000, size], NUM_TOKENS, use_dictionary = False).to(device) try: model.load_state_dict(torch.load(args.results_dir / Path("model.torch"), map_location=device)["state_dict"]) except FileNotFoundError: pass cor = mutation_effect_prediction(model, args.data, args.data_sheet, args.metric_column, device, args.ensemble_count, args.results_dir)
def train_convnet(self, epoch=100, epoch_to_restore=0): train, val = get_datasets(self.dataset, self.train_val_split, self.seed) train_loader = DataLoader(train, **self.loader_kwargs) val_loader = DataLoader(val, **self.loader_kwargs) net = self.convnet(**self.convnet_kwargs).to(self.device) optimizer = Adam(net.parameters()) criterion = torch.nn.CrossEntropyLoss() writer = SummaryWriter(self.log_dir) for e in range(epoch_to_restore + 1, epoch + epoch_to_restore + 1): net.train() history_train = {"loss": [], "metric": []} for idx_batch, (imgs, label) in enumerate(train_loader): imgs, label = imgs.to(self.device), label.to(self.device) net.zero_grad() pred, _ = net(imgs) loss = criterion(pred, label) loss.backward() optimizer.step() history_train["loss"].append(loss.detach().item()) pred = F.softmax(pred.detach(), dim=1).argmax(1) score = accuracy_score(label.cpu(), pred.cpu()) history_train["metric"].append(score) step = min((idx_batch + 1) * self.loader_kwargs["batch_size"], len(train)) sys.stdout.write("Training : " f"Epoch {e}/{epoch + epoch_to_restore}; " f"Step {step}/{len(train)}; " f"Loss {loss.detach().item()}; " f"Score {score}\r") sys.stdout.flush() print() print() net.eval() history_val = {"loss": [], "metric": []} for idx_batch, (imgs, label) in enumerate(val_loader): imgs, label = imgs.to(self.device), label.to(self.device) with torch.no_grad(): pred, _ = net(imgs) loss = criterion(pred, label).detach().item() history_val["loss"].append(loss) pred = F.softmax(pred.detach(), dim=1).argmax(1) score = accuracy_score(label.cpu(), pred.cpu()) history_val["metric"].append(score) step = min((idx_batch + 1) * self.loader_kwargs["batch_size"], len(val)) sys.stdout.write("Validation : " f"Epoch {e}/{epoch + epoch_to_restore}; " f"Step {step}/{len(val)}; " f"Loss {loss}; " f"Score {score}\r") sys.stdout.flush() writer.add_scalars( 'ConvNet/Loss', { "train": np.mean(history_train["loss"]), "val": np.mean(history_val["loss"]) }, e) writer.add_scalars( 'ConvNet/Accuracy', { "train": np.mean(history_train["metric"]), "val": np.mean(history_val["metric"]) }, e) print(f"\n\n[*] Finished epoch {e};\n\n" "Train :\n" f"\tLoss : {np.mean(history_train['loss'])}\n" f"\tAccuracy : {np.mean(history_train['metric'])}\n" "Test :\n" f"\tLoss : {np.mean(history_val['loss'])}\n" f"\tAccuracy : {np.mean(history_val['metric'])}\n\n\n") if e % self.model_checkpoint == 0: filename = str(self.model_filename).format(model="ConvNet", epoch=e) torch.save(net.state_dict(), filename) return filename
from pybrain.utilities import percentError from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.modules import SoftmaxLayer from pybrain.tools.customxml.networkreader import NetworkReader from pybrain.tools.customxml.networkwriter import NetworkWriter import os import data train_type = "symbol" hidden_units = 10 xmldir = "xml/" networkname = train_type + "-" + str(hidden_units) + ".xml" training_set, test_set = data.get_datasets("pics/resized/", dstype = train_type) training_set._convertToOneOfMany() test_set._convertToOneOfMany() print("Test type: '{}'".format(train_type)) print("Number of training patterns:", len(training_set)) print("Number of test patterns:", len(test_set)) print("Input and output dimensions:", training_set.indim, training_set.outdim) print("Number of hidden units:", hidden_units) print() print("First sample (input, target, class):") print(training_set['input'][0], training_set['target'][0], training_set['class'][0]) print() network = buildNetwork(training_set.indim, hidden_units, training_set.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(network, dataset = training_set)
def main(): args = get_arguments() # expriment name if not args.exp_name: args.exp_name = '_'.join([args.dataset, args.model]) print("# Experiment: ", args.exp_name) # output folder output_folder = os.path.join(args.output_root, args.dataset, args.exp_name) os.makedirs(output_folder, exist_ok=True) print("# Output path: ", output_folder) # visdom global plotter if args.use_visdom: logging_folder = os.path.join(args.logging_root, args.dataset, args.exp_name) os.makedirs(logging_folder, exist_ok=True) plotter = utils.VisdomLinePlotter(env_name=args.exp_name, logging_path=os.path.join(logging_folder, 'vis.log')) print("# Visdom path: ", logging_folder) # dataset print("# Load datasets") train_datasets, val_datasets, test_datasets = get_datasets(args.dataset, args.dataset_folder, args.batch_size) num_classes = train_datasets[0].num_classes vocab = set(train_datasets[0].vocab) vocab = vocab.union(set(val_datasets[0].vocab)) vocab = vocab.union(set(test_datasets[0].vocab)) # pre-trained word2vec print("# Load pre-trained word2vec") pretrained_word2vec_cache = os.path.join(os.path.dirname(args.w2v_file), args.dataset + '_w2v.pkl') if os.path.isfile(pretrained_word2vec_cache): with open(pretrained_word2vec_cache, 'rb') as f: pretrained_word2vec = pickle.load(f) else: pretrained_word2vec = PretrainedWord2Vec(vocab, args.w2v_file) with open(pretrained_word2vec_cache, 'wb') as f: pickle.dump(pretrained_word2vec, f) # train print("# Start training") for cv, (train_dataset, val_dataset, test_dataset) in enumerate(zip(train_datasets, val_datasets, test_datasets)): # fix random seed utils.fix_random_seed(seed=const.RANDOM_SEED) # model cnn = get_model(args.model, num_classes, pretrained_word2vec) if torch.cuda.is_available(): cnn.cuda() # dataloader train_loader = DataLoader(train_dataset, args.batch_size, shuffle=True, collate_fn=sentence_collate_fn) val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, collate_fn=sentence_collate_fn) test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=sentence_collate_fn) # optimizer optim = Adadelta(cnn.parameters(), rho=0.95, eps=1e-6) # criterion criterion = CrossEntropyLoss() # training if plotter: plotter.set_cv(cv) output_path = os.path.join(output_folder, 'cv_%d_best.pkl' % cv) train(args.num_epochs, cnn, train_loader, optim, criterion, val_loader, output_path) # evaluation utils.load_model(output_path, cnn) find_most_similar_words(cnn) accuracy = eval(cnn, test_loader) print('cross_val:', cv, '\taccuracy:', accuracy)