Esempio n. 1
0
def main():

    args = parseCommandLineArguments()

    rmgpy_branch = args.rmgpy_branch
    rmgdb_branch = args.rmgdb_branch
    rmgpy_sha = args.rmgpy_sha
    rmgdb_sha = args.rmgdb_sha
    meta_dict = {
        "rmgpy_branch": rmgpy_branch,
        "rmgdb_branch": rmgdb_branch,
        "rmgpy_sha": rmgpy_sha,
        "rmgdb_sha": rmgdb_sha
    }

    dataset_file = args.datasets[0]
    test_tables = get_datasets(dataset_file)

    # connect to database
    auth_info = get_RTD_authentication_info()
    rtdi = RMGTestsDatabaseInterface(*auth_info)
    rtd = getattr(rtdi.client, 'rmg_tests')
    thermo_val_table = getattr(rtd, 'thermo_val_table')

    # check if database has this record
    performance_dict = evaluate_performance(dataset_file, model_kernel='GA')
    # push to database
    save_results_in_database(thermo_val_table, meta_dict, performance_dict)

    # save to txt file
    validataion_summary_path = os.path.join(os.path.dirname(dataset_file),
                                            'validation_summary.txt')
    save_results_in_file(performance_dict, validataion_summary_path)
Esempio n. 2
0
def setup_data_loaders(args):
    train_transforms, val_transforms = get_transforms(
        crop_size=args.crop_size,
        shorter_side=args.shorter_side,
        low_scale=args.low_scale,
        high_scale=args.high_scale,
        img_mean=args.img_mean,
        img_std=args.img_std,
        img_scale=args.img_scale,
        ignore_label=args.ignore_label,
        num_stages=args.num_stages,
        augmentations_type=args.augmentations_type,
        dataset_type=args.dataset_type,
    )
    train_sets, val_set = get_datasets(
        train_dir=args.train_dir,
        val_dir=args.val_dir,
        train_list_path=args.train_list_path,
        val_list_path=args.val_list_path,
        train_transforms=train_transforms,
        val_transforms=val_transforms,
        masks_names=("segm", ),
        dataset_type=args.dataset_type,
        stage_names=args.stage_names,
        train_download=args.train_download,
        val_download=args.val_download,
    )
    train_loaders, val_loader = dt.data.get_loaders(
        train_batch_size=args.train_batch_size,
        val_batch_size=args.val_batch_size,
        train_set=train_sets,
        val_set=val_set,
        num_stages=args.num_stages,
    )
    return train_loaders, val_loader
Esempio n. 3
0
def evaluate_performance(dataset_file, model_kernel='GA'):

    # get a list of test table names
    # from files or input
    test_tables = get_datasets(dataset_file)

    # model instantiation
    model = ThermoEstimator(kernel_type=model_kernel)

    # start test evaluation
    performance_dict = {}
    for _, db_name, collection_name in test_tables:

        data = get_data(db_name, collection_name)

        spec_labels = []
        spec_dict = {}
        H298s_true = []
        H298s_pred = []

        comments = []

        for db_mol in data:
            smiles_in = str(db_mol["SMILES_input"])
            H298_true = float(db_mol["Hf298(kcal/mol)"])  # unit: kcal/mol

            thermo = model.predict_thermo(smiles_in)
            H298_pred = thermo.H298.value_si / 4184.0

            spec_labels.append(smiles_in)
            H298s_true.append(H298_true)
            H298s_pred.append(H298_pred)
            comments.append(thermo.comment)

        # create pandas dataframe
        test_df = pd.DataFrame(index=spec_labels)
        test_df['SMILES'] = test_df.index

        test_df['H298_pred(kcal/mol)'] = pd.Series(H298s_pred,
                                                   index=test_df.index)
        test_df['H298_true(kcal/mol)'] = pd.Series(H298s_true,
                                                   index=test_df.index)

        diff = abs(test_df['H298_pred(kcal/mol)'] -
                   test_df['H298_true(kcal/mol)'])
        test_df['H298_diff(kcal/mol)'] = pd.Series(diff, index=test_df.index)
        test_df['Comments'] = pd.Series(comments, index=test_df.index)

        # save test_df for future reference and possible comparison
        test_df_save_path = os.path.join(
            os.path.dirname(dataset_file),
            'test_df_{0}_{1}.csv'.format(db_name, collection_name))
        with open(test_df_save_path, 'w') as fout:
            test_df.to_csv(fout, index=False)

        performance_dict[(
            db_name, collection_name
        )] = test_df['H298_diff(kcal/mol)'].describe()['mean']

    return performance_dict
Esempio n. 4
0
def main():
    parser = get_parser()
    args = parser.parse_args()

    # load model.  model_options defined in models/__init__.py
    model = sk_model_options[args.model](args.choice, args.freq_floor)

    # load data
    data_path = data_paths[args.dataset]
    train_set, dev_set, test_set = get_datasets(model.batch_size,
                                                data_path,
                                                model.preprocess_inputs,
                                                sk=True)

    print 'training...'
    train(model, train_set)
    print 'done training.'

    truth_file = os.path.join(data_path, 'truth.jsonl')
    mkdir(os.path.join(CKPT, args.sess_name))
    results_dir = os.path.join(CKPT, args.sess_name, 'results')
    mkdir(results_dir)
    print 'evaluating...'
    evaluate(model, train_set, results_dir, 'train', truth_file)
    evaluate(model, dev_set, results_dir, 'dev', truth_file)
    evaluate(model, test_set, results_dir, 'test', truth_file)
    print 'done evaluating.'
Esempio n. 5
0
def simple_train(model, batch_size):
    num_epochs = 1

    train_loader, _ = get_datasets(batch_size)
    loss_fn = nn.CrossEntropyLoss().cuda()

    model.train()
    loss_fn = nn.CrossEntropyLoss().cuda()
    optimizer = optim.SGD(model.parameters(), lr=0.001)

    for _ in range(num_epochs):
        for batch_idx, (inputs, labels) in enumerate(train_loader):
            # run forward pass
            optimizer.zero_grad()
            outputs = model(inputs.to("cuda:1"))

            # run backward pass
            labels = labels.to(outputs.device)
            loss_fn(outputs, labels).backward()
            optimizer.step()

            if batch_idx == 0:
                memory = utils.get_memory_usage()

    return memory
Esempio n. 6
0
def read_data(args):
	# template:		Nx3 (torch.Tensor)
	# source: 		Nx3 (torch.Tensor)
	
	print("You can modify the code to read the point clouds.")
	trainset, testset = data.get_datasets(args)
	template, source, _ = testset[0]
	return template, source
Esempio n. 7
0
def test_nll_estimation(data_path, device, embedding_size, hidden_size,
                        latent_size, num_layers, word_dropout, freebits,
                        model_save_path, batch_size_valid, saved_model_file,
                        num_samples, **kwargs):

    start_time = datetime.now()

    train_data, val_data, test_data = get_datasets(data_path)
    device = torch.device(device)
    vocab_size = train_data.tokenizer.vocab_size
    padding_index = train_data.tokenizer.pad_token_id

    model = SentenceVAE(
        vocab_size=vocab_size,
        embedding_size=embedding_size,
        hidden_size=hidden_size,
        latent_size=latent_size,
        num_layers=num_layers,
        word_dropout_probability=word_dropout,
        unk_token_idx=train_data.tokenizer.unk_token_id,
        freebits=
        freebits,  # Freebits value is the lambda value as described in Kingma et al. 
        model_save_path=model_save_path)

    model.load_from(saved_model_file)
    model.to(device)

    test_loader = DataLoader(test_data,
                             batch_size=batch_size_valid,
                             shuffle=False,
                             collate_fn=padded_collate)

    epoch_start_time = datetime.now()
    try:
        ppl = perplexity(model,
                         data_loader=test_loader,
                         device=device,
                         num_samples=num_samples)
        loss, kl, _ = approximate_nll(model=model,
                                      data_loader=test_loader,
                                      device=device,
                                      padding_index=padding_index,
                                      num_samples=num_samples)

    except KeyboardInterrupt:
        print("Manually stopped current epoch")
        __import__('pdb').set_trace()

    print("Approximate NLL:")
    print(loss)

    print("Approximate KL:")
    print(kl)

    print("Testing took {}".format(datetime.now() - start_time))
    return loss, kl, ppl
Esempio n. 8
0
def main():

    data_path = '../Data/Dataset'
    train_data, val_data, test_data = get_datasets(data_path)
    tokenizer = train_data.tokenizer

    print("Lengths: ", len(train_data), len(val_data), len(test_data))

    test_loader = DataLoader(val_data,
                             batch_size=32,
                             shuffle=False,
                             collate_fn=padded_collate)

    model = SentenceVAE(
        vocab_size=tokenizer.vocab_size,
        embedding_size=300,
        hidden_size=256,
        latent_size=16,
        num_layers=1,  #1,
        word_dropout_probability=1.0,
        unk_token_idx=tokenizer.unk_token_id,
        freebits=
        0,  # Freebits value is the lambda value as described in Kingma et al. 
    )

    # model_load_name = Path('1vanilla.pt')
    # model_load_name = Path('3word_dropout.pt')
    # model_load_name = Path('5freebits_dropout.pt')
    # model_load_name = Path('6freebits_worddropout_mdr.pt')
    model_load_name = Path('A.pt')

    models_path = Path('models')

    model_load_path = models_path / model_load_name

    model.load_from(model_load_path)
    # print(model.state_dict)

    sentence = sample_sentence(model, tokenizer, number=2)
    # print(sentences)

    # model.to(torch.device('cuda'))
    test_loss = evaluate(model,
                         test_loader,
                         torch.device('cpu'),
                         padding_index=0,
                         print_every=50)
    print("Test loss: ", test_loss)
Esempio n. 9
0
def main(args):

    set_seed(args)

    dataset_train, dataset_val, dataset_test = get_datasets(args)
    optimizer = get_optimizer(args)
    obj = get_objective(args, optimizer.hparams)
    xp = get_xp(args, optimizer)

    for i in range(args.epochs):
        xp.Epoch.update(1).log()

        train(obj, optimizer, dataset_train, xp, args, i)
        test(obj, optimizer, dataset_val, xp, args, i)

    test(obj, optimizer, dataset_test, xp, args, i)
    print_total_time(xp)
Esempio n. 10
0
def main():
    # config
    args = parse_args()
    cnfg = utils.parse_config(args.config)
    # data
    tr_loader, valid_loader, tst_loader = get_datasets(
        cnfg['data']['dir'], cnfg['data']['batch_size'])
    # initialization
    utils.set_seed(cnfg['seed'])
    device = torch.device('cuda:0') if cnfg['gpu'] is None else torch.device(
        cnfg['gpu'])

    logger = Logger(cnfg)
    model = utils.get_model(cnfg['model']).to(device)
    criterion = nn.CrossEntropyLoss()
    opt = torch.optim.SGD(model.parameters(),
                          lr=cnfg['train']['lr'],
                          momentum=cnfg['train']['momentum'],
                          weight_decay=cnfg['train']['weight_decay'])
    amp_args = dict(opt_level=cnfg['opt']['level'],
                    loss_scale=cnfg['opt']['loss_scale'],
                    verbosity=False)
    if cnfg['opt']['level'] == '02':
        amp_args['master_weights'] = cnfg['opt']['store']
    model, opt = amp.initialize(model, opt, **amp_args)
    scheduler = utils.get_scheduler(opt, cnfg['train'],
                                    cnfg['train']['epochs'] * len(tr_loader))
    # train+test
    for epoch in range(cnfg['train']['epochs']):
        train(epoch, model, criterion, opt, scheduler, tr_loader, device,
              logger, cnfg['train']['lr_scheduler'])
        # testing
        test(epoch, model, tst_loader, criterion, device, logger)
        # save
        if (epoch + 1) % cnfg['save']['epochs'] == 0 and epoch > 0:
            pth = 'models/' + cnfg['logger']['project'] + '_' \
                + cnfg['logger']['run'] + '_' + str(epoch) + '.pth'
            utils.save_model(model, cnfg, epoch, pth)
            logger.log_model(pth)
def main():
    # Parse command line arguments
    args = parse_args()

    # Session setup
    tf.compat.v1.enable_eager_execution(
        config=tf.compat.v1.ConfigProto(
            inter_op_parallelism_threads=args.inter_threads,
            intra_op_parallelism_threads=args.intra_threads))

    # Not running distributed
    dist = SimpleNamespace(rank=0, size=1, local_rank=0, local_size=1)

    # Load the dataset
    data = get_datasets(name='cosmo',
                        data_dir=args.data_dir,
                        sample_shape=[128, 128, 128, 4],
                        n_train=args.n_samples,
                        n_valid=0,
                        batch_size=args.batch_size,
                        n_epochs=args.n_epochs,
                        apply_log=True,
                        shard=False,
                        dist=dist)

    pprint.pprint(data)

    start_time = time.perf_counter()
    for x, y in data['train_dataset']:
        # Perform a simple operation
        tf.math.reduce_sum(x)
        tf.math.reduce_sum(y)
    duration = time.perf_counter() - start_time

    print('Total time: %.4f s' % duration)
    print('Throughput: %.4f samples/s' % (args.n_samples / duration))

    print('All done!')
Esempio n. 12
0
import numpy as np
import pandas as pd
from sklearn.utils import check_random_state
from functools import partial
from pprint import pprint

from hmmlearn import hmm
from hmmlearn.utils import normalize

from data import get_datasets
from sshmm import _do_mstep, split_state_startprob, split_state_transmat, split_state_emission, entropy

pd.options.display.max_colwidth = 150
topk_cluster = 30

train_dataset, dev_dataset, vocab, cnt = get_datasets(
    "./data/kmedoids_agent_150", topk_cluster)
vocab = {v: k for k, v in vocab.items()}
print('vocab size = ', len(vocab))

model_path = sys.argv[1]
df_path = sys.argv[2]

with open(model_path, "rb") as f:
    model = pickle.load(f)

df = pd.read_csv(df_path)

xs = list(iter(dev_dataset))
x_lens = [len(x) for x in xs]

sample_len = 25
Esempio n. 13
0
def main():
    """Main function"""

    # Initialization
    args = parse_args()
    dist = init_workers(args.distributed)
    config = load_config(args)
    os.makedirs(config['output_dir'], exist_ok=True)
    config_logging(verbose=args.verbose)
    logging.info('Initialized rank %i size %i local_rank %i local_size %i',
                 dist.rank, dist.size, dist.local_rank, dist.local_size)
    if dist.rank == 0:
        logging.info('Configuration: %s', config)

    # Setup MLPerf logging
    if args.mlperf:
        mllogger = configure_mllogger(config['output_dir'])
    if dist.rank == 0 and args.mlperf:
        mllogger.event(key=mllog.constants.CACHE_CLEAR)
        mllogger.start(key=mllog.constants.INIT_START)

    # Initialize Weights & Biases logging
    if args.wandb and dist.rank == 0:
        import wandb
        wandb.init(project='cosmoflow',
                   name=args.run_tag,
                   id=args.run_tag,
                   config=config,
                   resume=args.run_tag)

    # Device and session configuration
    gpu = dist.local_rank if args.rank_gpu else None
    if gpu is not None:
        logging.info('Taking gpu %i', gpu)
    configure_session(gpu=gpu,
                      intra_threads=args.intra_threads,
                      inter_threads=args.inter_threads,
                      kmp_blocktime=args.kmp_blocktime,
                      kmp_affinity=args.kmp_affinity,
                      omp_num_threads=args.omp_num_threads)

    # Mixed precision
    if args.amp:
        logging.info('Enabling mixed float16 precision')

        # Suggested bug workaround from https://github.com/tensorflow/tensorflow/issues/38516
        if tf.__version__.startswith('2.2.'):
            from tensorflow.python.keras.mixed_precision.experimental import device_compatibility_check
            device_compatibility_check.log_device_compatibility_check = lambda policy_name, skip_local: None
        tf.keras.mixed_precision.experimental.set_policy('mixed_float16')
        # TF 2.3
        #tf.keras.mixed_precision.set_global_policy('mixed_float16')

    # Start MLPerf logging
    if dist.rank == 0 and args.mlperf:
        log_submission_info(**config.get('mlperf', {}))
        mllogger.end(key=mllog.constants.INIT_STOP)
        mllogger.start(key=mllog.constants.RUN_START)

    # Load the data
    data_config = config['data']
    if dist.rank == 0:
        logging.info('Loading data')
    datasets = get_datasets(dist=dist, **data_config)
    logging.debug('Datasets: %s', datasets)

    # Construct or reload the model
    if dist.rank == 0:
        logging.info('Building the model')
    train_config = config['train']
    initial_epoch = 0
    checkpoint_format = os.path.join(config['output_dir'],
                                     'checkpoint-{epoch:03d}.h5')
    if args.resume and os.path.exists(checkpoint_format.format(epoch=1)):
        # Reload model from last checkpoint
        initial_epoch, model = reload_last_checkpoint(
            checkpoint_format,
            data_config['n_epochs'],
            distributed=args.distributed)
    else:
        # Build a new model
        model = get_model(**config['model'])
        # Configure the optimizer
        opt = get_optimizer(distributed=args.distributed,
                            **config['optimizer'])
        # Compile the model
        model.compile(optimizer=opt,
                      loss=train_config['loss'],
                      metrics=train_config['metrics'])

    if dist.rank == 0:
        model.summary()

    # Save configuration to output directory
    if dist.rank == 0:
        config['n_ranks'] = dist.size
        save_config(config)

    # Prepare the callbacks
    if dist.rank == 0:
        logging.info('Preparing callbacks')
    callbacks = []
    if args.distributed:

        # Broadcast initial variable states from rank 0 to all processes.
        callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0))

        # Average metrics across workers
        callbacks.append(hvd.callbacks.MetricAverageCallback())

    # Learning rate decay schedule
    if 'lr_schedule' in config:
        global_batch_size = data_config['batch_size'] * dist.size
        callbacks.append(
            tf.keras.callbacks.LearningRateScheduler(
                get_lr_schedule(global_batch_size=global_batch_size,
                                **config['lr_schedule'])))

    # Timing
    timing_callback = TimingCallback()
    callbacks.append(timing_callback)

    # Checkpointing and logging from rank 0 only
    if dist.rank == 0:
        callbacks.append(tf.keras.callbacks.ModelCheckpoint(checkpoint_format))
        callbacks.append(
            tf.keras.callbacks.CSVLogger(os.path.join(config['output_dir'],
                                                      'history.csv'),
                                         append=args.resume))
        if args.tensorboard:
            callbacks.append(
                tf.keras.callbacks.TensorBoard(
                    os.path.join(config['output_dir'], 'tensorboard')))
        if args.mlperf:
            callbacks.append(MLPerfLoggingCallback())
        if args.wandb:
            callbacks.append(wandb.keras.WandbCallback())

    # Early stopping
    patience = train_config.get('early_stopping_patience', None)
    if patience is not None:
        callbacks.append(
            tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                             min_delta=1e-5,
                                             patience=patience,
                                             verbose=1))

    # Stopping at specified target
    target_mae = train_config.get('target_mae', None)
    callbacks.append(StopAtTargetCallback(target_max=target_mae))

    if dist.rank == 0:
        logging.debug('Callbacks: %s', callbacks)

    # Train the model
    if dist.rank == 0:
        logging.info('Beginning training')
    fit_verbose = 1 if (args.verbose and dist.rank == 0) else 2
    model.fit(datasets['train_dataset'],
              steps_per_epoch=datasets['n_train_steps'],
              epochs=data_config['n_epochs'],
              validation_data=datasets['valid_dataset'],
              validation_steps=datasets['n_valid_steps'],
              callbacks=callbacks,
              initial_epoch=initial_epoch,
              verbose=fit_verbose)

    # Stop MLPerf timer
    if dist.rank == 0 and args.mlperf:
        mllogger.end(key=mllog.constants.RUN_STOP,
                     metadata={'status': 'success'})

    # Print training summary
    if dist.rank == 0:
        print_training_summary(config['output_dir'], args.print_fom)

    # Print GPU memory - not supported in TF 2.2?
    #if gpu is not None:
    #    device = tf.config.list_physical_devices('GPU')[gpu]
    #    #print(tf.config.experimental.get_memory_usage(device))
    #    #print(tf.config.experimental.get_memory_info(device))

    # Finalize
    if dist.rank == 0:
        logging.info('All done!')
Esempio n. 14
0
def main():
    """Main function"""

    # Initialization
    args = parse_args()
    rank, local_rank, n_ranks = init_workers(args.distributed)
    config = load_config(args.config,
                         output_dir=args.output_dir,
                         data_config=args.data_config)

    os.makedirs(config['output_dir'], exist_ok=True)
    config_logging(verbose=args.verbose)
    logging.info('Initialized rank %i local_rank %i size %i', rank, local_rank,
                 n_ranks)
    if rank == 0:
        logging.info('Configuration: %s', config)

    # Device and session configuration
    gpu = local_rank if args.rank_gpu else None
    configure_session(gpu=gpu, **config.get('device', {}))

    # Load the data
    data_config = config['data']
    if rank == 0:
        logging.info('Loading data')
    datasets = get_datasets(rank=rank, n_ranks=n_ranks, **data_config)
    logging.debug('Datasets: %s', datasets)

    # Construct or reload the model
    if rank == 0:
        logging.info('Building the model')
    initial_epoch = 0
    checkpoint_format = os.path.join(config['output_dir'],
                                     'checkpoint-{epoch:03d}.h5')
    if args.resume:
        # Reload model from last checkpoint
        initial_epoch, model = reload_last_checkpoint(
            checkpoint_format,
            data_config['n_epochs'],
            distributed=args.distributed)
    else:
        # Build a new model
        model = get_model(**config['model'])
        # Configure the optimizer
        opt = get_optimizer(n_ranks=n_ranks,
                            distributed=args.distributed,
                            **config['optimizer'])
        # Compile the model
        train_config = config['train']
        model.compile(optimizer=opt,
                      loss=train_config['loss'],
                      metrics=train_config['metrics'])

    if rank == 0:
        model.summary()

    # Save configuration to output directory
    if rank == 0:
        data_config['n_train'] = datasets['n_train']
        data_config['n_valid'] = datasets['n_valid']
        save_config(config)

    # Prepare the callbacks
    if rank == 0:
        logging.info('Preparing callbacks')
    callbacks = []
    if args.distributed:

        # Broadcast initial variable states from rank 0 to all processes.
        callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0))

        # Average metrics across workers
        callbacks.append(hvd.callbacks.MetricAverageCallback())

        # Learning rate warmup
        train_config = config['train']
        warmup_epochs = train_config.get('lr_warmup_epochs', 0)
        callbacks.append(
            hvd.callbacks.LearningRateWarmupCallback(
                warmup_epochs=warmup_epochs, verbose=1))

    # Learning rate decay schedule
    lr_schedule = train_config.get('lr_schedule', {})
    if rank == 0:
        logging.info('Adding LR decay schedule: %s', lr_schedule)
    callbacks.append(
        tf.keras.callbacks.LearningRateScheduler(
            schedule=lambda epoch, lr: lr * lr_schedule.get(epoch, 1)))

    # Timing
    timing_callback = TimingCallback()
    callbacks.append(timing_callback)

    # Checkpointing and CSV logging from rank 0 only
    if rank == 0:
        callbacks.append(tf.keras.callbacks.ModelCheckpoint(checkpoint_format))
        callbacks.append(
            tf.keras.callbacks.CSVLogger(os.path.join(config['output_dir'],
                                                      'history.csv'),
                                         append=args.resume))

    if rank == 0:
        logging.debug('Callbacks: %s', callbacks)

    # Train the model
    if rank == 0:
        logging.info('Beginning training')
    fit_verbose = 1 if (args.verbose and rank == 0) else 2
    model.fit(datasets['train_dataset'],
              steps_per_epoch=datasets['n_train_steps'],
              epochs=data_config['n_epochs'],
              validation_data=datasets['valid_dataset'],
              validation_steps=datasets['n_valid_steps'],
              callbacks=callbacks,
              initial_epoch=initial_epoch,
              verbose=fit_verbose)

    # Print training summary
    if rank == 0:
        print_training_summary(config['output_dir'])

    # Finalize
    if rank == 0:
        logging.info('All done!')
Esempio n. 15
0
model = get_model(rho_length_in=rho_length_in, 
                  **config['data_and_model'],
                  **config['model'])

rank=0
n_ranks=1

# Configure optimizer
# opt = get_optimizer(n_ranks=n_ranks, distributed=False,
#                     **config['optimizer'])

# Compile the model
model.compile(loss=train_config['loss'], optimizer=config['optimizer']['name'],#opt
              metrics=train_config['metrics'])
train_gen, valid_gen = get_datasets(batch_size=train_config['batch_size'],
                                    **config['data_and_model'],**config['data'])

steps_per_epoch = len(train_gen) // n_ranks

# Timing
callbacks = []
timing_callback = TimingCallback()
callbacks.append(timing_callback)
callbacks.append(keras.callbacks.EarlyStopping(monitor='val_loss',patience=5))

callbacks.append(keras.callbacks.ModelCheckpoint(filepath=os.path.join(output_dir, output_file_name),
                                                 monitor='val_mean_absolute_error',
                                                 save_best_only=True,
                                                 verbose=1))

history = model.fit_generator(train_gen,
Esempio n. 16
0
import torch
from models import HMM
from data import get_datasets, read_config
from training import Trainer

# Generate datasets from text file
path = "data"
N = 128
config = read_config(N,path)
train_dataset, valid_dataset = get_datasets(config)
checkpoint_path = "."

# Initialize model
model = HMM(config=config)

# Train the model
num_epochs = 10
trainer = Trainer(model, config, lr=0.003)
trainer.load_checkpoint(checkpoint_path)

for epoch in range(num_epochs):
	print("========= Epoch %d of %d =========" % (epoch+1, num_epochs))
	train_loss = trainer.train(train_dataset)
	valid_loss = trainer.test(valid_dataset)
	trainer.save_checkpoint(epoch, checkpoint_path)

	print("========= Results: epoch %d of %d =========" % (epoch+1, num_epochs))
	print("train loss: %.2f| valid loss: %.2f\n" % (train_loss, valid_loss) )


Esempio n. 17
0
from pybrain.utilities import percentError
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer
from pybrain.tools.customxml.networkreader import NetworkReader
from pybrain.tools.customxml.networkwriter import NetworkWriter
import os
import data

train_type = "symbol"
hidden_units = 10

xmldir = "xml/"
networkname = train_type + "-" + str(hidden_units) + ".xml"

training_set, test_set = data.get_datasets("pics/resized/", dstype=train_type)
training_set._convertToOneOfMany()
test_set._convertToOneOfMany()

print("Test type: '{}'".format(train_type))
print("Number of training patterns:", len(training_set))
print("Number of test patterns:", len(test_set))
print("Input and output dimensions:", training_set.indim, training_set.outdim)
print("Number of hidden units:", hidden_units)
print()
print("First sample (input, target, class):")
print(training_set['input'][0], training_set['target'][0],
      training_set['class'][0])
print()

network = buildNetwork(training_set.indim,
Esempio n. 18
0
def main():
    """Main function"""

    # Initialization
    args = parse_args()
    rank, local_rank, n_ranks = init_workers(args.distributed)

    # Load configuration
    config = load_config(args.config)

    # Configure logging
    config_logging(verbose=args.verbose)
    logging.info('Initialized rank %i local_rank %i size %i',
                 rank, local_rank, n_ranks)

    # Device configuration
    configure_session(gpu=local_rank, **config.get('device', {}))

    # Load the data
    train_data, valid_data = get_datasets(rank=rank, n_ranks=n_ranks,
                                          **config['data'])
    if rank == 0:
        logging.info(train_data)
        logging.info(valid_data)

    # Construct the model and optimizer
    model = get_model(**config['model'])
    optimizer = get_optimizer(n_ranks=n_ranks, **config['optimizer'])
    train_config = config['train']

    # Custom metrics for pixel accuracy and IoU
    metrics = [PixelAccuracy(), PixelIoU(name='iou', num_classes=3)]

    # Compile the model
    model.compile(loss=train_config['loss'], optimizer=optimizer,
                  metrics=metrics)

    # Print a model summary
    if rank == 0:
        model.summary()

    # Prepare the callbacks
    callbacks = []

    if args.distributed:

        # Broadcast initial variable states from rank 0 to all processes.
        callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0))

        # Average metrics across workers
        callbacks.append(hvd.callbacks.MetricAverageCallback())

        # Learning rate warmup
        warmup_epochs = train_config.get('lr_warmup_epochs', 0)
        callbacks.append(hvd.callbacks.LearningRateWarmupCallback(
            warmup_epochs=warmup_epochs, verbose=1))

    # Timing
    timing_callback = TimingCallback()
    callbacks.append(timing_callback)

    # Checkpointing and CSV logging from rank 0 only
    #if rank == 0:
    #    callbacks.append(tf.keras.callbacks.ModelCheckpoint(checkpoint_format))
    #    callbacks.append(tf.keras.callbacks.CSVLogger(
    #        os.path.join(config['output_dir'], 'history.csv'), append=args.resume))

    if rank == 0:
        logging.debug('Callbacks: %s', callbacks)

    # Train the model
    verbosity = 2 if rank==0 or args.verbose else 0
    history = model.fit(train_data,
                        validation_data=valid_data,
                        epochs=train_config['n_epochs'],
                        callbacks=callbacks,
                        verbose=verbosity)

    # All done
    if rank == 0:
        logging.info('All done!')
Esempio n. 19
0
def main():
    """Main function"""

    # Initialization
    args = parse_args()
    dist = init_workers(args.distributed)
    config = load_config(args)
    os.makedirs(config['output_dir'], exist_ok=True)
    config_logging(verbose=args.verbose)
    logging.info('Initialized rank %i size %i local_rank %i local_size %i',
                 dist.rank, dist.size, dist.local_rank, dist.local_size)
    if dist.rank == 0:
        logging.info('Configuration: %s', config)

    # Device and session configuration
    gpu = dist.local_rank if args.rank_gpu else None
    if gpu is not None:
        logging.info('Taking gpu %i', gpu)
    configure_session(gpu=gpu,
                      intra_threads=args.intra_threads,
                      inter_threads=args.inter_threads,
                      kmp_blocktime=args.kmp_blocktime,
                      kmp_affinity=args.kmp_affinity,
                      omp_num_threads=args.omp_num_threads)

    # Load the data
    data_config = config['data']
    if dist.rank == 0:
        logging.info('Loading data')
    datasets = get_datasets(dist=dist, **data_config)
    logging.debug('Datasets: %s', datasets)

    # Construct or reload the model
    if dist.rank == 0:
        logging.info('Building the model')
    train_config = config['train']
    initial_epoch = 0
    checkpoint_format = os.path.join(config['output_dir'],
                                     'checkpoint-{epoch:03d}.h5')
    if args.resume and os.path.exists(checkpoint_format.format(epoch=1)):
        # Reload model from last checkpoint
        initial_epoch, model = reload_last_checkpoint(
            checkpoint_format,
            data_config['n_epochs'],
            distributed=args.distributed)
    else:
        # Build a new model
        model = get_model(**config['model'])
        # Configure the optimizer
        opt = get_optimizer(distributed=args.distributed,
                            **config['optimizer'])
        # Compile the model
        model.compile(optimizer=opt,
                      loss=train_config['loss'],
                      metrics=train_config['metrics'])

    if dist.rank == 0:
        model.summary()

    # Save configuration to output directory
    if dist.rank == 0:
        config['n_ranks'] = dist.size
        save_config(config)

    # Prepare the callbacks
    if dist.rank == 0:
        logging.info('Preparing callbacks')
    callbacks = []
    if args.distributed:

        # Broadcast initial variable states from rank 0 to all processes.
        callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0))

        # Average metrics across workers
        callbacks.append(hvd.callbacks.MetricAverageCallback())

    # Learning rate decay schedule
    if 'lr_schedule' in config:
        global_batch_size = data_config['batch_size'] * dist.size
        callbacks.append(
            tf.keras.callbacks.LearningRateScheduler(
                get_lr_schedule(global_batch_size=global_batch_size,
                                **config['lr_schedule'])))

    # Timing
    timing_callback = TimingCallback()
    callbacks.append(timing_callback)

    # Checkpointing and logging from rank 0 only
    if dist.rank == 0:
        callbacks.append(tf.keras.callbacks.ModelCheckpoint(checkpoint_format))
        callbacks.append(
            tf.keras.callbacks.CSVLogger(os.path.join(config['output_dir'],
                                                      'history.csv'),
                                         append=args.resume))
        if args.tensorboard:
            callbacks.append(
                tf.keras.callbacks.TensorBoard(
                    os.path.join(config['output_dir'], 'tensorboard')))

    # Early stopping
    patience = config.get('early_stopping_patience', None)
    if patience is not None:
        callbacks.append(
            tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                             min_delta=1e-5,
                                             patience=patience,
                                             verbose=1))

    if dist.rank == 0:
        logging.debug('Callbacks: %s', callbacks)

    # Train the model
    if dist.rank == 0:
        logging.info('Beginning training')
    fit_verbose = 1 if (args.verbose and dist.rank == 0) else 2
    model.fit(datasets['train_dataset'],
              steps_per_epoch=datasets['n_train_steps'],
              epochs=data_config['n_epochs'],
              validation_data=datasets['valid_dataset'],
              validation_steps=datasets['n_valid_steps'],
              callbacks=callbacks,
              initial_epoch=initial_epoch,
              verbose=fit_verbose)

    # Print training summary
    if dist.rank == 0:
        print_training_summary(config['output_dir'], args.print_fom)

    # Finalize
    if dist.rank == 0:
        logging.info('All done!')
Esempio n. 20
0
def train(
    data_path,
    device,
    num_epochs,
    batch_size_train,
    batch_size_valid,
    learning_rate,
    num_layers,
    embedding_size,
    hidden_size,
    latent_size,
    word_dropout,
    print_every,
    save_every,
    tensorboard_logging,
    model_save_path,
    early_stopping_patience,
    freebits,
    MDR,
    # losses_save_path,
    args=None,
):

    start_time = datetime.now()

    train_data, val_data, test_data = get_datasets(data_path)
    device = torch.device(device)
    vocab_size = train_data.tokenizer.vocab_size
    padding_index = train_data.tokenizer.pad_token_id

    model = SentenceVAE(
        vocab_size=vocab_size,
        embedding_size=embedding_size,
        hidden_size=hidden_size,
        latent_size=latent_size,
        num_layers=num_layers,
        word_dropout_probability=word_dropout,
        unk_token_idx=train_data.tokenizer.unk_token_id,
        freebits=
        freebits,  # Freebits value is the lambda value as described in Kingma et al. 
        model_save_path=model_save_path)
    lagrangian = Lagrangian(MDR)

    model.to(device)
    lagrangian.to(device)

    if MDR is not None:
        ### Define lagrangian parameter and optimizers
        lagrangian_optimizer = RMSprop(
            lagrangian.parameters(),
            lr=learning_rate)  # TODO: Move this to other scope and use args.lr
    optimizer = Adam(model.parameters(), lr=learning_rate)

    train_loader = DataLoader(train_data,
                              batch_size=batch_size_train,
                              shuffle=True,
                              collate_fn=padded_collate)

    val_loader = DataLoader(val_data,
                            batch_size=batch_size_valid,
                            shuffle=False,
                            collate_fn=padded_collate)

    iterations = 0
    patience = 0
    best_val_loss = torch.tensor(np.inf, device=device)
    best_model = None
    for epoch in range(num_epochs):

        epoch_start_time = datetime.now()
        try:
            nll_list = []
            kl_list = []
            lists = (nll_list, kl_list)

            if MDR is None:
                iterations = train_one_epoch(model,
                                             optimizer,
                                             train_loader,
                                             device,
                                             iter_start=iterations,
                                             padding_index=padding_index,
                                             save_every=save_every,
                                             print_every=print_every,
                                             loss_lists=lists)
            else:
                iterations = train_one_epoch_MDR(model,
                                                 lagrangian,
                                                 lagrangian_optimizer,
                                                 optimizer,
                                                 train_loader,
                                                 device,
                                                 iter_start=iterations,
                                                 padding_index=padding_index,
                                                 save_every=save_every,
                                                 minimum_rate=MDR,
                                                 loss_lists=lists)

        except KeyboardInterrupt:
            print("Manually stopped current epoch")
            __import__('pdb').set_trace()

        print("Training this epoch took {}".format(datetime.now() -
                                                   epoch_start_time))

        print("Validation phase:")
        val_loss, ppl = evaluate(model,
                                 val_loader,
                                 device,
                                 padding_index=padding_index,
                                 print_every=print_every)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.saved_model_files[-1]
            patience = 0
        else:
            patience += 1
            if patience >= early_stopping_patience:
                print("EARLY STOPPING")
                break

        print(
            f"###############################################################")
        print(
            f"Epoch {epoch} finished, validation loss: {val_loss}, ppl: {ppl}")
        print(
            f"###############################################################")
        print("Current epoch training took {}".format(datetime.now() -
                                                      epoch_start_time))

        losses_file_name = f"MDR{MDR}-freebits{freebits}-word_dropout{word_dropout}-print_every{print_every}-iterations{iterations}"
        save_losses_path = Path(model_save_path) / losses_file_name
        with open(save_losses_path, 'wb') as file:
            print("Saving losses..")
            pickle.dump((lists, print_every, args), file)

    print("Training took {}".format(datetime.now() - start_time))
    print(f"Best validation loss: {best_val_loss}")
    print(f"Best model: {best_model}")
Esempio n. 21
0
import h5py
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import scipy
import tensorflow as tf
from data import get_datasets
import random
from tensorflow.keras import layers
import time
from tensorflow.keras import datasets, layers, models, applications

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

X_train, X_dev, X_test, Y_train, Y_dev, Y_test = get_datasets()
print('datasets retrieved')
print('train:', len(X_train))
print('dev:', len(X_dev))
print('test:', len(X_test))

#define model

model = tf.keras.Sequential()
model.add(
    layers.Conv2D(8, (3, 3),
                  activation='relu',
                  input_shape=(48, 48, 3),
                  data_format='channels_last'))
model.add(
    layers.MaxPooling2D((2, 2), (2, 2),
Esempio n. 22
0
"""
import sys
sys.dont_write_bytecode = True

from config import base_config

from optimizer import get_optimizer
from loss import get_loss_function
from model import get_model
from metrics import get_metrics_lst
from callback import get_callbacks
from trainer import Trainer
from data import get_datasets

if __name__ == "__main__":

    config = base_config()
    config.METRICS_LST = get_metrics_lst()
    config.OPTIMIZER = get_optimizer()
    config.LOSS_FUNC = get_loss_function()
    config.CALLBACK_LST = get_callbacks(config)

    config.display()

    model = get_model(config)
    datasets = get_datasets(config)
    trainer = Trainer(datasets, model, config)

    trainer._compile()
    trainer.train()
Esempio n. 23
0
from utils import get_device, load_json, get_writer
import argparse
from statistics import mean

parser = argparse.ArgumentParser()
parser.add_argument("-d", "--device", type=int, help="gpu id")
parser.add_argument("-n", "--log", type=str, help="name of log folder")
parser.add_argument("-p", "--hparams", type=str, help="hparams config file")
opts = parser.parse_args()

# Get CUDA/CPU device
device = get_device(opts.device)

print('Loading data..')
hparams = load_json('./configs', opts.hparams)
dataset_a, dataset_b = get_datasets(**hparams['dataset'])
loader_a = DataLoader(dataset_a, **hparams['loading'])
loader_b = DataLoader(dataset_b, **hparams['loading'])
model = TravelGAN(hparams['model'], device=device)
writer, monitor = get_writer(opts.log)

print('Start training..')
for epoch in range(hparams['n_epochs']):
    # Run one epoch
    dis_losses, gen_losses = [], []
    for x_a, x_b in zip(loader_a, loader_b):
        # Loading on device
        x_a = x_a.to(device, non_blocking=True)
        x_b = x_b.to(device, non_blocking=True)

        # Calculate losses and update weights
def main(args):

    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
    device = torch.device(
        'cuda' if args.use_cuda and torch.cuda.is_available() else 'cpu')

    # Create output folder
    if (args.output_folder is not None):
        if not os.path.exists(args.output_folder):
            os.makedirs(args.output_folder)
            logging.debug('Creating folder `{0}`'.format(args.output_folder))

        output_folder = os.path.join(args.output_folder,
                                     time.strftime('%Y-%m-%d_%H%M%S'))
        os.makedirs(output_folder)
        logging.debug('Creating folder `{0}`'.format(output_folder))

        args.datafolder = os.path.abspath(args.datafolder)
        args.model_path = os.path.abspath(
            os.path.join(output_folder, 'model.th'))

        # Save the configuration in a config.json file
        with open(os.path.join(output_folder, 'config.json'), 'w') as f:
            json.dump(vars(args), f, indent=2)
        logging.info('Saving configuration file in `{0}`'.format(
            os.path.abspath(os.path.join(output_folder, 'config.json'))))

    # Get datasets and load into meta learning format
    meta_train_dataset, meta_val_dataset, _ = get_datasets(
        args.dataset,
        args.datafolder,
        args.num_ways,
        args.num_shots,
        args.num_shots_test,
        augment=augment,
        fold=args.fold,
        download=download_data)

    meta_train_dataloader = BatchMetaDataLoader(meta_train_dataset,
                                                batch_size=args.batch_size,
                                                shuffle=True,
                                                num_workers=args.num_workers,
                                                pin_memory=True)

    meta_val_dataloader = BatchMetaDataLoader(meta_val_dataset,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=args.num_workers,
                                              pin_memory=True)

    # Define model
    model = Unet(device=device, feature_scale=args.feature_scale)
    model = model.to(device)
    print(f'Using device: {device}')

    # Define optimizer
    meta_optimizer = torch.optim.Adam(model.parameters(),
                                      lr=args.meta_lr)  #, weight_decay=1e-5)
    #meta_optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, momentum = 0.99)

    # Define meta learner
    metalearner = ModelAgnosticMetaLearning(
        model,
        meta_optimizer,
        first_order=args.first_order,
        num_adaptation_steps=args.num_adaption_steps,
        step_size=args.step_size,
        learn_step_size=False,
        loss_function=loss_function,
        device=device)

    best_value = None

    # Training loop
    epoch_desc = 'Epoch {{0: <{0}d}}'.format(1 +
                                             int(math.log10(args.num_epochs)))
    train_losses = []
    val_losses = []
    train_ious = []
    train_accuracies = []
    val_accuracies = []
    val_ious = []

    start_time = time.time()

    for epoch in range(args.num_epochs):
        print('start epoch ', epoch + 1)
        print('start train---------------------------------------------------')
        train_loss, train_accuracy, train_iou = metalearner.train(
            meta_train_dataloader,
            max_batches=args.num_batches,
            verbose=args.verbose,
            desc='Training',
            leave=False)
        print(f'\n train accuracy: {train_accuracy}, train loss: {train_loss}')
        print('end train---------------------------------------------------')
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)
        train_ious.append(train_iou)

        # Evaluate in given intervals
        if epoch % args.val_step_size == 0:
            print(
                'start evaluate-------------------------------------------------'
            )
            results = metalearner.evaluate(meta_val_dataloader,
                                           max_batches=args.num_batches,
                                           verbose=args.verbose,
                                           desc=epoch_desc.format(epoch + 1),
                                           is_test=False)
            val_acc = results['accuracy']
            val_loss = results['mean_outer_loss']
            val_losses.append(val_loss)
            val_accuracies.append(val_acc)
            val_ious.append(results['iou'])
            print(
                f'\n validation accuracy: {val_acc}, validation loss: {val_loss}'
            )
            print(
                'end evaluate-------------------------------------------------'
            )

            # Save best model
            if 'accuracies_after' in results:
                if (best_value is None) or (best_value <
                                            results['accuracies_after']):
                    best_value = results['accuracies_after']
                    save_model = True
            elif (best_value is None) or (best_value >
                                          results['mean_outer_loss']):
                best_value = results['mean_outer_loss']
                save_model = True
            else:
                save_model = False

            if save_model and (args.output_folder is not None):
                with open(args.model_path, 'wb') as f:
                    torch.save(model.state_dict(), f)

        print('end epoch ', epoch + 1)

    elapsed_time = time.time() - start_time
    print('Finished after ',
          time.strftime('%H:%M:%S', time.gmtime(elapsed_time)))

    r = {}
    r['train_losses'] = train_losses
    r['train_accuracies'] = train_accuracies
    r['train_ious'] = train_ious
    r['val_losses'] = val_losses
    r['val_accuracies'] = val_accuracies
    r['val_ious'] = val_ious
    r['time'] = time.strftime('%H:%M:%S', time.gmtime(elapsed_time))
    with open(os.path.join(output_folder, 'train_results.json'), 'w') as g:
        json.dump(r, g)
        logging.info('Saving results dict in `{0}`'.format(
            os.path.abspath(os.path.join(output_folder,
                                         'train_results.json'))))

    # Plot results
    plot_errors(args.num_epochs,
                train_losses,
                val_losses,
                val_step_size=args.val_step_size,
                output_folder=output_folder,
                save=True,
                bce_dice_focal=bce_dice_focal)
    plot_accuracy(args.num_epochs,
                  train_accuracies,
                  val_accuracies,
                  val_step_size=args.val_step_size,
                  output_folder=output_folder,
                  save=True)
    plot_iou(args.num_epochs,
             train_ious,
             val_ious,
             val_step_size=args.val_step_size,
             output_folder=output_folder,
             save=True)

    if hasattr(meta_train_dataset, 'close'):
        meta_train_dataset.close()
        meta_val_dataset.close()
Esempio n. 25
0
def main(args):

    # If passed through command line, check if CUDA available and use GPU if possible
    if args.cuda:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print('Device: {}'.format(device))
    else:
        device = torch.device('cpu')
        print('Device: {}'.format(device))

    # Prerequisites for training
    train_data, val_data, test_data = get_datasets()

    # Build model
    vocab_size = train_data.tokenizer.vocab_size
    model = RNNLM(ntoken = vocab_size, ninp = args.emsize, nhid = args.nhid,
                    nlayers = args.nlayers, dropout = args.dropout).to(device)

    train_loader = DataLoader(
        train_data, batch_size = args.batch_size, shuffle = False,
        collate_fn = padded_collate, num_workers = 1
    )

    val_loader = DataLoader(
        val_data, batch_size = args.eval_batch_size, shuffle = False,
        collate_fn = padded_collate, num_workers = 1
    )

    test_loader = DataLoader(
        test_data, batch_size = args.eval_batch_size, shuffle = False,
        collate_fn = padded_collate, num_workers = 1
    )

    # Till here

    print('Split sizes | Train: {} | Val: {} | Test: {} |'.format(len(train_loader), len(val_loader),
                                            len(test_loader)))

    optimizer = Adam(model.parameters(), lr = args.lr)
    print(model)

    # store best validation loss
    best_val_loss = None

    # Use Ctrl + C to break out of training at any time
    try:
        for epoch in range(1, args.epochs + 1):

            epoch_start_time = time.time()

            train(model, train_data, train_loader, args, device, optimizer, epoch)

            val_loss = evaluate(val_loader, val_data, device, model)

            print('-' * 89)

            print('| End of epoch {:3d} | Time: {:5.2f} | Validation loss: {:5.2f} |'.format(epoch,
             (time.time() - epoch_start_time), val_loss))

            print('-' * 89)

            if not best_val_loss or val_loss < best_val_loss:
                with open(args.save, 'wb') as f:
                    torch.save(model, f)

                best_val_loss = val_loss
    except KeyboardInterrupt:
        print('-' * 89)
        print('Terminating training early.')



    # Load best model
    with open(args.save, 'rb') as f:
        model = torch.load(f)

        # Ensure rnn parameters are a continuous chunk of memory
        model.rnn.flatten_parameters()

    test_loss = evaluate(test_loader, test_data, device, model)

    print('=' * 89)
    print('|End of training and testing. | Test loss {:5.2f}'.format(test_loss))
    print('=' * 89)
Esempio n. 26
0
def main():
    """Main function"""

    # Initialization
    args = parse_args()
    rank, n_ranks = init_workers(args.distributed)

    # Load configuration
    config = load_config(args.config)
    train_config = config['training']
    output_dir = os.path.expandvars(config['output_dir'])
    checkpoint_format = os.path.join(output_dir, 'checkpoints',
                                     'checkpoint-{epoch}.h5')
    if rank==0:
        os.makedirs(output_dir, exist_ok=True)

    # Loggging
    config_logging(verbose=args.verbose)
    logging.info('Initialized rank %i out of %i', rank, n_ranks)
    if args.show_config:
        logging.info('Command line config: %s', args)
    if rank == 0:
        logging.info('Job configuration: %s', config)
        logging.info('Saving job outputs to %s', output_dir)

    # Configure session
    device_config = config.get('device', {})
    configure_session(**device_config)

    # Load the data
    train_gen, valid_gen = get_datasets(batch_size=train_config['batch_size'],
                                        **config['data'])

    # Build the model
    model = get_model(**config['model'])
    # Configure optimizer
    opt = get_optimizer(n_ranks=n_ranks, dist_wrapper=hvd.DistributedOptimizer, **config['optimizer'])
    # Compile the model
    model.compile(loss=train_config['loss'], optimizer=opt,
                  metrics=train_config['metrics'])
    if rank == 0:
        model.summary()

    # Prepare the training callbacks
    callbacks = get_basic_callbacks(args.distributed)

    # Learning rate warmup
    warmup_epochs = train_config.get('lr_warmup_epochs', 0)
    callbacks.append(hvd.callbacks.LearningRateWarmupCallback(
                     warmup_epochs=warmup_epochs, verbose=1))

    # Learning rate decay schedule
    for lr_schedule in train_config.get('lr_schedule', []):
        if rank == 0:
            logging.info('Adding LR schedule: %s', lr_schedule)
        callbacks.append(hvd.callbacks.LearningRateScheduleCallback(**lr_schedule))

    # Checkpoint only from rank 0
    if rank == 0:
        os.makedirs(os.path.dirname(checkpoint_format), exist_ok=True)
        callbacks.append(keras.callbacks.ModelCheckpoint(checkpoint_format))
        
    # Timing callback
    timing_callback = TimingCallback()
    callbacks.append(timing_callback)

    # Train the model
    train_steps_per_epoch = max([len(train_gen) // n_ranks, 1])
    valid_steps_per_epoch = max([len(valid_gen) // n_ranks, 1])
    history = model.fit_generator(train_gen,
                                  epochs=train_config['n_epochs'],
                                  steps_per_epoch=train_steps_per_epoch,
                                  validation_data=valid_gen,
                                  validation_steps=valid_steps_per_epoch,
                                  callbacks=callbacks,
                                  workers=4, verbose=2 if rank==0 else 0)

    # Save training history
    if rank == 0:
        # Print some best-found metrics
        if 'val_acc' in history.history.keys():
            logging.info('Best validation accuracy: %.3f',
                         max(history.history['val_acc']))
        if 'val_top_k_categorical_accuracy' in history.history.keys():
            logging.info('Best top-5 validation accuracy: %.3f',
                         max(history.history['val_top_k_categorical_accuracy']))
        logging.info('Average time per epoch: %.3f s',
                     np.mean(timing_callback.times))
        np.savez(os.path.join(output_dir, 'history'),
                 n_ranks=n_ranks, **history.history)

    # Drop to IPython interactive shell
    if args.interactive and (rank == 0):
        logging.info('Starting IPython interactive session')
        import IPython
        IPython.embed()

    if rank == 0:
        logging.info('All done!')
Esempio n. 27
0
    def train_confidnet(self, convnet_path, epoch=100, epoch_to_restore=0):
        train, val = get_datasets(self.dataset, self.train_val_split,
                                  self.seed)
        train_loader = DataLoader(train, **self.loader_kwargs)
        val_loader = DataLoader(val, **self.loader_kwargs)

        conv_net = self.convnet(**self.convnet_kwargs).to(self.device)
        conv_net.load_state_dict(torch.load(convnet_path))
        conv_net.eval()
        confid_net = ConfidNet(**self.confidnet_kwargs).to(self.device)
        optimizer = Adam(confid_net.parameters(), **self.optimizer_kwargs)
        # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=3)
        criterion = torch.nn.MSELoss()
        writer = SummaryWriter(self.log_dir)

        for e in range(epoch_to_restore + 1, epoch + epoch_to_restore + 1):
            confid_net.train()
            history_train = {"loss": [], "metric": []}
            train_histograms = ConfidenceHistograms()
            for idx_batch, (imgs, label) in enumerate(train_loader):
                imgs, label = imgs.to(self.device), label.to(self.device)
                with torch.no_grad():
                    pred, encoded = conv_net(imgs)

                confid_net.zero_grad()
                confidence = confid_net(encoded)
                loss = criterion(
                    confidence,
                    F.softmax(pred, dim=1).gather(1, label.unsqueeze(1)))
                loss.backward()
                optimizer.step()
                train_histograms.step(label, pred, confidence.detach())
                history_train["loss"].append(loss.detach().item())

                step = min((idx_batch + 1) * self.loader_kwargs["batch_size"],
                           len(train))
                sys.stdout.write("Training : "
                                 f"Epoch {e}/{epoch + epoch_to_restore}; "
                                 f"Step {step}/{len(train)}; "
                                 f"Loss {loss.detach().item()};\r")
                sys.stdout.flush()
            print()
            print()

            confid_net.eval()
            history_val = {"loss": [], "metric": []}
            val_histograms = ConfidenceHistograms()
            for idx_batch, (imgs, label) in enumerate(val_loader):
                imgs, label = imgs.to(self.device), label.to(self.device)

                with torch.no_grad():
                    pred, encoded = conv_net(imgs)
                    confidence = confid_net(encoded)
                val_histograms.step(label, pred, confidence)
                loss = criterion(
                    confidence,
                    F.softmax(pred, dim=1).gather(1, label.unsqueeze(1)))
                history_val["loss"].append(loss.detach().item())

                step = min((idx_batch + 1) * self.loader_kwargs["batch_size"],
                           len(val))
                sys.stdout.write("Validation : "
                                 f"Epoch {e}/{epoch + epoch_to_restore}; "
                                 f"Step {step}/{len(val)}; "
                                 f"Loss {loss.detach().item()};\r")
                sys.stdout.flush()

            # scheduler.step(np.mean(history_val["loss"]))
            train_mcp_hist, train_tcp_hist = train_histograms.get_histograms()
            writer.add_figure("ConfidNet/train/MCP", train_mcp_hist, e)
            writer.add_figure("ConfidNet/train/TCP", train_tcp_hist, e)
            val_mcp_hist, val_tcp_hist = val_histograms.get_histograms()
            writer.add_figure("ConfidNet/val/MCP", val_mcp_hist, e)
            writer.add_figure("ConfidNet/val/TCP", val_tcp_hist, e)
            writer.add_scalars(
                'ConfidNet/Loss', {
                    "train": np.mean(history_train["loss"]),
                    "val": np.mean(history_val["loss"])
                }, e)

            print(f"\n\n[*] Finished epoch {e};\n\n"
                  "Train :\n"
                  f"\tLoss : {np.mean(history_train['loss'])}\n"
                  "Test :\n"
                  f"\tLoss : {np.mean(history_val['loss'])}\n\n\n")

            if e % self.model_checkpoint == 0:
                filename = str(self.model_filename).format(model="ConfidNet",
                                                           epoch=e)
                torch.save(confid_net.state_dict(), filename)

        return confid_net
Esempio n. 28
0
    parser.add_argument("--data", type = Path, default = Path("data/alignments/BLAT_ECOLX_hmmerbit_plmc_n5_m30_f50_t0.2_r24-286_id100_b105.a2m"), help = "Fasta input file of sequences.")
    parser.add_argument("--data_sheet", type = str, default = "BLAT_ECOLX_Ranganathan2015", help = "Protein family data sheet in mutation_data.pickle.")
    parser.add_argument("--metric_column", type = str, default = "2500", help = "Metric column of sheet used for Spearman's Rho calculation.")
    parser.add_argument("--ensemble_count", type = int, default = 2000, help = "How many samples of the model to use for evaluation as an ensemble.")
    parser.add_argument("--results_dir", type = Path, default = Path(f"results_{datetime.now().strftime('%Y-%m-%dT%H_%M_%S')}"), help = "Directory to save results to.")

    with torch.no_grad():
        args = parser.parse_args()

        print("Arguments given:")
        for arg, value in args.__dict__.items():
            print(f"  {arg}: {value}")
        print("")

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        protein_dataset, *_ = get_datasets(args.data, device, 0.8)
        print('Data loaded')

        wt, *_ = protein_dataset[0]
        size = len(wt) * NUM_TOKENS

        # load model
        model = VAE([size, 1500, 1500, 30, 100, 2000, size], NUM_TOKENS, use_dictionary = False).to(device)

        try:
            model.load_state_dict(torch.load(args.results_dir / Path("model.torch"), map_location=device)["state_dict"])
        except FileNotFoundError:
            pass

        cor = mutation_effect_prediction(model, args.data, args.data_sheet, args.metric_column, device, args.ensemble_count, args.results_dir)
Esempio n. 29
0
    def train_convnet(self, epoch=100, epoch_to_restore=0):
        train, val = get_datasets(self.dataset, self.train_val_split,
                                  self.seed)
        train_loader = DataLoader(train, **self.loader_kwargs)
        val_loader = DataLoader(val, **self.loader_kwargs)

        net = self.convnet(**self.convnet_kwargs).to(self.device)
        optimizer = Adam(net.parameters())
        criterion = torch.nn.CrossEntropyLoss()
        writer = SummaryWriter(self.log_dir)

        for e in range(epoch_to_restore + 1, epoch + epoch_to_restore + 1):
            net.train()
            history_train = {"loss": [], "metric": []}
            for idx_batch, (imgs, label) in enumerate(train_loader):
                imgs, label = imgs.to(self.device), label.to(self.device)

                net.zero_grad()
                pred, _ = net(imgs)
                loss = criterion(pred, label)
                loss.backward()
                optimizer.step()
                history_train["loss"].append(loss.detach().item())

                pred = F.softmax(pred.detach(), dim=1).argmax(1)
                score = accuracy_score(label.cpu(), pred.cpu())
                history_train["metric"].append(score)

                step = min((idx_batch + 1) * self.loader_kwargs["batch_size"],
                           len(train))
                sys.stdout.write("Training : "
                                 f"Epoch {e}/{epoch + epoch_to_restore}; "
                                 f"Step {step}/{len(train)}; "
                                 f"Loss {loss.detach().item()}; "
                                 f"Score {score}\r")
                sys.stdout.flush()
            print()
            print()

            net.eval()
            history_val = {"loss": [], "metric": []}
            for idx_batch, (imgs, label) in enumerate(val_loader):
                imgs, label = imgs.to(self.device), label.to(self.device)

                with torch.no_grad():
                    pred, _ = net(imgs)
                loss = criterion(pred, label).detach().item()
                history_val["loss"].append(loss)

                pred = F.softmax(pred.detach(), dim=1).argmax(1)
                score = accuracy_score(label.cpu(), pred.cpu())
                history_val["metric"].append(score)

                step = min((idx_batch + 1) * self.loader_kwargs["batch_size"],
                           len(val))
                sys.stdout.write("Validation : "
                                 f"Epoch {e}/{epoch + epoch_to_restore}; "
                                 f"Step {step}/{len(val)}; "
                                 f"Loss {loss}; "
                                 f"Score {score}\r")
                sys.stdout.flush()

            writer.add_scalars(
                'ConvNet/Loss', {
                    "train": np.mean(history_train["loss"]),
                    "val": np.mean(history_val["loss"])
                }, e)
            writer.add_scalars(
                'ConvNet/Accuracy', {
                    "train": np.mean(history_train["metric"]),
                    "val": np.mean(history_val["metric"])
                }, e)

            print(f"\n\n[*] Finished epoch {e};\n\n"
                  "Train :\n"
                  f"\tLoss : {np.mean(history_train['loss'])}\n"
                  f"\tAccuracy : {np.mean(history_train['metric'])}\n"
                  "Test :\n"
                  f"\tLoss : {np.mean(history_val['loss'])}\n"
                  f"\tAccuracy : {np.mean(history_val['metric'])}\n\n\n")

            if e % self.model_checkpoint == 0:
                filename = str(self.model_filename).format(model="ConvNet",
                                                           epoch=e)
                torch.save(net.state_dict(), filename)

        return filename
Esempio n. 30
0
from pybrain.utilities           import percentError
from pybrain.tools.shortcuts     import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules   import SoftmaxLayer
from pybrain.tools.customxml.networkreader import NetworkReader
from pybrain.tools.customxml.networkwriter import NetworkWriter
import os
import data

train_type = "symbol"
hidden_units = 10

xmldir = "xml/"
networkname = train_type + "-" + str(hidden_units) + ".xml"

training_set, test_set = data.get_datasets("pics/resized/", dstype = train_type)
training_set._convertToOneOfMany()
test_set._convertToOneOfMany()

print("Test type: '{}'".format(train_type))
print("Number of training patterns:", len(training_set))
print("Number of test patterns:", len(test_set))
print("Input and output dimensions:", training_set.indim, training_set.outdim)
print("Number of hidden units:", hidden_units)
print()
print("First sample (input, target, class):")
print(training_set['input'][0], training_set['target'][0], training_set['class'][0])
print()

network = buildNetwork(training_set.indim, hidden_units, training_set.outdim, outclass=SoftmaxLayer)
trainer = BackpropTrainer(network, dataset = training_set)
Esempio n. 31
0
def main():
    args = get_arguments()

    # expriment name
    if not args.exp_name:
        args.exp_name = '_'.join([args.dataset, args.model])
    print("# Experiment: ", args.exp_name)

    # output folder
    output_folder = os.path.join(args.output_root, args.dataset, args.exp_name)
    os.makedirs(output_folder, exist_ok=True)
    print("# Output path: ", output_folder)

    # visdom
    global plotter
    if args.use_visdom:
        logging_folder = os.path.join(args.logging_root, args.dataset, args.exp_name)
        os.makedirs(logging_folder, exist_ok=True)
        plotter = utils.VisdomLinePlotter(env_name=args.exp_name, logging_path=os.path.join(logging_folder, 'vis.log'))
        print("# Visdom path: ", logging_folder)

    # dataset
    print("# Load datasets")
    train_datasets, val_datasets, test_datasets = get_datasets(args.dataset, args.dataset_folder, args.batch_size)
    num_classes = train_datasets[0].num_classes
    vocab = set(train_datasets[0].vocab)
    vocab = vocab.union(set(val_datasets[0].vocab))
    vocab = vocab.union(set(test_datasets[0].vocab))

    # pre-trained word2vec
    print("# Load pre-trained word2vec")
    pretrained_word2vec_cache = os.path.join(os.path.dirname(args.w2v_file), args.dataset + '_w2v.pkl')
    if os.path.isfile(pretrained_word2vec_cache):
        with open(pretrained_word2vec_cache, 'rb') as f:
            pretrained_word2vec = pickle.load(f)
    else:
        pretrained_word2vec = PretrainedWord2Vec(vocab, args.w2v_file)
        with open(pretrained_word2vec_cache, 'wb') as f:
            pickle.dump(pretrained_word2vec, f)

    # train
    print("# Start training")
    for cv, (train_dataset, val_dataset, test_dataset) in enumerate(zip(train_datasets, val_datasets, test_datasets)):
        # fix random seed
        utils.fix_random_seed(seed=const.RANDOM_SEED)

        # model
        cnn = get_model(args.model, num_classes, pretrained_word2vec)
        if torch.cuda.is_available():
            cnn.cuda()

        # dataloader
        train_loader = DataLoader(train_dataset, args.batch_size, shuffle=True, collate_fn=sentence_collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, collate_fn=sentence_collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=sentence_collate_fn)

        # optimizer
        optim = Adadelta(cnn.parameters(), rho=0.95, eps=1e-6)

        # criterion
        criterion = CrossEntropyLoss()

        # training
        if plotter:
            plotter.set_cv(cv)
        output_path = os.path.join(output_folder, 'cv_%d_best.pkl' % cv)
        train(args.num_epochs, cnn, train_loader, optim, criterion, val_loader, output_path)

        # evaluation
        utils.load_model(output_path, cnn)
        find_most_similar_words(cnn)
        accuracy = eval(cnn, test_loader)
        print('cross_val:', cv, '\taccuracy:', accuracy)