Beispiel #1
0
def main():

    args = parseCommandLineArguments()

    rmgpy_branch = args.rmgpy_branch
    rmgdb_branch = args.rmgdb_branch
    rmgpy_sha = args.rmgpy_sha
    rmgdb_sha = args.rmgdb_sha
    meta_dict = {
        "rmgpy_branch": rmgpy_branch,
        "rmgdb_branch": rmgdb_branch,
        "rmgpy_sha": rmgpy_sha,
        "rmgdb_sha": rmgdb_sha
    }

    dataset_file = args.datasets[0]
    test_tables = get_datasets(dataset_file)

    # connect to database
    auth_info = get_RTD_authentication_info()
    rtdi = RMGTestsDatabaseInterface(*auth_info)
    rtd = getattr(rtdi.client, 'rmg_tests')
    thermo_val_table = getattr(rtd, 'thermo_val_table')

    # check if database has this record
    performance_dict = evaluate_performance(dataset_file, model_kernel='GA')
    # push to database
    save_results_in_database(thermo_val_table, meta_dict, performance_dict)

    # save to txt file
    validataion_summary_path = os.path.join(os.path.dirname(dataset_file),
                                            'validation_summary.txt')
    save_results_in_file(performance_dict, validataion_summary_path)
Beispiel #2
0
def setup_data_loaders(args):
    train_transforms, val_transforms = get_transforms(
        crop_size=args.crop_size,
        shorter_side=args.shorter_side,
        low_scale=args.low_scale,
        high_scale=args.high_scale,
        img_mean=args.img_mean,
        img_std=args.img_std,
        img_scale=args.img_scale,
        ignore_label=args.ignore_label,
        num_stages=args.num_stages,
        augmentations_type=args.augmentations_type,
        dataset_type=args.dataset_type,
    )
    train_sets, val_set = get_datasets(
        train_dir=args.train_dir,
        val_dir=args.val_dir,
        train_list_path=args.train_list_path,
        val_list_path=args.val_list_path,
        train_transforms=train_transforms,
        val_transforms=val_transforms,
        masks_names=("segm", ),
        dataset_type=args.dataset_type,
        stage_names=args.stage_names,
        train_download=args.train_download,
        val_download=args.val_download,
    )
    train_loaders, val_loader = dt.data.get_loaders(
        train_batch_size=args.train_batch_size,
        val_batch_size=args.val_batch_size,
        train_set=train_sets,
        val_set=val_set,
        num_stages=args.num_stages,
    )
    return train_loaders, val_loader
Beispiel #3
0
def evaluate_performance(dataset_file, model_kernel='GA'):

    # get a list of test table names
    # from files or input
    test_tables = get_datasets(dataset_file)

    # model instantiation
    model = ThermoEstimator(kernel_type=model_kernel)

    # start test evaluation
    performance_dict = {}
    for _, db_name, collection_name in test_tables:

        data = get_data(db_name, collection_name)

        spec_labels = []
        spec_dict = {}
        H298s_true = []
        H298s_pred = []

        comments = []

        for db_mol in data:
            smiles_in = str(db_mol["SMILES_input"])
            H298_true = float(db_mol["Hf298(kcal/mol)"])  # unit: kcal/mol

            thermo = model.predict_thermo(smiles_in)
            H298_pred = thermo.H298.value_si / 4184.0

            spec_labels.append(smiles_in)
            H298s_true.append(H298_true)
            H298s_pred.append(H298_pred)
            comments.append(thermo.comment)

        # create pandas dataframe
        test_df = pd.DataFrame(index=spec_labels)
        test_df['SMILES'] = test_df.index

        test_df['H298_pred(kcal/mol)'] = pd.Series(H298s_pred,
                                                   index=test_df.index)
        test_df['H298_true(kcal/mol)'] = pd.Series(H298s_true,
                                                   index=test_df.index)

        diff = abs(test_df['H298_pred(kcal/mol)'] -
                   test_df['H298_true(kcal/mol)'])
        test_df['H298_diff(kcal/mol)'] = pd.Series(diff, index=test_df.index)
        test_df['Comments'] = pd.Series(comments, index=test_df.index)

        # save test_df for future reference and possible comparison
        test_df_save_path = os.path.join(
            os.path.dirname(dataset_file),
            'test_df_{0}_{1}.csv'.format(db_name, collection_name))
        with open(test_df_save_path, 'w') as fout:
            test_df.to_csv(fout, index=False)

        performance_dict[(
            db_name, collection_name
        )] = test_df['H298_diff(kcal/mol)'].describe()['mean']

    return performance_dict
Beispiel #4
0
def main():
    parser = get_parser()
    args = parser.parse_args()

    # load model.  model_options defined in models/__init__.py
    model = sk_model_options[args.model](args.choice, args.freq_floor)

    # load data
    data_path = data_paths[args.dataset]
    train_set, dev_set, test_set = get_datasets(model.batch_size,
                                                data_path,
                                                model.preprocess_inputs,
                                                sk=True)

    print 'training...'
    train(model, train_set)
    print 'done training.'

    truth_file = os.path.join(data_path, 'truth.jsonl')
    mkdir(os.path.join(CKPT, args.sess_name))
    results_dir = os.path.join(CKPT, args.sess_name, 'results')
    mkdir(results_dir)
    print 'evaluating...'
    evaluate(model, train_set, results_dir, 'train', truth_file)
    evaluate(model, dev_set, results_dir, 'dev', truth_file)
    evaluate(model, test_set, results_dir, 'test', truth_file)
    print 'done evaluating.'
Beispiel #5
0
def simple_train(model, batch_size):
    num_epochs = 1

    train_loader, _ = get_datasets(batch_size)
    loss_fn = nn.CrossEntropyLoss().cuda()

    model.train()
    loss_fn = nn.CrossEntropyLoss().cuda()
    optimizer = optim.SGD(model.parameters(), lr=0.001)

    for _ in range(num_epochs):
        for batch_idx, (inputs, labels) in enumerate(train_loader):
            # run forward pass
            optimizer.zero_grad()
            outputs = model(inputs.to("cuda:1"))

            # run backward pass
            labels = labels.to(outputs.device)
            loss_fn(outputs, labels).backward()
            optimizer.step()

            if batch_idx == 0:
                memory = utils.get_memory_usage()

    return memory
Beispiel #6
0
def read_data(args):
	# template:		Nx3 (torch.Tensor)
	# source: 		Nx3 (torch.Tensor)
	
	print("You can modify the code to read the point clouds.")
	trainset, testset = data.get_datasets(args)
	template, source, _ = testset[0]
	return template, source
Beispiel #7
0
def test_nll_estimation(data_path, device, embedding_size, hidden_size,
                        latent_size, num_layers, word_dropout, freebits,
                        model_save_path, batch_size_valid, saved_model_file,
                        num_samples, **kwargs):

    start_time = datetime.now()

    train_data, val_data, test_data = get_datasets(data_path)
    device = torch.device(device)
    vocab_size = train_data.tokenizer.vocab_size
    padding_index = train_data.tokenizer.pad_token_id

    model = SentenceVAE(
        vocab_size=vocab_size,
        embedding_size=embedding_size,
        hidden_size=hidden_size,
        latent_size=latent_size,
        num_layers=num_layers,
        word_dropout_probability=word_dropout,
        unk_token_idx=train_data.tokenizer.unk_token_id,
        freebits=
        freebits,  # Freebits value is the lambda value as described in Kingma et al. 
        model_save_path=model_save_path)

    model.load_from(saved_model_file)
    model.to(device)

    test_loader = DataLoader(test_data,
                             batch_size=batch_size_valid,
                             shuffle=False,
                             collate_fn=padded_collate)

    epoch_start_time = datetime.now()
    try:
        ppl = perplexity(model,
                         data_loader=test_loader,
                         device=device,
                         num_samples=num_samples)
        loss, kl, _ = approximate_nll(model=model,
                                      data_loader=test_loader,
                                      device=device,
                                      padding_index=padding_index,
                                      num_samples=num_samples)

    except KeyboardInterrupt:
        print("Manually stopped current epoch")
        __import__('pdb').set_trace()

    print("Approximate NLL:")
    print(loss)

    print("Approximate KL:")
    print(kl)

    print("Testing took {}".format(datetime.now() - start_time))
    return loss, kl, ppl
Beispiel #8
0
def main():

    data_path = '../Data/Dataset'
    train_data, val_data, test_data = get_datasets(data_path)
    tokenizer = train_data.tokenizer

    print("Lengths: ", len(train_data), len(val_data), len(test_data))

    test_loader = DataLoader(val_data,
                             batch_size=32,
                             shuffle=False,
                             collate_fn=padded_collate)

    model = SentenceVAE(
        vocab_size=tokenizer.vocab_size,
        embedding_size=300,
        hidden_size=256,
        latent_size=16,
        num_layers=1,  #1,
        word_dropout_probability=1.0,
        unk_token_idx=tokenizer.unk_token_id,
        freebits=
        0,  # Freebits value is the lambda value as described in Kingma et al. 
    )

    # model_load_name = Path('1vanilla.pt')
    # model_load_name = Path('3word_dropout.pt')
    # model_load_name = Path('5freebits_dropout.pt')
    # model_load_name = Path('6freebits_worddropout_mdr.pt')
    model_load_name = Path('A.pt')

    models_path = Path('models')

    model_load_path = models_path / model_load_name

    model.load_from(model_load_path)
    # print(model.state_dict)

    sentence = sample_sentence(model, tokenizer, number=2)
    # print(sentences)

    # model.to(torch.device('cuda'))
    test_loss = evaluate(model,
                         test_loader,
                         torch.device('cpu'),
                         padding_index=0,
                         print_every=50)
    print("Test loss: ", test_loss)
Beispiel #9
0
def main(args):

    set_seed(args)

    dataset_train, dataset_val, dataset_test = get_datasets(args)
    optimizer = get_optimizer(args)
    obj = get_objective(args, optimizer.hparams)
    xp = get_xp(args, optimizer)

    for i in range(args.epochs):
        xp.Epoch.update(1).log()

        train(obj, optimizer, dataset_train, xp, args, i)
        test(obj, optimizer, dataset_val, xp, args, i)

    test(obj, optimizer, dataset_test, xp, args, i)
    print_total_time(xp)
Beispiel #10
0
def main():
    # config
    args = parse_args()
    cnfg = utils.parse_config(args.config)
    # data
    tr_loader, valid_loader, tst_loader = get_datasets(
        cnfg['data']['dir'], cnfg['data']['batch_size'])
    # initialization
    utils.set_seed(cnfg['seed'])
    device = torch.device('cuda:0') if cnfg['gpu'] is None else torch.device(
        cnfg['gpu'])

    logger = Logger(cnfg)
    model = utils.get_model(cnfg['model']).to(device)
    criterion = nn.CrossEntropyLoss()
    opt = torch.optim.SGD(model.parameters(),
                          lr=cnfg['train']['lr'],
                          momentum=cnfg['train']['momentum'],
                          weight_decay=cnfg['train']['weight_decay'])
    amp_args = dict(opt_level=cnfg['opt']['level'],
                    loss_scale=cnfg['opt']['loss_scale'],
                    verbosity=False)
    if cnfg['opt']['level'] == '02':
        amp_args['master_weights'] = cnfg['opt']['store']
    model, opt = amp.initialize(model, opt, **amp_args)
    scheduler = utils.get_scheduler(opt, cnfg['train'],
                                    cnfg['train']['epochs'] * len(tr_loader))
    # train+test
    for epoch in range(cnfg['train']['epochs']):
        train(epoch, model, criterion, opt, scheduler, tr_loader, device,
              logger, cnfg['train']['lr_scheduler'])
        # testing
        test(epoch, model, tst_loader, criterion, device, logger)
        # save
        if (epoch + 1) % cnfg['save']['epochs'] == 0 and epoch > 0:
            pth = 'models/' + cnfg['logger']['project'] + '_' \
                + cnfg['logger']['run'] + '_' + str(epoch) + '.pth'
            utils.save_model(model, cnfg, epoch, pth)
            logger.log_model(pth)
def main():
    # Parse command line arguments
    args = parse_args()

    # Session setup
    tf.compat.v1.enable_eager_execution(
        config=tf.compat.v1.ConfigProto(
            inter_op_parallelism_threads=args.inter_threads,
            intra_op_parallelism_threads=args.intra_threads))

    # Not running distributed
    dist = SimpleNamespace(rank=0, size=1, local_rank=0, local_size=1)

    # Load the dataset
    data = get_datasets(name='cosmo',
                        data_dir=args.data_dir,
                        sample_shape=[128, 128, 128, 4],
                        n_train=args.n_samples,
                        n_valid=0,
                        batch_size=args.batch_size,
                        n_epochs=args.n_epochs,
                        apply_log=True,
                        shard=False,
                        dist=dist)

    pprint.pprint(data)

    start_time = time.perf_counter()
    for x, y in data['train_dataset']:
        # Perform a simple operation
        tf.math.reduce_sum(x)
        tf.math.reduce_sum(y)
    duration = time.perf_counter() - start_time

    print('Total time: %.4f s' % duration)
    print('Throughput: %.4f samples/s' % (args.n_samples / duration))

    print('All done!')
Beispiel #12
0
import numpy as np
import pandas as pd
from sklearn.utils import check_random_state
from functools import partial
from pprint import pprint

from hmmlearn import hmm
from hmmlearn.utils import normalize

from data import get_datasets
from sshmm import _do_mstep, split_state_startprob, split_state_transmat, split_state_emission, entropy

pd.options.display.max_colwidth = 150
topk_cluster = 30

train_dataset, dev_dataset, vocab, cnt = get_datasets(
    "./data/kmedoids_agent_150", topk_cluster)
vocab = {v: k for k, v in vocab.items()}
print('vocab size = ', len(vocab))

model_path = sys.argv[1]
df_path = sys.argv[2]

with open(model_path, "rb") as f:
    model = pickle.load(f)

df = pd.read_csv(df_path)

xs = list(iter(dev_dataset))
x_lens = [len(x) for x in xs]

sample_len = 25
def main():
    """Main function"""

    # Initialization
    args = parse_args()
    dist = init_workers(args.distributed)
    config = load_config(args)
    os.makedirs(config['output_dir'], exist_ok=True)
    config_logging(verbose=args.verbose)
    logging.info('Initialized rank %i size %i local_rank %i local_size %i',
                 dist.rank, dist.size, dist.local_rank, dist.local_size)
    if dist.rank == 0:
        logging.info('Configuration: %s', config)

    # Setup MLPerf logging
    if args.mlperf:
        mllogger = configure_mllogger(config['output_dir'])
    if dist.rank == 0 and args.mlperf:
        mllogger.event(key=mllog.constants.CACHE_CLEAR)
        mllogger.start(key=mllog.constants.INIT_START)

    # Initialize Weights & Biases logging
    if args.wandb and dist.rank == 0:
        import wandb
        wandb.init(project='cosmoflow',
                   name=args.run_tag,
                   id=args.run_tag,
                   config=config,
                   resume=args.run_tag)

    # Device and session configuration
    gpu = dist.local_rank if args.rank_gpu else None
    if gpu is not None:
        logging.info('Taking gpu %i', gpu)
    configure_session(gpu=gpu,
                      intra_threads=args.intra_threads,
                      inter_threads=args.inter_threads,
                      kmp_blocktime=args.kmp_blocktime,
                      kmp_affinity=args.kmp_affinity,
                      omp_num_threads=args.omp_num_threads)

    # Mixed precision
    if args.amp:
        logging.info('Enabling mixed float16 precision')

        # Suggested bug workaround from https://github.com/tensorflow/tensorflow/issues/38516
        if tf.__version__.startswith('2.2.'):
            from tensorflow.python.keras.mixed_precision.experimental import device_compatibility_check
            device_compatibility_check.log_device_compatibility_check = lambda policy_name, skip_local: None
        tf.keras.mixed_precision.experimental.set_policy('mixed_float16')
        # TF 2.3
        #tf.keras.mixed_precision.set_global_policy('mixed_float16')

    # Start MLPerf logging
    if dist.rank == 0 and args.mlperf:
        log_submission_info(**config.get('mlperf', {}))
        mllogger.end(key=mllog.constants.INIT_STOP)
        mllogger.start(key=mllog.constants.RUN_START)

    # Load the data
    data_config = config['data']
    if dist.rank == 0:
        logging.info('Loading data')
    datasets = get_datasets(dist=dist, **data_config)
    logging.debug('Datasets: %s', datasets)

    # Construct or reload the model
    if dist.rank == 0:
        logging.info('Building the model')
    train_config = config['train']
    initial_epoch = 0
    checkpoint_format = os.path.join(config['output_dir'],
                                     'checkpoint-{epoch:03d}.h5')
    if args.resume and os.path.exists(checkpoint_format.format(epoch=1)):
        # Reload model from last checkpoint
        initial_epoch, model = reload_last_checkpoint(
            checkpoint_format,
            data_config['n_epochs'],
            distributed=args.distributed)
    else:
        # Build a new model
        model = get_model(**config['model'])
        # Configure the optimizer
        opt = get_optimizer(distributed=args.distributed,
                            **config['optimizer'])
        # Compile the model
        model.compile(optimizer=opt,
                      loss=train_config['loss'],
                      metrics=train_config['metrics'])

    if dist.rank == 0:
        model.summary()

    # Save configuration to output directory
    if dist.rank == 0:
        config['n_ranks'] = dist.size
        save_config(config)

    # Prepare the callbacks
    if dist.rank == 0:
        logging.info('Preparing callbacks')
    callbacks = []
    if args.distributed:

        # Broadcast initial variable states from rank 0 to all processes.
        callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0))

        # Average metrics across workers
        callbacks.append(hvd.callbacks.MetricAverageCallback())

    # Learning rate decay schedule
    if 'lr_schedule' in config:
        global_batch_size = data_config['batch_size'] * dist.size
        callbacks.append(
            tf.keras.callbacks.LearningRateScheduler(
                get_lr_schedule(global_batch_size=global_batch_size,
                                **config['lr_schedule'])))

    # Timing
    timing_callback = TimingCallback()
    callbacks.append(timing_callback)

    # Checkpointing and logging from rank 0 only
    if dist.rank == 0:
        callbacks.append(tf.keras.callbacks.ModelCheckpoint(checkpoint_format))
        callbacks.append(
            tf.keras.callbacks.CSVLogger(os.path.join(config['output_dir'],
                                                      'history.csv'),
                                         append=args.resume))
        if args.tensorboard:
            callbacks.append(
                tf.keras.callbacks.TensorBoard(
                    os.path.join(config['output_dir'], 'tensorboard')))
        if args.mlperf:
            callbacks.append(MLPerfLoggingCallback())
        if args.wandb:
            callbacks.append(wandb.keras.WandbCallback())

    # Early stopping
    patience = train_config.get('early_stopping_patience', None)
    if patience is not None:
        callbacks.append(
            tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                             min_delta=1e-5,
                                             patience=patience,
                                             verbose=1))

    # Stopping at specified target
    target_mae = train_config.get('target_mae', None)
    callbacks.append(StopAtTargetCallback(target_max=target_mae))

    if dist.rank == 0:
        logging.debug('Callbacks: %s', callbacks)

    # Train the model
    if dist.rank == 0:
        logging.info('Beginning training')
    fit_verbose = 1 if (args.verbose and dist.rank == 0) else 2
    model.fit(datasets['train_dataset'],
              steps_per_epoch=datasets['n_train_steps'],
              epochs=data_config['n_epochs'],
              validation_data=datasets['valid_dataset'],
              validation_steps=datasets['n_valid_steps'],
              callbacks=callbacks,
              initial_epoch=initial_epoch,
              verbose=fit_verbose)

    # Stop MLPerf timer
    if dist.rank == 0 and args.mlperf:
        mllogger.end(key=mllog.constants.RUN_STOP,
                     metadata={'status': 'success'})

    # Print training summary
    if dist.rank == 0:
        print_training_summary(config['output_dir'], args.print_fom)

    # Print GPU memory - not supported in TF 2.2?
    #if gpu is not None:
    #    device = tf.config.list_physical_devices('GPU')[gpu]
    #    #print(tf.config.experimental.get_memory_usage(device))
    #    #print(tf.config.experimental.get_memory_info(device))

    # Finalize
    if dist.rank == 0:
        logging.info('All done!')
Beispiel #14
0
def main():
    """Main function"""

    # Initialization
    args = parse_args()
    rank, local_rank, n_ranks = init_workers(args.distributed)
    config = load_config(args.config,
                         output_dir=args.output_dir,
                         data_config=args.data_config)

    os.makedirs(config['output_dir'], exist_ok=True)
    config_logging(verbose=args.verbose)
    logging.info('Initialized rank %i local_rank %i size %i', rank, local_rank,
                 n_ranks)
    if rank == 0:
        logging.info('Configuration: %s', config)

    # Device and session configuration
    gpu = local_rank if args.rank_gpu else None
    configure_session(gpu=gpu, **config.get('device', {}))

    # Load the data
    data_config = config['data']
    if rank == 0:
        logging.info('Loading data')
    datasets = get_datasets(rank=rank, n_ranks=n_ranks, **data_config)
    logging.debug('Datasets: %s', datasets)

    # Construct or reload the model
    if rank == 0:
        logging.info('Building the model')
    initial_epoch = 0
    checkpoint_format = os.path.join(config['output_dir'],
                                     'checkpoint-{epoch:03d}.h5')
    if args.resume:
        # Reload model from last checkpoint
        initial_epoch, model = reload_last_checkpoint(
            checkpoint_format,
            data_config['n_epochs'],
            distributed=args.distributed)
    else:
        # Build a new model
        model = get_model(**config['model'])
        # Configure the optimizer
        opt = get_optimizer(n_ranks=n_ranks,
                            distributed=args.distributed,
                            **config['optimizer'])
        # Compile the model
        train_config = config['train']
        model.compile(optimizer=opt,
                      loss=train_config['loss'],
                      metrics=train_config['metrics'])

    if rank == 0:
        model.summary()

    # Save configuration to output directory
    if rank == 0:
        data_config['n_train'] = datasets['n_train']
        data_config['n_valid'] = datasets['n_valid']
        save_config(config)

    # Prepare the callbacks
    if rank == 0:
        logging.info('Preparing callbacks')
    callbacks = []
    if args.distributed:

        # Broadcast initial variable states from rank 0 to all processes.
        callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0))

        # Average metrics across workers
        callbacks.append(hvd.callbacks.MetricAverageCallback())

        # Learning rate warmup
        train_config = config['train']
        warmup_epochs = train_config.get('lr_warmup_epochs', 0)
        callbacks.append(
            hvd.callbacks.LearningRateWarmupCallback(
                warmup_epochs=warmup_epochs, verbose=1))

    # Learning rate decay schedule
    lr_schedule = train_config.get('lr_schedule', {})
    if rank == 0:
        logging.info('Adding LR decay schedule: %s', lr_schedule)
    callbacks.append(
        tf.keras.callbacks.LearningRateScheduler(
            schedule=lambda epoch, lr: lr * lr_schedule.get(epoch, 1)))

    # Timing
    timing_callback = TimingCallback()
    callbacks.append(timing_callback)

    # Checkpointing and CSV logging from rank 0 only
    if rank == 0:
        callbacks.append(tf.keras.callbacks.ModelCheckpoint(checkpoint_format))
        callbacks.append(
            tf.keras.callbacks.CSVLogger(os.path.join(config['output_dir'],
                                                      'history.csv'),
                                         append=args.resume))

    if rank == 0:
        logging.debug('Callbacks: %s', callbacks)

    # Train the model
    if rank == 0:
        logging.info('Beginning training')
    fit_verbose = 1 if (args.verbose and rank == 0) else 2
    model.fit(datasets['train_dataset'],
              steps_per_epoch=datasets['n_train_steps'],
              epochs=data_config['n_epochs'],
              validation_data=datasets['valid_dataset'],
              validation_steps=datasets['n_valid_steps'],
              callbacks=callbacks,
              initial_epoch=initial_epoch,
              verbose=fit_verbose)

    # Print training summary
    if rank == 0:
        print_training_summary(config['output_dir'])

    # Finalize
    if rank == 0:
        logging.info('All done!')
Beispiel #15
0
model = get_model(rho_length_in=rho_length_in, 
                  **config['data_and_model'],
                  **config['model'])

rank=0
n_ranks=1

# Configure optimizer
# opt = get_optimizer(n_ranks=n_ranks, distributed=False,
#                     **config['optimizer'])

# Compile the model
model.compile(loss=train_config['loss'], optimizer=config['optimizer']['name'],#opt
              metrics=train_config['metrics'])
train_gen, valid_gen = get_datasets(batch_size=train_config['batch_size'],
                                    **config['data_and_model'],**config['data'])

steps_per_epoch = len(train_gen) // n_ranks

# Timing
callbacks = []
timing_callback = TimingCallback()
callbacks.append(timing_callback)
callbacks.append(keras.callbacks.EarlyStopping(monitor='val_loss',patience=5))

callbacks.append(keras.callbacks.ModelCheckpoint(filepath=os.path.join(output_dir, output_file_name),
                                                 monitor='val_mean_absolute_error',
                                                 save_best_only=True,
                                                 verbose=1))

history = model.fit_generator(train_gen,
Beispiel #16
0
import torch
from models import HMM
from data import get_datasets, read_config
from training import Trainer

# Generate datasets from text file
path = "data"
N = 128
config = read_config(N,path)
train_dataset, valid_dataset = get_datasets(config)
checkpoint_path = "."

# Initialize model
model = HMM(config=config)

# Train the model
num_epochs = 10
trainer = Trainer(model, config, lr=0.003)
trainer.load_checkpoint(checkpoint_path)

for epoch in range(num_epochs):
	print("========= Epoch %d of %d =========" % (epoch+1, num_epochs))
	train_loss = trainer.train(train_dataset)
	valid_loss = trainer.test(valid_dataset)
	trainer.save_checkpoint(epoch, checkpoint_path)

	print("========= Results: epoch %d of %d =========" % (epoch+1, num_epochs))
	print("train loss: %.2f| valid loss: %.2f\n" % (train_loss, valid_loss) )


Beispiel #17
0
from pybrain.utilities import percentError
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer
from pybrain.tools.customxml.networkreader import NetworkReader
from pybrain.tools.customxml.networkwriter import NetworkWriter
import os
import data

train_type = "symbol"
hidden_units = 10

xmldir = "xml/"
networkname = train_type + "-" + str(hidden_units) + ".xml"

training_set, test_set = data.get_datasets("pics/resized/", dstype=train_type)
training_set._convertToOneOfMany()
test_set._convertToOneOfMany()

print("Test type: '{}'".format(train_type))
print("Number of training patterns:", len(training_set))
print("Number of test patterns:", len(test_set))
print("Input and output dimensions:", training_set.indim, training_set.outdim)
print("Number of hidden units:", hidden_units)
print()
print("First sample (input, target, class):")
print(training_set['input'][0], training_set['target'][0],
      training_set['class'][0])
print()

network = buildNetwork(training_set.indim,
def main():
    """Main function"""

    # Initialization
    args = parse_args()
    rank, local_rank, n_ranks = init_workers(args.distributed)

    # Load configuration
    config = load_config(args.config)

    # Configure logging
    config_logging(verbose=args.verbose)
    logging.info('Initialized rank %i local_rank %i size %i',
                 rank, local_rank, n_ranks)

    # Device configuration
    configure_session(gpu=local_rank, **config.get('device', {}))

    # Load the data
    train_data, valid_data = get_datasets(rank=rank, n_ranks=n_ranks,
                                          **config['data'])
    if rank == 0:
        logging.info(train_data)
        logging.info(valid_data)

    # Construct the model and optimizer
    model = get_model(**config['model'])
    optimizer = get_optimizer(n_ranks=n_ranks, **config['optimizer'])
    train_config = config['train']

    # Custom metrics for pixel accuracy and IoU
    metrics = [PixelAccuracy(), PixelIoU(name='iou', num_classes=3)]

    # Compile the model
    model.compile(loss=train_config['loss'], optimizer=optimizer,
                  metrics=metrics)

    # Print a model summary
    if rank == 0:
        model.summary()

    # Prepare the callbacks
    callbacks = []

    if args.distributed:

        # Broadcast initial variable states from rank 0 to all processes.
        callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0))

        # Average metrics across workers
        callbacks.append(hvd.callbacks.MetricAverageCallback())

        # Learning rate warmup
        warmup_epochs = train_config.get('lr_warmup_epochs', 0)
        callbacks.append(hvd.callbacks.LearningRateWarmupCallback(
            warmup_epochs=warmup_epochs, verbose=1))

    # Timing
    timing_callback = TimingCallback()
    callbacks.append(timing_callback)

    # Checkpointing and CSV logging from rank 0 only
    #if rank == 0:
    #    callbacks.append(tf.keras.callbacks.ModelCheckpoint(checkpoint_format))
    #    callbacks.append(tf.keras.callbacks.CSVLogger(
    #        os.path.join(config['output_dir'], 'history.csv'), append=args.resume))

    if rank == 0:
        logging.debug('Callbacks: %s', callbacks)

    # Train the model
    verbosity = 2 if rank==0 or args.verbose else 0
    history = model.fit(train_data,
                        validation_data=valid_data,
                        epochs=train_config['n_epochs'],
                        callbacks=callbacks,
                        verbose=verbosity)

    # All done
    if rank == 0:
        logging.info('All done!')
Beispiel #19
0
def main():
    """Main function"""

    # Initialization
    args = parse_args()
    dist = init_workers(args.distributed)
    config = load_config(args)
    os.makedirs(config['output_dir'], exist_ok=True)
    config_logging(verbose=args.verbose)
    logging.info('Initialized rank %i size %i local_rank %i local_size %i',
                 dist.rank, dist.size, dist.local_rank, dist.local_size)
    if dist.rank == 0:
        logging.info('Configuration: %s', config)

    # Device and session configuration
    gpu = dist.local_rank if args.rank_gpu else None
    if gpu is not None:
        logging.info('Taking gpu %i', gpu)
    configure_session(gpu=gpu,
                      intra_threads=args.intra_threads,
                      inter_threads=args.inter_threads,
                      kmp_blocktime=args.kmp_blocktime,
                      kmp_affinity=args.kmp_affinity,
                      omp_num_threads=args.omp_num_threads)

    # Load the data
    data_config = config['data']
    if dist.rank == 0:
        logging.info('Loading data')
    datasets = get_datasets(dist=dist, **data_config)
    logging.debug('Datasets: %s', datasets)

    # Construct or reload the model
    if dist.rank == 0:
        logging.info('Building the model')
    train_config = config['train']
    initial_epoch = 0
    checkpoint_format = os.path.join(config['output_dir'],
                                     'checkpoint-{epoch:03d}.h5')
    if args.resume and os.path.exists(checkpoint_format.format(epoch=1)):
        # Reload model from last checkpoint
        initial_epoch, model = reload_last_checkpoint(
            checkpoint_format,
            data_config['n_epochs'],
            distributed=args.distributed)
    else:
        # Build a new model
        model = get_model(**config['model'])
        # Configure the optimizer
        opt = get_optimizer(distributed=args.distributed,
                            **config['optimizer'])
        # Compile the model
        model.compile(optimizer=opt,
                      loss=train_config['loss'],
                      metrics=train_config['metrics'])

    if dist.rank == 0:
        model.summary()

    # Save configuration to output directory
    if dist.rank == 0:
        config['n_ranks'] = dist.size
        save_config(config)

    # Prepare the callbacks
    if dist.rank == 0:
        logging.info('Preparing callbacks')
    callbacks = []
    if args.distributed:

        # Broadcast initial variable states from rank 0 to all processes.
        callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0))

        # Average metrics across workers
        callbacks.append(hvd.callbacks.MetricAverageCallback())

    # Learning rate decay schedule
    if 'lr_schedule' in config:
        global_batch_size = data_config['batch_size'] * dist.size
        callbacks.append(
            tf.keras.callbacks.LearningRateScheduler(
                get_lr_schedule(global_batch_size=global_batch_size,
                                **config['lr_schedule'])))

    # Timing
    timing_callback = TimingCallback()
    callbacks.append(timing_callback)

    # Checkpointing and logging from rank 0 only
    if dist.rank == 0:
        callbacks.append(tf.keras.callbacks.ModelCheckpoint(checkpoint_format))
        callbacks.append(
            tf.keras.callbacks.CSVLogger(os.path.join(config['output_dir'],
                                                      'history.csv'),
                                         append=args.resume))
        if args.tensorboard:
            callbacks.append(
                tf.keras.callbacks.TensorBoard(
                    os.path.join(config['output_dir'], 'tensorboard')))

    # Early stopping
    patience = config.get('early_stopping_patience', None)
    if patience is not None:
        callbacks.append(
            tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                             min_delta=1e-5,
                                             patience=patience,
                                             verbose=1))

    if dist.rank == 0:
        logging.debug('Callbacks: %s', callbacks)

    # Train the model
    if dist.rank == 0:
        logging.info('Beginning training')
    fit_verbose = 1 if (args.verbose and dist.rank == 0) else 2
    model.fit(datasets['train_dataset'],
              steps_per_epoch=datasets['n_train_steps'],
              epochs=data_config['n_epochs'],
              validation_data=datasets['valid_dataset'],
              validation_steps=datasets['n_valid_steps'],
              callbacks=callbacks,
              initial_epoch=initial_epoch,
              verbose=fit_verbose)

    # Print training summary
    if dist.rank == 0:
        print_training_summary(config['output_dir'], args.print_fom)

    # Finalize
    if dist.rank == 0:
        logging.info('All done!')
Beispiel #20
0
def train(
    data_path,
    device,
    num_epochs,
    batch_size_train,
    batch_size_valid,
    learning_rate,
    num_layers,
    embedding_size,
    hidden_size,
    latent_size,
    word_dropout,
    print_every,
    save_every,
    tensorboard_logging,
    model_save_path,
    early_stopping_patience,
    freebits,
    MDR,
    # losses_save_path,
    args=None,
):

    start_time = datetime.now()

    train_data, val_data, test_data = get_datasets(data_path)
    device = torch.device(device)
    vocab_size = train_data.tokenizer.vocab_size
    padding_index = train_data.tokenizer.pad_token_id

    model = SentenceVAE(
        vocab_size=vocab_size,
        embedding_size=embedding_size,
        hidden_size=hidden_size,
        latent_size=latent_size,
        num_layers=num_layers,
        word_dropout_probability=word_dropout,
        unk_token_idx=train_data.tokenizer.unk_token_id,
        freebits=
        freebits,  # Freebits value is the lambda value as described in Kingma et al. 
        model_save_path=model_save_path)
    lagrangian = Lagrangian(MDR)

    model.to(device)
    lagrangian.to(device)

    if MDR is not None:
        ### Define lagrangian parameter and optimizers
        lagrangian_optimizer = RMSprop(
            lagrangian.parameters(),
            lr=learning_rate)  # TODO: Move this to other scope and use args.lr
    optimizer = Adam(model.parameters(), lr=learning_rate)

    train_loader = DataLoader(train_data,
                              batch_size=batch_size_train,
                              shuffle=True,
                              collate_fn=padded_collate)

    val_loader = DataLoader(val_data,
                            batch_size=batch_size_valid,
                            shuffle=False,
                            collate_fn=padded_collate)

    iterations = 0
    patience = 0
    best_val_loss = torch.tensor(np.inf, device=device)
    best_model = None
    for epoch in range(num_epochs):

        epoch_start_time = datetime.now()
        try:
            nll_list = []
            kl_list = []
            lists = (nll_list, kl_list)

            if MDR is None:
                iterations = train_one_epoch(model,
                                             optimizer,
                                             train_loader,
                                             device,
                                             iter_start=iterations,
                                             padding_index=padding_index,
                                             save_every=save_every,
                                             print_every=print_every,
                                             loss_lists=lists)
            else:
                iterations = train_one_epoch_MDR(model,
                                                 lagrangian,
                                                 lagrangian_optimizer,
                                                 optimizer,
                                                 train_loader,
                                                 device,
                                                 iter_start=iterations,
                                                 padding_index=padding_index,
                                                 save_every=save_every,
                                                 minimum_rate=MDR,
                                                 loss_lists=lists)

        except KeyboardInterrupt:
            print("Manually stopped current epoch")
            __import__('pdb').set_trace()

        print("Training this epoch took {}".format(datetime.now() -
                                                   epoch_start_time))

        print("Validation phase:")
        val_loss, ppl = evaluate(model,
                                 val_loader,
                                 device,
                                 padding_index=padding_index,
                                 print_every=print_every)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.saved_model_files[-1]
            patience = 0
        else:
            patience += 1
            if patience >= early_stopping_patience:
                print("EARLY STOPPING")
                break

        print(
            f"###############################################################")
        print(
            f"Epoch {epoch} finished, validation loss: {val_loss}, ppl: {ppl}")
        print(
            f"###############################################################")
        print("Current epoch training took {}".format(datetime.now() -
                                                      epoch_start_time))

        losses_file_name = f"MDR{MDR}-freebits{freebits}-word_dropout{word_dropout}-print_every{print_every}-iterations{iterations}"
        save_losses_path = Path(model_save_path) / losses_file_name
        with open(save_losses_path, 'wb') as file:
            print("Saving losses..")
            pickle.dump((lists, print_every, args), file)

    print("Training took {}".format(datetime.now() - start_time))
    print(f"Best validation loss: {best_val_loss}")
    print(f"Best model: {best_model}")
Beispiel #21
0
import h5py
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import scipy
import tensorflow as tf
from data import get_datasets
import random
from tensorflow.keras import layers
import time
from tensorflow.keras import datasets, layers, models, applications

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

X_train, X_dev, X_test, Y_train, Y_dev, Y_test = get_datasets()
print('datasets retrieved')
print('train:', len(X_train))
print('dev:', len(X_dev))
print('test:', len(X_test))

#define model

model = tf.keras.Sequential()
model.add(
    layers.Conv2D(8, (3, 3),
                  activation='relu',
                  input_shape=(48, 48, 3),
                  data_format='channels_last'))
model.add(
    layers.MaxPooling2D((2, 2), (2, 2),
Beispiel #22
0
"""
import sys
sys.dont_write_bytecode = True

from config import base_config

from optimizer import get_optimizer
from loss import get_loss_function
from model import get_model
from metrics import get_metrics_lst
from callback import get_callbacks
from trainer import Trainer
from data import get_datasets

if __name__ == "__main__":

    config = base_config()
    config.METRICS_LST = get_metrics_lst()
    config.OPTIMIZER = get_optimizer()
    config.LOSS_FUNC = get_loss_function()
    config.CALLBACK_LST = get_callbacks(config)

    config.display()

    model = get_model(config)
    datasets = get_datasets(config)
    trainer = Trainer(datasets, model, config)

    trainer._compile()
    trainer.train()
Beispiel #23
0
from utils import get_device, load_json, get_writer
import argparse
from statistics import mean

parser = argparse.ArgumentParser()
parser.add_argument("-d", "--device", type=int, help="gpu id")
parser.add_argument("-n", "--log", type=str, help="name of log folder")
parser.add_argument("-p", "--hparams", type=str, help="hparams config file")
opts = parser.parse_args()

# Get CUDA/CPU device
device = get_device(opts.device)

print('Loading data..')
hparams = load_json('./configs', opts.hparams)
dataset_a, dataset_b = get_datasets(**hparams['dataset'])
loader_a = DataLoader(dataset_a, **hparams['loading'])
loader_b = DataLoader(dataset_b, **hparams['loading'])
model = TravelGAN(hparams['model'], device=device)
writer, monitor = get_writer(opts.log)

print('Start training..')
for epoch in range(hparams['n_epochs']):
    # Run one epoch
    dis_losses, gen_losses = [], []
    for x_a, x_b in zip(loader_a, loader_b):
        # Loading on device
        x_a = x_a.to(device, non_blocking=True)
        x_b = x_b.to(device, non_blocking=True)

        # Calculate losses and update weights
def main(args):

    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
    device = torch.device(
        'cuda' if args.use_cuda and torch.cuda.is_available() else 'cpu')

    # Create output folder
    if (args.output_folder is not None):
        if not os.path.exists(args.output_folder):
            os.makedirs(args.output_folder)
            logging.debug('Creating folder `{0}`'.format(args.output_folder))

        output_folder = os.path.join(args.output_folder,
                                     time.strftime('%Y-%m-%d_%H%M%S'))
        os.makedirs(output_folder)
        logging.debug('Creating folder `{0}`'.format(output_folder))

        args.datafolder = os.path.abspath(args.datafolder)
        args.model_path = os.path.abspath(
            os.path.join(output_folder, 'model.th'))

        # Save the configuration in a config.json file
        with open(os.path.join(output_folder, 'config.json'), 'w') as f:
            json.dump(vars(args), f, indent=2)
        logging.info('Saving configuration file in `{0}`'.format(
            os.path.abspath(os.path.join(output_folder, 'config.json'))))

    # Get datasets and load into meta learning format
    meta_train_dataset, meta_val_dataset, _ = get_datasets(
        args.dataset,
        args.datafolder,
        args.num_ways,
        args.num_shots,
        args.num_shots_test,
        augment=augment,
        fold=args.fold,
        download=download_data)

    meta_train_dataloader = BatchMetaDataLoader(meta_train_dataset,
                                                batch_size=args.batch_size,
                                                shuffle=True,
                                                num_workers=args.num_workers,
                                                pin_memory=True)

    meta_val_dataloader = BatchMetaDataLoader(meta_val_dataset,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=args.num_workers,
                                              pin_memory=True)

    # Define model
    model = Unet(device=device, feature_scale=args.feature_scale)
    model = model.to(device)
    print(f'Using device: {device}')

    # Define optimizer
    meta_optimizer = torch.optim.Adam(model.parameters(),
                                      lr=args.meta_lr)  #, weight_decay=1e-5)
    #meta_optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, momentum = 0.99)

    # Define meta learner
    metalearner = ModelAgnosticMetaLearning(
        model,
        meta_optimizer,
        first_order=args.first_order,
        num_adaptation_steps=args.num_adaption_steps,
        step_size=args.step_size,
        learn_step_size=False,
        loss_function=loss_function,
        device=device)

    best_value = None

    # Training loop
    epoch_desc = 'Epoch {{0: <{0}d}}'.format(1 +
                                             int(math.log10(args.num_epochs)))
    train_losses = []
    val_losses = []
    train_ious = []
    train_accuracies = []
    val_accuracies = []
    val_ious = []

    start_time = time.time()

    for epoch in range(args.num_epochs):
        print('start epoch ', epoch + 1)
        print('start train---------------------------------------------------')
        train_loss, train_accuracy, train_iou = metalearner.train(
            meta_train_dataloader,
            max_batches=args.num_batches,
            verbose=args.verbose,
            desc='Training',
            leave=False)
        print(f'\n train accuracy: {train_accuracy}, train loss: {train_loss}')
        print('end train---------------------------------------------------')
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)
        train_ious.append(train_iou)

        # Evaluate in given intervals
        if epoch % args.val_step_size == 0:
            print(
                'start evaluate-------------------------------------------------'
            )
            results = metalearner.evaluate(meta_val_dataloader,
                                           max_batches=args.num_batches,
                                           verbose=args.verbose,
                                           desc=epoch_desc.format(epoch + 1),
                                           is_test=False)
            val_acc = results['accuracy']
            val_loss = results['mean_outer_loss']
            val_losses.append(val_loss)
            val_accuracies.append(val_acc)
            val_ious.append(results['iou'])
            print(
                f'\n validation accuracy: {val_acc}, validation loss: {val_loss}'
            )
            print(
                'end evaluate-------------------------------------------------'
            )

            # Save best model
            if 'accuracies_after' in results:
                if (best_value is None) or (best_value <
                                            results['accuracies_after']):
                    best_value = results['accuracies_after']
                    save_model = True
            elif (best_value is None) or (best_value >
                                          results['mean_outer_loss']):
                best_value = results['mean_outer_loss']
                save_model = True
            else:
                save_model = False

            if save_model and (args.output_folder is not None):
                with open(args.model_path, 'wb') as f:
                    torch.save(model.state_dict(), f)

        print('end epoch ', epoch + 1)

    elapsed_time = time.time() - start_time
    print('Finished after ',
          time.strftime('%H:%M:%S', time.gmtime(elapsed_time)))

    r = {}
    r['train_losses'] = train_losses
    r['train_accuracies'] = train_accuracies
    r['train_ious'] = train_ious
    r['val_losses'] = val_losses
    r['val_accuracies'] = val_accuracies
    r['val_ious'] = val_ious
    r['time'] = time.strftime('%H:%M:%S', time.gmtime(elapsed_time))
    with open(os.path.join(output_folder, 'train_results.json'), 'w') as g:
        json.dump(r, g)
        logging.info('Saving results dict in `{0}`'.format(
            os.path.abspath(os.path.join(output_folder,
                                         'train_results.json'))))

    # Plot results
    plot_errors(args.num_epochs,
                train_losses,
                val_losses,
                val_step_size=args.val_step_size,
                output_folder=output_folder,
                save=True,
                bce_dice_focal=bce_dice_focal)
    plot_accuracy(args.num_epochs,
                  train_accuracies,
                  val_accuracies,
                  val_step_size=args.val_step_size,
                  output_folder=output_folder,
                  save=True)
    plot_iou(args.num_epochs,
             train_ious,
             val_ious,
             val_step_size=args.val_step_size,
             output_folder=output_folder,
             save=True)

    if hasattr(meta_train_dataset, 'close'):
        meta_train_dataset.close()
        meta_val_dataset.close()
Beispiel #25
0
def main(args):

    # If passed through command line, check if CUDA available and use GPU if possible
    if args.cuda:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print('Device: {}'.format(device))
    else:
        device = torch.device('cpu')
        print('Device: {}'.format(device))

    # Prerequisites for training
    train_data, val_data, test_data = get_datasets()

    # Build model
    vocab_size = train_data.tokenizer.vocab_size
    model = RNNLM(ntoken = vocab_size, ninp = args.emsize, nhid = args.nhid,
                    nlayers = args.nlayers, dropout = args.dropout).to(device)

    train_loader = DataLoader(
        train_data, batch_size = args.batch_size, shuffle = False,
        collate_fn = padded_collate, num_workers = 1
    )

    val_loader = DataLoader(
        val_data, batch_size = args.eval_batch_size, shuffle = False,
        collate_fn = padded_collate, num_workers = 1
    )

    test_loader = DataLoader(
        test_data, batch_size = args.eval_batch_size, shuffle = False,
        collate_fn = padded_collate, num_workers = 1
    )

    # Till here

    print('Split sizes | Train: {} | Val: {} | Test: {} |'.format(len(train_loader), len(val_loader),
                                            len(test_loader)))

    optimizer = Adam(model.parameters(), lr = args.lr)
    print(model)

    # store best validation loss
    best_val_loss = None

    # Use Ctrl + C to break out of training at any time
    try:
        for epoch in range(1, args.epochs + 1):

            epoch_start_time = time.time()

            train(model, train_data, train_loader, args, device, optimizer, epoch)

            val_loss = evaluate(val_loader, val_data, device, model)

            print('-' * 89)

            print('| End of epoch {:3d} | Time: {:5.2f} | Validation loss: {:5.2f} |'.format(epoch,
             (time.time() - epoch_start_time), val_loss))

            print('-' * 89)

            if not best_val_loss or val_loss < best_val_loss:
                with open(args.save, 'wb') as f:
                    torch.save(model, f)

                best_val_loss = val_loss
    except KeyboardInterrupt:
        print('-' * 89)
        print('Terminating training early.')



    # Load best model
    with open(args.save, 'rb') as f:
        model = torch.load(f)

        # Ensure rnn parameters are a continuous chunk of memory
        model.rnn.flatten_parameters()

    test_loss = evaluate(test_loader, test_data, device, model)

    print('=' * 89)
    print('|End of training and testing. | Test loss {:5.2f}'.format(test_loss))
    print('=' * 89)
Beispiel #26
0
def main():
    """Main function"""

    # Initialization
    args = parse_args()
    rank, n_ranks = init_workers(args.distributed)

    # Load configuration
    config = load_config(args.config)
    train_config = config['training']
    output_dir = os.path.expandvars(config['output_dir'])
    checkpoint_format = os.path.join(output_dir, 'checkpoints',
                                     'checkpoint-{epoch}.h5')
    if rank==0:
        os.makedirs(output_dir, exist_ok=True)

    # Loggging
    config_logging(verbose=args.verbose)
    logging.info('Initialized rank %i out of %i', rank, n_ranks)
    if args.show_config:
        logging.info('Command line config: %s', args)
    if rank == 0:
        logging.info('Job configuration: %s', config)
        logging.info('Saving job outputs to %s', output_dir)

    # Configure session
    device_config = config.get('device', {})
    configure_session(**device_config)

    # Load the data
    train_gen, valid_gen = get_datasets(batch_size=train_config['batch_size'],
                                        **config['data'])

    # Build the model
    model = get_model(**config['model'])
    # Configure optimizer
    opt = get_optimizer(n_ranks=n_ranks, dist_wrapper=hvd.DistributedOptimizer, **config['optimizer'])
    # Compile the model
    model.compile(loss=train_config['loss'], optimizer=opt,
                  metrics=train_config['metrics'])
    if rank == 0:
        model.summary()

    # Prepare the training callbacks
    callbacks = get_basic_callbacks(args.distributed)

    # Learning rate warmup
    warmup_epochs = train_config.get('lr_warmup_epochs', 0)
    callbacks.append(hvd.callbacks.LearningRateWarmupCallback(
                     warmup_epochs=warmup_epochs, verbose=1))

    # Learning rate decay schedule
    for lr_schedule in train_config.get('lr_schedule', []):
        if rank == 0:
            logging.info('Adding LR schedule: %s', lr_schedule)
        callbacks.append(hvd.callbacks.LearningRateScheduleCallback(**lr_schedule))

    # Checkpoint only from rank 0
    if rank == 0:
        os.makedirs(os.path.dirname(checkpoint_format), exist_ok=True)
        callbacks.append(keras.callbacks.ModelCheckpoint(checkpoint_format))
        
    # Timing callback
    timing_callback = TimingCallback()
    callbacks.append(timing_callback)

    # Train the model
    train_steps_per_epoch = max([len(train_gen) // n_ranks, 1])
    valid_steps_per_epoch = max([len(valid_gen) // n_ranks, 1])
    history = model.fit_generator(train_gen,
                                  epochs=train_config['n_epochs'],
                                  steps_per_epoch=train_steps_per_epoch,
                                  validation_data=valid_gen,
                                  validation_steps=valid_steps_per_epoch,
                                  callbacks=callbacks,
                                  workers=4, verbose=2 if rank==0 else 0)

    # Save training history
    if rank == 0:
        # Print some best-found metrics
        if 'val_acc' in history.history.keys():
            logging.info('Best validation accuracy: %.3f',
                         max(history.history['val_acc']))
        if 'val_top_k_categorical_accuracy' in history.history.keys():
            logging.info('Best top-5 validation accuracy: %.3f',
                         max(history.history['val_top_k_categorical_accuracy']))
        logging.info('Average time per epoch: %.3f s',
                     np.mean(timing_callback.times))
        np.savez(os.path.join(output_dir, 'history'),
                 n_ranks=n_ranks, **history.history)

    # Drop to IPython interactive shell
    if args.interactive and (rank == 0):
        logging.info('Starting IPython interactive session')
        import IPython
        IPython.embed()

    if rank == 0:
        logging.info('All done!')
Beispiel #27
0
    def train_confidnet(self, convnet_path, epoch=100, epoch_to_restore=0):
        train, val = get_datasets(self.dataset, self.train_val_split,
                                  self.seed)
        train_loader = DataLoader(train, **self.loader_kwargs)
        val_loader = DataLoader(val, **self.loader_kwargs)

        conv_net = self.convnet(**self.convnet_kwargs).to(self.device)
        conv_net.load_state_dict(torch.load(convnet_path))
        conv_net.eval()
        confid_net = ConfidNet(**self.confidnet_kwargs).to(self.device)
        optimizer = Adam(confid_net.parameters(), **self.optimizer_kwargs)
        # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=3)
        criterion = torch.nn.MSELoss()
        writer = SummaryWriter(self.log_dir)

        for e in range(epoch_to_restore + 1, epoch + epoch_to_restore + 1):
            confid_net.train()
            history_train = {"loss": [], "metric": []}
            train_histograms = ConfidenceHistograms()
            for idx_batch, (imgs, label) in enumerate(train_loader):
                imgs, label = imgs.to(self.device), label.to(self.device)
                with torch.no_grad():
                    pred, encoded = conv_net(imgs)

                confid_net.zero_grad()
                confidence = confid_net(encoded)
                loss = criterion(
                    confidence,
                    F.softmax(pred, dim=1).gather(1, label.unsqueeze(1)))
                loss.backward()
                optimizer.step()
                train_histograms.step(label, pred, confidence.detach())
                history_train["loss"].append(loss.detach().item())

                step = min((idx_batch + 1) * self.loader_kwargs["batch_size"],
                           len(train))
                sys.stdout.write("Training : "
                                 f"Epoch {e}/{epoch + epoch_to_restore}; "
                                 f"Step {step}/{len(train)}; "
                                 f"Loss {loss.detach().item()};\r")
                sys.stdout.flush()
            print()
            print()

            confid_net.eval()
            history_val = {"loss": [], "metric": []}
            val_histograms = ConfidenceHistograms()
            for idx_batch, (imgs, label) in enumerate(val_loader):
                imgs, label = imgs.to(self.device), label.to(self.device)

                with torch.no_grad():
                    pred, encoded = conv_net(imgs)
                    confidence = confid_net(encoded)
                val_histograms.step(label, pred, confidence)
                loss = criterion(
                    confidence,
                    F.softmax(pred, dim=1).gather(1, label.unsqueeze(1)))
                history_val["loss"].append(loss.detach().item())

                step = min((idx_batch + 1) * self.loader_kwargs["batch_size"],
                           len(val))
                sys.stdout.write("Validation : "
                                 f"Epoch {e}/{epoch + epoch_to_restore}; "
                                 f"Step {step}/{len(val)}; "
                                 f"Loss {loss.detach().item()};\r")
                sys.stdout.flush()

            # scheduler.step(np.mean(history_val["loss"]))
            train_mcp_hist, train_tcp_hist = train_histograms.get_histograms()
            writer.add_figure("ConfidNet/train/MCP", train_mcp_hist, e)
            writer.add_figure("ConfidNet/train/TCP", train_tcp_hist, e)
            val_mcp_hist, val_tcp_hist = val_histograms.get_histograms()
            writer.add_figure("ConfidNet/val/MCP", val_mcp_hist, e)
            writer.add_figure("ConfidNet/val/TCP", val_tcp_hist, e)
            writer.add_scalars(
                'ConfidNet/Loss', {
                    "train": np.mean(history_train["loss"]),
                    "val": np.mean(history_val["loss"])
                }, e)

            print(f"\n\n[*] Finished epoch {e};\n\n"
                  "Train :\n"
                  f"\tLoss : {np.mean(history_train['loss'])}\n"
                  "Test :\n"
                  f"\tLoss : {np.mean(history_val['loss'])}\n\n\n")

            if e % self.model_checkpoint == 0:
                filename = str(self.model_filename).format(model="ConfidNet",
                                                           epoch=e)
                torch.save(confid_net.state_dict(), filename)

        return confid_net
Beispiel #28
0
    parser.add_argument("--data", type = Path, default = Path("data/alignments/BLAT_ECOLX_hmmerbit_plmc_n5_m30_f50_t0.2_r24-286_id100_b105.a2m"), help = "Fasta input file of sequences.")
    parser.add_argument("--data_sheet", type = str, default = "BLAT_ECOLX_Ranganathan2015", help = "Protein family data sheet in mutation_data.pickle.")
    parser.add_argument("--metric_column", type = str, default = "2500", help = "Metric column of sheet used for Spearman's Rho calculation.")
    parser.add_argument("--ensemble_count", type = int, default = 2000, help = "How many samples of the model to use for evaluation as an ensemble.")
    parser.add_argument("--results_dir", type = Path, default = Path(f"results_{datetime.now().strftime('%Y-%m-%dT%H_%M_%S')}"), help = "Directory to save results to.")

    with torch.no_grad():
        args = parser.parse_args()

        print("Arguments given:")
        for arg, value in args.__dict__.items():
            print(f"  {arg}: {value}")
        print("")

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        protein_dataset, *_ = get_datasets(args.data, device, 0.8)
        print('Data loaded')

        wt, *_ = protein_dataset[0]
        size = len(wt) * NUM_TOKENS

        # load model
        model = VAE([size, 1500, 1500, 30, 100, 2000, size], NUM_TOKENS, use_dictionary = False).to(device)

        try:
            model.load_state_dict(torch.load(args.results_dir / Path("model.torch"), map_location=device)["state_dict"])
        except FileNotFoundError:
            pass

        cor = mutation_effect_prediction(model, args.data, args.data_sheet, args.metric_column, device, args.ensemble_count, args.results_dir)
Beispiel #29
0
    def train_convnet(self, epoch=100, epoch_to_restore=0):
        train, val = get_datasets(self.dataset, self.train_val_split,
                                  self.seed)
        train_loader = DataLoader(train, **self.loader_kwargs)
        val_loader = DataLoader(val, **self.loader_kwargs)

        net = self.convnet(**self.convnet_kwargs).to(self.device)
        optimizer = Adam(net.parameters())
        criterion = torch.nn.CrossEntropyLoss()
        writer = SummaryWriter(self.log_dir)

        for e in range(epoch_to_restore + 1, epoch + epoch_to_restore + 1):
            net.train()
            history_train = {"loss": [], "metric": []}
            for idx_batch, (imgs, label) in enumerate(train_loader):
                imgs, label = imgs.to(self.device), label.to(self.device)

                net.zero_grad()
                pred, _ = net(imgs)
                loss = criterion(pred, label)
                loss.backward()
                optimizer.step()
                history_train["loss"].append(loss.detach().item())

                pred = F.softmax(pred.detach(), dim=1).argmax(1)
                score = accuracy_score(label.cpu(), pred.cpu())
                history_train["metric"].append(score)

                step = min((idx_batch + 1) * self.loader_kwargs["batch_size"],
                           len(train))
                sys.stdout.write("Training : "
                                 f"Epoch {e}/{epoch + epoch_to_restore}; "
                                 f"Step {step}/{len(train)}; "
                                 f"Loss {loss.detach().item()}; "
                                 f"Score {score}\r")
                sys.stdout.flush()
            print()
            print()

            net.eval()
            history_val = {"loss": [], "metric": []}
            for idx_batch, (imgs, label) in enumerate(val_loader):
                imgs, label = imgs.to(self.device), label.to(self.device)

                with torch.no_grad():
                    pred, _ = net(imgs)
                loss = criterion(pred, label).detach().item()
                history_val["loss"].append(loss)

                pred = F.softmax(pred.detach(), dim=1).argmax(1)
                score = accuracy_score(label.cpu(), pred.cpu())
                history_val["metric"].append(score)

                step = min((idx_batch + 1) * self.loader_kwargs["batch_size"],
                           len(val))
                sys.stdout.write("Validation : "
                                 f"Epoch {e}/{epoch + epoch_to_restore}; "
                                 f"Step {step}/{len(val)}; "
                                 f"Loss {loss}; "
                                 f"Score {score}\r")
                sys.stdout.flush()

            writer.add_scalars(
                'ConvNet/Loss', {
                    "train": np.mean(history_train["loss"]),
                    "val": np.mean(history_val["loss"])
                }, e)
            writer.add_scalars(
                'ConvNet/Accuracy', {
                    "train": np.mean(history_train["metric"]),
                    "val": np.mean(history_val["metric"])
                }, e)

            print(f"\n\n[*] Finished epoch {e};\n\n"
                  "Train :\n"
                  f"\tLoss : {np.mean(history_train['loss'])}\n"
                  f"\tAccuracy : {np.mean(history_train['metric'])}\n"
                  "Test :\n"
                  f"\tLoss : {np.mean(history_val['loss'])}\n"
                  f"\tAccuracy : {np.mean(history_val['metric'])}\n\n\n")

            if e % self.model_checkpoint == 0:
                filename = str(self.model_filename).format(model="ConvNet",
                                                           epoch=e)
                torch.save(net.state_dict(), filename)

        return filename
from pybrain.utilities           import percentError
from pybrain.tools.shortcuts     import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules   import SoftmaxLayer
from pybrain.tools.customxml.networkreader import NetworkReader
from pybrain.tools.customxml.networkwriter import NetworkWriter
import os
import data

train_type = "symbol"
hidden_units = 10

xmldir = "xml/"
networkname = train_type + "-" + str(hidden_units) + ".xml"

training_set, test_set = data.get_datasets("pics/resized/", dstype = train_type)
training_set._convertToOneOfMany()
test_set._convertToOneOfMany()

print("Test type: '{}'".format(train_type))
print("Number of training patterns:", len(training_set))
print("Number of test patterns:", len(test_set))
print("Input and output dimensions:", training_set.indim, training_set.outdim)
print("Number of hidden units:", hidden_units)
print()
print("First sample (input, target, class):")
print(training_set['input'][0], training_set['target'][0], training_set['class'][0])
print()

network = buildNetwork(training_set.indim, hidden_units, training_set.outdim, outclass=SoftmaxLayer)
trainer = BackpropTrainer(network, dataset = training_set)
def main():
    args = get_arguments()

    # expriment name
    if not args.exp_name:
        args.exp_name = '_'.join([args.dataset, args.model])
    print("# Experiment: ", args.exp_name)

    # output folder
    output_folder = os.path.join(args.output_root, args.dataset, args.exp_name)
    os.makedirs(output_folder, exist_ok=True)
    print("# Output path: ", output_folder)

    # visdom
    global plotter
    if args.use_visdom:
        logging_folder = os.path.join(args.logging_root, args.dataset, args.exp_name)
        os.makedirs(logging_folder, exist_ok=True)
        plotter = utils.VisdomLinePlotter(env_name=args.exp_name, logging_path=os.path.join(logging_folder, 'vis.log'))
        print("# Visdom path: ", logging_folder)

    # dataset
    print("# Load datasets")
    train_datasets, val_datasets, test_datasets = get_datasets(args.dataset, args.dataset_folder, args.batch_size)
    num_classes = train_datasets[0].num_classes
    vocab = set(train_datasets[0].vocab)
    vocab = vocab.union(set(val_datasets[0].vocab))
    vocab = vocab.union(set(test_datasets[0].vocab))

    # pre-trained word2vec
    print("# Load pre-trained word2vec")
    pretrained_word2vec_cache = os.path.join(os.path.dirname(args.w2v_file), args.dataset + '_w2v.pkl')
    if os.path.isfile(pretrained_word2vec_cache):
        with open(pretrained_word2vec_cache, 'rb') as f:
            pretrained_word2vec = pickle.load(f)
    else:
        pretrained_word2vec = PretrainedWord2Vec(vocab, args.w2v_file)
        with open(pretrained_word2vec_cache, 'wb') as f:
            pickle.dump(pretrained_word2vec, f)

    # train
    print("# Start training")
    for cv, (train_dataset, val_dataset, test_dataset) in enumerate(zip(train_datasets, val_datasets, test_datasets)):
        # fix random seed
        utils.fix_random_seed(seed=const.RANDOM_SEED)

        # model
        cnn = get_model(args.model, num_classes, pretrained_word2vec)
        if torch.cuda.is_available():
            cnn.cuda()

        # dataloader
        train_loader = DataLoader(train_dataset, args.batch_size, shuffle=True, collate_fn=sentence_collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, collate_fn=sentence_collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=sentence_collate_fn)

        # optimizer
        optim = Adadelta(cnn.parameters(), rho=0.95, eps=1e-6)

        # criterion
        criterion = CrossEntropyLoss()

        # training
        if plotter:
            plotter.set_cv(cv)
        output_path = os.path.join(output_folder, 'cv_%d_best.pkl' % cv)
        train(args.num_epochs, cnn, train_loader, optim, criterion, val_loader, output_path)

        # evaluation
        utils.load_model(output_path, cnn)
        find_most_similar_words(cnn)
        accuracy = eval(cnn, test_loader)
        print('cross_val:', cv, '\taccuracy:', accuracy)