def write_vocab_to_file(tokens_list, vocab_file_path):
    """
    Counts all tokens in list and writes them to file. Make dir if not exists.
    """
    utils.maybe_mkdir(vocab_file_path)
    vocab_file = open(vocab_file_path, "w", encoding='utf-8')  # "ISO-8859-1")
    # Get counter object to hold counts of characters
    vocab_counter = collections.Counter(tokens_list)
    # Write vocabulary (counter object) to file in order of frequency
    for vocab, count in vocab_counter.most_common():
        vocab_file.write(f"{vocab}\t{count}\n")

    vocab_file.close()
def run():
    parser = ArgumentParser()
    parser.add_argument("-c",
                        "--config_path",
                        default='config/config.yaml',
                        help="The default config file.")
    parser.add_argument(
        "-mq",
        "--model_path",
        type=str,
        required=True,
        help='Pretrained model path to local checkpoint for Question Generator'
    )
    parser.add_argument("-e",
                        "--exp_name",
                        type=str,
                        default='qgen',
                        help='The name of experiment')
    args = parser.parse_args()

    # Read config from yaml file.
    config_file = args.config_path
    with open(config_file) as reader:
        config = yaml.safe_load(reader)
        config = dotdict(config)

    for k, v in vars(args).items():
        config[k] = v

    config.checkpoint = os.path.join(config.model_path, "sampling",
                                     config.exp_name)
    maybe_mkdir(config.checkpoint)
    copyfile(config.config_path, os.path.join(config.checkpoint,
                                              "config.yaml"))

    config.device = "cuda" if torch.cuda.is_available() else "cpu"
    config.n_gpu = torch.cuda.device_count()
    config.n_gpu = 1

    # logging is set to INFO
    logging.basicConfig(level=logging.INFO)
    logger.info("Arguments: %s", pformat(config))
    logger.info("device: {}, n_gpu {}".format(config.device, config.n_gpu))

    random.seed(config.seed)
    torch.random.manual_seed(config.seed)
    torch.cuda.manual_seed(config.seed)
    torch.manual_seed(config.seed)
    main(config)
Esempio n. 3
0
def main():
    dataset_filepath = '../../data/datasets/ngsim_feature_trajectories.h5'
    binedges = [10,15,25,50]
    max_len = 100
    data = utils.load_ngsim_trajectory_data(
        dataset_filepath,
        binedges=binedges,
        max_len=max_len,
        max_samples=None,
        train_ratio=.9,
        target_keys=['lidar_10']
    )

    exp_dir = '../../data/experiments/imputation'
    utils.maybe_mkdir(exp_dir)

    model = rnn.RNN(
        name='supervised_imputation',
        input_dim=data['train_x'].shape[2],
        hidden_dim=256,
        max_len=max_len,
        output_dim=len(binedges),
        batch_size=500,
        learning_rate=.0005,
        dropout_keep_prob=.75
    )
    writer = tf.summary.FileWriter(os.path.join(exp_dir, 'train'))
    val_writer = tf.summary.FileWriter(os.path.join(exp_dir, 'val'))

    utils.write_baseline_summary(data['train_lengths'], data['train_y'], writer)
    utils.write_baseline_summary(data['val_lengths'], data['val_y'], val_writer)
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        model.train(
            data, 
            n_epochs=1000,
            writer=writer,
            val_writer=val_writer
        )
Esempio n. 4
0
def main(
        mode='with_adapt',
        source_filepath='../../../data/datasets/nov/subselect_proposal_prediction_data.h5',
        target_filepath='../../../data/datasets/nov/bn_train_data.h5',
        results_dir='../../../data/datasets/nov/hyperparam_search',
        target_idx=2,
        batch_size=500,
        debug_size=100000,
        n_pos_tgt_train_samples=[1, 10, 25, 50, 75, 100, 200],
        n_tgt_train_samples=None,
        n_epochs=[20, 22, 25, 30, 35, 40, 45]):
    
    utils.maybe_mkdir(results_dir)
    if n_pos_tgt_train_samples is not None:
        n_itr = len(n_pos_tgt_train_samples)
    elif n_tgt_train_samples is not None:
        n_itr = len(n_tgt_train_samples)

    for i in range(n_itr):

        if n_pos_tgt_train_samples is not None:
            src, tgt = utils.load_data(
                source_filepath, 
                target_filepath, 
                debug_size=debug_size,
                remove_early_collision_idx=5,
                n_pos_tgt_train_samples=n_pos_tgt_train_samples[i],
                src_train_split=.9,
                tgt_train_split=2./3,
                target_idx=target_idx
            )
            n_samples = n_pos_tgt_train_samples[i]
        elif n_tgt_train_samples is not None:
            src, tgt = utils.load_data(
                source_filepath, 
                target_filepath, 
                debug_size=debug_size,
                remove_early_collision_idx=5,
                n_tgt_train_samples=n_tgt_train_samples[i],
                src_train_split=.9,
                tgt_train_split=2/3.,
                target_idx=target_idx
            )
            n_samples = n_tgt_train_samples[i]

        template = os.path.join(
                results_dir,
                '{}_'.format(n_samples) + '{:.4f}_itr_{}_' + '{}.npy'.format(mode))
        hyperparam_search(
            src, 
            tgt, 
            mode,
            encoder_sizes=[
                (512, 256, 128, 64),
                (256, 128, 64),
                (128, 64)
            ],
            classifier_sizes=[
                (),
                (64,),
                (64,64)
            ],
            dropout_keep_probs=np.linspace(.5,1,200),
            learning_rates=np.linspace(1e-4,1e-3,200),
            n_itr=30,
            stats_filepath_template=template,
            n_epochs=n_epochs[i]
        )
Esempio n. 5
0
def write_predictions_to_file(predictions, test_file_path, out_file_path):
    utils.maybe_mkdir(out_file_path)
    # Get original input from test file
    lemmas, features = data.read_test_file(test_file_path)
    # Write all data with predictions to the out file
    data.write_morph_file(lemmas, predictions, features, out_file_path)
                        default=default_output_root)
    parser.add_argument('--name', type=str, default='baseline')
    args, _ = parser.parse_known_args()
    stream_handler = logging.StreamHandler()
    stream_handler.setFormatter(ColoredFormatter(to_file=False))
    stream_handler.addFilter(TFlogFilter())
    stream_handler.setLevel(logging.INFO)
    logger.addHandler(stream_handler)

    output_root = args.output_root
    name = args.name

    config = configuration.Configuration(name, args.work_dir, args.data_dir,
                                         args.output_root)
    config_parser = config.to_parser()
    update_args, _ = config_parser.parse_known_args()
    config.from_args(update_args)
    output_dir = maybe_mkdir(config.output_dir)
    log_path = os.path.join(output_dir, config.start_time + '.log')
    file_handler = logging.FileHandler(log_path)
    file_handler.setLevel(logging.DEBUG)
    file_handler.setFormatter(ColoredFormatter(to_file=True))
    file_handler.addFilter(TFlogFilter())
    logger.addHandler(file_handler)
    try:
        config.log_params()
        runner.train(config, restore=False)
    except:
        logger.exception('Uncaught exception:')
        sys.exit(1)
def run_setup(config):
    expdir = config.defaults().get('expdir')
    maybe_mkdir(expdir)
    maybe_mkdir(os.path.join(expdir, 'log'))
    maybe_mkdir(os.path.join(expdir, 'data'))
    maybe_mkdir(os.path.join(expdir, 'data', 'snapshots'))
    maybe_mkdir(os.path.join(expdir, 'data', 'summaries'))
    maybe_mkdir(os.path.join(expdir, 'viz'))
Esempio n. 8
0
    # logistics
    parser.add_argument('--exp_name', type=str, default='CartPole-v0')
    parser.add_argument('--itr', type=int, default=95)
    parser.add_argument('--mode', type=str, default='collect', help='one of collect, evaluate, or visualize')
    parser.add_argument('--n_traj', type=int, default=500, help='number of trajectories to collect or evaluate with')
    parser.add_argument('--max_steps', type=int, default=1000)

    args = parser.parse_args()

    exp_name = 'CartPole-v0'

    # collect expert trajectories
    if args.mode == 'collect':
        input_filepath = '../data/experiments/{}/train/log/itr_{}.pkl'.format(args.exp_name, args.itr)
        output_dir = '../data/experiments/{}/collection/'.format(args.exp_name)
        utils.maybe_mkdir(output_dir)
        output_filepath = os.path.join(output_dir, 'expert_traj.h5')
        trajectories = collect(input_filepath, n_traj=args.n_traj, max_steps=args.max_steps)
        hgail.misc.simulation.write_trajectories(trajectories, output_filepath)

    # evaluate 
    elif args.mode == 'evaluate' or args.mode == 'visualize':
        if args.mode == 'visualize':
            args.n_traj = 10
            render = True 
        else:
            render = False

        phase = 'imitate'
        input_filepath = '../data/experiments/{}/{}/log/itr_{}.pkl'.format(
            args.exp_name, phase, args.itr)
Esempio n. 9
0
def main(
        visualize=False,
        batch_size=100,
        vis_dir='../../../data/visualizations/domain_adaptation',
        output_filepath_template='../../../data/datasets/da_results_*_{}.npy',
        source_filepath='../../../data/datasets/nov/subselect_proposal_prediction_data.h5',
        target_filepath='../../../data/datasets/nov/bn_train_data.h5',
        n_tgt_train_samples = [None],
        n_src_train_samples = [None],
        debug_size=100000,
        mode='with_adapt'):
    
    # set output filepath template based on mode
    output_filepath_template = output_filepath_template.replace('*', mode)

    # modes
    if mode == 'with_adapt':
        lambda_final = .5
    elif mode == 'without_adapt':
        lambda_final = 0.
    elif mode == 'target_only':
        n_tgt_train_samples = [int(v * .5) for v in n_tgt_train_samples if v != None]
        n_tgt_train_samples.append(None)
        n_src_train_samples = n_tgt_train_samples
        source_filepath = target_filepath
        lambda_final = 0
    elif mode == 'frustratingly':
        lambda_final = 0.
    else:
        raise(ValueError('invalid mode: {}'.format(mode)))

    # debug
    # source_filepath = '../../../data/datasets/debug_source.h5'
    # target_filepath = '../../../data/datasets/debug_target.h5'
    
    n_sample_sizes = len(n_tgt_train_samples)

    infos = dict()
    for i in range(n_sample_sizes):

        # set the seed
        np.random.seed(seeds[i])

        # load the data for this size
        data = utils.load_data(
            source_filepath, 
            target_filepath,
            validation_filepath=validation_filepath,
            max_tgt_train_samples=n_tgt_train_samples[i],
            max_src_train_samples=n_src_train_samples[i],
            debug_size=debug_size,
            timestep=-1,
            train_split=.95
        )
        
        if visualize:
            utils.maybe_mkdir(vis_dir)
            visualization_utils.visualize(data, vis_dir)

        # build datasets
        dataset, val_dataset, extra_val_dataset = utils.build_datasets(data, batch_size)

        # update n_tgt_samples in case fewer than requested were loaded
        n_tgt_train_samples[i] = len(dataset.xt)

        # report training size
        print('training with {} target samples'.format(n_tgt_train_samples[i]))

        # train
        cur_size = n_tgt_train_samples[i]
        if mode == 'target_only' and i != n_sample_sizes - 1:
            cur_size *= 2
        infos[cur_size] = run_training(
            dataset, 
            val_dataset, 
            extra_val_dataset,
            batch_size=batch_size,
            lambda_final=lambda_final
        )
        np.save(output_filepath_template.format(n_tgt_train_samples[i]), infos)
        config = yaml.safe_load(reader)
        config = dotdict(config)

    for k, v in vars(args).items():
        config[k] = v

    assert len(
        config.learning) != 0, "Required atleast one of sl or rl for learning."

    config.device = "cuda" if torch.cuda.is_available() else "cpu"
    config.checkpoint = os.path.join(
        config.checkpoint,
        "{}-{}".format(datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S'),
                       config.exp_name))
    config.n_gpu = torch.cuda.device_count()
    maybe_mkdir(config.checkpoint)

    with open(os.path.join(config.checkpoint, 'config'), 'wt') as f:
        pprint.pprint(config, stream=f)

    # logging is set to INFO
    logging.basicConfig(level=logging.INFO)
    logger.info("Arguments: %s", pprint.pformat(config))
    logger.info("device: {}, n_gpu {}".format(config.device, config.n_gpu))

    random.seed(config.seed)
    torch.random.manual_seed(config.seed)
    torch.cuda.manual_seed(config.seed)
    torch.manual_seed(config.seed)
    main(config)
Esempio n. 11
0
                              learning_rate=hparams.learning_rate)

    transferCNN.x = content_image
    # ground truth content comes from content image
    transferCNN.gt_contents = session.run(transferCNN.contents())

    transferCNN.x = style_image
    # ground truth style comes from style image
    transferCNN.gt_styles = session.run(transferCNN.styles())

    transferCNN.build()

    summary_writer = None
    if args.should_log:
        current_logs_dir = os.path.join(args.logs_dir, hparams.to_json())
        utils.maybe_mkdir(current_logs_dir)
        summary_writer = tf.summary.FileWriter(current_logs_dir, graph)

    # initialize
    transferCNN.x = (1 - hparams.noise_ratio) * content_image +\
        hparams.noise_ratio * np.random.uniform(-20.0, 20.0, content_image.shape)

    for i in range(hparams.num_epochs):
        _, loss = session.run([transferCNN.optimize(), transferCNN.loss()])

        if i % 50 == 0 or i == hparams.num_epochs - 1:
            print("step: %i, loss: %.4e" % (i + 1, loss))
            if args.should_log:
                summary_writer.add_summary(
                    session.run(transferCNN.summaries()), i + 1)
    if args.should_log: