def write_vocab_to_file(tokens_list, vocab_file_path): """ Counts all tokens in list and writes them to file. Make dir if not exists. """ utils.maybe_mkdir(vocab_file_path) vocab_file = open(vocab_file_path, "w", encoding='utf-8') # "ISO-8859-1") # Get counter object to hold counts of characters vocab_counter = collections.Counter(tokens_list) # Write vocabulary (counter object) to file in order of frequency for vocab, count in vocab_counter.most_common(): vocab_file.write(f"{vocab}\t{count}\n") vocab_file.close()
def run(): parser = ArgumentParser() parser.add_argument("-c", "--config_path", default='config/config.yaml', help="The default config file.") parser.add_argument( "-mq", "--model_path", type=str, required=True, help='Pretrained model path to local checkpoint for Question Generator' ) parser.add_argument("-e", "--exp_name", type=str, default='qgen', help='The name of experiment') args = parser.parse_args() # Read config from yaml file. config_file = args.config_path with open(config_file) as reader: config = yaml.safe_load(reader) config = dotdict(config) for k, v in vars(args).items(): config[k] = v config.checkpoint = os.path.join(config.model_path, "sampling", config.exp_name) maybe_mkdir(config.checkpoint) copyfile(config.config_path, os.path.join(config.checkpoint, "config.yaml")) config.device = "cuda" if torch.cuda.is_available() else "cpu" config.n_gpu = torch.cuda.device_count() config.n_gpu = 1 # logging is set to INFO logging.basicConfig(level=logging.INFO) logger.info("Arguments: %s", pformat(config)) logger.info("device: {}, n_gpu {}".format(config.device, config.n_gpu)) random.seed(config.seed) torch.random.manual_seed(config.seed) torch.cuda.manual_seed(config.seed) torch.manual_seed(config.seed) main(config)
def main(): dataset_filepath = '../../data/datasets/ngsim_feature_trajectories.h5' binedges = [10,15,25,50] max_len = 100 data = utils.load_ngsim_trajectory_data( dataset_filepath, binedges=binedges, max_len=max_len, max_samples=None, train_ratio=.9, target_keys=['lidar_10'] ) exp_dir = '../../data/experiments/imputation' utils.maybe_mkdir(exp_dir) model = rnn.RNN( name='supervised_imputation', input_dim=data['train_x'].shape[2], hidden_dim=256, max_len=max_len, output_dim=len(binedges), batch_size=500, learning_rate=.0005, dropout_keep_prob=.75 ) writer = tf.summary.FileWriter(os.path.join(exp_dir, 'train')) val_writer = tf.summary.FileWriter(os.path.join(exp_dir, 'val')) utils.write_baseline_summary(data['train_lengths'], data['train_y'], writer) utils.write_baseline_summary(data['val_lengths'], data['val_y'], val_writer) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) model.train( data, n_epochs=1000, writer=writer, val_writer=val_writer )
def main( mode='with_adapt', source_filepath='../../../data/datasets/nov/subselect_proposal_prediction_data.h5', target_filepath='../../../data/datasets/nov/bn_train_data.h5', results_dir='../../../data/datasets/nov/hyperparam_search', target_idx=2, batch_size=500, debug_size=100000, n_pos_tgt_train_samples=[1, 10, 25, 50, 75, 100, 200], n_tgt_train_samples=None, n_epochs=[20, 22, 25, 30, 35, 40, 45]): utils.maybe_mkdir(results_dir) if n_pos_tgt_train_samples is not None: n_itr = len(n_pos_tgt_train_samples) elif n_tgt_train_samples is not None: n_itr = len(n_tgt_train_samples) for i in range(n_itr): if n_pos_tgt_train_samples is not None: src, tgt = utils.load_data( source_filepath, target_filepath, debug_size=debug_size, remove_early_collision_idx=5, n_pos_tgt_train_samples=n_pos_tgt_train_samples[i], src_train_split=.9, tgt_train_split=2./3, target_idx=target_idx ) n_samples = n_pos_tgt_train_samples[i] elif n_tgt_train_samples is not None: src, tgt = utils.load_data( source_filepath, target_filepath, debug_size=debug_size, remove_early_collision_idx=5, n_tgt_train_samples=n_tgt_train_samples[i], src_train_split=.9, tgt_train_split=2/3., target_idx=target_idx ) n_samples = n_tgt_train_samples[i] template = os.path.join( results_dir, '{}_'.format(n_samples) + '{:.4f}_itr_{}_' + '{}.npy'.format(mode)) hyperparam_search( src, tgt, mode, encoder_sizes=[ (512, 256, 128, 64), (256, 128, 64), (128, 64) ], classifier_sizes=[ (), (64,), (64,64) ], dropout_keep_probs=np.linspace(.5,1,200), learning_rates=np.linspace(1e-4,1e-3,200), n_itr=30, stats_filepath_template=template, n_epochs=n_epochs[i] )
def write_predictions_to_file(predictions, test_file_path, out_file_path): utils.maybe_mkdir(out_file_path) # Get original input from test file lemmas, features = data.read_test_file(test_file_path) # Write all data with predictions to the out file data.write_morph_file(lemmas, predictions, features, out_file_path)
default=default_output_root) parser.add_argument('--name', type=str, default='baseline') args, _ = parser.parse_known_args() stream_handler = logging.StreamHandler() stream_handler.setFormatter(ColoredFormatter(to_file=False)) stream_handler.addFilter(TFlogFilter()) stream_handler.setLevel(logging.INFO) logger.addHandler(stream_handler) output_root = args.output_root name = args.name config = configuration.Configuration(name, args.work_dir, args.data_dir, args.output_root) config_parser = config.to_parser() update_args, _ = config_parser.parse_known_args() config.from_args(update_args) output_dir = maybe_mkdir(config.output_dir) log_path = os.path.join(output_dir, config.start_time + '.log') file_handler = logging.FileHandler(log_path) file_handler.setLevel(logging.DEBUG) file_handler.setFormatter(ColoredFormatter(to_file=True)) file_handler.addFilter(TFlogFilter()) logger.addHandler(file_handler) try: config.log_params() runner.train(config, restore=False) except: logger.exception('Uncaught exception:') sys.exit(1)
def run_setup(config): expdir = config.defaults().get('expdir') maybe_mkdir(expdir) maybe_mkdir(os.path.join(expdir, 'log')) maybe_mkdir(os.path.join(expdir, 'data')) maybe_mkdir(os.path.join(expdir, 'data', 'snapshots')) maybe_mkdir(os.path.join(expdir, 'data', 'summaries')) maybe_mkdir(os.path.join(expdir, 'viz'))
# logistics parser.add_argument('--exp_name', type=str, default='CartPole-v0') parser.add_argument('--itr', type=int, default=95) parser.add_argument('--mode', type=str, default='collect', help='one of collect, evaluate, or visualize') parser.add_argument('--n_traj', type=int, default=500, help='number of trajectories to collect or evaluate with') parser.add_argument('--max_steps', type=int, default=1000) args = parser.parse_args() exp_name = 'CartPole-v0' # collect expert trajectories if args.mode == 'collect': input_filepath = '../data/experiments/{}/train/log/itr_{}.pkl'.format(args.exp_name, args.itr) output_dir = '../data/experiments/{}/collection/'.format(args.exp_name) utils.maybe_mkdir(output_dir) output_filepath = os.path.join(output_dir, 'expert_traj.h5') trajectories = collect(input_filepath, n_traj=args.n_traj, max_steps=args.max_steps) hgail.misc.simulation.write_trajectories(trajectories, output_filepath) # evaluate elif args.mode == 'evaluate' or args.mode == 'visualize': if args.mode == 'visualize': args.n_traj = 10 render = True else: render = False phase = 'imitate' input_filepath = '../data/experiments/{}/{}/log/itr_{}.pkl'.format( args.exp_name, phase, args.itr)
def main( visualize=False, batch_size=100, vis_dir='../../../data/visualizations/domain_adaptation', output_filepath_template='../../../data/datasets/da_results_*_{}.npy', source_filepath='../../../data/datasets/nov/subselect_proposal_prediction_data.h5', target_filepath='../../../data/datasets/nov/bn_train_data.h5', n_tgt_train_samples = [None], n_src_train_samples = [None], debug_size=100000, mode='with_adapt'): # set output filepath template based on mode output_filepath_template = output_filepath_template.replace('*', mode) # modes if mode == 'with_adapt': lambda_final = .5 elif mode == 'without_adapt': lambda_final = 0. elif mode == 'target_only': n_tgt_train_samples = [int(v * .5) for v in n_tgt_train_samples if v != None] n_tgt_train_samples.append(None) n_src_train_samples = n_tgt_train_samples source_filepath = target_filepath lambda_final = 0 elif mode == 'frustratingly': lambda_final = 0. else: raise(ValueError('invalid mode: {}'.format(mode))) # debug # source_filepath = '../../../data/datasets/debug_source.h5' # target_filepath = '../../../data/datasets/debug_target.h5' n_sample_sizes = len(n_tgt_train_samples) infos = dict() for i in range(n_sample_sizes): # set the seed np.random.seed(seeds[i]) # load the data for this size data = utils.load_data( source_filepath, target_filepath, validation_filepath=validation_filepath, max_tgt_train_samples=n_tgt_train_samples[i], max_src_train_samples=n_src_train_samples[i], debug_size=debug_size, timestep=-1, train_split=.95 ) if visualize: utils.maybe_mkdir(vis_dir) visualization_utils.visualize(data, vis_dir) # build datasets dataset, val_dataset, extra_val_dataset = utils.build_datasets(data, batch_size) # update n_tgt_samples in case fewer than requested were loaded n_tgt_train_samples[i] = len(dataset.xt) # report training size print('training with {} target samples'.format(n_tgt_train_samples[i])) # train cur_size = n_tgt_train_samples[i] if mode == 'target_only' and i != n_sample_sizes - 1: cur_size *= 2 infos[cur_size] = run_training( dataset, val_dataset, extra_val_dataset, batch_size=batch_size, lambda_final=lambda_final ) np.save(output_filepath_template.format(n_tgt_train_samples[i]), infos)
config = yaml.safe_load(reader) config = dotdict(config) for k, v in vars(args).items(): config[k] = v assert len( config.learning) != 0, "Required atleast one of sl or rl for learning." config.device = "cuda" if torch.cuda.is_available() else "cpu" config.checkpoint = os.path.join( config.checkpoint, "{}-{}".format(datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S'), config.exp_name)) config.n_gpu = torch.cuda.device_count() maybe_mkdir(config.checkpoint) with open(os.path.join(config.checkpoint, 'config'), 'wt') as f: pprint.pprint(config, stream=f) # logging is set to INFO logging.basicConfig(level=logging.INFO) logger.info("Arguments: %s", pprint.pformat(config)) logger.info("device: {}, n_gpu {}".format(config.device, config.n_gpu)) random.seed(config.seed) torch.random.manual_seed(config.seed) torch.cuda.manual_seed(config.seed) torch.manual_seed(config.seed) main(config)
learning_rate=hparams.learning_rate) transferCNN.x = content_image # ground truth content comes from content image transferCNN.gt_contents = session.run(transferCNN.contents()) transferCNN.x = style_image # ground truth style comes from style image transferCNN.gt_styles = session.run(transferCNN.styles()) transferCNN.build() summary_writer = None if args.should_log: current_logs_dir = os.path.join(args.logs_dir, hparams.to_json()) utils.maybe_mkdir(current_logs_dir) summary_writer = tf.summary.FileWriter(current_logs_dir, graph) # initialize transferCNN.x = (1 - hparams.noise_ratio) * content_image +\ hparams.noise_ratio * np.random.uniform(-20.0, 20.0, content_image.shape) for i in range(hparams.num_epochs): _, loss = session.run([transferCNN.optimize(), transferCNN.loss()]) if i % 50 == 0 or i == hparams.num_epochs - 1: print("step: %i, loss: %.4e" % (i + 1, loss)) if args.should_log: summary_writer.add_summary( session.run(transferCNN.summaries()), i + 1) if args.should_log: