def main(argv): logging.getLogger("transformers.tokenization_utils").setLevel( logging.ERROR) FLAGS = flags.FLAGS utils.print_flags(FLAGS) # Random seed initialization. random.seed(FLAGS.random_seed) np.random.seed(FLAGS.random_seed) torch.manual_seed(FLAGS.random_seed) # Configuration and paths. cfg = yaml.load(open(FLAGS.config, 'r'), Loader=yaml.BaseLoader) PATH_DATA = cfg['path_data'] PATH_CORPUS = '{}/{}'.format(PATH_DATA, cfg['corpus']) PATH_DATA_PREFIX = '{}/{}'.format(PATH_DATA, cfg['data_prefix']) # Set up the experimental environment. exp = experiment.Experiment(FLAGS, cfg, dumpflag=False) for i, layer in enumerate(exp.model.base.encoder.layer): layer.attention.self.attention_window = FLAGS.window_size # Load the corpus. corpus = utils.Corpus(PATH_CORPUS, FLAGS) # Load train/dev data. test_data = utils.Data(PATH_DATA_PREFIX + 'test', corpus, FLAGS) # Evaluate dev data. test_eval = exp.eval_dump(test_data, FLAGS.num_sample_eval, 'Evaluating test queries') print('Test Evaluation', test_eval, file=sys.stderr)
def __init__(self): self.root_path = FLAGS.root_path self.flag_config_file = os.path.join(self.root_path, "config.pkl") self.FLAGS = load_flags(self.flag_config_file, FLAGS, True) print_flags(self.FLAGS, True) self.valid_data = self.FLAGS.valid_data self.test_data = self.FLAGS.test_data self.valid_data_num = line_num_count(self.valid_data) self.test_data_num = line_num_count(self.test_data) tf.logging.info("{} / {} sentences in dev / test".format( self.valid_data_num, self.test_data_num)) self.map_file = os.path.join(self.root_path, "map.pkl") self.vocabulary_file = os.path.join(self.root_path, "vocabulary.csv") self.eval_summary_op = None self.log_dir = os.path.join(self.root_path, self.FLAGS.log_dir) self.best_model_dir = os.path.join(self.root_path, self.FLAGS.log_dir, self.FLAGS.best_model_path) self.summary_dir = os.path.join(self.log_dir, "summary") self.summary_writer = tf.summary.FileWriter(self.summary_dir) self.topN = FLAGS.N_best_model self.model_performance = dict.fromkeys(range(self.topN), 0.0) self.worst_valid_model_index = 0 self.best_valid_accuracy = 0.0 self.best_test_accuracy = 0.0
def main(argv): logging.getLogger("transformers.tokenization_utils").setLevel(logging.ERROR) FLAGS = flags.FLAGS utils.print_flags(FLAGS) # Random seed initialization. random.seed(FLAGS.random_seed) np.random.seed(FLAGS.random_seed) torch.manual_seed(FLAGS.random_seed) # Configuration and paths. cfg = yaml.load(open(FLAGS.config, 'r'), Loader=yaml.BaseLoader) PATH_DATA = cfg['path_data'] PATH_CORPUS = '{}/{}'.format(PATH_DATA, cfg['corpus']) PATH_DATA_PREFIX = '{}/{}'.format(PATH_DATA, cfg['data_prefix']) PATH_MODEL_PREFIX = '{}/{}'.format(cfg['path_model'], FLAGS.model_prefix) os.makedirs(PATH_MODEL_PREFIX, exist_ok=True) # Set up the experimental environment. exp = experiment.Experiment(FLAGS, cfg) # Change attention window size. for i, layer in enumerate(exp.model.base.encoder.layer): layer.attention.self.attention_window = FLAGS.window_size # Load the corpus. corpus = utils.Corpus(PATH_CORPUS, FLAGS) # Load train/dev data. train_data = utils.Data(PATH_DATA_PREFIX + 'train', corpus, FLAGS) dev_data = utils.Data(PATH_DATA_PREFIX + 'dev', corpus, FLAGS) test_data = utils.Data(PATH_DATA_PREFIX + 'test', corpus, FLAGS) for epoch in range(FLAGS.last_epoch, FLAGS.num_epochs): print('Epoch {}'.format(epoch + 1), file=sys.stderr) # Train the model. train_loss = exp.train(train_data, eval_data=dev_data, test_data=test_data, num_sample_eval=FLAGS.num_sample_eval) print('Epoch {}, train_loss = {}'.format( epoch + 1, train_loss), file=sys.stderr) # Dump the model. print('Dump model for epoch {}.'.format(epoch + 1)) exp.dump_model(PATH_MODEL_PREFIX, str(epoch + 1)) # Evaluate dev data. test_eval = exp.eval_dump(test_data, FLAGS.num_sample_eval, 'Evaluating test queries') print('Test Evaluation', test_eval, file=sys.stderr) # Dump tensorboard results. if exp.tb: exp.tb_writer.add_scalar('Epoch_Eval_cut10/NDCG', test_eval['ndcg10'], epoch + 1) exp.tb_writer.add_scalar('Epoch_Eval_cut10/MRR', test_eval['mrr10'], epoch + 1) exp.tb_writer.add_scalar('Epoch_Eval_cut10/MAP', test_eval['map10'], epoch + 1) exp.tb_writer.add_scalar('Epoch_Eval_overall/NDCG', test_eval['ndcg'], epoch + 1) exp.tb_writer.add_scalar('Epoch_Eval_overall/MRR', test_eval['mrr'], epoch + 1) exp.tb_writer.add_scalar('Epoch_Eval_overall/MAP', test_eval['map'], epoch + 1)
def main(argv=None): K.set_floatx('float32') print_flags(FLAGS) # Read or/and prepare test config dictionary if FLAGS.test_config_file: with open(FLAGS.test_config_file, 'r') as yml_file: test_config = yaml.load(yml_file, Loader=yaml.FullLoader) else: test_config = {} test_config = prepare_test_config(test_config, FLAGS) # Load model model = load_model(os.path.join(FLAGS.model)) # Open HDF5 file containing the data set and get images and labels hdf5_file = h5py.File(FLAGS.data_file, 'r') images_tr, images_tt, labels_tr, labels_tt, _ = train_val_split( hdf5_file, FLAGS.group_tr, FLAGS.group_tt, FLAGS.chunk_size) # Test results_dict = test(images_tt, labels_tt, images_tr, labels_tr, model, test_config, FLAGS.batch_size, FLAGS.chunk_size) # Print and write results if FLAGS.output_dir: if FLAGS.output_dir == '-1': FLAGS.output_dir = os.path.dirname(FLAGS.model) if FLAGS.append: write_mode = 'a' else: write_mode = 'w' if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) output_file = os.path.join(FLAGS.output_dir, '{}.txt'.format(FLAGS.output_basename)) write_test_results(results_dict, output_file, write_mode) output_file = os.path.join(FLAGS.output_dir, '{}.yml'.format(FLAGS.output_basename)) with open(output_file, write_mode) as f: results_dict = numpy_to_python(results_dict) yaml.dump(results_dict, f, default_flow_style=False) print_test_results(results_dict) # Close HDF5 File hdf5_file.close()
def main(): # print all flags print_flags(FLAGS) # start the timer start_time = time() # train the model train() # end the timer end_time = time() elapsed = end_time - start_time minutes = int(elapsed // 60) seconds = int(elapsed % 60) print(f'Done transfer learning in {minutes}:{seconds} minutes.')
def __init__(self): self.root_path = FLAGS.root_path self.model_path = FLAGS.model_path self.flag_config_file = os.path.join(self.root_path, "config.pkl") self.FLAGS = FLAGS print_flags(self.FLAGS, False) self.infer_data = self.FLAGS.infer_data self.infer_data_num = line_num_count(self.infer_data) tf.logging.info("{} sentences in infer".format(self.infer_data_num)) self.map_file = os.path.join( self.root_path, "data/bidirectional_lstm_model_200_msl_scbm_max_3_epoch_3_mtf_300_dim_comment_all_data_w2v_embed_fine_tuned_comment_data_regression_word_pos_dict.pkl" ) self.vocabulary_file = os.path.join(self.root_path, "vocabulary.csv") self.train_init_op = None
def train(self): self._init_dataset_maker(False) train_graph = tf.Graph() with train_graph.as_default(): train_char_mapping_tensor, train_label_mapping_tensor = DatasetMaker.make_mapping_table_tensor( ) train_dataset = DatasetMaker.make_dataset( train_char_mapping_tensor, train_label_mapping_tensor, self.train_data, FLAGS.batch_size, "train", 1, 0) self.global_step = tf.train.get_or_create_global_step() train_iter = tf.data.Iterator.from_structure( train_dataset.output_types, train_dataset.output_shapes) train_init_op = train_iter.make_initializer(train_dataset) train_model = TrainModel(train_iter, FLAGS, self.global_step) self.train_summary_op = train_model.merge_train_summary_op eval_graph = tf.Graph() with eval_graph.as_default(): eval_char_mapping_tensor, eval_label_mapping_tensor = DatasetMaker.make_mapping_table_tensor( ) valid_dataset = DatasetMaker.make_dataset( eval_char_mapping_tensor, eval_label_mapping_tensor, self.valid_data, FLAGS.batch_size, "eval", 1, 0) tf.logging.info("The part 1/1 Validation dataset is prepared!") test_dataset = DatasetMaker.make_dataset( eval_char_mapping_tensor, eval_label_mapping_tensor, self.test_data, FLAGS.batch_size, "eval", 1, 0) tf.logging.info("The part 1/1 Test dataset is prepared!") eval_iter = tf.data.Iterator.from_structure( valid_dataset.output_types, valid_dataset.output_shapes) valid_init_op = eval_iter.make_initializer(valid_dataset) test_init_op = eval_iter.make_initializer(test_dataset) eval_model = EvalModel(eval_iter, FLAGS) train_session = self._create_session(train_graph) tf.logging.info("Created model with fresh parameters.") print_flags(FLAGS) save_flags(FLAGS, os.path.join(self.root_path, "config.pkl")) with train_session.graph.as_default(): train_session.run(tf.global_variables_initializer()) train_session.run(train_char_mapping_tensor.init) #train_session.run(train_gram2_mapping_tensor.init) #train_session.run(train_gram3_mapping_tensor.init) train_session.run(train_label_mapping_tensor.init) train_session.run(train_init_op) eval_session = self._create_session(eval_graph) eval_session.run(eval_char_mapping_tensor.init) #eval_session.run(eval_gram2_mapping_tensor.init) #eval_session.run(eval_gram3_mapping_tensor.init) eval_session.run(eval_label_mapping_tensor.init) tf.logging.info("Start training") loss = [] steps_per_epoch = self.train_data_num // FLAGS.batch_size # how many batches in an epoch for i in range(FLAGS.max_epoch): for j in range(steps_per_epoch): step, loss_value = train_model.train(train_session) loss.append(loss_value) if step % FLAGS.check_step == 0: iteration = step // steps_per_epoch + 1 tf.logging.info( "iteration:{} step:{}/{}, cross entropy loss:{:>9.6f}". format(iteration, step % steps_per_epoch, steps_per_epoch, np.mean(loss))) loss = [] if step % FLAGS.eval_step == 0: tf.logging.info( "Evaluate Validation Dataset and Test Dataset in step: {}" .format(step)) train_model.saver.save( train_session, os.path.join(self.log_dir, "temp_model.ckpt")) tf.logging.info("Saving model parameters in {}".format( os.path.join(self.log_dir, "temp_model.ckpt"))) eval_model.saver.restore( eval_session, os.path.join(self.log_dir, "temp_model.ckpt")) tf.logging.info("Loading model from {}".format( os.path.join(self.log_dir, "temp_model.ckpt"))) validation_accuracy = self._eval_performance( eval_session, eval_model, "validation", valid_init_op) test_accuracy = self._eval_performance( eval_session, eval_model, "test", test_init_op) eval_model.save_dev_test_summary(self.summary_writer, eval_session, validation_accuracy, test_accuracy, step)
def main(argv=None): handle_train_dir(FLAGS.train_dir) # Print and write the flag arguments print_flags(FLAGS) write_flags(FLAGS) K.set_floatx('float32') # Read or/and prepare train config dictionary if FLAGS.train_config_file: with open(FLAGS.train_config_file, 'r') as f_yml: train_config = yaml.load(f_yml, Loader=yaml.FullLoader) else: train_config = {} train_config = prepare_train_config(train_config, FLAGS) train_config = dict2namespace(train_config) # Set tensorflow and numpy seeds (weights initialization) if train_config.seeds.tf: tf.set_random_seed(train_config.seeds.tf) np.random.seed(train_config.seeds.np) # Open HDF5 file containing the data set hdf5_file = h5py.File(train_config.data.data_file, 'r') num_examples, num_classes, image_shape = dataset_characteristics( hdf5_file, train_config.data.group_tr, train_config.data.labels_id) train_config.data.n_classes = num_classes train_config.data.image_shape = image_shape # Determine the train and validation sets images_tr, images_val, labels_tr, labels_val, aux_hdf5 = \ train_val_split(hdf5_file, train_config.data.group_tr, train_config.data.group_val, train_config.data.chunk_size, train_config.data.pct_tr, train_config.data.pct_val, seed=train_config.seeds.train_val, shuffle=train_config.data.shuffle_train_val, labels_id=train_config.data.labels_id) train_config.data.n_train = images_tr.shape[0] train_config.data.n_val = images_val.shape[0] # Data augmentation parameters with open(train_config.daug.daug_params_file, 'r') as f_yml: daug_params_tr = yaml.load(f_yml, Loader=yaml.FullLoader) if (daug_params_tr['do_random_crop'] | daug_params_tr['do_central_crop']) & \ (daug_params_tr['crop_size'] is not None): train_config.data.image_shape = daug_params_tr['crop_size'] daug_params_tr['seed_daug'] = train_config.seeds.daug if train_config.daug.aug_per_img_val > 1: daug_params_val = daug_params_tr daug_params_val['seed_daug'] = train_config.seeds.daug else: daug_params_val = validation_image_params(train_config.daug.nodaug, **daug_params_tr) train_config.daug.daug_params_tr = daug_params_tr train_config.daug.daug_params_val = daug_params_val # Adjust training parameters train_config = define_train_params(train_config, output_dir=FLAGS.train_dir) # Read invariance paramters if train_config.optimizer.invariance: with open(train_config.optimizer.daug_invariance_params_file, 'r') as f_yml: train_config.optimizer.daug_invariance_params = yaml.load( f_yml, Loader=yaml.FullLoader) with open(train_config.optimizer.class_invariance_params_file, 'r') as f_yml: train_config.optimizer.class_invariance_params = yaml.load( f_yml, Loader=yaml.FullLoader) # Get monitored metrics metrics, metric_names = handle_metrics(train_config.metrics) FLAGS.metrics = metric_names # Initialize the model model, model_cat, loss_weights = _model_setup(train_config, metrics, FLAGS.resume_training) _model_print_save(model, FLAGS.train_dir) callbacks = _get_callbacks(train_config, FLAGS.train_dir, save_model_every=FLAGS.save_model_every, track_gradients=FLAGS.track_gradients, fmri_rdms=FLAGS.fmri_rdms, loss_weights=loss_weights) # Write training configuration to disk output_file = os.path.join( FLAGS.train_dir, 'train_config_' + time.strftime('%a_%d_%b_%Y_%H%M%S') + '.yml') with open(output_file, 'w') as f: yaml.dump(numpy_to_python(namespace2dict(train_config)), f, default_flow_style=False) # Initialize Training Progress Logger loggers = [] if FLAGS.log_file_train: log_file = os.path.join(FLAGS.train_dir, FLAGS.log_file_train) loggers.append( TrainingProgressLogger(log_file, model, train_config, images_tr, labels_tr)) if FLAGS.log_file_test: log_file = os.path.join(FLAGS.train_dir, FLAGS.log_file_test) loggers.append( TrainingProgressLogger(log_file, model, train_config, images_val, labels_val)) # Train history, model = train(images_tr, labels_tr, images_val, labels_val, model, model_cat, callbacks, train_config, loggers) # Save model model.save(os.path.join(FLAGS.train_dir, 'model_final')) # Test if FLAGS.test_config_file: with open(FLAGS.test_config_file, 'r') as f_yml: test_config = yaml.load(f_yml, Loader=yaml.FullLoader) test_config = prepare_test_config(test_config, FLAGS) test_results_dict = test(images_val, labels_val, images_tr, labels_tr, model, test_config, train_config.train.batch_size.val, train_config.data.chunk_size) # Write test results to YAML output_file = os.path.join( FLAGS.train_dir, 'test_' + os.path.basename(FLAGS.test_config_file)) with open(output_file, 'wb') as f: yaml.dump(numpy_to_python(test_results_dict), f, default_flow_style=False) # Write test results to TXT output_file = output_file.replace('yml', 'txt') write_test_results(test_results_dict, output_file) # Print test results print_test_results(test_results_dict) # Close and remove aux HDF5 files hdf5_file.close() for f in aux_hdf5: filename = f.filename f.close() os.remove(filename)
def train(self): if self.job_name == "ps": with tf.device("/cpu:0"): self.server.join() return if not self.is_chief: time.sleep(20) self._init_dataset_maker(True) ps_strategy = tf.contrib.training.GreedyLoadBalancingStrategy( self.num_ps) with tf.device( tf.train.replica_device_setter( worker_device=self.worker_prefix, cluster=self.cluster, ps_strategy=ps_strategy)): self.global_step = tf.train.get_or_create_global_step() char_mapping_tensor, label_mapping_tensor = DatasetMaker.make_mapping_table_tensor( ) train_dataset = DatasetMaker.make_dataset( char_mapping_tensor, label_mapping_tensor, self.train_data, FLAGS.batch_size, "train", self.num_worker, self.task_index) tf.logging.info( "The part {}/{} Training dataset is prepared!".format( self.task_index + 1, self.num_worker)) train_iter = tf.data.Iterator.from_structure( train_dataset.output_types, train_dataset.output_shapes) self.train_init_op = train_iter.make_initializer(train_dataset) train_model = TrainModel(train_iter, FLAGS, self.global_step) self.optimizer = train_model.optimizer self.train_summary_op = train_model.merge_train_summary_op with self._create_session_wrapper(retries=10) as sess: try: if self.job_name == "worker": step = 0 while not sess.should_stop(): global_step_val, loss_value = train_model.train(sess) if (step + 1) % self.check_step == 0: epoch = ((step + 1) * FLAGS.batch_size) // self.train_data_num tf.logging.info( "Job-{}:Worker-{}-----Local_Step/Global_Step:{}/{}:Loss is {:.4f}" .format(self.job_name, self.task_index, step, global_step_val, loss_value)) tf.logging.info( "Epoch:{}-Processed {}/{} data".format( epoch, (step + 1) * FLAGS.batch_size % self.train_data_num, self.train_data_num)) step += 1 elif self.job_name == "chief": print_flags(FLAGS, True) save_flags(FLAGS, os.path.join(self.root_path, "config.pkl"), True) tf.logging.info("Waiting for training...") # record top N model's performance while True: time.sleep(5) global_step_val = sess.run(self.global_step) tf.logging.info( "Global step is {}".format(global_step_val)) except tf.errors.OutOfRangeError as e: exc_info = traceback.format_exc(sys.exc_info()) msg = 'Out of range error:{}\n{}'.format(e, exc_info) tf.logging.warn(msg) tf.logging.info('Done training -- step limit reached')
def __init__(self, optimizer=tf.train.AdamOptimizer, need_forlders=True): self.optimizer_constructor = optimizer FLAGS.input_name = inp.get_input_name(FLAGS.input_path) if need_forlders: ut.configure_folders(FLAGS) ut.print_flags(FLAGS)
os.path.basename(tf_records[-1]))): network.validate(working_dir, tf_records, checkpoint_path=checkpoint_name, name=validate_name) def evaluate(black_model, white_model): os.makedirs(PATHS.SGF_DIR, exist_ok=True) with timer("Loading weights"): black_net = network.PolicyValueNetwork(black_model) white_net = network.PolicyValueNetwork(white_model) with timer("Playing {} games".format( GLOBAL_PARAMETER_STORE.EVALUATION_GAMES)): play_match(black_net, white_net, GLOBAL_PARAMETER_STORE.EVALUATION_GAMES, GLOBAL_PARAMETER_STORE.EVALUATION_READOUTS, PATHS.SGF_DIR) parser = argparse.ArgumentParser() argh.add_commands( parser, [train, selfplay, aggregate, evaluate, initialize_random_model, validate]) if __name__ == '__main__': print_flags() argh.dispatch(parser)