def main(): # Load the parameters from json file args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(json_path), 'No json configuration file found at {}'.format(json_path) params = Params(json_path) # Set the logger set_logger(os.path.join(args.model_dir, 'train.log')) if not os.path.exists(args.restore_from): os.makedirs(args.restore_from) # Create the input data pipeline logging.info('Creating the datasets...') data_dir = args.data_dir train_data_dir = os.path.join(data_dir, 'train') valid_data_dir = os.path.join(data_dir, 'valid') # Get the filenames and labels from the train and valid sets train_filenames, train_labels = get_filenames_and_labels( train_data_dir, params) valid_filenames, valid_labels = get_filenames_and_labels( valid_data_dir, params) params.train_size = len(train_filenames) params.valid_size = len(valid_filenames) params.num_labels = len(set(train_labels)) # Create the two iterators over the two datasets train_inputs = input_fn(True, train_filenames, train_labels, params) valid_inputs = input_fn(False, valid_filenames, valid_labels, params) # Define the model logging.info('Creating the model...') train_model_spec = model_fn('train', train_inputs, params) valid_model_spec = model_fn('eval', valid_inputs, params, reuse=True) # Train the model logging.info('Starting training for {} epoch(s)'.format( params.num_epochs)) train_and_evaluate(train_model_spec, valid_model_spec, args.model_dir, params, args.restore_from)
def train(): # Set the logger set_logger(os.path.join(params['model_dir'], 'train.log')) # log params logging.info(params) # Load vacabulary vocab = tf.contrib.lookup.index_table_from_file(vocab_path, num_oov_buckets=1) # Create the input data pipeline logging.info('Creating the datasets...') train_input_words = load_dataset_from_text(data_dir, train_input_filename, vocab) train_context_words = load_dataset_from_text(data_dir, train_context_filename, vocab) # Create the iterator over the dataset train_inputs = input_fn('train', train_input_words, train_context_words, params) eval_inputs = input_fn('eval', train_input_words, train_context_words, params) logging.info("- done") # Define the model logging.info('Creating the model...') train_model_spec = model_fn('train', train_inputs, params, reuse=tf.AUTO_REUSE) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) logging.info('- done.') # Train the model logging.info('Starting training for {} epochs'.format( params['num_epochs'])) normalized_embedding_matrix = train_and_evaluate(train_model_spec, eval_model_spec, params) save_dict_to_json(params, params['model_dir'] + '/params.json') pd.DataFrame(normalized_embedding_matrix).to_csv(os.path.join( params['model_dir'], 'normalized_embedding_matrix.tsv'), index=False, header=None, sep='\t')
path_train_tfrecords = os.path.join(args.data_dir, 'train_' + args.tfrecords_filename) path_eval_tfrecords = os.path.join(args.data_dir, 'eval_' + args.tfrecords_filename) # Create the input data pipeline logging.info("Creating the datasets...") train_dataset = load_dataset_from_tfrecords(path_train_tfrecords) eval_dataset = load_dataset_from_tfrecords(path_eval_tfrecords) # Specify other parameters for the dataset and the model # Create the two iterators over the two datasets train_inputs = input_fn('train', train_dataset, params) eval_inputs = input_fn('vali', eval_dataset, params) logging.info("- done.") # Define the models (2 different set of nodes that share weights for train and validation) logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('vali', eval_inputs, params, reuse=True) logging.info("- done.") # Train the model # log tim # start_time = time.time() logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_dir) # print("--- %s seconds ---" % (time.time() - start_time))
# Specify other parameters for the dataset and the model # Create the two iterators over the two datasets train_inputs = input_fn('train', train_dataset, params) eval_inputs = input_fn('vali', eval_dataset, params) logging.info("- done.") # Define the models (2 different set of nodes that share weights for train and validation) logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('vali', eval_inputs, params, reuse=True) logging.info("- done.") logging.info( "Starting training for at most {} epoch(s) for the initial learner" .format(params.num_epochs)) start_time = time.time() global_epoch = train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, \ learner_id=0, restore_from=args.restore_dir) logging.info( "global_epoch: {} epoch(s) at learner 0".format(global_epoch)) logging.info("total time: %s seconds ---" % (time.time() - start_time)) # start gradient boosting last_global_epoch = global_epoch if (params.num_learners > 1): ######################################################### logging.info("RETRAINING ~~") params.dict['training_keep_prob'] = 1.0 start_time = time.time() for learner_id in range(1, params.num_learners): # # tf.reset_default_graph() # # tf.set_random_seed(230) # path_train_tfrecords = os.path.join(args.data_dir, 'train_aug-*' + args.tfrecords_filename) # path_eval_tfrecords = os.path.join(args.data_dir, 'validation_aug' + args.tfrecords_filename)
train_filenames = [ os.path.join(train_data_dir, f) for f in os.listdir(train_data_dir) if f.endswith('.jpg') ] eval_filenames = [ os.path.join(dev_data_dir, f) for f in os.listdir(dev_data_dir) if f.endswith('.jpg') ] # Labels will be between 0 and 5 included (6 classes in total) train_labels = [int(f.split('/')[-1][0]) for f in train_filenames] eval_labels = [int(f.split('/')[-1][0]) for f in eval_filenames] # Specify the sizes of the dataset we train on and evaluate on params.train_size = len(train_filenames) params.eval_size = len(eval_filenames) # Create the two iterators over the two datasets train_inputs = input_fn(True, train_filenames, train_labels, params) eval_inputs = input_fn(False, eval_filenames, eval_labels, params) # Define the model logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, model_dir, params, restore_from)
test_flow_filenames = construct_optical_flow_filenames( test_filenames, params.volume_depth) test_inputs = input_temporal_fn(False, test_flow_filenames, test_labels, params) # Free up the memory del test_flow_filenames del test_filenames del test_labels else: test_inputs = input_two_stream_fn(False, test_filenames, test_flow_filenames, test_labels, params) # Free up the memory del test_flow_filenames del test_filenames del test_labels # Define the model logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params, args.stream) test_model_spec = model_fn('test', test_inputs, params, args.stream, reuse=True) # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, test_model_spec, args.model_dir, params, args.restore_from)
npy_vars_weights1_1 = np.load('cluster_array_weights1_1.npy') # npy_vars = np.reshape(npy_vars, shape) npy_vars_weights1_2 = np.load('cluster_array_weights1_2.npy') npy_vars_weights3_1 = np.load('cluster_array_weights3_1.npy') npy_vars_weights3_2 = np.load('cluster_array_weights3_2.npy') npy_vars = [ npy_vars_weights1_1, npy_vars_weights1_2, npy_vars_weights3_1, npy_vars_weights3_2 ] train_model_spec = model_fn('train', train_inputs, params, npy_vars=npy_vars) eval_model_spec = model_fn('vali', eval_inputs, params, npy_vars=npy_vars, reuse=True) logging.info("- done.") logging.info( "Starting training for at most {} epoch(s) for the initial learner". format(params.num_epochs)) start_time = time.time() # global_epoch = train_and_evaluate_sample(train_model_spec, eval_model_spec, \ # eval_train_model_spec, args.model_dir, params, \ # restore_from=args.restore_dir) global_epoch = train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, \ restore_from=args.restore_dir) logging.info("global_epoch: {} epoch(s) at learner 0".format(global_epoch)) logging.info("total time: %s seconds ---" % (time.time() - start_time))
assert not overwritting, "Weights found in model_dir, aborting to avoid overwrite" # Set the logger set_logger(os.path.join(args.model_dir, 'train.log')) # Create the input data pipeline logging.info("Creating the datasets...") data_dir = args.data_dir model_dir = args.model_dir image_dir = os.path.join(data_dir, 'Images') label_dir = os.path.join(data_dir, 'Labels') # Create the two iterators over the two datasets train_inputs = input_fn(True, image_dir, label_dir, params) eval_inputs = input_fn(False, image_dir, label_dir, params) # Define the model logging.info("Creating the model...") preset = get_preset_by_name('ssdmobilenet160') train_model_specs = model_fn('train', train_inputs, preset, params) eval_model_specs = model_fn('eval', eval_inputs, preset, params, reuse=True) # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_specs, eval_model_specs, model_dir, params, args.restore_from)
help="Directory containing the dataset") parser.add_argument( '--restore_from', default=None, help= "Optional, directory or file containing weights to reload before training") if __name__ == '__main__': # Get arguments args = parser.parse_args() # Set the random seed for the whole graph tf.set_random_seed(100) # Set the logger set_logger(os.path.join(args.model_dir, 'train.log')) # Load the parameters params = Params(os.path.join(args.model_dir, 'params.json')) # Initialize the dataset for training dataset = Dataset(args.data_dir, params) # Create the model model = Model(dataset, params) # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(model.train_spec, model.eval_spec, args.model_dir, params, args.restore_from)