def export(): tf.logging.set_verbosity(tf.logging.INFO) inp = tf.placeholder(tf.float32, [None], name=INPUT_TENSOR_NAME) model_fn(dict(data=inp), None, tf.estimator.ModeKeys.PREDICT) sess = get_session() tf.train.Saver().save(sess, os.path.join(EXPORT_FOLDER, 'checkpoint.ckpt')) tf.train.write_graph(sess.graph_def, EXPORT_FOLDER, 'graph.pbtxt', True) sess.close() print("Freezing graph") lp = get_latest_export() ckpt = tf.train.get_checkpoint_state(EXPORT_FOLDER) freeze_graph( os.path.join(EXPORT_FOLDER, 'graph.pbtxt'), None, False, ckpt.model_checkpoint_path, OUTPUT_TENSOR_NAME, 'save/restore_all', 'save/Const:0', os.path.join(EXPORT_FOLDER, 'fozen.pb'), True, '' ) input_graph_def = tf.GraphDef() with tf.gfile.Open(os.path.join(EXPORT_FOLDER, 'fozen.pb'), "rb") as f: input_graph_def.ParseFromString(f.read()) output_graph = optimize_for_inference( input_graph_def, [INPUT_TENSOR_NAME], [OUTPUT_TENSOR_NAME], tf.float32.as_datatype_enum ) with tf.gfile.FastGFile(EXPORTED_MODEL_NAME, 'w') as f: f.write(output_graph.SerializeToString())
def build_and_run_exports(latest, job_dir, serving_input_fn, hidden_units): """Given the latest checkpoint file export the saved model. Args: latest (string): Latest checkpoint file job_dir (string): Location of checkpoints and model files name (string): Name of the checkpoint to be exported. Used in building the export path. hidden_units (list): Number of hidden units learning_rate (float): Learning rate for the SGD """ prediction_graph = tf.Graph() with prediction_graph.as_default(): features, inputs_dict = serving_input_fn() prediction_dict = model.model_fn( model.PREDICT, features.copy(), None, # labels hidden_units=hidden_units, learning_rate=None # learning_rate unused in prediction mode ) saver = tf.train.Saver() with tf.Session(graph=prediction_graph) as session: session.run([tf.local_variables_initializer(), tf.tables_initializer()]) saver.restore(session, latest) saved_model_util.simple_save( session, os.path.join(job_dir, 'export'), inputs_dict, prediction_dict)
def test(args): # import parmeter data hps = params_utils.parse_params(params.mnist_classification_args) # load dataset dataset = load_mnist(show_info=False) # inputs setting with tf.variable_scope('input'): if hps.data_params.is_flattened: shape = [None, np.prod(hps.data_params.image_size)] else: shape = [None] + list(hps.data_params.image_size) x = tf.placeholder(tf.float32, shape) y = tf.placeholder(tf.float32, [None, 10]) is_training = tf.placeholder(tf.bool, shape=None) # format image if hps.data_params.is_flattened: if len(hps.data_params.image_size) == 3: image_shape = [-1] + list(hps.data_params.image_size) elif len(hps.data_params.image_size) == 2: image_shape = [-1] + list(hps.data_params.image_size) + [1] else: raise NotImplementedError('image shape should be NHW or NHWC') _x = tf.reshape(x, image_shape) # input -> model -> output y_hat = model.model_fn(_x, hps, is_training) with tf.name_scope('metrics'): with tf.name_scope('accuracy'): correctness = tf.equal(tf.argmax(y_hat), tf.argmax(y)) correctness = tf.cast(correctness, tf.float32) accuracy = tf.reduce_mean(correctness) saver = tf.train.Saver() path_prefix = Path(args.prefix) save_path = hps.paths.model_path / path_prefix ckpt = tf.train.get_checkpoint_state(save_path) with tf.Session() as sess: if ckpt: print('restore variable') last_model = ckpt.model_checkpoint_path saver.restore(sess, last_model) else: raise Exception() accs = [] for step in tqdm(range(hps.hyper_parameters.step)): batch = dataset.test.next_batch(hps.hyper_parameters.batch_size) acc = sess.run(accuracy, feed_dict={ x: batch[0], y: batch[1], is_training: False }) accs.append(acc) print(np.mean(accs))
def evaluate(args): # Build model model = model_fn( first_layer=[args.inp_kernel, args.inp_window], gcn_layers=[args.n_GC_units] * args.n_GC_layers, conv_layer_filters=[int(k) for k in args.conv_kernels.split(',')], conv_layer_windows=[int(k) for k in args.conv_windows.split(',')], nBins=1250, lr=args.lr, nMarks=args.n_marks, verbose=1 ) model.load_weights('v11_temp_model_7.h5') # Use the model to predict chromosome 2 ch = 'chr2' res = 1000 # Input resolution: 1000 bp # Load epigenetic data epi_names = ['ATAC_seq', 'CTCF', 'H3K4me1', 'H3K4me3', 'H3K9ac', 'H3K27ac', 'H3K27me3', 'H3K36me3'] epigenetic_data = load_epigenetic_data([ch], epi_names, args.cell_line) path = '/nfs/turbo/umms-drjieliu/proj/4dn/data/microC/high_res_map_project_training_data' for pos in range(100000, 242100000 - 249999, 125000): print(pos) hic = np.load(f'{path}/HiC/{args.cell_line}/{ch}/{ch}_{res}bp_{pos}_{pos + 250000}.npy') epi = epigenetic_data[ch][pos // 200 - (args.inp_window // 2): pos // 200 + 1250 + (args.inp_window // 2), :] hics = np.array([hic]) epis = np.array([epi]) m = model.predict([hics, epis])[0, :, :] # Model Input: HiC and Epigenetic data np.save(f'outputs/pred_{ch}_{res}bp_{pos}.npy', m)
def dispatch(train_files, eval_files, job_dir, train_steps, eval_steps, train_batch_size, eval_batch_size, learning_rate, eval_frequency, first_layer_size, num_layers, scale_factor, eval_num_epochs, num_epochs, checkpoint_epochs): retinopathy_model = model.model_fn(CLASS_SIZE) try: os.makedirs(job_dir) except: pass # Unhappy hack to work around h5py not being able to write to GCS. # Force snapshots and saves to local filesystem, then copy them over to GCS. checkpoint_path = FILE_PATH if not job_dir.startswith("gs://"): checkpoint_path = os.path.join(job_dir, checkpoint_path) # Model checkpoint callback checkpoint = keras.callbacks.ModelCheckpoint( checkpoint_path, monitor='val_loss', verbose=2, period=checkpoint_epochs, mode='max') # Continuous eval callback evaluation = ContinuousEval(eval_frequency, job_dir) # Tensorboard logs callback tblog = keras.callbacks.TensorBoard( log_dir=os.path.join(job_dir, 'logs'), histogram_freq=0, write_graph=True, embeddings_freq=0) callbacks=[checkpoint, evaluation, tblog] [X_train,Y_train]=model.read_train_data() datagen = ImageDataGenerator( shear_range=0.1, zoom_range=0.1, horizontal_flip=True)
def test_set(x_test_labelled, x_test_unlabelled, y_test_labelled, y_test_unlabelled, params): ''' Helper function for training ''' # Prepare input and model x_l = tf.placeholder(x_test_labelled.dtype, x_test_labelled.shape) y_l = tf.placeholder(y_test_labelled.dtype, y_test_labelled.shape) x_u = tf.placeholder(x_test_unlabelled.dtype, x_test_unlabelled.shape) y_u = tf.placeholder(y_test_unlabelled.dtype, y_test_unlabelled.shape) inputs = test_input_fn(x_l, x_u, y_l, y_u, params['batch_size'], params['buffer_size']) model = model_fn(inputs, tf.estimator.ModeKeys.EVAL, params) l_accuracy = model['l_accuracy'] u_accuracy = model['u_accuracy'] update_l_acc = model['update_l_acc'] update_u_acc = model['update_u_acc'] acc_init = model['acc_init'] loss_sum = 0.0 total_l_acc = 0.0 total_u_acc = 0.0 step = 0 saver = tf.train.Saver() #chkp.print_tensors_in_checkpoint_file(os.path.join(params['model_dir'], 'model.ckpt'), tensor_name='', all_tensors=False, all_tensor_names=True) with tf.Session() as sess: tf.logging.info('Starting testing') sess.run(model['var_init']) sess.run(acc_init) # Restoring trained weights from training in params['model_dir'] saver.restore(sess, os.path.join(params['model_dir'], 'model.ckpt')) sess.run( model['iter_init'], { x_l: x_test_labelled, x_u: x_test_unlabelled, y_l: y_test_labelled, y_u: y_test_unlabelled }) #writer = tf.summary.FileWriter(os.path.join(params['model_dir'], 'test_summaries'), sess.graph) while True: try: step += 1 # Update accuracy values _, _, l_acc_val, u_acc_val = \ sess.run([update_l_acc, update_u_acc, l_accuracy, u_accuracy]) total_l_acc = l_acc_val total_u_acc = u_acc_val if params['log_step']: if step % params['log_step'] == 0: # Log accuracies so far if appropriate tf.logging.info( 'Loss: {}; Labelled Accuracy: {}; Unlabelled Accuracy: {}' .format(loss_sum / step, total_l_acc, total_u_acc)) except tf.errors.OutOfRangeError: # At the end of the dataset break return total_l_acc, total_u_acc
def train(): config = tf.ConfigProto() config.gpu_options.allow_growth = True config.log_device_placement = True sess = tf.Session(config=config) tf.keras.backend.set_session(sess) model = model_fn((height, width, 3), learning_rate) model.fit(training_input_fn, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=validation_input_fn, validation_steps=1, callbacks=[tf.keras.callbacks.TensorBoard(log_dir=log_dir)])
def build_and_run_exports(latest, job_dir, name, serving_input_fn, hidden_units): """Given the latest checkpoint file export the saved model. Args: latest (string): Latest checkpoint file job_dir (string): Location of checkpoints and model files name (string): Name of the checkpoint to be exported. Used in building the export path. hidden_units (list): Number of hidden units learning_rate (float): Learning rate for the SGD """ prediction_graph = tf.Graph() exporter = tf.saved_model.builder.SavedModelBuilder( os.path.join(job_dir, 'export', name)) with prediction_graph.as_default(): features, inputs_dict = serving_input_fn() prediction_dict = model.model_fn( model.PREDICT, features, None, # labels hidden_units=hidden_units, learning_rate=None # learning_rate unused in prediction mode ) saver = tf.train.Saver() inputs_info = { name: tf.saved_model.utils.build_tensor_info(tensor) for name, tensor in inputs_dict.iteritems() } output_info = { name: tf.saved_model.utils.build_tensor_info(tensor) for name, tensor in prediction_dict.iteritems() } signature_def = tf.saved_model.signature_def_utils.build_signature_def( inputs=inputs_info, outputs=output_info, method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME) with tf.Session(graph=prediction_graph) as session: session.run( [tf.local_variables_initializer(), tf.tables_initializer()]) saver.restore(session, latest) exporter.add_meta_graph_and_variables( session, tags=[tf.saved_model.tag_constants.SERVING], signature_def_map={ tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def }, ) exporter.save()
def dispatch(train_files, eval_files, job_dir, train_steps, eval_steps, train_batch_size, eval_batch_size, learning_rate, eval_frequency, first_layer_size, num_layers, scale_factor, eval_num_epochs, num_epochs, checkpoint_epochs): census_model = model.model_fn(INPUT_SIZE, CLASS_SIZE) try: os.makedirs(job_dir) except: pass # Unhappy hack to work around h5py not being able to write to GCS. # Force snapshots and saves to local filesystem, then copy them over to GCS. checkpoint_path = FILE_PATH if not job_dir.startswith("gs://"): checkpoint_path = os.path.join(job_dir, checkpoint_path) # Model checkpoint callback checkpoint = keras.callbacks.ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=1, period=checkpoint_epochs, mode='max') # Continuous eval callback evaluation = ContinuousEval(eval_frequency, eval_files, learning_rate, job_dir, steps=train_steps) # Tensorboard logs callback tblog = keras.callbacks.TensorBoard(log_dir=os.path.join(job_dir, 'logs'), histogram_freq=0, write_graph=True, embeddings_freq=0) callbacks = [checkpoint, evaluation, tblog] census_model.fit_generator(model.generator_input(train_files, chunk_size=CHUNK_SIZE), steps_per_epoch=train_steps, epochs=num_epochs, callbacks=callbacks) # Unhappy hack to work around h5py not being able to write to GCS. # Force snapshots and saves to local filesystem, then copy them over to GCS. if job_dir.startswith("gs://"): census_model.save(CENSUS_MODEL) copy_file_to_gcs(job_dir, CENSUS_MODEL) else: census_model.save(os.path.join(job_dir, CENSUS_MODEL)) # Convert the Keras model to TensorFlow SavedModel model.to_savedmodel(census_model, os.path.join(job_dir, 'export'))
def build_and_run_exports(latest, job_dir, serving_input_fn, hidden_units): """Given the latest checkpoint file export the saved model. Args: latest (string): Latest checkpoint file job_dir (string): Location of checkpoints and model files name (string): Name of the checkpoint to be exported. Used in building the export path. hidden_units (list): Number of hidden units learning_rate (float): Learning rate for the SGD """ prediction_graph = tf.Graph() exporter = tf.saved_model.builder.SavedModelBuilder( os.path.join(job_dir, 'export')) with prediction_graph.as_default(): features, inputs_dict = serving_input_fn() prediction_dict = model.model_fn( model.PREDICT, features.copy(), None, # labels hidden_units=hidden_units, learning_rate=None # learning_rate unused in prediction mode ) saver = tf.train.Saver() inputs_info = { name: tf.saved_model.utils.build_tensor_info(tensor) for name, tensor in inputs_dict.iteritems() } output_info = { name: tf.saved_model.utils.build_tensor_info(tensor) for name, tensor in prediction_dict.iteritems() } signature_def = tf.saved_model.signature_def_utils.build_signature_def( inputs=inputs_info, outputs=output_info, method_name=sig_constants.PREDICT_METHOD_NAME ) with tf.Session(graph=prediction_graph) as session: session.run([tf.local_variables_initializer(), tf.tables_initializer()]) saver.restore(session, latest) exporter.add_meta_graph_and_variables( session, tags=[tf.saved_model.tag_constants.SERVING], signature_def_map={ sig_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def }, legacy_init_op=main_op() ) exporter.save()
def train_and_evaluate(args): # Build model model = model_fn( first_layer=[args.inp_kernel, args.inp_window], gcn_layers=[args.n_GC_units] * args.n_GC_layers, conv_layer_filters=[int(k) for k in args.conv_kernels.split(',')], conv_layer_windows=[int(k) for k in args.conv_windows.split(',')], nBins=1250, lr=args.lr, nMarks=args.n_marks, verbose=1) # Load chromosomes st_ed_pos = chromosome_sizes[args.cell_line] chromosomes = ['chr' + elm for elm in args.chrs.split(',') ] if args.chrs.lower() != 'all' else st_ed_pos.keys() # Load resolutions resolutions = [int(elm) for elm in args.inp_resolutions.split(',')] # Load epigenetic data epi_names = [ 'ATAC_seq', 'CTCF', 'H3K4me1', 'H3K4me3', 'H3K9ac', 'H3K27ac', 'H3K27me3', 'H3K36me3' ] epigenetic_data = load_epigenetic_data(args.cell_line, chromosomes, epi_names) # epigenetic_data = load_processed_epigenetic_data_2(chromosomes, epi_names, args.cell_line) for i in range(args.epochs): print('Epoch', i, ':') t1 = time.time() for (epi, hics), micros in generate_batches(args.cell_line, chromosomes, resolutions, epi_names, epigenetic_data, args.batch_size, args.inp_window): t2 = time.time() print(' - Loading data:', t2 - t1, 's') model.train_on_batch([hics, epi], micros) t3 = time.time() print(' - Training:', t3 - t2, 's') mse = model.evaluate([hics, epi], micros, batch_size=args.batch_size, verbose=0) t1 = time.time() print(' - Evaluating:', t1 - t3, 's') print(' - MSE:', mse) if i % args.checkpoint_frequency == 0: model.save_weights('temp_model_{0}.h5'.format(i))
def load_model(args): model = model_fn( first_layer=[args.inp_kernel, args.inp_window], gcn_layers=[args.n_GC_units] * args.n_GC_layers, conv_layer_filters=[int(k) for k in args.conv_kernels.split(',')], conv_layer_windows=[int(k) for k in args.conv_windows.split(',')], nBins=1250, lr=args.lr, nMarks=args.n_marks, verbose=1) model.load_weights('v16_temp_model_11.h5') return model
def _run_export(self): export_dir = 'export_ckpt_' + re.findall('\d+', self._latest_checkpoint)[-1] tf.logging.info('Exporting model from checkpoint {0}'.format( self._latest_checkpoint)) prediction_graph = tf.Graph() try: exporter = tf.saved_model.builder.SavedModelBuilder( os.path.join(self._checkpoint_dir, export_dir)) except IOError: tf.logging.info( 'Checkpoint {0} already exported, continuing...'.format( self._latest_checkpoint)) return with prediction_graph.as_default(): image, name, inputs_dict = model.serving_input_fn() prediction_dict = model.model_fn(model.PREDICT, name, image, None, 6, None) saver = tf.train.Saver() inputs_info = { name: tf.saved_model.utils.build_tensor_info(tensor) for name, tensor in inputs_dict.iteritems() } output_info = { name: tf.saved_model.utils.build_tensor_info(tensor) for name, tensor in prediction_dict.iteritems() } signature_def = tf.saved_model.signature_def_utils.build_signature_def( inputs=inputs_info, outputs=output_info, method_name=sig_constants.PREDICT_METHOD_NAME) with tf.Session(graph=prediction_graph) as session: saver.restore(session, self._latest_checkpoint) exporter.add_meta_graph_and_variables( session, tags=[tf.saved_model.tag_constants.SERVING], signature_def_map={ sig_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def }, legacy_init_op=my_main_op()) exporter.save()
def dispatch(train_files, eval_files, job_dir, learning_rate, eval_frequency, num_epochs, checkpoint_epochs): # setting the seed for reproducibility np.random.seed(13) forecast_model = model.model_fn() scaler = model.build_scaler(train_files + eval_files) try: os.makedirs(job_dir) except Exception as e: print(e) # Unhappy hack to work around h5py not being able to write to GCS. # Force snapshots and saves to local filesystem, then copy them over to GCS. checkpoint_path = CHECKPOINT_PATH if not job_dir.startswith("gs://"): checkpoint_path = os.path.join(job_dir, checkpoint_path) # Model checkpoint callback checkpoint = keras.callbacks.ModelCheckpoint(checkpoint_path, verbose=1, period=checkpoint_epochs) # Continuous eval callback with ContinuousEval(eval_frequency, eval_files, learning_rate, job_dir, scaler) as evaluation: # Tensorboard logs callback tblog = keras.callbacks.TensorBoard(log_dir=os.path.join( job_dir, 'logs'), histogram_freq=0, write_graph=True, embeddings_freq=0) callbacks = [checkpoint, evaluation, tblog] x, y = model.load_features(train_files, scaler) forecast_model.fit(x, y, epochs=num_epochs, callbacks=callbacks) # Unhappy hack to work around h5py not being able to write to GCS. # Force snapshots and saves to local filesystem, then copy them over to GCS. if job_dir.startswith("gs://"): forecast_model.save(MODEL_FILENAME) copy_file_to_gcs(job_dir, MODEL_FILENAME) else: forecast_model.save(os.path.join(job_dir, MODEL_FILENAME))
def dispatch(train_files, eval_files, job_dir, train_steps, eval_steps, train_batch_size, eval_batch_size, learning_rate, eval_frequency, first_layer_size, num_layers, scale_factor, eval_num_epochs, num_epochs, checkpoint_epochs): census_model = model.model_fn(INPUT_SIZE, CLASS_SIZE) try: os.makedirs(job_dir) except: pass # Model checkpoint callback checkpoint = keras.callbacks.ModelCheckpoint(os.path.join( job_dir, FILE_PATH), monitor='val_loss', verbose=1, period=checkpoint_epochs, mode='max') # Continuous eval callback evaluation = ContinuousEval(eval_frequency, eval_files, learning_rate, job_dir) # Tensorboard logs callback tblog = keras.callbacks.TensorBoard(log_dir=os.path.join(job_dir, 'logs'), histogram_freq=0, write_graph=True, embeddings_freq=0) # TODO: This needs to be fixed in h5py so that writes to GCS are possible # Don't attempt to create checkpoints on Cloud ML Engine for now because # h5py doesn't come with native GCS write capability if job_dir.startswith('gs://'): callbacks = [evaluation, tblog] else: callbacks = [checkpoint, evaluation, tblog] start_time = time.time() census_model.fit_generator(model.generator_input(train_files, chunk_size=CHUNK_SIZE), steps_per_epoch=train_steps, epochs=num_epochs, callbacks=callbacks) print "\nTime used.", time.time() - start_time census_model.save(os.path.join(job_dir, CENSUS_MODEL))
def run(args): ms.context.set_context( mode=ms.context.GRAPH_MODE, device_target=args.device, save_graphs=False, ) net = model_fn() loss = ms.nn.loss.SoftmaxCrossEntropyWithLogits( sparse=True, reduction='mean', ) opt = build_optimizer(args, net) if args.mode == 'init': save_checkpoint( net, ckpt_file_name=os.path.join('seeds', '%d.ckpt' % (time.time())), ) if args.mode == 'train': ds_train = create_dataset( data_path=os.path.join(args.data_path, 'train'), batch_size=args.device_batch_size, ) if args.init_ckpt: print('using init checkpoint %s' % (args.init_ckpt)) load_ckpt(net, args.init_ckpt) train(args, net, loss, opt, ds_train) if args.mode == 'test': ds_test = create_dataset( data_path=os.path.join(args.data_path, 'test'), batch_size=args.device_batch_size, ) if args.ckpt_files: checkpoints = args.ckpt_files.split(',') else: steps = [10, 20, 30, 40] checkpoints = [get_ckpt_file_name(args, i) for i in steps] print('will test %d checkpoints' % (len(checkpoints))) # for i, n in enumerate(checkpoints): # print('[%d]=%s' % (i, n)) test(args, net, loss, opt, ds_test, checkpoints)
def train_and_evaluate(hparams): img_shape, num_classes, test_input_fn, train_input_fn = get_dataset(hparams.dataset) model = model_fn(img_shape, num_classes, hparams.learning_rate) # model.summary() model.load_weights('my_model_weights.h5') strategy = { 'mirror': MirroredStrategy(num_gpus=hparams.num_gpus, prefetch_on_device=True), 'collective': CollectiveAllReduceStrategy(num_gpus_per_worker=hparams.num_gpus) }[hparams.dist] # config = tf.estimator.RunConfig(train_distribute=strategy, save_checkpoints_steps=500) dist_config = tf.contrib.distribute.DistributeConfig( train_distribute=strategy, eval_distribute=strategy, remote_cluster=None ) session_config = tf.ConfigProto( inter_op_parallelism_threads=0, intra_op_parallelism_threads=0, allow_soft_placement=True ) config = tf.estimator.RunConfig( save_checkpoints_steps=2000, session_config=session_config, experimental_distribute=dist_config ) estimator = tf.keras.estimator.model_to_estimator(model, model_dir=hparams.job_dir, config=config) train_spec = tf.estimator.TrainSpec( input_fn=lambda: train_input_fn(hparams.batch_size), max_steps=8000) eval_spec = tf.estimator.EvalSpec( input_fn=lambda: test_input_fn(hparams.batch_size), steps=50, start_delay_secs=10, throttle_secs=10) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def evaluate_files(pattern, x_splits=7, y_splits=5): """ Evaluates the images matching the pattern and outputs the results as a generator """ image_reader = tf.WholeFileReader() sip = tf.train.string_input_producer( tf.train.match_filenames_once(pattern), num_epochs=1) image_name, image_file = image_reader.read(sip) image = tf.image.decode_png(image_file) images = [] shape = tf.shape(image) height, width = shape[0], shape[1] dx = (width - CROPPED_IMAGE_SIZE) / (x_splits - 1) dy = (height - CROPPED_IMAGE_SIZE) / (y_splits - 1) for x in range(x_splits): for y in range(y_splits): images.append( tf.image.crop_to_bounding_box( image, tf.minimum(tf.to_int32(dy * y), height - CROPPED_IMAGE_SIZE), tf.minimum(tf.to_int32(dx * x), width - CROPPED_IMAGE_SIZE), CROPPED_IMAGE_SIZE, CROPPED_IMAGE_SIZE)) images = tf.reshape( tf.to_float(images) / 255.0, (-1, CROPPED_IMAGE_SIZE, CROPPED_IMAGE_SIZE, 3)) nn = model_fn(dict(input=images), None, tf.estimator.ModeKeys.PREDICT) preds = tf.reshape(nn.predictions['predictions'], (x_splits, y_splits)) sess = get_session() coord = tf.train.Coordinator() tf.train.start_queue_runners(sess, coord) try: while True: name, predictions = sess.run([image_name, preds]) yield name, predictions except KeyboardInterrupt: pass except tf.errors.OutOfRangeError: pass finally: coord.request_stop() coord.join() sess.close()
def create_model(args): model = keras.Model(*model_fn(args)) if args.nb_ipus_per_replica > 1: set_pipeline_options(model, args) model.print_pipeline_stage_assignment_summary() elif args.nb_ipus_per_replica == 1: model.set_gradient_accumulation_options( gradient_accumulation_steps_per_replica=args. gradient_accumulation_count, offload_weight_update_variables=False) model.compile( optimizer=get_optimizer(args), loss=dice_ce_loss, # Number of micro batches to process sequentially in a single execution steps_per_execution=args.steps_per_execution if args.nb_ipus_per_replica > 0 else None, metrics=[dice_coef_accuracy_fn, ce_loss]) return model
def evaluate(): tf.logging.set_verbosity(tf.logging.INFO) input_dict, label_dict = input_fn() nn = model_fn(input_dict, None, tf.estimator.ModeKeys.PREDICT) stats = PredictionStats() sess = get_session() coord = tf.train.Coordinator() tf.train.start_queue_runners(sess, coord) try: while True: pred, act = sess.run( [nn.predictions['predictions'], label_dict['labels']]) stats.add_predictions(pred, act) print('Predictions: %00000d, Accuracy: %.4f' % (stats.get_amount(), stats.get_accuracy())) except KeyboardInterrupt: pass finally: coord.request_stop() coord.join() sess.close() print() stats.print_result()
class ModelWrapper(object): config = load_config() model = model_fn(config) model_path = '../model/best/%s-%d' % (config.model_name, 5) model.load_weights(model_path) # Hack: This fixes a keras bug with TF backend in async environment, # see https://github.com/keras-team/keras/issues/2397 for details. graph = tf.get_default_graph() print 'successfully loaded model from: %s' % model_path @classmethod def normalize(cls, img): """ cropping and zero-centering """ img = imutils.crop(img, std_size=(cls.config.width, cls.config.height)) img = (img - 128.0) / 255.0 return np.array([img]) @classmethod def predict(cls, img): with cls.graph.as_default(): X = cls.normalize(img) return id2building[np.argmax(cls.model.predict(X)[0])]
type=str, choices=['one', 'all'], required=True) args = parser.parse_args() return args if __name__ == '__main__': args = get_args() X = np.load(X_path) y = np.load(y_path) ids = np.load(ids_path) config = load_config() model = model_fn(config) if args.mode == 'all': for epoch in xrange(20): model.load_weights('../model/%s-%d' % (config.model_name, epoch)) score = model.evaluate(X, y) print 'epoch:', epoch print 'score:', score else: model.load_weights('../model/best/%s-%d' % (config.model_name, args.epoch)) score = model.evaluate(X, y) print 'score:', score
def run(): config = Config() # Load configs save_path = 'keras_models/keras_model' # Model save path x_train_path = 'data/xtrain.txt' x_test_path = 'data/xtest.txt' y_train_path = 'data/ytrain.txt' x_idx = prep.Indexer() X = prep.read_file(x_train_path, raw=True) y = prep.read_file(y_train_path, label=True) t = CountVectorizer(analyzer='char', ngram_range=(config.ngram, config.ngram)) t.fit(X) X = prep.transform(X, t, x_idx) X = np.array(pad_sequences(X, config.maxlen)) x_train, x_test, y_train, y_test = train_test_split( X, y, test_size=config.test_size, shuffle=config.shuffle) ############################################# # Train model print("BEGINNING TRAINING") tsv_logger = CSVLogger('training-data.tsv', append=True, separator='\t') m = model_fn(config=config, input_length=x_idx.max_number() + 1) # m = load_model(save_path) m.fit(x_train, y_train, epochs=config.n_epochs, batch_size=config.batch_size, verbose=1, shuffle=True, callbacks=[tsv_logger], validation_data=(x_test, y_test)) m.save(save_path) print("MODEL REPORT") score, acc = m.evaluate(x_test, y_test) print("\nSCORE: ", score) print("ACCURACY: ", acc) pred = [np.argmax(label) for label in m.predict(x_test)] report = classification_report(y_test, pred) print(report) ############################################### # Predict and write labels for xtest.txt print("PREDICTION") X = prep.read_file(x_test_path, raw=True) X = prep.transform(X, t, x_idx, add_if_new=False) X = np.array(pad_sequences(X, config.maxlen)) pred = [np.argmax(label) for label in m.predict(X)] with open("".join(["keras_prediction/ytest.txt"]), "w+", encoding="utf-8") as rec: for label in pred: rec.write("%s\n" % label) rec.close()
def run(target, cluster_spec, is_chief, train_steps, eval_steps, job_dir, train_files, eval_files, train_batch_size, eval_batch_size, learning_rate, eval_frequency, first_layer_size, num_layers, scale_factor, num_epochs, export_format): """Run the training and evaluation graph. Args: target (string): Tensorflow server target is_chief (bool): Boolean flag to specify a chief server train_steps (int): Maximum number of training steps eval_steps (int): Number of steps to run evaluation for at each checkpoint. if eval_steps is None, evaluation will run for 1 epoch. job_dir (string): Output dir for checkpoint and summary train_files (string): List of CSV files to read train data eval_files (string): List of CSV files to read eval data train_batch_size (int): Batch size for training eval_batch_size (int): Batch size for evaluation learning_rate (float): Learning rate for Gradient Descent eval_frequency (int): Run evaluation frequency every n training steps. Do not evaluate too frequently otherwise you will pay for performance and do not evaluate too in-frequently otherwise you will not know how soon to stop training. Use default values to start with first_layer_size (int): Size of the first DNN layer num_layers (int): Number of hidden layers in the DNN scale_factor (float): Decay rate for the size of hidden layers num_epochs (int): Maximum number of training data epochs on which to train export_format (str): One of 'JSON', 'CSV' or 'EXAMPLE'. The input format for the outputed saved_model binary. """ # Calculate the number of hidden units hidden_units = [ max(2, int(first_layer_size * scale_factor**i)) for i in range(num_layers) ] # If the server is chief which is `master` # In between graph replication Chief is one node in # the cluster with extra responsibility and by default # is worker task zero. We have assigned master as the chief. # # See https://youtu.be/la_M6bCV91M?t=1203 for details on # distributed TensorFlow and motivation about chief. if is_chief: tf.logging.info("Created DNN hidden units {}".format(hidden_units)) evaluation_graph = tf.Graph() with evaluation_graph.as_default(): # Features and label tensors features, labels = model.input_fn( eval_files, num_epochs=None if eval_steps else 1, batch_size=eval_batch_size, shuffle=False) # Accuracy and AUROC metrics # model.model_fn returns the dict when EVAL mode metric_dict = model.model_fn(model.EVAL, features.copy(), labels, hidden_units=hidden_units, learning_rate=learning_rate) hooks = [ EvaluationRunHook( job_dir, metric_dict, evaluation_graph, eval_frequency, eval_steps=eval_steps, ) ] else: hooks = [] # Create a new graph and specify that as default with tf.Graph().as_default(): # Placement of ops on devices using replica device setter # which automatically places the parameters on the `ps` server # and the `ops` on the workers # # See: # https://www.tensorflow.org/api_docs/python/tf/train/replica_device_setter with tf.device(tf.train.replica_device_setter(cluster=cluster_spec)): # Features and label tensors as read using filename queue features, labels = model.input_fn(train_files, num_epochs=num_epochs, batch_size=train_batch_size) # Returns the training graph and global step tensor train_op, global_step_tensor = model.model_fn( model.TRAIN, features.copy(), labels, hidden_units=hidden_units, learning_rate=learning_rate) # Creates a MonitoredSession for training # MonitoredSession is a Session-like object that handles # initialization, recovery and hooks # https://www.tensorflow.org/api_docs/python/tf/train/MonitoredTrainingSession with tf.train.MonitoredTrainingSession( master=target, is_chief=is_chief, checkpoint_dir=job_dir, hooks=hooks, save_checkpoint_secs=20, save_summaries_steps=50) as session: # Global step to keep track of global number of steps particularly in # distributed setting step = global_step_tensor.eval(session=session) # Run the training graph which returns the step number as tracked by # the global step tensor. # When train epochs is reached, session.should_stop() will be true. while (train_steps is None or step < train_steps) and not session.should_stop(): step, _ = session.run([global_step_tensor, train_op]) # Find the filename of the latest saved checkpoint file latest_checkpoint = tf.train.latest_checkpoint(job_dir) # Only perform this if chief if is_chief: build_and_run_exports(latest_checkpoint, job_dir, model.SERVING_INPUT_FUNCTIONS[export_format], hidden_units)
# Load Vocabularies words = tf.contrib.lookup.index_table_from_file(path_words, num_oov_buckets=1) # Create the input data pipeline logging.info("Creating the datasets...") train_sentences = load_dataset_from_text(path_train_sentences) eval_sentences = load_dataset_from_text(path_eval_sentences) # Specify other parameters for the dataset and the model params.eval_size = params.dev_size params.buffer_size = params.train_size # buffer size for shuffling params.id_pad_word = words.lookup(tf.constant(params.pad_word)) # Create the two iterators over the two datasets train_inputs = input_fn('train', train_sentences, words, params) eval_inputs = input_fn('eval', eval_sentences, words, params) logging.info("- done.") # Define the models (2 different set of nodes that share weights for train and eval) logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) logging.info("- done.") # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_dir)
def build_model(self): x_shape = self.train_iter.get_shape("x") self.model = model_fn(x_shape=x_shape, rnn=self.config.rnn)
def main(): parser = argparse.ArgumentParser( description='train the model for all model') parser.add_argument('--epochs', type=int, default=10) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--model_dir', type=str, default='training_model') parser.add_argument('--n_threads', type=int, default=4) args = parser.parse_args() epochs = args.epochs batch_size = args.batch_size model_dir = args.model_dir input_shape = [28, 28, 1] num_classes = 10 mnist_generator = MnistDataGenerator(args.batch_size) custom_runner = CustomRunner(input_shape, num_classes, args.batch_size, mnist_generator.train_iterator_heavy) images, labels = custom_runner.get_inputs() train_inputs = {'x': images, 'y': labels} valid_inputs = { 'x': tf.placeholder(tf.float32, [ None, ] + input_shape), 'y': tf.placeholder(tf.float32, [None, num_classes]) } train_model_spec = model_fn(train_inputs, is_train=True) valid_model_spec = model_fn(valid_inputs, reuse=True, is_train=False) os.makedirs(model_dir, exist_ok=True) set_logger(os.path.join(model_dir, 'train.log')) save_dir = os.path.join(model_dir, 'weights') save_path = os.path.join(save_dir, 'epoch') begin_at_epoch = 0 steps_per_epoch = mnist_generator.num_train_sample // batch_size with tf.Session() as sess: saver = tf.train.Saver(max_to_keep=5) # will keep last 5 epochs sess.run(tf.global_variables_initializer()) tf.train.start_queue_runners(sess=sess) custom_runner.start_threads(sess, n_threads=args.n_threads) if os.path.isdir(save_dir): restore_from = tf.train.latest_checkpoint(save_dir) begin_at_epoch = int(restore_from.split('-')[-1]) saver.restore(sess, restore_from) epochs += begin_at_epoch for epoch in range(begin_at_epoch, epochs): logging.info('Epoch {}/{}'.format(epoch + 1, epochs)) train_loss, train_acc = train(sess, train_model_spec, steps_per_epoch) valid_loss, valid_acc = eval(sess, valid_model_spec, mnist_generator.test_iterator) logging.info('train/acc: {:.4f}, train/loss: {:.4f}'.format( train_acc, train_loss)) logging.info('valid/acc: {:.4f}, valid/loss: {:.4f}'.format( valid_acc, valid_loss)) saver.save(sess, save_path, global_step=epoch + 1)
metrics = {} # Count batches print('Counting batches') num_batches = 0 for _ in batch_generator(hparams, 'test'): num_batches += 1 print(num_batches, 'batches') for mode in ['eval', 'eval_sample']: # Model tf.reset_default_graph() tf.set_random_seed(0) inputs = tf.placeholder(tf.int32, [None, None]) labels = tf.placeholder(tf.int32, [None, None]) probs, losses = model_fn(inputs, hparams, mode, labels) # Add ops to save and restore all the variables. saver = tf.train.Saver() # This kostyl helps to run evaluation second time with different graph gc.collect() # Evaluate model on test set with tf.Session() as sess: # Load model saver.restore(sess, os.path.join(hparams['model_path'], 'model.ckpt')) # Get batches from generator with PG(batch_generator(hparams, 'test'), 10) as g: g = GeneratorLen(g, num_batches) # use progressbar for batch_x, batch_y in tqdm(g):
def train(args: Dict): # import parmeter data hps = params_utils.parse_params(params.mnist_classification_args) # load dataset dataset = load_mnist(show_info=False) # inputs setting with tf.variable_scope('input'): if hps.data_params.is_flattened: shape = [None, np.prod(hps.data_params.image_size)] else: shape = [None] + list(hps.data_params.image_size) x = tf.placeholder(tf.float32, shape) y = tf.placeholder(tf.float32, [None, 10]) is_training = tf.placeholder(tf.bool, shape=None) # format image if hps.data_params.is_flattened: if len(hps.data_params.image_size) == 3: image_shape = [-1] + list(hps.data_params.image_size) elif len(hps.data_params.image_size) == 2: image_shape = [-1] + list(hps.data_params.image_size) + [1] else: raise NotImplementedError('image shape should be NHW or NHWC') _x = tf.reshape(x, image_shape) # input -> model -> output y_hat = model.model_fn(_x, hps, is_training) # setup metrics with tf.name_scope('metrics'): with tf.name_scope('accuracy'): correctness = tf.equal(tf.argmax(y_hat), tf.argmax(y)) correctness = tf.cast(correctness, tf.float32) accuracy = tf.reduce_mean(correctness) with tf.name_scope('loss'): cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( labels=y, logits=y_hat) loss = tf.reduce_mean(cross_entropy) # note: # xxx is THE value, xxx_op is the OPERATION to update xxx with tf.name_scope('train'): train_loss, train_loss_op = tf.metrics.mean(loss, name='train_loss') train_acc, train_acc_op = tf.metrics.mean(accuracy, name='train_acc') tf.summary.scalar('loss', train_loss, collections=['train']) tf.summary.scalar('acc', train_acc, collections=['train']) with tf.name_scope('val'): val_loss, val_loss_op = tf.metrics.mean(loss, name='val_loss') val_acc, val_acc_op = tf.metrics.mean(accuracy, name='val_acc') tf.summary.scalar('loss', val_loss, collections=['val']) tf.summary.scalar('acc', val_acc, collections=['val']) # metrics initializer train_metrics_initialzie_op = tf.variables_initializer( [var for var in tf.local_variables() if 'train/' in var.name]) val_metrics_initialize_op = tf.variables_initializer( [var for var in tf.local_variables() if 'val/' in var.name]) # gathered summary operation train_summary_op = tf.summary.merge_all('train') val_summary_op = tf.summary.merge_all('val') # optimizer settings with tf.name_scope('optimizer'): global_step = tf.Variable(0, trainable=False, name='global_step') learning_rate = hps.hyper_parameters.learning_rate if hps.hyper_parameters.optimizer == model.Optimizer.ADAM: optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) elif hps.hypter_paramters.optimizer == model.Optimizer.SGD: optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) else: raise NotImplementedError('optimizer is in {}'.format( list(model.Optimizer))) train_step = optimizer.minimize(loss, global_step=global_step) init_op = tf.global_variables_initializer() local_init_op = tf.local_variables_initializer() saver = tf.train.Saver() path_prefix = Path(args.prefix) save_path = hps.paths.model_path / path_prefix save_path.mkdir(parents=True, exist_ok=True) ckpt = tf.train.get_checkpoint_state(save_path) with tf.Session() as sess: if ckpt: print('restore variable') last_model = ckpt.model_checkpoint_path saver.restore(sess, last_model) sess.run(train_metrics_initialzie_op) sess.run(val_metrics_initialize_op) writer = tf.summary.FileWriter(hps.paths.log_path / path_prefix, sess.graph) else: # initialize all variable and operations sess.run(init_op) sess.run(local_init_op) sess.run(train_metrics_initialzie_op) sess.run(val_metrics_initialize_op) sess.run(init_op) writer = tf.summary.FileWriter(hps.paths.log_path / path_prefix, sess.graph) for step in tqdm(range(hps.hyper_parameters.step)): step += 1 batch = dataset.train.next_batch(hps.hyper_parameters.batch_size) sess.run([train_step, train_loss_op, train_acc_op], feed_dict={ x: batch[0], y: batch[1], is_training: True }) # train_log if step % 100 == 0: summary, gstep = sess.run([train_summary_op, global_step]) writer.add_summary(summary, global_step=gstep) sess.run(train_metrics_initialzie_op) saver.save(sess, save_path / Path('model.ckpt'), global_step=global_step) # validation log if step % 1000 == 0: sess.run(val_metrics_initialize_op) for _ in range(50): val_batch = dataset.train.next_batch(100) sess.run([val_loss_op, val_acc_op], feed_dict={ x: val_batch[0], y: val_batch[1], is_training: False }) summary, gstep = sess.run([val_summary_op, global_step]) writer.add_summary(summary, global_step=gstep)
def run(target, cluster_spec, is_chief, train_steps, eval_steps, job_dir, train_files, eval_files, train_batch_size, eval_batch_size, learning_rate, eval_frequency, first_layer_size, num_layers, scale_factor, num_epochs, export_format): """Run the training and evaluation graph. Args: target (string): Tensorflow server target is_chief (bool): Boolean flag to specify a chief server train_steps (int): Maximum number of training steps eval_steps (int): Number of steps to run evaluation for at each checkpoint. if eval_steps is None, evaluation will run for 1 epoch. job_dir (string): Output dir for checkpoint and summary train_files (string): List of CSV files to read train data eval_files (string): List of CSV files to read eval data train_batch_size (int): Batch size for training eval_batch_size (int): Batch size for evaluation learning_rate (float): Learning rate for Gradient Descent eval_frequency (int): Run evaluation frequency every n training steps. Do not evaluate too frequently otherwise you will pay for performance and do not evaluate too in-frequently otherwise you will not know how soon to stop training. Use default values to start with first_layer_size (int): Size of the first DNN layer num_layers (int): Number of hidden layers in the DNN scale_factor (float): Decay rate for the size of hidden layers num_epochs (int): Maximum number of training data epochs on which to train export_format (str): One of 'JSON', 'CSV' or 'EXAMPLE'. The input format for the outputed saved_model binary. """ # Calculate the number of hidden units hidden_units = [ max(2, int(first_layer_size * scale_factor**i)) for i in range(num_layers) ] # If the server is chief which is `master` # In between graph replication Chief is one node in # the cluster with extra responsibility and by default # is worker task zero. We have assigned master as the chief. # # See https://youtu.be/la_M6bCV91M?t=1203 for details on # distributed TensorFlow and motivation about chief. if is_chief: tf.logging.info("Created DNN hidden units {}".format(hidden_units)) evaluation_graph = tf.Graph() with evaluation_graph.as_default(): # Features and label tensors features, labels = model.input_fn( eval_files, num_epochs=None if eval_steps else 1, batch_size=eval_batch_size, shuffle=False ) # Accuracy and AUROC metrics # model.model_fn returns the dict when EVAL mode metric_dict = model.model_fn( model.EVAL, features.copy(), labels, hidden_units=hidden_units, learning_rate=learning_rate ) hooks = [EvaluationRunHook( job_dir, metric_dict, evaluation_graph, eval_frequency, eval_steps=eval_steps, )] else: hooks = [] # Create a new graph and specify that as default with tf.Graph().as_default(): # Placement of ops on devices using replica device setter # which automatically places the parameters on the `ps` server # and the `ops` on the workers # # See: # https://www.tensorflow.org/api_docs/python/tf/train/replica_device_setter with tf.device(tf.train.replica_device_setter(cluster=cluster_spec)): # Features and label tensors as read using filename queue features, labels = model.input_fn( train_files, num_epochs=num_epochs, batch_size=train_batch_size ) # Returns the training graph and global step tensor train_op, global_step_tensor = model.model_fn( model.TRAIN, features.copy(), labels, hidden_units=hidden_units, learning_rate=learning_rate ) # Creates a MonitoredSession for training # MonitoredSession is a Session-like object that handles # initialization, recovery and hooks # https://www.tensorflow.org/api_docs/python/tf/train/MonitoredTrainingSession with tf.train.MonitoredTrainingSession(master=target, is_chief=is_chief, checkpoint_dir=job_dir, hooks=hooks, save_checkpoint_secs=20, save_summaries_steps=50) as session: # Global step to keep track of global number of steps particularly in # distributed setting step = global_step_tensor.eval(session=session) # Run the training graph which returns the step number as tracked by # the global step tensor. # When train epochs is reached, session.should_stop() will be true. while (train_steps is None or step < train_steps) and not session.should_stop(): step, _ = session.run([global_step_tensor, train_op]) # Find the filename of the latest saved checkpoint file latest_checkpoint = tf.train.latest_checkpoint(job_dir) # Only perform this if chief if is_chief: build_and_run_exports(latest_checkpoint, job_dir, model.SERVING_INPUT_FUNCTIONS[export_format], hidden_units)
import tensorflow as tf import numpy as np from multiprocessing_generator import ParallelGenerator as PG import os from tqdm import tqdm from featurizer import Encoder from model import model_fn from my_utils import * from load_hparams import hparams, PrintHparamsInfo PrintHparamsInfo(hparams) # Model inputs = tf.placeholder(tf.float32, [None, hparams['latent_size']]) decoded = model_fn(inputs, hparams, 'decode') text_encoder = Encoder(hparams) # Add ops to save and restore all the variables. saver = tf.train.Saver() # Create output file opath = os.path.join(hparams['output_path'], 'decoded.txt') ofile = open(opath, 'w') # Decode sequences with tf.Session() as sess: # Load model saver.restore(sess, os.path.join(hparams['model_path'], 'model.ckpt')) # Get vectors from file for line in tqdm(
from model import model_fn, pipeline_model_fn from utils import load_data, parse_params import time, os # Store class and shape information. num_classes = 10 input_shape = (28, 28, 1) x_train, y_train, x_test, y_test = load_data() args = parse_params() start = time.time() # 시작 시간 저장 if not args.use_ipu: # Model.__init__ takes two required arguments, inputs and outputs. model = keras.Model(*model_fn(input_shape, num_classes)) # Compile our model with Stochastic Gradient Descent as an optimizer # and Categorical Cross Entropy as a loss. model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=["accuracy"]) model.summary() print('Training') model.fit(x_train, y_train, epochs=3, batch_size=64) print('Evaluation') model.evaluate(x_test, y_test) else: # Standard IPU TensorFlow setup. ipu_config = ipu.utils.create_ipu_config() ipu_config = ipu.utils.auto_select_ipus(ipu_config, 2)