def make_predictions(conf,shot_list,loader): os.environ['THEANO_FLAGS'] = 'device=cpu' #=cpu import theano from keras.utils.generic_utils import Progbar from model_builder import ModelBuilder builder = ModelBuilder(conf) y_prime = [] y_gold = [] disruptive = [] _,model = builder.build_train_test_models() builder.load_model_weights(model) model_save_path = builder.get_latest_save_path() start_time = time.time() pool = mp.Pool() fn = partial(make_single_prediction,builder=builder,loader=loader,model_save_path=model_save_path) print('running in parallel on {} processes'.format(pool._processes)) for (i,(y_p,y,is_disruptive)) in enumerate(pool.imap(fn,shot_list)): # for (i,(y_p,y,is_disruptive)) in enumerate(imap(fn,shot_list)): print('Shot {}/{}'.format(i,len(shot_list))) sys.stdout.flush() y_prime.append(y_p) y_gold.append(y) disruptive.append(is_disruptive) pool.close() pool.join() print('Finished Predictions in {} seconds'.format(time.time()-start_time)) return y_prime,y_gold,disruptive
def load_model(): if not os.path.exists(Hyperparameters.MODEL_PATH): model = ModelBuilder.build_model(Hyperparameters.INPUT_SHAPE, Hyperparameters.OUTPUT_SHAPE) else: model = load_model( Hyperparameters.MODEL_PATH, custom_objects=ModelBuilder.get_model_custom_objects()) return model
def main(): '''Main function.''' # Parse arguments from command line #args = get_script_arguments() # Verify data file exist assert os.path.exists(data_filename), 'Data does not exist.' # Load input data print('Loading the inputs in memory. It might take a while...') data = pickle.load(open(data_filename, 'rb')) # Get train and test data kx_train, ky_train, kx_test, ky_test, categorical_speakers = data_to_keras( data) # Reshape data # Convolutional models require data reshaping # if args.cnn_1 or args.cnn_2 if True or False: kx_train, ky_train, kx_test, ky_test = data_reshape( kx_train, ky_train, kx_test, ky_test) ############################ # MODEL # ############################ # Instantiate ModelBuilder(input_shape, num_categories) builder = ModelBuilder(kx_train.shape, ky_train.shape[1]) # Create model object. model = builder(activation=activation, optimizer=optimizer, dropout_rate=dropout_rate, architecture=get_architecture(True, False), batch_size=batch_size) # Train model model, history = fit_model(model, kx_train, ky_train, kx_test, ky_test, max_epochs=60) # plot history plot_history(history) # Save the trained model and its weights builder.save(model)
def simulate(self, name=None, resource_limit=None): model = ModelBuilder() self.models, self.rm, self.dm = model.build_all( resource_limit=resource_limit) self._initialize_queue() simulation = time.time() while not self.execution_queue.is_empty(): current = self.execution_queue.pop() if current.start > self.end: break self._simulate(current) print('Simulation time: ' + str(time.time() - simulation)) LogWriter.write(self.log_queue, name=name)
def main(argv): if FLAGS.model_id is not None: logging.info("Building model '%s'..." % FLAGS.model_id) model_builder = get_model_builder_from_id(FLAGS.model_id) FLAGS.checkpoint_dir = model_builder.checkpoint_dir FLAGS.data_dir = model_builder.data_dir model = model_builder.build() logging.info("Loading data from '%s'..." % FLAGS.data_dir) data_provider = TFRecordProvider(FLAGS.data_dir, split="train" if FLAGS.valid else "all") data_provider_valid = TFRecordProvider( FLAGS.data_dir, split="valid") if FLAGS.valid else None if FLAGS.model_id is None: if FLAGS.checkpoint_dir is None: raise ValueError("checkpoint_dir must be set.") if FLAGS.data_dir is None: raise ValueError("data_dir must be set.") n_cylinders = 4. #losses = [MelSpectralLoss(sample_rate=data_provider.audio_rate, n_bands=2)] #losses = [AdaptiveMelSpectralLoss(sample_rate=data_provider.audio_rate, n_bands=8)] losses = [ TimeFreqResMelSpectralLoss(sample_rate=data_provider.audio_rate, time_res=1 / data_provider.input_rate) ] model = ModelBuilder(model_type="f0_rnn_fc_hpn_decoder", audio_rate=data_provider.audio_rate, input_rate=data_provider.input_rate, window_secs=data_provider.example_secs, f0_denom=n_cylinders, checkpoint_dir=FLAGS.checkpoint_dir, losses=losses, feature_domain="time").build() logging.info("Building trainer...") summary_dir = os.path.join(FLAGS.checkpoint_dir, "summaries", "train") if not os.path.exists(summary_dir): os.makedirs(summary_dir) if FLAGS.valid: summary_dir_valid = os.path.join(FLAGS.checkpoint_dir, "summaries", "valid") if not os.path.exists(summary_dir_valid): os.makedirs(summary_dir_valid) strategy = tf.distribute.MirroredStrategy(devices=FLAGS.devices) trainer = Trainer(model, strategy) logging.info("Initializing training...") while True: #try: train(data_provider, trainer, batch_size=FLAGS.batch_size, steps_per_summary=FLAGS.steps_per_summary, steps_per_summary_valid=FLAGS.steps_per_summary_valid, steps_per_save=FLAGS.steps_per_save, model_dir=FLAGS.checkpoint_dir, valid=FLAGS.valid, data_provider_valid=data_provider_valid) '''except KeyboardInterrupt:
def main(_): tf.gfile.MakeDirs(FLAGS.output_dir) if FLAGS.is_fixed_emb: emb_matrix = utils.get_emb_matrix(FLAGS.data_dir, FLAGS.max_features) clr = CyclicLR(base_lr=FLAGS.min_lr, max_lr=FLAGS.max_lr, step_size=2740, mode='exp_range', gamma=0.99994) matcher = TextMatcher(FLAGS.model_name, FLAGS.vocab_file, FLAGS.do_lower_case, FLAGS.max_seq_len) model_builder = ModelBuilder(model_name=FLAGS.model_name, max_len=FLAGS.max_seq_len, input_dim=FLAGS.input_dim, max_features=FLAGS.max_features, units=FLAGS.units, num_filter=FLAGS.num_filter) if FLAGS.is_fixed_emb: model_builder.set_embedding_matrix(emb_matrix) model = model_builder.build_model() print(model.summary()) if FLAGS.do_train: train_example = matcher.get_train_examples(FLAGS.data_dir) matcher.do_train(model, FLAGS.output_dir, train_example, FLAGS.epochs, FLAGS.batch_size, callback=[ clr, ]) if FLAGS.do_eval: dev_example = matcher.get_dev_examples(FLAGS.data_dir) matcher.do_eval(model, FLAGS.output_dir, dev_example, FLAGS.batch_size) if FLAGS.do_predict: test_example = matcher.get_test_examples(FLAGS.data_dir) matcher.do_predict(model, FLAGS.output_dir, test_example, FLAGS.batch_size)
class Model(): def __init__(self): self.model = ModelBuilder().build().condense().model self.stemmer = SnowballStemmer('english') def simset(self, word): stemmed_word = self.stemmer.stem(word) return self.model.get(stemmed_word, [])
class Model: def __init__(self): self.model = ModelBuilder().build().condense().model self.stemmer = SnowballStemmer("english") def simset(self, word): stemmed_word = self.stemmer.stem(word) return self.model.get(stemmed_word, [])
def _create_model(self): # TODO hidden_layers to be fixed # hidden_layers = self.params[HIDDEN_LAYERS][0] hidden_layers = self.params[HIDDEN_LAYERS] mb = ModelBuilder() # b = a.grab("tensorflow.python.estimator.canned.linear.LinearRegressor", self.feature_columns) self.params['n_classes'] = len(self.label_unique_values) if self.label_unique_values is not None else 0 self.params['label_vocabulary'] = self.label_unique_values self.params['config'] = self.runConfig self.params['hidden_units'] = hidden_layers self.params['dnn_hidden_units'] = hidden_layers self.params['dnn_dropout'] = self.params['dropout'] self.params['dnn_optimizer'] = self.params['optimizer'] self.params['linear_optimizer'] = self.params['optimizer'] self.params['activation_fn'] = getattr(tf.nn, self.params['activation_fn']) self.model = mb.create_from_model_name(self.params['model_name'], self.feature_columns, self.params)
def main(): #Build the client servAddr = "http://192.168.0.91:80/elasticsearch" client = ElasticClient(servAddr) #Build the netflow model builder = ModelBuilder(client) ipVersion = "ipv4" blacklist = None whitelist = [ "192.168.2.10", "192.168.2.101", "192.168.2.102", "192.168.2.103", "192.168.2.104", "192.168.2.105", "192.168.2.106", "192.168.2.107", "192.168.2.108", "192.168.0.11", "255.255.255.255", "127.0.0.1", "128.0.0.0", "0.0.0.0", "192.255.255.0" ] hostnameConversionTable = { "HP-B53-01": "192.168.0.11", #scada + hmi "COM600-PC": "192.168.2.10" #abb substation mgt unit; aka 'rtu' } indexPattern = "netflow*" indexPattern = "netflow-v9-2017*" #uses '-' to exclude specific indices or index-patterns indexPattern = "netflow-v9-2017*,-netflow-v9-2017.04*" #april indices have failed repeatedly, due to what appears to be differently-index data; may require re-indexing netflowModel = builder.BuildNetFlowModel(indexPattern, ipVersion=ipVersion, ipBlacklist=blacklist, ipWhitelist=whitelist) winlogModel = builder.BuildWinlogEventIdModel("winlogbeat*") #just resolves the keys of the winlogmodel (hostnames) to their ip addresses convertedModel = dict([(hostnameConversionTable[host], model) for host, model in winlogModel.items()]) netflowModel.MergeVertexModel( convertedModel, "event_id" ) #store the event model in the nodes; this is redundant, but fine for now #Build the analyzer analyzer = ModelAnalyzer(netflowModel, winlogModel) #analyzer.Analyze() analyzer.AssignMitreTacticProbabilities() netflowModel.Save("netflowModel.pickle") netflowModel.PrintAttackModels() analyzer.AnalyzeStationaryAttackDistribution()
def test_model(n_gram_mins): fe = FeatureExtractor("../dataset/slack_dialogue.txt", n_grams=[1, 2, 3, 4], n_gram_mins=n_gram_mins, debug=False) fe.load() me = ModelEvaluator(fe.headers, fe.features) model_array, highest_rate = me.search_initial_best_fit_algorithm() chosen_model = model_array[random.randint(0, len(model_array) - 1)] mb = ModelBuilder(chosen_model) X_train, X_validation, Y_train, Y_validation = me.split_dataset() mb.fit_model(X_train, Y_train) accuracy_score = mb.accuracy_score(X_validation, Y_validation) print("Got score: " + str(accuracy_score) + " with model: " + str(model_array)) print("Using : " + str(n_gram_mins)) return accuracy_score, model_array
def restore_model(checkpoint_filename, video_retriever_generator, selector, extractor): """This function restores a model from a tf checkpoint Json filename is asserted to be the same as checpoint filename, but with json file extension -Recovers the model's parameters via the json file -Builds the model using this parameters -Prepare the tensorflow graph and get neural network operations """ json_filename = checkpoint_filename + '.json' with open(json_filename, 'r', encoding='utf-8') as json_file: params = json.load(json_file) builder = ModelBuilder(params["training_videos_names"], params["testing_videos_names"], params["n_captions_per_video"], params["feature_n_frames"], video_retriever_generator, selector, extractor) model = params["model"] builder.create_model(model["enc_units"], model["dec_units"], model["rnn_layers"], model["embedding_dims"], model["learning_rate"], model["dropout_rate"], model["bi_encoder"]) builder.prepare_training(params["batch_size"]) model_saver = ModelSaver(os.path.dirname(checkpoint_filename), os.path.basename(checkpoint_filename)) return builder, model_saver, params
def make_predictions_gpu(conf,shot_list,loader): os.environ['THEANO_FLAGS'] = 'device=gpu,floatX=float32' #=cpu import theano from keras.utils.generic_utils import Progbar from model_builder import ModelBuilder builder = ModelBuilder(conf) y_prime = [] y_gold = [] disruptive = [] _,model = builder.build_train_test_models() builder.load_model_weights(model) model.reset_states() pbar = Progbar(len(shot_list)) shot_sublists = shot_list.sublists(conf['model']['pred_batch_size'],shuffle=False,equal_size=True) for (i,shot_sublist) in enumerate(shot_sublists): X,y,shot_lengths,disr = loader.load_as_X_y_pred(shot_sublist) #load data and fit on data y_p = model.predict(X, batch_size=conf['model']['pred_batch_size']) model.reset_states() y_p = loader.batch_output_to_array(y_p) y = loader.batch_output_to_array(y) #cut arrays back y_p = [arr[:shot_lengths[j]] for (j,arr) in enumerate(y_p)] y = [arr[:shot_lengths[j]] for (j,arr) in enumerate(y)] # print('Shots {}/{}'.format(i*num_at_once + j*1.0*len(shot_sublist)/len(X_list),len(shot_list_train))) pbar.add(1.0*len(shot_sublist)) loader.verbose=False#True during the first iteration y_prime += y_p y_gold += y disruptive += disr y_prime = y_prime[:len(shot_list)] y_gold = y_gold[:len(shot_list)] disruptive = disruptive[:len(shot_list)] return y_prime,y_gold,disruptive
def config_path(self, config_path): """Load config file (.ini file) and get dataset builder and neural network model. Set private attribute self._dataset_builder, self._model and self._config_path. Note that this function does not parse config file. It simply uses config file to set up dataset builder and model builder. Args: config_path: Str, path to config (.ini) file. """ if not os.path.isfile(config_path): raise ValueError("Specified config file does not exist.") # Get dataset builder based on config file self._dataset_builder = DatasetBuilder(config_path=config_path, one_hot=False) # Get model builder and get encoder (triplet model with weights) self._model_builder = ModelBuilder(config_path=config_path) self._model = self._model_builder.get_encoder() # Set self._config_path self._config_path = config_path
def load(self, select_new_best_model=False): ''' Reloads data from the file and selects the best model. Useful when there are automated updates to datasets. ''' self.fe.load() self.me = ModelEvaluator(self.fe.headers, self.fe.features) if select_new_best_model: self.me = ModelEvaluator(self.fe.headers, self.fe.features) self.model_array, self.highest_rate = self.me.search_initial_best_fit_algorithm() self.chosen_model = self.model_array[ random.randint(0, len(self.model_array) - 1)] self.mb = ModelBuilder(self.chosen_model) self.X_train, self.X_validation, self.Y_train, self.Y_validation = self.me.split_dataset() self.mb.fit_model(self.X_train, self.Y_train) self.accuracy_score = self.mb.accuracy_score(self.X_validation, self.Y_validation)
def make_evaluations_gpu(conf,shot_list,loader): os.environ['THEANO_FLAGS'] = 'device=gpu,floatX=float32' #=cpu import theano from keras.utils.generic_utils import Progbar from model_builder import ModelBuilder builder = ModelBuilder(conf) y_prime = [] y_gold = [] disruptive = [] batch_size = min(len(shot_list),conf['model']['pred_batch_size']) pbar = Progbar(len(shot_list)) print('evaluating {} shots using batchsize {}'.format(len(shot_list),batch_size)) shot_sublists = shot_list.sublists(batch_size,equal_size=False) all_metrics = [] all_weights = [] for (i,shot_sublist) in enumerate(shot_sublists): batch_size = len(shot_sublist) model = builder.build_model(True,custom_batch_size=batch_size) builder.load_model_weights(model) model.reset_states() X,y,shot_lengths,disr = loader.load_as_X_y_pred(shot_sublist,custom_batch_size=batch_size) #load data and fit on data all_metrics.append(model.evaluate(X,y,batch_size=batch_size,verbose=False)) all_weights.append(batch_size) model.reset_states() pbar.add(1.0*len(shot_sublist)) loader.verbose=False#True during the first iteration if len(all_metrics) > 1: print('evaluations all: {}'.format(all_metrics)) loss = np.average(all_metrics,weights = all_weights) print('Evaluation Loss: {}'.format(loss)) return loss
def predict(config_path, model_path=None, labels_json=None, data_dir=None, group=None): configs = tools.read_configs(config_path) model_path = tools.str2path(model_path or configs.get('model_path')) data_dir = tools.str2path(data_dir or configs.get('data_dir')) labels_json = tools.str2path(labels_json or configs.get('labels_json')) group = group or configs.get('group') model_builder = ModelBuilder(configs, mode='predict', model_path=model_path.as_posix()) model = model_builder.build() pred_gen = DataGenerator(configs, image_dir=data_dir, labels_json=labels_json, group=group, mode='predict') correct = 0 false = 0 for x, y, xnames in pred_gen.flow_from_labels(): predictions = model.predict(x, verbose=1) pred_cls_ids = np.argmax(predictions, axis=1) tp = np.sum(pred_cls_ids == y) error = len(y) - tp correct += tp false += error accuracy = (1 - false / correct) * 100 print(f"Correct: {correct}") print(f'False: {false}') print(f'Accuracy: {accuracy}')
def __init__(self): self.model = ModelBuilder().build().condense().model self.stemmer = SnowballStemmer("english")
def test_resnet(): model_builder = ModelBuilder(10, pretrained=False) print(model_builder.available_models) model = model_builder['resnet50'] print(model)
def train(config_path, train_dir=None, val_dir=None, output_dir=None, train_labels_json=None, val_labels_json=None, group=None, model_name=None, model_suffix=None): np.random.seed(42) # for reproducibility logger = logging.getLogger('root') configs = tools.read_configs(config_path) train_dir = tools.str2path(train_dir or configs['train_dir']) val_dir = tools.str2path(val_dir or configs['val_dir']) train_labels_json = tools.str2path(train_labels_json or configs['train_labels_json']) val_labels_json = tools.str2path(val_labels_json or configs['val_labels_json']) output_dir = tools.str2path(output_dir or configs['output_dir']) group = group or configs['group'] model_name = model_name or configs['model_name'] model_suffix = model_suffix or configs['model_suffix'] train_counts = configs.get('train_class_counts') val_counts = configs.get('val_class_counts') output_dir.mkdir(exist_ok=True) model_out_name = f'{model_name}_{group}_{model_suffix}.h5' model_path = output_dir / model_out_name train_gen = DataGenerator(configs, train_dir, train_labels_json, 'train', group, train_counts) val_gen = DataGenerator(configs, val_dir, val_labels_json, 'val', group, val_counts) epochs = configs['epochs'] classes = configs['network_parameters']['classes'] loss = configs['loss'] optimizer = configs['optimizer'] model_builder = ModelBuilder(configs, 'train', model_name, model_path, classes, loss, optimizer) model = model_builder.build() checkpoint = keras.callbacks.ModelCheckpoint(model_path.as_posix(), monitor='loss', verbose=1, save_best_only=True, save_weights_only=False, mode='min') logger.info(f"Training model {model_out_name} for {epochs} epochs") logger.info(f'Class weights: {train_gen.class_weights}') model.fit_generator(generator=train_gen.flow_generator, steps_per_epoch=train_gen.steps_per_epoch, epochs=epochs, verbose=1, class_weight=train_gen.class_weights, callbacks=[checkpoint], validation_data=val_gen.flow_generator, validation_steps=val_gen.steps_per_epoch)
def train(dataset_train, dataset_valid, train_config, model_config): dataset_handle = tf.placeholder(tf.string, shape=[]) dataset_train_iterator = dataset_train.make_one_shot_iterator() dataset_valid_iterator = dataset_valid.make_one_shot_iterator() dataset_handle = tf.placeholder(tf.string, shape=[]) dataset_iterator = tf.data.Iterator.from_string_handle( dataset_handle, dataset_train.output_types, dataset_train.output_shapes) inputs, true_heatmap = dataset_iterator.get_next() # model building ========================= # < complete codes here > modelbuilder = ModelBuilder(model_config=model_config) pred_heatmap = modelbuilder.get_model(model_in=inputs, scope='model') # traning ops ============================================= # < complete codes here > loss_heatmap_op = train_config.loss_fn( (true_heatmap - pred_heatmap) / train_config.batch_size) loss_regularizer_op = tf.losses.get_regularization_loss() loss_op = loss_heatmap_op + loss_regularizer_op global_step = tf.Variable(0, trainable=False) batchnum_per_epoch = np.floor(train_config.train_data_size / train_config.batch_size) lr_op = tf.train.exponential_decay( learning_rate=train_config.learning_rate, global_step=global_step, decay_steps=train_config.learning_rate_decay_step, decay_rate=train_config.learning_rate_decay_rate, staircase=True) opt_op = train_config.opt_fn(learning_rate=lr_op, name='opt_op') train_op = opt_op.minimize(loss_op, global_step) # For Tensorboard =========================================== # 중간 결과 보려면, tf.summray.image? 같은 함수 사용 가능. 단 서머리가 많아지면 속도가 느려진다. file_writer_train = tf.summary.FileWriter(logdir=train_config.tflogdir + '/train') file_writer_valid = tf.summary.FileWriter(logdir=train_config.tflogdir + '/valid') file_writer_train.add_graph(tf.get_default_graph()) # tb_summary_loss= tf.summary.scalar('loss', loss_heatmap_op) # tb_summary_lr = tf.summary.scalar('learning_rate',lr_op) # write_op = tf.summary.merge_all() write_op = summary_fn(loss=loss_heatmap_op, total_out_losssum=loss_op, learning_rate=lr_op, input_images=inputs, label_heatmap=true_heatmap, pred_out_heatmap=pred_heatmap, train_config=train_config, model_config=model_config) # training ============================== init_var = tf.global_variables_initializer() saver = tf.train.Saver() sess_config = tf.ConfigProto(log_device_placement=True, gpu_options=tf.GPUOptions(allow_growth=True)) with tf.Session(config=sess_config) as sess: # Run the variable initializer sess.run(init_var) # save graph in pb file tf.train.write_graph(sess.graph_def, train_config.ckpt_dir, 'model.pb') train_handle = sess.run(dataset_train_iterator.string_handle()) valid_handle = sess.run(dataset_valid_iterator.string_handle()) # for 문이 하나밖에 없다. 나머지 하나는 tf.data에서 처리해준다. tf.logging.info('====================================') tf.logging.info('<<<< Training start! >>>>') tf.logging.info('[train] training_epochs = %s' % train_config.training_epochs) tf.logging.info('------------------------------------') for epoch in range(train_config.training_epochs): train_start_time = time.time() # train model _, loss_train = sess.run( [train_op, loss_op], feed_dict={ dataset_handle: train_handle, modelbuilder.dropout_keeprate: model_config.output.dropout_keeprate }) train_elapsed_time = time.time() - train_start_time global_step_eval = global_step.eval() if train_config.display_step == 0: continue elif global_step_eval % train_config.display_step == 0: print('[train] curr epochs = %s' % epoch) # # valid model loss_train = loss_heatmap_op.eval( feed_dict={ dataset_handle: train_handle, modelbuilder.dropout_keeprate: 1.0 }) loss_valid = loss_heatmap_op.eval( feed_dict={ dataset_handle: valid_handle, modelbuilder.dropout_keeprate: 1.0 }) # tf summary summary_train = write_op.eval( feed_dict={ dataset_handle: train_handle, modelbuilder.dropout_keeprate: 1.0 }) file_writer_train.add_summary(summary_train, global_step_eval) file_writer_train.flush() summary_valid = write_op.eval( feed_dict={ dataset_handle: valid_handle, modelbuilder.dropout_keeprate: 1.0 }) file_writer_valid.add_summary(summary_valid, global_step_eval) file_writer_valid.flush() print('At step = %d, train elapsed_time = %.1f ms' % (global_step_eval, train_elapsed_time)) print("Training set loss (avg over batch)= %.2f " % (loss_train)) print("valid set Err loss (total batch)= %.2f " % (loss_valid)) print("--------------------------------------------") if global_step_eval % train_config.ckpt_step == 0: ckpt_save_path = saver.save(sess, train_config.ckpt_dir + 'model.ckpt', global_step=global_step_eval) tf.logging.info("Global step - %s: Model saved in file: %s" % (global_step_eval, ckpt_save_path)) print("Training finished!") file_writer_train.close() file_writer_valid.close()
def __init__(self): self.model = ModelBuilder().build().condense().model self.stemmer = SnowballStemmer('english')
dataset_root / '*/*/*.avi')).shuffle(buffer_size).batch(batch_size) words_in_dir = set() for word_dir in dataset_root.glob('*/*'): words_in_dir.add(word_dir.parts[-1]) index_by_word = {} with open(str(dictionary_path)) as f: for i, word in enumerate(f.readlines()): stripped = word.strip() if (stripped in words_in_dir): index_by_word[stripped] = i num_words = len(index_by_word) model_builder = ModelBuilder(num_frames, frame_shape, num_words, noise_dim, learning_rate) generator = model_builder.build_generator() discriminator = model_builder.build_discriminator() cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True) def discriminator_loss(real_output, fake_output): real_loss = cross_entropy(tf.ones_like(real_output), real_output) fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output) total_loss = real_loss + fake_loss return total_loss def generator_loss(fake_output):
class RepresentationGenerator: """A representation (embeddings) generator for visualization of characters. When embeddings and labels are written to files, embeddings are written to 'out_file'_vec.tsv file and labels are written to 'out_file'_meta.tsv file. You can see a 3D visualization of the embeddings in browser using TensorFlow embedding projector. Go to https://projector.tensorflow.org/ and click 'Load' button on the left-hand side. Load 'out_file'_vec.tsv as vectors and 'out_file'_meta.tsv as metadata to visualize embeddings. Initialization: >>> rg = RepresentationGenerator(config_path='configs/config.ini', \ out_dir='embeddings/') Configurations are all set in .ini file. Change path to new config file to change configurations: >>> rg.config_path = 'configs/new_config.ini' Get representations for all images in a directory (IMPORTANT! Expect all images to be generated by VisualizationGenerator in vis_gen. Otherwise, make sure file name in format 'U+XXXX_*' to use 'char_as_label' feature): >>> codepoints, embeddings = rg.get_embeddings(img_dir='test_imgs', \ char_as_label=True) Write codepoints and embeddings to file: >>> rg.write_embeddings_from_list(codepoints=codepoints, \ embeddings=embeddings) Write write labels and embeddings to file directly from image directory. >>> rg.write_embeddings_from_image(img_dir='test_imgs') """ def __init__(self, config_path='configs/sample_config.ini', out_dir="embeddings"): """Need a checkpoint directory to initialize RepresentationGenerator. Args: config_path: Str, path to config (.ini) file. (default "configs/sample_config.ini" out_dir: Str, relative path of the output directory (default "embeddings"). Raises: ValueError: if model_name not found ValueError: if ckpt_dir don't contain TensorFlow formatted checkpoint """ self._dataset_builder = None self._model_builder = None self._model = None self.config_path = config_path self.out_dir = out_dir @property def config_path(self): """ Returns: self._config_path: Str, path to config file (.ini file). """ return self._config_path @property def out_dir(self): """ Returns: self._out_dir: Str, path to output directory. """ return self._out_dir @config_path.setter def config_path(self, config_path): """Load config file (.ini file) and get dataset builder and neural network model. Set private attribute self._dataset_builder, self._model and self._config_path. Note that this function does not parse config file. It simply uses config file to set up dataset builder and model builder. Args: config_path: Str, path to config (.ini) file. """ if not os.path.isfile(config_path): raise ValueError("Specified config file does not exist.") # Get dataset builder based on config file self._dataset_builder = DatasetBuilder(config_path=config_path, one_hot=False) # Get model builder and get encoder (triplet model with weights) self._model_builder = ModelBuilder(config_path=config_path) self._model = self._model_builder.get_encoder() # Set self._config_path self._config_path = config_path @out_dir.setter def out_dir(self, out_dir): """ Args: out_dir: Str, relative path of the output directory. """ self._out_dir = out_dir def get_embeddings(self, img_dir): """For the image files in 'img_dir', return their embeddings. Args: img_dir: Str, relative path to directory where all character images are stored. Returns: codepoints: List of codepoints with other configs in format 'CODEPOINT_FONTNAME[_FONTSTYLE]_ANTIALIAS'. Same as the filename generated by vis_gen. embeddings: List of embeddings. Each element is a representation of a character. """ # Get dataset with filename as label dataset = self._dataset_builder.get_filename_dataset(img_dir) # Get unicode code points and their corresponding embeddings codepoints = [] embeddings = [] i = 0 print('Generating embeddings...') for img, filename in dataset: i += 1 if i % 100 == 0: print("Getting embedding #" + str(i) + ".") # decode Tensor into string filename_str = filename.numpy()[0].decode('utf-8') codepoints.append(filename_str.split('.')[0]) # Get embeddings embedding = self._model.predict(img)[0] embeddings.append(embedding) return codepoints, embeddings def write_embeddings_from_image(self, img_dir, out_file, char_as_label=True): """Get embeddings and write embeddings and labels to .tsv files. This function will write to two .tsv files: 'out_file'_vec.tsv and 'out_file'_meta.tsv. Entries in 'out_file'_vec.tsv are separated by newline. Elements in each embeddings are separated by tab. Entries in 'out_file'_meta.tsv are separated by newline. Args: img_dir: Str, relative path to directory where all character images are stored out_file: Str, name of the output file intended to write to char_as_label: Bool, whether to """ # Get model predictions and unicode code points codepoints, embeddings = self.get_embeddings(img_dir=img_dir) # Write code points to '_meta.tsv' and embeddgins to '_vec.tsv' self.write_embeddings_from_list(codepoints, embeddings, out_file, char_as_label) def write_embeddings_from_list(self, codepoints, embeddings, out_file, char_as_label=True): """Write labels and embeddings to file. Args: codepoints: List of Str, each element must be in format 'U+XXXX'. embeddings: out_file: Str, name of the output file intended to write to. char_as_label: Bool, whether to use character as label. Otherwise, use code points. Raises: ValueError: if codepoints and embeddings does not have the same number of entries. """ # Throw exception if codepoint array and embedding array does not have # the same number of elements if len(codepoints) != len(embeddings): raise ValueError('Expect array codepoints and embeddings to have ' 'the same number of elements.') # Get absolute directory path, create new folder if needed. out_dir_abs = os.path.abspath(self.out_dir) os.makedirs(out_dir_abs, exist_ok=True) # Get absolute path to output files out_file_abs = os.path.join(out_dir_abs, out_file) out_file_vec_abs = out_file_abs + '_vec.tsv' out_file_meta_abs = out_file_abs + '_meta.tsv' # Write embeddings to file print("Writing embeddings to file {}...".format(out_file_vec_abs)) np.savetxt(out_file_abs + "_vec.tsv", embeddings, delimiter='\t') print('Successfully written to file {}.'.format(out_file_vec_abs)) # Change Unicode code point to character if specified if char_as_label: try: # 'CODEPOINT_FONTNAME[_FONTSTYLE]_ANTIALIAS' -> 'CODEPOINT' codepoints = [ codepoint.split('_')[0] for codepoint in codepoints ] # 'U+XXXX' -> char codepoints = [ chr(int('0x' + codepoint[2:], 16)) for codepoint in codepoints ] except: print('All entries of codepoints array must be in format: ' 'CODEPOINT_FONTNAME[_FONTSTYLE]_ANTIALIAS. Example: ' 'U+4eba_Noto Sans CJK SC_Default.') raise # Write labels print("Writing labels to file {}...".format(out_file_meta_abs)) with open(out_file_meta_abs, "w+") as f_out: for label in codepoints: f_out.write(label) f_out.write('\n') print('Successfully written to file {}.'.format(out_file_meta_abs))
print(f'mpm size: {len(model_builder.registered_bbs)}') print(ret) return should_end if __name__ == '__main__': STUDENT_ID = os.environ.get('STUDENT_ID') print(f'Running student_id: {STUDENT_ID}') if not os.path.isdir(RESULT_DIR): print(f'Making folder for results: {RESULT_DIR}') os.mkdir(RESULT_DIR) else: print(f'Result folder already exists, abort. {RESULT_DIR}') exit() should_end = False i_generation = 0 api_connector = APIConnector( api_url=os.environ.get('API_URL'), student_id=STUDENT_ID, ) model_builder = ModelBuilder() while not should_end: if i_generation > 50: print('Too many generations, abort.') exit() print(f'Running generation {i_generation}...') should_end = run_generation(api_connector, model_builder, i_generation) i_generation += 1
def main(): # load config cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset) # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) model_name = args.snapshot.split('/')[-1].split('.')[0] total_lost = 0 if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] tic = cv2.getTickCount() if idx == frame_counter: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: outputs = tracker.track(img) pred_bbox = outputs['bbox'] if cfg.MASK.MASK: pred_bbox = outputs['polygon'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines( img, [np.array(gt_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 0), 3) if cfg.MASK.MASK: cv2.polylines( img, [np.array(pred_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 255), 3) else: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results video_path = os.path.join('results', args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n') print( '({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}' .format(v_idx + 1, video.name, toc, idx / toc, lost_number)) total_lost += lost_number print("{:s} total lost: {:d}".format(model_name, total_lost)) else: # OPE tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): tic = cv2.getTickCount() #gt_0_f = [] #yy #ti_f = [] _ti_f = [] if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) print("imgsize:", img.shape) # gt_0 = # 从模型中获得其特征 gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] #tracker.init(img, gt_bbox_) tracker.init_pln(img, gt_bbox_) #yy pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) print("idx:", idx) else: #outputs = tracker.track(img,plastnet) pbox = pred_bboxes[idx - 1] p_crop = tracker.get_crop(img, pbox) outputs = tracker.track_pln(img, p_crop) pred_bbox = outputs['bbox'] #print("outputs:",pred_bbox) pred_bboxes.append(pred_bbox) scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results if 'VOT2018-LT' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, 'longterm', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join( video_path, '{}_001_confidence.value'.format(video.name)) with open(result_path, 'w') as f: for x in scores: f.write('\n') if x is None else f.write( "{:.6f}\n".format(x)) result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) elif 'GOT-10k' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) else: model_path = os.path.join('results', args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'. format(v_idx + 1, video.name, toc, idx / toc))
curr_model = config['model-parameters']['model'] data_shape = json.loads(config['model-parameters']['input_shape']) n_channels = json.loads(config['model-parameters']['n_channels']) input_shape = tuple(data_shape + [n_channels]) #Optimizer-hyperparameters configdata optimizer = config['optimizer-hyperparameters']['optimizer'] loss = config['optimizer-hyperparameters']['loss'] lr = json.loads(config['optimizer-hyperparameters']['learning_rate']) batch_size = json.loads(config['optimizer-hyperparameters']['batch_size']) epochs = json.loads(config['optimizer-hyperparameters']['epochs']) steps = json.loads(config['optimizer-hyperparameters']['steps_per_epoch']) metrics = json.loads(config['optimizer-hyperparameters']['metrics']) model = ModelBuilder(curr_model, classes, input_shape=input_shape) 'Load pre-trained weights if wanted' if json.loads(config['DEFAULT']['use_load_weights']): weight_folder = config['load-weights']['weights_folder'] weights = config['load-weights']['weights'] model.load_weights(os.path.join(weight_folder, weights)) train_data = os.listdir(train_data_dir) data_len = len(train_data) random.shuffle(train_data) val_range = json.loads(config['model-parameters']['val_range']) train_data = train_data[int(data_len * val_range) + 1:] val_data = train_data[:int(data_len * val_range)]
def __init__(self, config_path='configs/sample_config.ini'): """Read and set configuration from config file (.ini file) and create keras.Model object or input function according to configuration. To add new model, simply add new base model to self._MODEL_MAP. Args: config_path: Str, path to config (.ini) file. Raises: ValueError: if values in config file does not have the correct type. ValueError: if optimizer does not exists in predefined map. """ # Pre-defined learning rate schedules self._LR_SCHEDULE_MAP = { 'ExponentialDecay': tf.keras.optimizers.schedules.ExponentialDecay, 'PiecewiseConstantDecay': tf.keras.optimizers.schedules.PiecewiseConstantDecay, 'PolynomialDecay': tf.keras.optimizers.schedules.PolynomialDecay, } # Pre-defined optimizers self._OPTIMIZER_MAP = { 'Adam': tf.keras.optimizers.Adam, 'RMSprop': tf.keras.optimizers.RMSprop, } # Pre-defined losses # IMPORTANT: DON'T USE TRIPLET HARD LOSS! EXTREMELY HARD TO TRAIN! self._LOSS_MAP = { 'CrossEntropy': tf.keras.losses.CategoricalCrossentropy, 'TripletHard': tfa.losses.TripletHardLoss, 'TripletSemiHard': tfa.losses.TripletSemiHardLoss, } # Pre-defined metrics self._METRIC_MAP = { 'Accuracy': tf.keras.metrics.CategoricalAccuracy, } # Get custom dataset self.datset_builder = DatasetBuilder(config_path=config_path) self.model_builder = ModelBuilder(config_path=config_path) # Parse config file config = configparser.ConfigParser() config.read(config_path) # Get classifier training config self._CLS_CKPT_DIR = config.get('CLASSIFIER_TRAINING', 'CKPT_DIR') self._CLS_MAX_STEP = config.getint('CLASSIFIER_TRAINING', 'MAX_STEP') self._CLS_OPTIMIZER = config.get('CLASSIFIER_TRAINING', 'OPTIMIZER') self._CLS_LR_BOUNDARIES = [ int(boundary.strip()) for boundary in config.get('CLASSIFIER_TRAINING', 'LR_BOUNDARIES').split(',') ] self._CLS_LR_VALUES = [ float(value.strip()) for value in config.get('CLASSIFIER_TRAINING', 'LR_VALUES').split(',') ] # Get triplet training config self._TPL_INIT_DIR = config.get('TRIPLET_TRAINING', 'INIT_DIR') self._TPL_CKPT_DIR = config.get('TRIPLET_TRAINING', 'CKPT_DIR') self._TPL_CYCLES = config.getint('TRIPLET_TRAINING', 'CYCLES') self._TPL_EPOCHS = config.getint('TRIPLET_TRAINING', 'EPOCHS') self._TPL_FILTER_SIZE = config.getint('TRIPLET_TRAINING', 'FILTER_SIZE') self._TPL_MARGIN = config.getfloat('TRIPLET_TRAINING', 'MARGIN') self._TPL_OPTIMIZER = config.get('TRIPLET_TRAINING', 'OPTIMIZER') self._TPL_LR_VALUE = config.getfloat('TRIPLET_TRAINING', 'LEARNING_RATE') self._TPL_FREEZE_VARS = [ var.strip() for var in config.get('TRIPLET_TRAINING', 'FREEZE_VARS').split(',') ] # Throw exception if optimizer is not defined if self._CLS_OPTIMIZER not in self._OPTIMIZER_MAP.keys(): raise ValueError("CLASSIFIER_TRAINING OPTIMIZER not defined.") if self._TPL_OPTIMIZER not in self._OPTIMIZER_MAP.keys(): raise ValueError("TRIPLET_TRAINING OPTIMIZER not defined.")
def experiment_ford_helper(ckpt_dir, data_dir, plot_type="spectrogram", sound_mode="save", f0_denom=1., n_harmonic_distribution=60, n_noise_magnitudes=65, losses=None, feature_domain="freq", model=None): ''' Code general for all Ford experiments. ''' logging.info("Loading data...") data_provider = TFRecordProvider(data_dir) input_tensor = data_provider.get_single_batch(batch_number=1) #input_tensor["f0"] = tf.convert_to_tensor(np.flip(np.arange(32., 33., 100./np.size(input_tensor["f0"]))), dtype=tf.float32)[tf.newaxis,:,tf.newaxis] #input_tensor["f0"] = tf.convert_to_tensor(np.arange(1., 200., 100./np.size(input_tensor["f0"])), dtype=tf.float32)[tf.newaxis,:,tf.newaxis] #N = np.size(input_tensor["f0"]) #x = 2*np.pi/N*np.arange(1,N) #y = 100 + 50*np.sin(x) #input_tensor["f0"] = tf.convert_to_tensor(y, dtype=tf.float32)[tf.newaxis,:,tf.newaxis] #input_tensor["f0"] += 20. #input_tensor.pop("osc", None) logging.info("Building model...") if model is None: model = ModelBuilder(model_type="f0_rnn_fc_hpn_decoder", audio_rate=data_provider.audio_rate, input_rate=data_provider.input_rate, window_secs=data_provider.example_secs, f0_denom=f0_denom, checkpoint_dir=ckpt_dir, n_harmonic_distribution=n_harmonic_distribution, n_noise_magnitudes=n_noise_magnitudes, losses=losses, feature_domain=feature_domain).build() logging.info("Normalizing inputs...") features = model.encode(input_tensor) logging.info("Synthesizing from f0 signal...") start = time.time() output_tensor = model.decode(features, training=False) time_elapsed = time.time() - start logging.info("Synthesis took %.3f seconds." % time_elapsed) logging.info("Plotting signals...") audio_in = features["audio"].numpy()[0,:] audio_out = output_tensor.numpy()[0,:] f0 = input_tensor["f0"].numpy()[0,:] f0_scaled = features["f0_scaled"].numpy()[0,:] if plot_type == "signal": _, ax = plt.subplots(4, 1, figsize=(10, 8)) ax[0].plot(audio_in) ax[1].plot(audio_out) ax[2].plot(f0) ax[3].plot(f0_scaled) elif plot_type == "spectrogram": '''mag_in = spectral_ops.compute_mag(audio_in, size=8192).numpy().T plt.imshow(mag_in, origin="lower") plt.show() pdb.set_trace()''' n_fft = 4096 n_mels = int(n_fft/8) audio_dict = {"recording": audio_in, "synthesized": audio_out} for key in audio_dict.keys(): plt.figure() plot_audio_f0(audio_dict[key], data_provider.audio_rate, f0, data_provider.input_rate, title=key, n_fft=n_fft, n_mels=n_mels) plt.show() if sound_mode == "play": logging.info("Playing original audio...") sd.play(audio_in, data_provider.audio_rate) sd.wait() logging.info("Playing synthesized audio...") sd.play(audio_out, data_provider.audio_rate) sd.wait() elif sound_mode == "save": audio_in_path = "./audio_in.wav" audio_out_path = "./audio_out.wav" logging.info("Saving recorded audio to '%s'..." % audio_in_path) sf.write(audio_in_path, audio_in, data_provider.audio_rate) logging.info("Saving synthesized audio to '%s'..." % audio_out_path) sf.write(audio_out_path, audio_out, data_provider.audio_rate)
def main(argv=None): random.seed(2) print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) # Load data and preprocess data print("Loading data...") data_reader = DataReader(FLAGS.DATA_PATH, FLAGS.DATA_FILENAME, FLAGS.NUM_MODEL) well_dic = data_reader.create_well_dictionary() print("Preprocessing data...") target_well = well_dic[str(FLAGS.WELL_TO_LEARN)] test_model_data = target_well[str(FLAGS.TRUE_MODEL)] preprocessor = Preprocessor(FLAGS.NUM_MODEL, FLAGS.TRUE_MODEL) well_data_zero_removed = preprocessor.remove_zero_wopr(target_well) serialized_data, end_indice = preprocessor.serialize_well_dataframe( well_data_zero_removed) scaled_data, scaler = preprocessor.scale_serialzed_data(serialized_data) # Split dataset and prepare batch batch_reader = BatchReader(scaled_data=scaled_data, end_indice=end_indice, train_split=FLAGS.TRAIN_SPLIT, true_model=FLAGS.TRUE_MODEL, buffer_size=FLAGS.BUFFER_SIZE, batch_size=FLAGS.BATCH_SIZE) train_data = batch_reader.get_train_batch() val_data = batch_reader.get_val_batch() train_total_seq_length = batch_reader.get_seq_length() # Define Model print("Defining model...") model_builder = ModelBuilder(FLAGS.BATCH_SIZE) model = model_builder.contruct_model() model.summary() # Set Training callbacks history_logger = HistoryLogger() # Train the model print("Begin training the model...") for epoch_idx in range(FLAGS.EPOCHS): print('epochs : ' + str(epoch_idx + 1)) model.fit(train_data, epochs=1, steps_per_epoch=train_total_seq_length / FLAGS.BATCH_SIZE, verbose=2, validation_data=val_data, validation_steps=100, use_multiprocessing=True, callbacks=[history_logger]) model.reset_states() # Save fig of loss history print("Saving loss history") plotter = Plotter(FLAGS.EPOCHS, FLAGS.WELL_TO_LEARN, FLAGS.TRUE_MODEL) plotter.plot_loss_history(history_logger.losses, history_logger.val_losses) # Inference (Cascade) print("Starting inference...") test_data = scaler.transform(test_model_data.values) total_timestep = test_data.shape[0] test_x, test_y = batch_reader.get_test_input_and_label(test_data) seq_in = test_x[FLAGS.OBSERVATION_DAY - FLAGS.BATCH_SIZE:FLAGS.OBSERVATION_DAY, :, :] seq_out = test_x[:FLAGS.INPUT_SEQUENCE, :1, :].flatten().tolist( ) + test_y[:FLAGS.OBSERVATION_DAY + 1].tolist() pred_count = test_x.shape[0] - FLAGS.OBSERVATION_DAY # Do Inference from Observationday for i in range(1, pred_count): sample_in = seq_in pred_out = model.predict(sample_in) seq_out.append(pred_out[-1, :].item()) seq_in = test_x[FLAGS.OBSERVATION_DAY - FLAGS.BATCH_SIZE + i:FLAGS.OBSERVATION_DAY + i, :, :] model.reset_states() # Evaluate print("Start evaluating the model...") seq_out_array = np.asarray(seq_out) prediction_val = (seq_out_array - scaler.min_[0]) / scaler.scale_[0] true_val = test_model_data['WOPR'].to_numpy() # Plot prediction result print("Saving prediction result...") plotter.plot_prediction(total_timestep, true_val, prediction_val) # Calculate error and save into file print("Calculate MAPE and save it to result file...") result_handler = ResultHandler(true_val=true_val, pred_val=prediction_val, well_to_learn=FLAGS.WELL_TO_LEARN, true_model=FLAGS.TRUE_MODEL) result_handler.save_mape_to_csv(FLAGS.RESULT_FILENAME) # Clear Session tf.keras.backend.clear_session() print("Done")
def train(dataset_train, dataset_test): model_config = ModelConfig() train_config = TrainConfig() dataset_handle = tf.placeholder(tf.string, shape=[]) dataset_train_iterator = dataset_train.make_one_shot_iterator() # dataset_test_iterator = dataset_test.make_one_shot_iterator() inputs = tf.placeholder(dtype=model_config.dtype, shape=[ train_config.batch_size, model_config._input_size, model_config._input_size, model_config.input_chnum ]) true_heatmap = tf.placeholder(dtype=model_config.dtype, shape=[ train_config.batch_size, model_config._output_size, model_config._output_size, model_config.output_chnum ]) # model building ========================= # < complete codes here > modelbuilder = ModelBuilder(model_config=model_config) pred_heatmap = modelbuilder.get_model(model_in=inputs, scope='model') # traning ops ============================================= # < complete codes here > loss_heatmap = train_config.loss_fn(true_heatmap - pred_heatmap) / train_config.batch_size loss_regularizer = tf.losses.get_regularization_loss() loss_op = loss_heatmap + loss_regularizer global_step = tf.Variable(0, trainable=False) batchnum_per_epoch = np.floor(train_config.train_data_size / train_config.batch_size) lr_op = tf.train.exponential_decay( learning_rate=train_config.learning_rate, global_step=global_step, decay_steps=train_config.learning_rate_decay_step, decay_rate=train_config.learning_rate_decay_rate, staircase=True) opt_op = train_config.opt_fn(learning_rate=lr_op, name='opt_op') train_op = opt_op.minimize(loss_op, global_step) # For Tensorboard =========================================== file_writer = tf.summary.FileWriter(logdir=train_config.tflogdir) file_writer.add_graph(tf.get_default_graph()) tb_summary_loss_train = tf.summary.scalar('loss_train', loss_op) tb_summary_loss_test = tf.summary.scalar('loss_test', loss_op) tb_summary_lr = tf.summary.scalar('learning_rate', lr_op) # training ============================== init_var = tf.global_variables_initializer() print('[train] training_epochs = %s' % train_config.training_epochs) print('------------------------------------') # build dataset ======================== # inputs_test_op, true_heatmap_test_op = dataset_test_iterator.get_next() inputs_train_op, true_heatmap_train_op = dataset_train_iterator.get_next() with tf.Session() as sess: # Run the variable initializer sess.run(init_var) # train_handle = sess.run(dataset_train_iterator.string_handle()) # test_handle = sess.run(dataset_test_iterator.string_handle()) for epoch in range(train_config.training_epochs): inputs_train, true_heatmap_train = sess.run( [inputs_train_op, true_heatmap_train_op]) # inputs_valid,true_heatmap_valid = sess.run([inputs_test_op,true_heatmap_test_op]) train_start_time = time.time() # train model # _,loss_train = sess.run([train_op,loss_op], # feed_dict={dataset_handle: train_handle, # modelbuilder.dropout_keeprate:model_config.output.dropout_keeprate}) _, loss_train = sess.run( [train_op, loss_op], feed_dict={ inputs: inputs_train, true_heatmap: true_heatmap_train, modelbuilder.dropout_keeprate: model_config.output.dropout_keeprate }) train_elapsed_time = time.time() - train_start_time global_step_eval = global_step.eval() if train_config.display_step == 0: continue elif global_step_eval % train_config.display_step == 0: print('[train] curr epochs = %s' % epoch) # # test model # loss_test = loss_op.eval(feed_dict={dataset_handle: test_handle, # modelbuilder.dropout_keeprate: 1.0}) # # loss_test = loss_op.eval( feed_dict={inputs: inputs_valid, # true_heatmap: true_heatmap_valid, # modelbuilder.dropout_keeprate: 1.0}) # tf summary summary_loss_train = tb_summary_loss_train.eval( feed_dict={ inputs: inputs_train, true_heatmap: true_heatmap_train, modelbuilder.dropout_keeprate: 1.0 }) # summary_loss_test = tb_summary_loss_test.eval( feed_dict={inputs: inputs_valid, # true_heatmap: true_heatmap_valid, # modelbuilder.dropout_keeprate: 1.0}) # # summary_loss_train = tb_summary_loss_train.eval(feed_dict={dataset_handle: train_handle, # modelbuilder.dropout_keeprate:1.0}) # # summary_loss_test = tb_summary_loss_test.eval(feed_dict={dataset_handle: test_handle, # modelbuilder.dropout_keeprate: 1.0}) summary_lr = tb_summary_lr.eval() file_writer.add_summary(summary_loss_train, global_step_eval) # file_writer.add_summary(summary_loss_test,global_step_eval) file_writer.add_summary(summary_lr, global_step_eval) print('At step = %d, train elapsed_time = %.1f ms' % (global_step_eval, train_elapsed_time)) print("Training set loss (avg over batch)= %.2f " % (loss_train)) # print("Test set Err loss (total batch)= %.2f %%" % (loss_test)) print("--------------------------------------------") print("Training finished!") file_writer.close()
model stores the path for model configurations. train stores the path for training configurations. test stores the path for testing configurations. gpu stores the gpu id used. """ parser = argparse.ArgumentParser() parser.add_argument('--model', dest='model', type=str, required=True) parser.add_argument('--train', dest='train', type=str) parser.add_argument('--test', dest='test', type=str) args = parser.parse_args() # Build Model Graph from Config model_config = configparser.ConfigParser() model_config.read(args.model) model = ModelBuilder(model_config) model.build_graph() model.compile() model.summary_txt() model.print_png() model.save_graph() # Train Model if args.train: train_config = configparser.ConfigParser() train_config.read(args.train) trainer = ModelTrainer(train_config) trainer.get_hyperparameters() trainer.get_train_set() trainer.get_dev_set() trainer.get_callbacks()
class ModelTrainer: def __init__(self, config_path='configs/sample_config.ini'): """Read and set configuration from config file (.ini file) and create keras.Model object or input function according to configuration. To add new model, simply add new base model to self._MODEL_MAP. Args: config_path: Str, path to config (.ini) file. Raises: ValueError: if values in config file does not have the correct type. ValueError: if optimizer does not exists in predefined map. """ # Pre-defined learning rate schedules self._LR_SCHEDULE_MAP = { 'ExponentialDecay': tf.keras.optimizers.schedules.ExponentialDecay, 'PiecewiseConstantDecay': tf.keras.optimizers.schedules.PiecewiseConstantDecay, 'PolynomialDecay': tf.keras.optimizers.schedules.PolynomialDecay, } # Pre-defined optimizers self._OPTIMIZER_MAP = { 'Adam': tf.keras.optimizers.Adam, 'RMSprop': tf.keras.optimizers.RMSprop, } # Pre-defined losses # IMPORTANT: DON'T USE TRIPLET HARD LOSS! EXTREMELY HARD TO TRAIN! self._LOSS_MAP = { 'CrossEntropy': tf.keras.losses.CategoricalCrossentropy, 'TripletHard': tfa.losses.TripletHardLoss, 'TripletSemiHard': tfa.losses.TripletSemiHardLoss, } # Pre-defined metrics self._METRIC_MAP = { 'Accuracy': tf.keras.metrics.CategoricalAccuracy, } # Get custom dataset self.datset_builder = DatasetBuilder(config_path=config_path) self.model_builder = ModelBuilder(config_path=config_path) # Parse config file config = configparser.ConfigParser() config.read(config_path) # Get classifier training config self._CLS_CKPT_DIR = config.get('CLASSIFIER_TRAINING', 'CKPT_DIR') self._CLS_MAX_STEP = config.getint('CLASSIFIER_TRAINING', 'MAX_STEP') self._CLS_OPTIMIZER = config.get('CLASSIFIER_TRAINING', 'OPTIMIZER') self._CLS_LR_BOUNDARIES = [ int(boundary.strip()) for boundary in config.get('CLASSIFIER_TRAINING', 'LR_BOUNDARIES').split(',') ] self._CLS_LR_VALUES = [ float(value.strip()) for value in config.get('CLASSIFIER_TRAINING', 'LR_VALUES').split(',') ] # Get triplet training config self._TPL_INIT_DIR = config.get('TRIPLET_TRAINING', 'INIT_DIR') self._TPL_CKPT_DIR = config.get('TRIPLET_TRAINING', 'CKPT_DIR') self._TPL_CYCLES = config.getint('TRIPLET_TRAINING', 'CYCLES') self._TPL_EPOCHS = config.getint('TRIPLET_TRAINING', 'EPOCHS') self._TPL_FILTER_SIZE = config.getint('TRIPLET_TRAINING', 'FILTER_SIZE') self._TPL_MARGIN = config.getfloat('TRIPLET_TRAINING', 'MARGIN') self._TPL_OPTIMIZER = config.get('TRIPLET_TRAINING', 'OPTIMIZER') self._TPL_LR_VALUE = config.getfloat('TRIPLET_TRAINING', 'LEARNING_RATE') self._TPL_FREEZE_VARS = [ var.strip() for var in config.get('TRIPLET_TRAINING', 'FREEZE_VARS').split(',') ] # Throw exception if optimizer is not defined if self._CLS_OPTIMIZER not in self._OPTIMIZER_MAP.keys(): raise ValueError("CLASSIFIER_TRAINING OPTIMIZER not defined.") if self._TPL_OPTIMIZER not in self._OPTIMIZER_MAP.keys(): raise ValueError("TRIPLET_TRAINING OPTIMIZER not defined.") def train_classifier(self): '''Train classifer according to specs in config file.''' # When training classifier, we uses one-hot encoding as label self.datset_builder.ONE_HOT = True # Create full model using model_builder model, input_name = self.model_builder.create_full_model() # Sanity check model.summary() # Set learning rate schedule boundaries = self._CLS_LR_BOUNDARIES values = self._CLS_LR_VALUES lr_schedule = self._LR_SCHEDULE_MAP['PiecewiseConstantDecay']( boundaries=boundaries, values=values) # Use learning reate schedule to create optimizer optimizer = self._OPTIMIZER_MAP[self._CLS_OPTIMIZER]( learning_rate=lr_schedule) # Create loss function loss = self._LOSS_MAP['CrossEntropy'](from_logits=True) # Add accuracy metrics accuracy = self._METRIC_MAP['Accuracy']() model.compile(optimizer=optimizer, loss=loss, metrics=[accuracy]) # Build tf.estimator estimator = tf.keras.estimator \ .model_to_estimator(keras_model=model, model_dir=self._CLS_CKPT_DIR) train_spec = tf.estimator.TrainSpec( input_fn=self.datset_builder.get_train_input_fn(input_name), max_steps=self._CLS_MAX_STEP) eval_spec = tf.estimator.EvalSpec( input_fn=self.datset_builder.get_eval_input_fn(input_name)) # Start training tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) def _freeze_vars(self, model): """Freeze variables in the model based on regular expressions in self._TPL_FREEZE_VARS. Args: model: tf.keras.Model, the model within which variables are frozen. """ # Get regular expressions in config file. freeze_var_res = self._TPL_FREEZE_VARS # Get layers that matches regular expression. freeze_layers = [layer for layer in model.layers if any(re.match(str(pattern), layer.name) for pattern in freeze_var_res)] # Freeze layers. print('Freezing {} layers.'.format(str(len(freeze_layers)))) for layer in freeze_layers: print('Freezing layer {}.'.format(layer.name)) layer.trainable = False def train_triplet_transfer(self): """Train encoder with triplet loss according to specs in config file.""" # When training using triplet loss, we avoid using one-hot encoding self.datset_builder.ONE_HOT = False # Create full model using model_builder model, input_name = self.model_builder.create_full_model() # Sanity check model.summary() # Build optimizer optimizer = self._OPTIMIZER_MAP[self._TPL_OPTIMIZER](self._TPL_LR_VALUE) # Load initial weights from self._TPL_INIT_DIR init_dir = self._TPL_INIT_DIR latest = tf.train.latest_checkpoint(init_dir) model.load_weights(latest) # Get ResNet50 model resnet_model = model.layers[0] # Freeze specified variables self._freeze_vars(resnet_model) # Create loss function loss = self._LOSS_MAP['TripletSemiHard'](self._TPL_MARGIN) model.compile(optimizer=optimizer, loss=loss) # Train triplet model # In each cycle, a new training dataset with N labels are generated and # training is carried out for M epochs. # Total number of cycles = self._TPL_CYCLES # N = self._TPL_FILTER_SIZE # M = self._TPL_EPOCHS for i in range(self._TPL_CYCLES): print('Cycle #{}'.format(i+1)) train_dataset = self.datset_builder.get_train_dataset( filter_size=self._TPL_FILTER_SIZE) history = model.fit( train_dataset, epochs=self._TPL_EPOCHS ) # Store weights every 50 cycles if (i+1) % 50 == 0: model.save_weights(self._TPL_CKPT_DIR + '_#{}'.format(i+1)) model.save_weights(self._TPL_CKPT_DIR)
def main(args): config = json.load(open(args.config, 'r')) # save config into experiment directory json.dump(config, open(os.path.join(args.experiment_dir, 'config.json'), 'w')) logging.info("Config json: %s", config) ball = Ball(config) file_config = config['files'] base_data_dir = file_config['base-data-dir'] model_config = config['model'] train_config = config['training'] model_builder = ModelBuilder(model_config, ball.get_word_vecs(), ball.get_character_vecs(), ball.get_feature_indxr(), ball.get_ent_cbow_vecs()) logging.info("Model Summary:") model_builder.build_f().summary() logging.info("Building model...") model = model_builder.build_trainable_model() logging.info("Model Built!") logging.info("Building data ball...") data_converter = DataConverter(ball) logging.info("Data ball built!") trainer = Trainer(model, ball, data_converter, neg_sample_k=train_config['neg_samples'], batch_size=train_config['batch_size'], neg_sample_from_cands=train_config['neg_sample_from_cands']) # Optimization Loop logging.info("--- Starting optimization loop ---") for epoch in xrange(0, train_config['epochs']): logging.info("Starting epoch %d", epoch + 1) data_iterator = WikilinksIterator(base_data_dir + 'train') for item in data_iterator.jsons(): trainer.train_on(item) trainer.epoch_done() logging.info("Finished training epoch %d", epoch + 1) # temp weight save for evaluation tmp_weights_path = os.path.join(args.experiment_dir, 'tmp-model.weights') model.save_weights(tmp_weights_path) logging.info("Evaluating epoch %d", epoch + 1) test_model = model_builder.build_f(weights=tmp_weights_path) evaluator = Evaluator(test_model, ball, data_converter) data_iterator = WikilinksIterator(base_data_dir + 'dev') for item in data_iterator.jsons(): evaluator.evaluate_on(item) accuracy = evaluator.evaluate_model() logging.info("Model accuracy for epoch %d is %.2f", epoch + 1, accuracy) logging.info("Saving final model") final_weights_path = os.path.join(args.experiment_dir, 'final-model.weights') model.save_weights(final_weights_path) test_model = model_builder.build_f(weights=final_weights_path) data_iterator = WikilinksIterator(base_data_dir + 'dev') final_evaluator = Evaluator(test_model, ball, data_converter) logging.info("Starting final model dev evaluation") for item in data_iterator.jsons(): final_evaluator.evaluate_on(item) accuracy = final_evaluator.evaluate_model() logging.info("Final model dev accuracy is %.2f", accuracy) data_iterator = WikilinksIterator(base_data_dir + 'test') final_evaluator = Evaluator(test_model, ball, data_converter) logging.info("Starting final model test evaluation") for item in data_iterator.jsons(): final_evaluator.evaluate_on(item) accuracy = final_evaluator.evaluate_model() logging.info("Final model test accuracy is %.2f", accuracy)
def main(): # load config cfg.merge_from_file(args.config) # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) #model_name = args.snapshot.split('/')[-1].split('.')[0] #total_lost = 0 #cur_dir = os.path.dirname(os.path.realpath(__file__)) #dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset) video_path = '/home/yuuzhao/Documents/project/pysot/testing_dataset/VOT2016' #lists = open('/home/lichao/tracking/LaSOT_Evaluation_Toolkit/sequence_evaluation_config/' + setfile + '.txt', 'r') #list_file = [line.strip() for line in lists] category = os.listdir(video_path) category.sort() # create dataset #dataset = DatasetFactory.create_dataset(name=args.dataset,dataset_root=dataset_root,load_img=False) template_acc = [] template_cur = [] init0 = [] init = [] pre = [] gt = [] # init0 is reset init print("Category & Video:") for tmp_cat in category: tmp_cat_path = temp_path + '/' + tmp_cat if not os.path.isdir(tmp_cat_path): os.makedirs(tmp_cat_path) print("Category:", tmp_cat) video = os.listdir(join(video_path, tmp_cat)) video.sort() #video_cut = video[0:frames_of_each_video] frame = 0 #for picture in video_cut: # 这个循环或许该去掉 # print("Frame:", picture) gt_path = join(video_path, tmp_cat, 'groundtruth.txt') ground_truth = np.loadtxt(gt_path, delimiter=',') # num_frames = len(ground_truth); # num_frames = min(num_frames, frame_max) num_frames = frames_of_each_video # print("num_frames: ",num_frames) img_path = join(video_path, tmp_cat) # print("imgpath",img_path) imgFiles = [ join(img_path, '%08d.jpg') % i for i in range(1, num_frames + 1) ] while frame < num_frames: print("frame:", frame) Polygon = ground_truth[frame] cx, cy, w, h = get_axis_aligned_bbox(Polygon) gt_rect = [cx, cy, w, h] image_file = imgFiles[frame] # target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) img = cv2.imread(image_file) # HxWxC if frame == 0: tracker.init(img, gt_rect) if w * h != 0: # image_file = imgFiles[frame] # target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) # img = cv2.imread(image_file) # HxWxC zf_acc = tracker.get_zf(img, gt_rect) output = tracker.track(img) pre_rect = output['bbox'] zf_pre = tracker.get_zf(img, pre_rect) template_acc.append(zf_acc) template_cur.append((zf_pre)) print("ACC&PRE") init0.append(0) init.append(frame) frame_reset = 0 pre.append(0) gt.append(1) while frame < (num_frames - 1): print("while ", frame, "<", num_frames) frame = frame + 1 frame_reset = frame_reset + 1 image_file = imgFiles[frame] if not image_file: break Polygon = ground_truth[frame] cx, cy, w, h = get_axis_aligned_bbox(Polygon) gt_rect = [cx, cy, w, h] img = cv2.imread(image_file) # HxWxC zf_acc = tracker.get_zf(img, gt_rect) output = tracker.track(img) pre_rect = output['bbox'] zf_pre = tracker.get_zf(img, pre_rect) # print("zf_pre:",zf_pre.shape) # print("zf_acc:",zf_acc.shape) # pdb.set_trace() template_acc.append(zf_acc) template_cur.append(zf_pre) init0.append(frame_reset) init.append(frame) pre.append(1) if frame == (num_frames - 1): # last frame print("if frame == num_frames-1") gt.append(0) else: gt.append(1) pre_rect_arr = np.array(pre_rect) cx, cy, w, h = get_axis_aligned_bbox(pre_rect_arr) target_pos, target_siz = np.array([cx, cy]), np.array([w, h]) res = cxy_wh_2_rect(target_pos, target_siz) if reset: cx, cy, w, h = get_axis_aligned_bbox( ground_truth[frame]) gt_rect = [cx, cy, w, h] gt_rect = np.array(gt_rect) iou = overlap_ratio(gt_rect, res) if iou <= 0: break else: print("else") template_acc.append( torch.zeros([1, 3, 127, 127], dtype=torch.float32)) template_cur.append( torch.zeros([1, 3, 127, 127], dtype=torch.float32)) init0.append(0) init.append(frame) pre.append(1) if frame == (num_frames - 1): # last frame gt.append(0) else: gt.append(1) frame = frame + 1 # skip #写出一次 #print("template_acc:",template_acc) #print("template_cur:",template_cur) #print("init:", init) #print("init0:",init0) #print("pre:",pre) #template_acc_con = np.concatenate(template_acc); #template_cur_con = np.concatenate(template_cur) print("write for each video") np.save(tmp_cat_path + '/template', template_acc) np.save(tmp_cat_path + '/templatei', template_cur) np.save(tmp_cat_path + '/init0', init0) np.save(tmp_cat_path + '/init', init) np.save(tmp_cat_path + '/pre', pre) np.save(tmp_cat_path + '/gt', gt) print("template")