Beispiel #1
0
def make_predictions(conf,shot_list,loader):

    os.environ['THEANO_FLAGS'] = 'device=cpu' #=cpu
    import theano
    from keras.utils.generic_utils import Progbar 
    from model_builder import ModelBuilder
    builder = ModelBuilder(conf) 
    


    y_prime = []
    y_gold = []
    disruptive = []

    _,model = builder.build_train_test_models()
    builder.load_model_weights(model)
    model_save_path = builder.get_latest_save_path()

    start_time = time.time()
    pool = mp.Pool()
    fn = partial(make_single_prediction,builder=builder,loader=loader,model_save_path=model_save_path)

    print('running in parallel on {} processes'.format(pool._processes))
    for (i,(y_p,y,is_disruptive)) in enumerate(pool.imap(fn,shot_list)):
    # for (i,(y_p,y,is_disruptive)) in enumerate(imap(fn,shot_list)):
        print('Shot {}/{}'.format(i,len(shot_list)))
        sys.stdout.flush()
        y_prime.append(y_p)
        y_gold.append(y)
        disruptive.append(is_disruptive)
    pool.close()
    pool.join()
    print('Finished Predictions in {} seconds'.format(time.time()-start_time))
    return y_prime,y_gold,disruptive
Beispiel #2
0
 def load_model():
     if not os.path.exists(Hyperparameters.MODEL_PATH):
         model = ModelBuilder.build_model(Hyperparameters.INPUT_SHAPE,
                                          Hyperparameters.OUTPUT_SHAPE)
     else:
         model = load_model(
             Hyperparameters.MODEL_PATH,
             custom_objects=ModelBuilder.get_model_custom_objects())
     return model
def main():
    '''Main function.'''

    # Parse arguments from command line
    #args = get_script_arguments()

    # Verify data file exist
    assert os.path.exists(data_filename), 'Data does not exist.'

    # Load input data
    print('Loading the inputs in memory. It might take a while...')
    data = pickle.load(open(data_filename, 'rb'))

    # Get train and test data
    kx_train, ky_train, kx_test, ky_test, categorical_speakers = data_to_keras(
        data)

    # Reshape data
    # Convolutional models require data reshaping
    # if args.cnn_1 or args.cnn_2
    if True or False:
        kx_train, ky_train, kx_test, ky_test = data_reshape(
            kx_train, ky_train, kx_test, ky_test)

    ############################
    #           MODEL          #
    ############################

    # Instantiate ModelBuilder(input_shape, num_categories)
    builder = ModelBuilder(kx_train.shape, ky_train.shape[1])

    # Create model object.
    model = builder(activation=activation,
                    optimizer=optimizer,
                    dropout_rate=dropout_rate,
                    architecture=get_architecture(True, False),
                    batch_size=batch_size)

    # Train model
    model, history = fit_model(model,
                               kx_train,
                               ky_train,
                               kx_test,
                               ky_test,
                               max_epochs=60)

    # plot history
    plot_history(history)

    # Save the trained model and its weights
    builder.save(model)
    def simulate(self, name=None, resource_limit=None):
        model = ModelBuilder()
        self.models, self.rm, self.dm = model.build_all(
            resource_limit=resource_limit)

        self._initialize_queue()

        simulation = time.time()
        while not self.execution_queue.is_empty():
            current = self.execution_queue.pop()
            if current.start > self.end:
                break
            self._simulate(current)
        print('Simulation time: ' + str(time.time() - simulation))

        LogWriter.write(self.log_queue, name=name)
Beispiel #5
0
def main(argv):
    if FLAGS.model_id is not None:
        logging.info("Building model '%s'..." % FLAGS.model_id)
        model_builder = get_model_builder_from_id(FLAGS.model_id)
        FLAGS.checkpoint_dir = model_builder.checkpoint_dir
        FLAGS.data_dir = model_builder.data_dir
        model = model_builder.build()

    logging.info("Loading data from '%s'..." % FLAGS.data_dir)
    data_provider = TFRecordProvider(FLAGS.data_dir,
                                     split="train" if FLAGS.valid else "all")
    data_provider_valid = TFRecordProvider(
        FLAGS.data_dir, split="valid") if FLAGS.valid else None

    if FLAGS.model_id is None:
        if FLAGS.checkpoint_dir is None:
            raise ValueError("checkpoint_dir must be set.")
        if FLAGS.data_dir is None:
            raise ValueError("data_dir must be set.")
        n_cylinders = 4.
        #losses = [MelSpectralLoss(sample_rate=data_provider.audio_rate, n_bands=2)]
        #losses = [AdaptiveMelSpectralLoss(sample_rate=data_provider.audio_rate, n_bands=8)]
        losses = [
            TimeFreqResMelSpectralLoss(sample_rate=data_provider.audio_rate,
                                       time_res=1 / data_provider.input_rate)
        ]
        model = ModelBuilder(model_type="f0_rnn_fc_hpn_decoder",
                             audio_rate=data_provider.audio_rate,
                             input_rate=data_provider.input_rate,
                             window_secs=data_provider.example_secs,
                             f0_denom=n_cylinders,
                             checkpoint_dir=FLAGS.checkpoint_dir,
                             losses=losses,
                             feature_domain="time").build()

    logging.info("Building trainer...")
    summary_dir = os.path.join(FLAGS.checkpoint_dir, "summaries", "train")
    if not os.path.exists(summary_dir):
        os.makedirs(summary_dir)
    if FLAGS.valid:
        summary_dir_valid = os.path.join(FLAGS.checkpoint_dir, "summaries",
                                         "valid")
        if not os.path.exists(summary_dir_valid):
            os.makedirs(summary_dir_valid)
    strategy = tf.distribute.MirroredStrategy(devices=FLAGS.devices)
    trainer = Trainer(model, strategy)

    logging.info("Initializing training...")
    while True:
        #try:
        train(data_provider,
              trainer,
              batch_size=FLAGS.batch_size,
              steps_per_summary=FLAGS.steps_per_summary,
              steps_per_summary_valid=FLAGS.steps_per_summary_valid,
              steps_per_save=FLAGS.steps_per_save,
              model_dir=FLAGS.checkpoint_dir,
              valid=FLAGS.valid,
              data_provider_valid=data_provider_valid)
        '''except KeyboardInterrupt:
Beispiel #6
0
def main(_):
    tf.gfile.MakeDirs(FLAGS.output_dir)

    if FLAGS.is_fixed_emb:
        emb_matrix = utils.get_emb_matrix(FLAGS.data_dir, FLAGS.max_features)

    clr = CyclicLR(base_lr=FLAGS.min_lr,
                   max_lr=FLAGS.max_lr,
                   step_size=2740,
                   mode='exp_range',
                   gamma=0.99994)
    matcher = TextMatcher(FLAGS.model_name, FLAGS.vocab_file,
                          FLAGS.do_lower_case, FLAGS.max_seq_len)

    model_builder = ModelBuilder(model_name=FLAGS.model_name,
                                 max_len=FLAGS.max_seq_len,
                                 input_dim=FLAGS.input_dim,
                                 max_features=FLAGS.max_features,
                                 units=FLAGS.units,
                                 num_filter=FLAGS.num_filter)

    if FLAGS.is_fixed_emb:
        model_builder.set_embedding_matrix(emb_matrix)

    model = model_builder.build_model()

    print(model.summary())

    if FLAGS.do_train:
        train_example = matcher.get_train_examples(FLAGS.data_dir)
        matcher.do_train(model,
                         FLAGS.output_dir,
                         train_example,
                         FLAGS.epochs,
                         FLAGS.batch_size,
                         callback=[
                             clr,
                         ])

    if FLAGS.do_eval:
        dev_example = matcher.get_dev_examples(FLAGS.data_dir)
        matcher.do_eval(model, FLAGS.output_dir, dev_example, FLAGS.batch_size)

    if FLAGS.do_predict:
        test_example = matcher.get_test_examples(FLAGS.data_dir)
        matcher.do_predict(model, FLAGS.output_dir, test_example,
                           FLAGS.batch_size)
Beispiel #7
0
class Model():
    def __init__(self):
        self.model = ModelBuilder().build().condense().model
        self.stemmer = SnowballStemmer('english')

    def simset(self, word):
        stemmed_word = self.stemmer.stem(word)
        return self.model.get(stemmed_word, [])
Beispiel #8
0
class Model:
    def __init__(self):
        self.model = ModelBuilder().build().condense().model
        self.stemmer = SnowballStemmer("english")

    def simset(self, word):
        stemmed_word = self.stemmer.stem(word)
        return self.model.get(stemmed_word, [])
Beispiel #9
0
    def _create_model(self):
        # TODO hidden_layers to be fixed
        # hidden_layers = self.params[HIDDEN_LAYERS][0]
        hidden_layers = self.params[HIDDEN_LAYERS]

        mb = ModelBuilder()
        # b = a.grab("tensorflow.python.estimator.canned.linear.LinearRegressor", self.feature_columns)

        self.params['n_classes'] = len(self.label_unique_values) if self.label_unique_values is not None else 0
        self.params['label_vocabulary'] = self.label_unique_values
        self.params['config'] = self.runConfig
        self.params['hidden_units'] = hidden_layers
        self.params['dnn_hidden_units'] = hidden_layers
        self.params['dnn_dropout'] = self.params['dropout']
        self.params['dnn_optimizer'] = self.params['optimizer']
        self.params['linear_optimizer'] = self.params['optimizer']
        self.params['activation_fn'] = getattr(tf.nn, self.params['activation_fn'])

        self.model = mb.create_from_model_name(self.params['model_name'], self.feature_columns, self.params)
Beispiel #10
0
def main():
    #Build the client
    servAddr = "http://192.168.0.91:80/elasticsearch"
    client = ElasticClient(servAddr)
    #Build the netflow model
    builder = ModelBuilder(client)
    ipVersion = "ipv4"
    blacklist = None
    whitelist = [
        "192.168.2.10", "192.168.2.101", "192.168.2.102", "192.168.2.103",
        "192.168.2.104", "192.168.2.105", "192.168.2.106", "192.168.2.107",
        "192.168.2.108", "192.168.0.11", "255.255.255.255", "127.0.0.1",
        "128.0.0.0", "0.0.0.0", "192.255.255.0"
    ]

    hostnameConversionTable = {
        "HP-B53-01": "192.168.0.11",  #scada + hmi
        "COM600-PC": "192.168.2.10"  #abb substation mgt unit; aka 'rtu'
    }

    indexPattern = "netflow*"
    indexPattern = "netflow-v9-2017*"
    #uses '-' to exclude specific indices or index-patterns
    indexPattern = "netflow-v9-2017*,-netflow-v9-2017.04*"  #april indices have failed repeatedly, due to what appears to be differently-index data; may require re-indexing
    netflowModel = builder.BuildNetFlowModel(indexPattern,
                                             ipVersion=ipVersion,
                                             ipBlacklist=blacklist,
                                             ipWhitelist=whitelist)
    winlogModel = builder.BuildWinlogEventIdModel("winlogbeat*")
    #just resolves the keys of the winlogmodel (hostnames) to their ip addresses
    convertedModel = dict([(hostnameConversionTable[host], model)
                           for host, model in winlogModel.items()])
    netflowModel.MergeVertexModel(
        convertedModel, "event_id"
    )  #store the event model in the nodes; this is redundant, but fine for now
    #Build the analyzer
    analyzer = ModelAnalyzer(netflowModel, winlogModel)
    #analyzer.Analyze()
    analyzer.AssignMitreTacticProbabilities()
    netflowModel.Save("netflowModel.pickle")
    netflowModel.PrintAttackModels()
    analyzer.AnalyzeStationaryAttackDistribution()
Beispiel #11
0
def test_model(n_gram_mins):

    fe = FeatureExtractor("../dataset/slack_dialogue.txt",
                          n_grams=[1, 2, 3, 4],
                          n_gram_mins=n_gram_mins,
                          debug=False)
    fe.load()
    me = ModelEvaluator(fe.headers, fe.features)

    model_array, highest_rate = me.search_initial_best_fit_algorithm()
    chosen_model = model_array[random.randint(0, len(model_array) - 1)]
    mb = ModelBuilder(chosen_model)

    X_train, X_validation, Y_train, Y_validation = me.split_dataset()
    mb.fit_model(X_train, Y_train)
    accuracy_score = mb.accuracy_score(X_validation, Y_validation)

    print("Got score: " + str(accuracy_score) + " with model: " +
          str(model_array))
    print("Using : " + str(n_gram_mins))
    return accuracy_score, model_array
Beispiel #12
0
def restore_model(checkpoint_filename, video_retriever_generator, selector,
                  extractor):
    """This function restores a model from a tf checkpoint

    Json filename is asserted to be the same as checpoint filename, but with json file extension

    -Recovers the model's parameters via the json file
    -Builds the model using this parameters
    -Prepare the tensorflow graph and get neural network operations
    """

    json_filename = checkpoint_filename + '.json'
    with open(json_filename, 'r', encoding='utf-8') as json_file:
        params = json.load(json_file)

    builder = ModelBuilder(params["training_videos_names"],
                           params["testing_videos_names"],
                           params["n_captions_per_video"],
                           params["feature_n_frames"],
                           video_retriever_generator, selector, extractor)
    model = params["model"]
    builder.create_model(model["enc_units"], model["dec_units"],
                         model["rnn_layers"], model["embedding_dims"],
                         model["learning_rate"], model["dropout_rate"],
                         model["bi_encoder"])

    builder.prepare_training(params["batch_size"])

    model_saver = ModelSaver(os.path.dirname(checkpoint_filename),
                             os.path.basename(checkpoint_filename))

    return builder, model_saver, params
Beispiel #13
0
def make_predictions_gpu(conf,shot_list,loader):

    os.environ['THEANO_FLAGS'] = 'device=gpu,floatX=float32' #=cpu
    import theano
    from keras.utils.generic_utils import Progbar 
    from model_builder import ModelBuilder
    builder = ModelBuilder(conf) 

    y_prime = []
    y_gold = []
    disruptive = []

    _,model = builder.build_train_test_models()
    builder.load_model_weights(model)
    model.reset_states()

    pbar =  Progbar(len(shot_list))
    shot_sublists = shot_list.sublists(conf['model']['pred_batch_size'],shuffle=False,equal_size=True)
    for (i,shot_sublist) in enumerate(shot_sublists):
        X,y,shot_lengths,disr = loader.load_as_X_y_pred(shot_sublist)
        #load data and fit on data
        y_p = model.predict(X,
            batch_size=conf['model']['pred_batch_size'])
        model.reset_states()
        y_p = loader.batch_output_to_array(y_p)
        y = loader.batch_output_to_array(y)
        #cut arrays back
        y_p = [arr[:shot_lengths[j]] for (j,arr) in enumerate(y_p)]
        y = [arr[:shot_lengths[j]] for (j,arr) in enumerate(y)]

        # print('Shots {}/{}'.format(i*num_at_once + j*1.0*len(shot_sublist)/len(X_list),len(shot_list_train)))
        pbar.add(1.0*len(shot_sublist))
        loader.verbose=False#True during the first iteration
        y_prime += y_p
        y_gold += y
        disruptive += disr
    y_prime = y_prime[:len(shot_list)]
    y_gold = y_gold[:len(shot_list)]
    disruptive = disruptive[:len(shot_list)]
    return y_prime,y_gold,disruptive
Beispiel #14
0
    def config_path(self, config_path):
        """Load config file (.ini file) and get dataset builder and neural
        network model. Set private attribute self._dataset_builder, self._model
        and self._config_path. Note that this function does not parse config
        file. It simply uses config file to set up dataset builder and model
        builder.

        Args:
            config_path: Str, path to config (.ini) file.
        """
        if not os.path.isfile(config_path):
            raise ValueError("Specified config file does not exist.")

        # Get dataset builder based on config file
        self._dataset_builder = DatasetBuilder(config_path=config_path,
                                               one_hot=False)
        # Get model builder and get encoder (triplet model with weights)
        self._model_builder = ModelBuilder(config_path=config_path)
        self._model = self._model_builder.get_encoder()

        # Set self._config_path
        self._config_path = config_path
    def load(self, select_new_best_model=False):
        
        '''
        Reloads data from the file and selects the best model.
        
        Useful when there are automated updates to datasets.
        '''
        
        self.fe.load()
        self.me = ModelEvaluator(self.fe.headers, self.fe.features)        
        
        if select_new_best_model:
            self.me = ModelEvaluator(self.fe.headers, self.fe.features)
            self.model_array, self.highest_rate = self.me.search_initial_best_fit_algorithm()
            self.chosen_model = self.model_array[
                random.randint(0, len(self.model_array) - 1)]
            self.mb = ModelBuilder(self.chosen_model)

        self.X_train, self.X_validation, self.Y_train, self.Y_validation = self.me.split_dataset()
        self.mb.fit_model(self.X_train, self.Y_train)
        self.accuracy_score = self.mb.accuracy_score(self.X_validation,
                                                     self.Y_validation)
Beispiel #16
0
def make_evaluations_gpu(conf,shot_list,loader):
    os.environ['THEANO_FLAGS'] = 'device=gpu,floatX=float32' #=cpu
    import theano
    from keras.utils.generic_utils import Progbar 
    from model_builder import ModelBuilder
    builder = ModelBuilder(conf) 

    y_prime = []
    y_gold = []
    disruptive = []
    batch_size = min(len(shot_list),conf['model']['pred_batch_size'])

    pbar =  Progbar(len(shot_list))
    print('evaluating {} shots using batchsize {}'.format(len(shot_list),batch_size))

    shot_sublists = shot_list.sublists(batch_size,equal_size=False)
    all_metrics = []
    all_weights = []
    for (i,shot_sublist) in enumerate(shot_sublists):
        batch_size = len(shot_sublist)
        model = builder.build_model(True,custom_batch_size=batch_size)
        builder.load_model_weights(model)
        model.reset_states()
        X,y,shot_lengths,disr = loader.load_as_X_y_pred(shot_sublist,custom_batch_size=batch_size)
        #load data and fit on data
        all_metrics.append(model.evaluate(X,y,batch_size=batch_size,verbose=False))
        all_weights.append(batch_size)
        model.reset_states()

        pbar.add(1.0*len(shot_sublist))
        loader.verbose=False#True during the first iteration

    if len(all_metrics) > 1:
        print('evaluations all: {}'.format(all_metrics))
    loss = np.average(all_metrics,weights = all_weights)
    print('Evaluation Loss: {}'.format(loss))
    return loss 
Beispiel #17
0
def predict(config_path,
            model_path=None,
            labels_json=None,
            data_dir=None,
            group=None):
    configs = tools.read_configs(config_path)

    model_path = tools.str2path(model_path or configs.get('model_path'))
    data_dir = tools.str2path(data_dir or configs.get('data_dir'))
    labels_json = tools.str2path(labels_json or configs.get('labels_json'))
    group = group or configs.get('group')

    model_builder = ModelBuilder(configs,
                                 mode='predict',
                                 model_path=model_path.as_posix())
    model = model_builder.build()

    pred_gen = DataGenerator(configs,
                             image_dir=data_dir,
                             labels_json=labels_json,
                             group=group,
                             mode='predict')

    correct = 0
    false = 0
    for x, y, xnames in pred_gen.flow_from_labels():
        predictions = model.predict(x, verbose=1)
        pred_cls_ids = np.argmax(predictions, axis=1)
        tp = np.sum(pred_cls_ids == y)
        error = len(y) - tp
        correct += tp
        false += error
    accuracy = (1 - false / correct) * 100

    print(f"Correct: {correct}")
    print(f'False: {false}')
    print(f'Accuracy: {accuracy}')
Beispiel #18
0
 def __init__(self):
     self.model = ModelBuilder().build().condense().model
     self.stemmer = SnowballStemmer("english")
Beispiel #19
0
def test_resnet():
    model_builder = ModelBuilder(10, pretrained=False)
    print(model_builder.available_models)
    model = model_builder['resnet50']
    print(model)
Beispiel #20
0
def train(config_path,
          train_dir=None,
          val_dir=None,
          output_dir=None,
          train_labels_json=None,
          val_labels_json=None,
          group=None,
          model_name=None,
          model_suffix=None):
    np.random.seed(42)  # for reproducibility
    logger = logging.getLogger('root')
    configs = tools.read_configs(config_path)

    train_dir = tools.str2path(train_dir or configs['train_dir'])
    val_dir = tools.str2path(val_dir or configs['val_dir'])
    train_labels_json = tools.str2path(train_labels_json
                                       or configs['train_labels_json'])
    val_labels_json = tools.str2path(val_labels_json
                                     or configs['val_labels_json'])
    output_dir = tools.str2path(output_dir or configs['output_dir'])
    group = group or configs['group']
    model_name = model_name or configs['model_name']
    model_suffix = model_suffix or configs['model_suffix']
    train_counts = configs.get('train_class_counts')
    val_counts = configs.get('val_class_counts')

    output_dir.mkdir(exist_ok=True)

    model_out_name = f'{model_name}_{group}_{model_suffix}.h5'
    model_path = output_dir / model_out_name

    train_gen = DataGenerator(configs, train_dir, train_labels_json, 'train',
                              group, train_counts)
    val_gen = DataGenerator(configs, val_dir, val_labels_json, 'val', group,
                            val_counts)

    epochs = configs['epochs']
    classes = configs['network_parameters']['classes']
    loss = configs['loss']
    optimizer = configs['optimizer']

    model_builder = ModelBuilder(configs, 'train', model_name, model_path,
                                 classes, loss, optimizer)
    model = model_builder.build()

    checkpoint = keras.callbacks.ModelCheckpoint(model_path.as_posix(),
                                                 monitor='loss',
                                                 verbose=1,
                                                 save_best_only=True,
                                                 save_weights_only=False,
                                                 mode='min')
    logger.info(f"Training model {model_out_name} for {epochs} epochs")
    logger.info(f'Class weights: {train_gen.class_weights}')

    model.fit_generator(generator=train_gen.flow_generator,
                        steps_per_epoch=train_gen.steps_per_epoch,
                        epochs=epochs,
                        verbose=1,
                        class_weight=train_gen.class_weights,
                        callbacks=[checkpoint],
                        validation_data=val_gen.flow_generator,
                        validation_steps=val_gen.steps_per_epoch)
Beispiel #21
0
def train(dataset_train, dataset_valid, train_config, model_config):

    dataset_handle = tf.placeholder(tf.string, shape=[])
    dataset_train_iterator = dataset_train.make_one_shot_iterator()
    dataset_valid_iterator = dataset_valid.make_one_shot_iterator()

    dataset_handle = tf.placeholder(tf.string, shape=[])
    dataset_iterator = tf.data.Iterator.from_string_handle(
        dataset_handle, dataset_train.output_types,
        dataset_train.output_shapes)
    inputs, true_heatmap = dataset_iterator.get_next()

    # model building =========================
    # < complete codes here >
    modelbuilder = ModelBuilder(model_config=model_config)
    pred_heatmap = modelbuilder.get_model(model_in=inputs, scope='model')

    # traning ops =============================================
    # < complete codes here >
    loss_heatmap_op = train_config.loss_fn(
        (true_heatmap - pred_heatmap) / train_config.batch_size)
    loss_regularizer_op = tf.losses.get_regularization_loss()
    loss_op = loss_heatmap_op + loss_regularizer_op

    global_step = tf.Variable(0, trainable=False)
    batchnum_per_epoch = np.floor(train_config.train_data_size /
                                  train_config.batch_size)

    lr_op = tf.train.exponential_decay(
        learning_rate=train_config.learning_rate,
        global_step=global_step,
        decay_steps=train_config.learning_rate_decay_step,
        decay_rate=train_config.learning_rate_decay_rate,
        staircase=True)

    opt_op = train_config.opt_fn(learning_rate=lr_op, name='opt_op')
    train_op = opt_op.minimize(loss_op, global_step)

    # For Tensorboard ===========================================
    # 중간 결과 보려면, tf.summray.image? 같은 함수 사용 가능. 단 서머리가 많아지면 속도가 느려진다.
    file_writer_train = tf.summary.FileWriter(logdir=train_config.tflogdir +
                                              '/train')
    file_writer_valid = tf.summary.FileWriter(logdir=train_config.tflogdir +
                                              '/valid')

    file_writer_train.add_graph(tf.get_default_graph())

    # tb_summary_loss= tf.summary.scalar('loss', loss_heatmap_op)
    # tb_summary_lr   = tf.summary.scalar('learning_rate',lr_op)
    # write_op        = tf.summary.merge_all()

    write_op = summary_fn(loss=loss_heatmap_op,
                          total_out_losssum=loss_op,
                          learning_rate=lr_op,
                          input_images=inputs,
                          label_heatmap=true_heatmap,
                          pred_out_heatmap=pred_heatmap,
                          train_config=train_config,
                          model_config=model_config)

    # training ==============================

    init_var = tf.global_variables_initializer()
    saver = tf.train.Saver()

    sess_config = tf.ConfigProto(log_device_placement=True,
                                 gpu_options=tf.GPUOptions(allow_growth=True))

    with tf.Session(config=sess_config) as sess:
        # Run the variable initializer
        sess.run(init_var)

        # save graph in pb file
        tf.train.write_graph(sess.graph_def, train_config.ckpt_dir, 'model.pb')
        train_handle = sess.run(dataset_train_iterator.string_handle())
        valid_handle = sess.run(dataset_valid_iterator.string_handle())

        # for 문이 하나밖에 없다. 나머지 하나는 tf.data에서 처리해준다.
        tf.logging.info('====================================')
        tf.logging.info('<<<< Training start! >>>>')
        tf.logging.info('[train] training_epochs = %s' %
                        train_config.training_epochs)
        tf.logging.info('------------------------------------')
        for epoch in range(train_config.training_epochs):

            train_start_time = time.time()

            # train model
            _, loss_train = sess.run(
                [train_op, loss_op],
                feed_dict={
                    dataset_handle:
                    train_handle,
                    modelbuilder.dropout_keeprate:
                    model_config.output.dropout_keeprate
                })

            train_elapsed_time = time.time() - train_start_time
            global_step_eval = global_step.eval()

            if train_config.display_step == 0:
                continue
            elif global_step_eval % train_config.display_step == 0:
                print('[train] curr epochs = %s' % epoch)

                # # valid model
                loss_train = loss_heatmap_op.eval(
                    feed_dict={
                        dataset_handle: train_handle,
                        modelbuilder.dropout_keeprate: 1.0
                    })
                loss_valid = loss_heatmap_op.eval(
                    feed_dict={
                        dataset_handle: valid_handle,
                        modelbuilder.dropout_keeprate: 1.0
                    })

                # tf summary
                summary_train = write_op.eval(
                    feed_dict={
                        dataset_handle: train_handle,
                        modelbuilder.dropout_keeprate: 1.0
                    })
                file_writer_train.add_summary(summary_train, global_step_eval)
                file_writer_train.flush()

                summary_valid = write_op.eval(
                    feed_dict={
                        dataset_handle: valid_handle,
                        modelbuilder.dropout_keeprate: 1.0
                    })
                file_writer_valid.add_summary(summary_valid, global_step_eval)
                file_writer_valid.flush()

                print('At step = %d, train elapsed_time = %.1f ms' %
                      (global_step_eval, train_elapsed_time))
                print("Training set loss (avg over batch)= %.2f   " %
                      (loss_train))
                print("valid set Err loss (total batch)= %.2f " % (loss_valid))
                print("--------------------------------------------")

            if global_step_eval % train_config.ckpt_step == 0:
                ckpt_save_path = saver.save(sess,
                                            train_config.ckpt_dir +
                                            'model.ckpt',
                                            global_step=global_step_eval)
                tf.logging.info("Global step - %s: Model saved in file: %s" %
                                (global_step_eval, ckpt_save_path))

        print("Training finished!")

    file_writer_train.close()
    file_writer_valid.close()
Beispiel #22
0
 def __init__(self):
     self.model = ModelBuilder().build().condense().model
     self.stemmer = SnowballStemmer('english')
    dataset_root / '*/*/*.avi')).shuffle(buffer_size).batch(batch_size)

words_in_dir = set()
for word_dir in dataset_root.glob('*/*'):
    words_in_dir.add(word_dir.parts[-1])

index_by_word = {}
with open(str(dictionary_path)) as f:
    for i, word in enumerate(f.readlines()):
        stripped = word.strip()
        if (stripped in words_in_dir):
            index_by_word[stripped] = i

num_words = len(index_by_word)

model_builder = ModelBuilder(num_frames, frame_shape, num_words, noise_dim,
                             learning_rate)

generator = model_builder.build_generator()
discriminator = model_builder.build_discriminator()

cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)


def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss


def generator_loss(fake_output):
Beispiel #24
0
class RepresentationGenerator:
    """A representation (embeddings) generator for visualization of characters.
    When embeddings and labels are written to files, embeddings are written to
    'out_file'_vec.tsv file and labels are written to 'out_file'_meta.tsv file.
    You can see a 3D visualization of the embeddings in browser using TensorFlow
    embedding projector. Go to https://projector.tensorflow.org/ and click
    'Load' button on the left-hand side. Load 'out_file'_vec.tsv as vectors and
    'out_file'_meta.tsv as metadata to visualize embeddings.
    
    Initialization:
        >>> rg = RepresentationGenerator(config_path='configs/config.ini', \
                                         out_dir='embeddings/')

    Configurations are all set in .ini file. Change path to new config file to
    change configurations:
        >>> rg.config_path = 'configs/new_config.ini'

    Get representations for all images in a directory (IMPORTANT! Expect all
    images to be generated by VisualizationGenerator in vis_gen. Otherwise, make
    sure file name in format 'U+XXXX_*' to use 'char_as_label' feature):
        >>> codepoints, embeddings = rg.get_embeddings(img_dir='test_imgs', \
                                                       char_as_label=True)

    Write codepoints and embeddings to file:
        >>> rg.write_embeddings_from_list(codepoints=codepoints, \
                                          embeddings=embeddings)

    Write write labels and embeddings to file directly from image directory.
        >>> rg.write_embeddings_from_image(img_dir='test_imgs')
    """
    def __init__(self,
                 config_path='configs/sample_config.ini',
                 out_dir="embeddings"):
        """Need a checkpoint directory to initialize RepresentationGenerator.

        Args:
            config_path: Str, path to config (.ini) file. (default
                "configs/sample_config.ini"
            out_dir: Str, relative path of the output directory (default
                "embeddings").

        Raises:
            ValueError: if model_name not found
            ValueError: if ckpt_dir don't contain TensorFlow formatted
                checkpoint
        """
        self._dataset_builder = None
        self._model_builder = None
        self._model = None
        self.config_path = config_path
        self.out_dir = out_dir

    @property
    def config_path(self):
        """
        Returns:
            self._config_path: Str, path to config file (.ini file).
        """
        return self._config_path

    @property
    def out_dir(self):
        """
        Returns:
            self._out_dir: Str, path to output directory.
        """
        return self._out_dir

    @config_path.setter
    def config_path(self, config_path):
        """Load config file (.ini file) and get dataset builder and neural
        network model. Set private attribute self._dataset_builder, self._model
        and self._config_path. Note that this function does not parse config
        file. It simply uses config file to set up dataset builder and model
        builder.

        Args:
            config_path: Str, path to config (.ini) file.
        """
        if not os.path.isfile(config_path):
            raise ValueError("Specified config file does not exist.")

        # Get dataset builder based on config file
        self._dataset_builder = DatasetBuilder(config_path=config_path,
                                               one_hot=False)
        # Get model builder and get encoder (triplet model with weights)
        self._model_builder = ModelBuilder(config_path=config_path)
        self._model = self._model_builder.get_encoder()

        # Set self._config_path
        self._config_path = config_path

    @out_dir.setter
    def out_dir(self, out_dir):
        """
        Args:
             out_dir: Str, relative path of the output directory.
        """
        self._out_dir = out_dir

    def get_embeddings(self, img_dir):
        """For the image files in 'img_dir', return their embeddings.

        Args:
            img_dir: Str, relative path to directory where all character images
            are stored.

        Returns:
            codepoints: List of codepoints with other configs in format
                'CODEPOINT_FONTNAME[_FONTSTYLE]_ANTIALIAS'. Same as the filename
                generated by vis_gen.
            embeddings: List of embeddings. Each element is a representation of
                a character.
        """
        # Get dataset with filename as label
        dataset = self._dataset_builder.get_filename_dataset(img_dir)

        # Get unicode code points and their corresponding embeddings
        codepoints = []
        embeddings = []
        i = 0
        print('Generating embeddings...')
        for img, filename in dataset:
            i += 1
            if i % 100 == 0:
                print("Getting embedding #" + str(i) + ".")
            # decode Tensor into string
            filename_str = filename.numpy()[0].decode('utf-8')
            codepoints.append(filename_str.split('.')[0])

            # Get embeddings
            embedding = self._model.predict(img)[0]
            embeddings.append(embedding)

        return codepoints, embeddings

    def write_embeddings_from_image(self,
                                    img_dir,
                                    out_file,
                                    char_as_label=True):
        """Get embeddings and write embeddings and labels to .tsv files. This
        function will write to two .tsv files: 'out_file'_vec.tsv and
        'out_file'_meta.tsv. Entries in 'out_file'_vec.tsv are separated by
        newline. Elements in each embeddings are separated by tab. Entries in
        'out_file'_meta.tsv are separated by newline.

        Args:
            img_dir: Str, relative path to directory where all character images
                are stored
            out_file: Str, name of the output file intended to write to
            char_as_label: Bool, whether to
        """
        # Get model predictions and unicode code points
        codepoints, embeddings = self.get_embeddings(img_dir=img_dir)

        # Write code points to '_meta.tsv' and embeddgins to '_vec.tsv'
        self.write_embeddings_from_list(codepoints, embeddings, out_file,
                                        char_as_label)

    def write_embeddings_from_list(self,
                                   codepoints,
                                   embeddings,
                                   out_file,
                                   char_as_label=True):
        """Write labels and embeddings to file.

        Args:
            codepoints: List of Str, each element must be in format 'U+XXXX'.
            embeddings:
            out_file: Str, name of the output file intended to write to.
            char_as_label: Bool, whether to use character as label. Otherwise,
                use code points.

        Raises:
            ValueError: if codepoints and embeddings does not have the same
                number of entries.
        """
        # Throw exception if codepoint array and embedding array does not have
        # the same number of elements
        if len(codepoints) != len(embeddings):
            raise ValueError('Expect array codepoints and embeddings to have '
                             'the same number of elements.')

        # Get absolute directory path, create new folder if needed.
        out_dir_abs = os.path.abspath(self.out_dir)
        os.makedirs(out_dir_abs, exist_ok=True)

        # Get absolute path to output files
        out_file_abs = os.path.join(out_dir_abs, out_file)
        out_file_vec_abs = out_file_abs + '_vec.tsv'
        out_file_meta_abs = out_file_abs + '_meta.tsv'

        # Write embeddings to file
        print("Writing embeddings to file {}...".format(out_file_vec_abs))
        np.savetxt(out_file_abs + "_vec.tsv", embeddings, delimiter='\t')
        print('Successfully written to file {}.'.format(out_file_vec_abs))

        # Change Unicode code point to character if specified
        if char_as_label:
            try:
                # 'CODEPOINT_FONTNAME[_FONTSTYLE]_ANTIALIAS' -> 'CODEPOINT'
                codepoints = [
                    codepoint.split('_')[0] for codepoint in codepoints
                ]
                # 'U+XXXX' -> char
                codepoints = [
                    chr(int('0x' + codepoint[2:], 16))
                    for codepoint in codepoints
                ]
            except:
                print('All entries of codepoints array must be in format: '
                      'CODEPOINT_FONTNAME[_FONTSTYLE]_ANTIALIAS. Example: '
                      'U+4eba_Noto Sans CJK SC_Default.')
                raise

        # Write labels
        print("Writing labels to file {}...".format(out_file_meta_abs))
        with open(out_file_meta_abs, "w+") as f_out:
            for label in codepoints:
                f_out.write(label)
                f_out.write('\n')
        print('Successfully written to file {}.'.format(out_file_meta_abs))
Beispiel #25
0
    print(f'mpm size: {len(model_builder.registered_bbs)}')
    print(ret)
    return should_end


if __name__ == '__main__':
    STUDENT_ID = os.environ.get('STUDENT_ID')
    print(f'Running student_id: {STUDENT_ID}')
    if not os.path.isdir(RESULT_DIR):
        print(f'Making folder for results: {RESULT_DIR}')
        os.mkdir(RESULT_DIR)
    else:
        print(f'Result folder already exists, abort. {RESULT_DIR}')
        exit()

    should_end = False
    i_generation = 0
    api_connector = APIConnector(
        api_url=os.environ.get('API_URL'),
        student_id=STUDENT_ID,
    )
    model_builder = ModelBuilder()

    while not should_end:
        if i_generation > 50:
            print('Too many generations, abort.')
            exit()
        print(f'Running generation {i_generation}...')
        should_end = run_generation(api_connector, model_builder, i_generation)
        i_generation += 1
Beispiel #26
0
def main():
    # load config
    cfg.merge_from_file(args.config)

    cur_dir = os.path.dirname(os.path.realpath(__file__))
    dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset)

    # create model
    model = ModelBuilder()

    # load model
    model = load_pretrain(model, args.snapshot).cuda().eval()

    # build tracker
    tracker = build_tracker(model)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    model_name = args.snapshot.split('/')[-1].split('.')[0]
    total_lost = 0
    if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']:
        # restart tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            frame_counter = 0
            lost_number = 0
            toc = 0
            pred_bboxes = []
            for idx, (img, gt_bbox) in enumerate(video):
                if len(gt_bbox) == 4:
                    gt_bbox = [
                        gt_bbox[0], gt_bbox[1], gt_bbox[0],
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1,
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1]
                    ]
                tic = cv2.getTickCount()
                if idx == frame_counter:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    pred_bboxes.append(1)
                elif idx > frame_counter:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']
                    if cfg.MASK.MASK:
                        pred_bbox = outputs['polygon']
                    overlap = vot_overlap(pred_bbox, gt_bbox,
                                          (img.shape[1], img.shape[0]))
                    if overlap > 0:
                        # not lost
                        pred_bboxes.append(pred_bbox)
                    else:
                        # lost object
                        pred_bboxes.append(2)
                        frame_counter = idx + 5  # skip 5 frames
                        lost_number += 1
                else:
                    pred_bboxes.append(0)
                toc += cv2.getTickCount() - tic
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > frame_counter:
                    cv2.polylines(
                        img, [np.array(gt_bbox, np.int).reshape(
                            (-1, 1, 2))], True, (0, 255, 0), 3)
                    if cfg.MASK.MASK:
                        cv2.polylines(
                            img,
                            [np.array(pred_bbox, np.int).reshape(
                                (-1, 1, 2))], True, (0, 255, 255), 3)
                    else:
                        bbox = list(map(int, pred_bbox))
                        cv2.rectangle(img, (bbox[0], bbox[1]),
                                      (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                      (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.putText(img, str(lost_number), (40, 80),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            video_path = os.path.join('results', args.dataset, model_name,
                                      'baseline', video.name)
            if not os.path.isdir(video_path):
                os.makedirs(video_path)
            result_path = os.path.join(video_path,
                                       '{}_001.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    if isinstance(x, int):
                        f.write("{:d}\n".format(x))
                    else:
                        f.write(','.join([vot_float2str("%.4f", i)
                                          for i in x]) + '\n')
            print(
                '({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'
                .format(v_idx + 1, video.name, toc, idx / toc, lost_number))
            total_lost += lost_number
        print("{:s} total lost: {:d}".format(model_name, total_lost))
    else:
        # OPE tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            toc = 0
            pred_bboxes = []
            scores = []
            track_times = []
            for idx, (img, gt_bbox) in enumerate(video):
                tic = cv2.getTickCount()
                #gt_0_f = [] #yy
                #ti_f = []
                _ti_f = []

                if idx == 0:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    print("imgsize:", img.shape)
                    # gt_0 =    # 从模型中获得其特征
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    #tracker.init(img, gt_bbox_)

                    tracker.init_pln(img, gt_bbox_)  #yy

                    pred_bbox = gt_bbox_
                    scores.append(None)
                    if 'VOT2018-LT' == args.dataset:
                        pred_bboxes.append([1])
                    else:
                        pred_bboxes.append(pred_bbox)

                    print("idx:", idx)

                else:
                    #outputs = tracker.track(img,plastnet)
                    pbox = pred_bboxes[idx - 1]
                    p_crop = tracker.get_crop(img, pbox)

                    outputs = tracker.track_pln(img, p_crop)
                    pred_bbox = outputs['bbox']

                    #print("outputs:",pred_bbox)
                    pred_bboxes.append(pred_bbox)
                    scores.append(outputs['best_score'])
                toc += cv2.getTickCount() - tic
                track_times.append(
                    (cv2.getTickCount() - tic) / cv2.getTickFrequency())
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > 0:
                    gt_bbox = list(map(int, gt_bbox))
                    pred_bbox = list(map(int, pred_bbox))
                    cv2.rectangle(
                        img, (gt_bbox[0], gt_bbox[1]),
                        (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]),
                        (0, 255, 0), 3)
                    cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]),
                                  (pred_bbox[0] + pred_bbox[2],
                                   pred_bbox[1] + pred_bbox[3]), (0, 255, 255),
                                  3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            if 'VOT2018-LT' == args.dataset:
                video_path = os.path.join('results', args.dataset, model_name,
                                          'longterm', video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                                           '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
                result_path = os.path.join(
                    video_path, '{}_001_confidence.value'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in scores:
                        f.write('\n') if x is None else f.write(
                            "{:.6f}\n".format(x))
                result_path = os.path.join(video_path,
                                           '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            elif 'GOT-10k' == args.dataset:
                video_path = os.path.join('results', args.dataset, model_name,
                                          video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                                           '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
                result_path = os.path.join(video_path,
                                           '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            else:
                model_path = os.path.join('results', args.dataset, model_name)
                if not os.path.isdir(model_path):
                    os.makedirs(model_path)
                result_path = os.path.join(model_path,
                                           '{}.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.
                  format(v_idx + 1, video.name, toc, idx / toc))
curr_model = config['model-parameters']['model']
data_shape = json.loads(config['model-parameters']['input_shape'])
n_channels = json.loads(config['model-parameters']['n_channels'])
input_shape = tuple(data_shape + [n_channels])

#Optimizer-hyperparameters configdata
optimizer = config['optimizer-hyperparameters']['optimizer']
loss = config['optimizer-hyperparameters']['loss']
lr = json.loads(config['optimizer-hyperparameters']['learning_rate'])

batch_size = json.loads(config['optimizer-hyperparameters']['batch_size'])
epochs = json.loads(config['optimizer-hyperparameters']['epochs'])
steps = json.loads(config['optimizer-hyperparameters']['steps_per_epoch'])
metrics = json.loads(config['optimizer-hyperparameters']['metrics'])

model = ModelBuilder(curr_model, classes, input_shape=input_shape)

'Load pre-trained weights if wanted'
if json.loads(config['DEFAULT']['use_load_weights']):
    weight_folder = config['load-weights']['weights_folder']
    weights = config['load-weights']['weights']
    model.load_weights(os.path.join(weight_folder, weights))

train_data = os.listdir(train_data_dir)
data_len = len(train_data)

random.shuffle(train_data)

val_range = json.loads(config['model-parameters']['val_range'])
train_data = train_data[int(data_len * val_range) + 1:]
val_data = train_data[:int(data_len * val_range)]
Beispiel #28
0
    def __init__(self, config_path='configs/sample_config.ini'):
        """Read and set configuration from config file (.ini file) and create
        keras.Model object or input function according to configuration. To add
        new model, simply add new base model to self._MODEL_MAP.

        Args:
            config_path: Str, path to config (.ini) file.

        Raises:
            ValueError: if values in config file does not have the correct type.
            ValueError: if optimizer does not exists in predefined map.
        """
        # Pre-defined learning rate schedules
        self._LR_SCHEDULE_MAP = {
            'ExponentialDecay':
                tf.keras.optimizers.schedules.ExponentialDecay,
            'PiecewiseConstantDecay':
                tf.keras.optimizers.schedules.PiecewiseConstantDecay,
            'PolynomialDecay':
                tf.keras.optimizers.schedules.PolynomialDecay,
        }

        # Pre-defined optimizers
        self._OPTIMIZER_MAP = {
            'Adam':
                tf.keras.optimizers.Adam,
            'RMSprop':
                tf.keras.optimizers.RMSprop,
        }

        # Pre-defined losses
        # IMPORTANT: DON'T USE TRIPLET HARD LOSS! EXTREMELY HARD TO TRAIN!
        self._LOSS_MAP = {
            'CrossEntropy':
                tf.keras.losses.CategoricalCrossentropy,
            'TripletHard':
                tfa.losses.TripletHardLoss,
            'TripletSemiHard':
                tfa.losses.TripletSemiHardLoss,

        }

        # Pre-defined metrics
        self._METRIC_MAP = {
            'Accuracy':
            tf.keras.metrics.CategoricalAccuracy,
        }

        # Get custom dataset
        self.datset_builder = DatasetBuilder(config_path=config_path)
        self.model_builder = ModelBuilder(config_path=config_path)

        # Parse config file
        config = configparser.ConfigParser()
        config.read(config_path)

        # Get classifier training config
        self._CLS_CKPT_DIR = config.get('CLASSIFIER_TRAINING', 'CKPT_DIR')
        self._CLS_MAX_STEP = config.getint('CLASSIFIER_TRAINING', 'MAX_STEP')
        self._CLS_OPTIMIZER = config.get('CLASSIFIER_TRAINING', 'OPTIMIZER')
        self._CLS_LR_BOUNDARIES = [
            int(boundary.strip()) for boundary in
            config.get('CLASSIFIER_TRAINING', 'LR_BOUNDARIES').split(',')
        ]
        self._CLS_LR_VALUES = [
            float(value.strip()) for value in
            config.get('CLASSIFIER_TRAINING', 'LR_VALUES').split(',')
        ]

        # Get triplet training config
        self._TPL_INIT_DIR = config.get('TRIPLET_TRAINING', 'INIT_DIR')
        self._TPL_CKPT_DIR = config.get('TRIPLET_TRAINING', 'CKPT_DIR')
        self._TPL_CYCLES = config.getint('TRIPLET_TRAINING', 'CYCLES')
        self._TPL_EPOCHS = config.getint('TRIPLET_TRAINING', 'EPOCHS')
        self._TPL_FILTER_SIZE = config.getint('TRIPLET_TRAINING', 'FILTER_SIZE')
        self._TPL_MARGIN = config.getfloat('TRIPLET_TRAINING', 'MARGIN')
        self._TPL_OPTIMIZER = config.get('TRIPLET_TRAINING', 'OPTIMIZER')
        self._TPL_LR_VALUE = config.getfloat('TRIPLET_TRAINING',
                                             'LEARNING_RATE')
        self._TPL_FREEZE_VARS = [
            var.strip() for var in
            config.get('TRIPLET_TRAINING', 'FREEZE_VARS').split(',')
        ]

        # Throw exception if optimizer is not defined
        if self._CLS_OPTIMIZER not in self._OPTIMIZER_MAP.keys():
            raise ValueError("CLASSIFIER_TRAINING OPTIMIZER not defined.")
        if self._TPL_OPTIMIZER not in self._OPTIMIZER_MAP.keys():
            raise ValueError("TRIPLET_TRAINING OPTIMIZER not defined.")
Beispiel #29
0
def experiment_ford_helper(ckpt_dir, data_dir, plot_type="spectrogram",
                                               sound_mode="save",
                                               f0_denom=1.,
                                               n_harmonic_distribution=60,
                                               n_noise_magnitudes=65,
                                               losses=None,
                                               feature_domain="freq",
                                               model=None):
    '''
    Code general for all Ford experiments.
    '''
    logging.info("Loading data...")
    data_provider = TFRecordProvider(data_dir)
    input_tensor = data_provider.get_single_batch(batch_number=1)
    #input_tensor["f0"] = tf.convert_to_tensor(np.flip(np.arange(32., 33., 100./np.size(input_tensor["f0"]))), dtype=tf.float32)[tf.newaxis,:,tf.newaxis]
    #input_tensor["f0"] = tf.convert_to_tensor(np.arange(1., 200., 100./np.size(input_tensor["f0"])), dtype=tf.float32)[tf.newaxis,:,tf.newaxis]
    #N = np.size(input_tensor["f0"])
    #x = 2*np.pi/N*np.arange(1,N)
    #y = 100 + 50*np.sin(x)
    #input_tensor["f0"] = tf.convert_to_tensor(y, dtype=tf.float32)[tf.newaxis,:,tf.newaxis]
    #input_tensor["f0"] += 20.
    
    #input_tensor.pop("osc", None)

    logging.info("Building model...")
    if model is None:
        model = ModelBuilder(model_type="f0_rnn_fc_hpn_decoder",
                            audio_rate=data_provider.audio_rate,
                            input_rate=data_provider.input_rate,
                            window_secs=data_provider.example_secs,
                            f0_denom=f0_denom,
                            checkpoint_dir=ckpt_dir,
                            n_harmonic_distribution=n_harmonic_distribution,
                            n_noise_magnitudes=n_noise_magnitudes,
                            losses=losses,
                            feature_domain=feature_domain).build()

    logging.info("Normalizing inputs...")
    features = model.encode(input_tensor)

    logging.info("Synthesizing from f0 signal...")
    start = time.time()
    output_tensor = model.decode(features, training=False)
    time_elapsed = time.time() - start
    logging.info("Synthesis took %.3f seconds." % time_elapsed)

    logging.info("Plotting signals...")
    audio_in = features["audio"].numpy()[0,:]
    audio_out = output_tensor.numpy()[0,:]
    f0 = input_tensor["f0"].numpy()[0,:]
    f0_scaled = features["f0_scaled"].numpy()[0,:]
    if plot_type == "signal":
        _, ax = plt.subplots(4, 1, figsize=(10, 8))
        ax[0].plot(audio_in)
        ax[1].plot(audio_out)
        ax[2].plot(f0)
        ax[3].plot(f0_scaled)
    elif plot_type == "spectrogram":
        '''mag_in = spectral_ops.compute_mag(audio_in, size=8192).numpy().T
        plt.imshow(mag_in, origin="lower")
        plt.show()
        pdb.set_trace()'''
        n_fft = 4096
        n_mels = int(n_fft/8)
        audio_dict = {"recording": audio_in, "synthesized": audio_out}
        for key in audio_dict.keys():
            plt.figure()
            plot_audio_f0(audio_dict[key], data_provider.audio_rate, f0, data_provider.input_rate, title=key, n_fft=n_fft, n_mels=n_mels)
    plt.show()

    if sound_mode == "play":
        logging.info("Playing original audio...")
        sd.play(audio_in, data_provider.audio_rate)
        sd.wait()
        logging.info("Playing synthesized audio...")
        sd.play(audio_out, data_provider.audio_rate)
        sd.wait()
    elif sound_mode == "save":
        audio_in_path = "./audio_in.wav"
        audio_out_path = "./audio_out.wav"
        logging.info("Saving recorded audio to '%s'..." % audio_in_path)
        sf.write(audio_in_path, audio_in, data_provider.audio_rate)
        logging.info("Saving synthesized audio to '%s'..." % audio_out_path)
        sf.write(audio_out_path, audio_out, data_provider.audio_rate)
Beispiel #30
0
def main(argv=None):
    random.seed(2)
    print("Num GPUs Available: ",
          len(tf.config.experimental.list_physical_devices('GPU')))

    # Load data and preprocess data
    print("Loading data...")
    data_reader = DataReader(FLAGS.DATA_PATH, FLAGS.DATA_FILENAME,
                             FLAGS.NUM_MODEL)
    well_dic = data_reader.create_well_dictionary()

    print("Preprocessing data...")
    target_well = well_dic[str(FLAGS.WELL_TO_LEARN)]
    test_model_data = target_well[str(FLAGS.TRUE_MODEL)]

    preprocessor = Preprocessor(FLAGS.NUM_MODEL, FLAGS.TRUE_MODEL)
    well_data_zero_removed = preprocessor.remove_zero_wopr(target_well)
    serialized_data, end_indice = preprocessor.serialize_well_dataframe(
        well_data_zero_removed)
    scaled_data, scaler = preprocessor.scale_serialzed_data(serialized_data)

    # Split dataset and prepare batch
    batch_reader = BatchReader(scaled_data=scaled_data,
                               end_indice=end_indice,
                               train_split=FLAGS.TRAIN_SPLIT,
                               true_model=FLAGS.TRUE_MODEL,
                               buffer_size=FLAGS.BUFFER_SIZE,
                               batch_size=FLAGS.BATCH_SIZE)

    train_data = batch_reader.get_train_batch()
    val_data = batch_reader.get_val_batch()
    train_total_seq_length = batch_reader.get_seq_length()

    # Define Model
    print("Defining model...")
    model_builder = ModelBuilder(FLAGS.BATCH_SIZE)
    model = model_builder.contruct_model()
    model.summary()

    # Set Training callbacks
    history_logger = HistoryLogger()

    # Train the model
    print("Begin training the model...")
    for epoch_idx in range(FLAGS.EPOCHS):
        print('epochs : ' + str(epoch_idx + 1))
        model.fit(train_data,
                  epochs=1,
                  steps_per_epoch=train_total_seq_length / FLAGS.BATCH_SIZE,
                  verbose=2,
                  validation_data=val_data,
                  validation_steps=100,
                  use_multiprocessing=True,
                  callbacks=[history_logger])
        model.reset_states()

    # Save fig of loss history
    print("Saving loss history")
    plotter = Plotter(FLAGS.EPOCHS, FLAGS.WELL_TO_LEARN, FLAGS.TRUE_MODEL)
    plotter.plot_loss_history(history_logger.losses, history_logger.val_losses)

    # Inference (Cascade)
    print("Starting inference...")
    test_data = scaler.transform(test_model_data.values)
    total_timestep = test_data.shape[0]
    test_x, test_y = batch_reader.get_test_input_and_label(test_data)

    seq_in = test_x[FLAGS.OBSERVATION_DAY -
                    FLAGS.BATCH_SIZE:FLAGS.OBSERVATION_DAY, :, :]
    seq_out = test_x[:FLAGS.INPUT_SEQUENCE, :1, :].flatten().tolist(
    ) + test_y[:FLAGS.OBSERVATION_DAY + 1].tolist()

    pred_count = test_x.shape[0] - FLAGS.OBSERVATION_DAY

    # Do Inference from Observationday
    for i in range(1, pred_count):
        sample_in = seq_in
        pred_out = model.predict(sample_in)
        seq_out.append(pred_out[-1, :].item())
        seq_in = test_x[FLAGS.OBSERVATION_DAY - FLAGS.BATCH_SIZE +
                        i:FLAGS.OBSERVATION_DAY + i, :, :]

    model.reset_states()

    # Evaluate
    print("Start evaluating the model...")
    seq_out_array = np.asarray(seq_out)
    prediction_val = (seq_out_array - scaler.min_[0]) / scaler.scale_[0]
    true_val = test_model_data['WOPR'].to_numpy()

    # Plot prediction result
    print("Saving prediction result...")
    plotter.plot_prediction(total_timestep, true_val, prediction_val)

    # Calculate error and save into file
    print("Calculate MAPE and save it to result file...")
    result_handler = ResultHandler(true_val=true_val,
                                   pred_val=prediction_val,
                                   well_to_learn=FLAGS.WELL_TO_LEARN,
                                   true_model=FLAGS.TRUE_MODEL)
    result_handler.save_mape_to_csv(FLAGS.RESULT_FILENAME)

    # Clear Session
    tf.keras.backend.clear_session()
    print("Done")
Beispiel #31
0
def train(dataset_train, dataset_test):
    model_config = ModelConfig()
    train_config = TrainConfig()

    dataset_handle = tf.placeholder(tf.string, shape=[])
    dataset_train_iterator = dataset_train.make_one_shot_iterator()
    # dataset_test_iterator  = dataset_test.make_one_shot_iterator()

    inputs = tf.placeholder(dtype=model_config.dtype,
                            shape=[
                                train_config.batch_size,
                                model_config._input_size,
                                model_config._input_size,
                                model_config.input_chnum
                            ])

    true_heatmap = tf.placeholder(dtype=model_config.dtype,
                                  shape=[
                                      train_config.batch_size,
                                      model_config._output_size,
                                      model_config._output_size,
                                      model_config.output_chnum
                                  ])

    # model building =========================
    # < complete codes here >
    modelbuilder = ModelBuilder(model_config=model_config)
    pred_heatmap = modelbuilder.get_model(model_in=inputs, scope='model')

    # traning ops =============================================
    # < complete codes here >
    loss_heatmap = train_config.loss_fn(true_heatmap -
                                        pred_heatmap) / train_config.batch_size
    loss_regularizer = tf.losses.get_regularization_loss()
    loss_op = loss_heatmap + loss_regularizer

    global_step = tf.Variable(0, trainable=False)
    batchnum_per_epoch = np.floor(train_config.train_data_size /
                                  train_config.batch_size)

    lr_op = tf.train.exponential_decay(
        learning_rate=train_config.learning_rate,
        global_step=global_step,
        decay_steps=train_config.learning_rate_decay_step,
        decay_rate=train_config.learning_rate_decay_rate,
        staircase=True)

    opt_op = train_config.opt_fn(learning_rate=lr_op, name='opt_op')
    train_op = opt_op.minimize(loss_op, global_step)

    # For Tensorboard ===========================================
    file_writer = tf.summary.FileWriter(logdir=train_config.tflogdir)
    file_writer.add_graph(tf.get_default_graph())

    tb_summary_loss_train = tf.summary.scalar('loss_train', loss_op)
    tb_summary_loss_test = tf.summary.scalar('loss_test', loss_op)

    tb_summary_lr = tf.summary.scalar('learning_rate', lr_op)

    # training ==============================

    init_var = tf.global_variables_initializer()
    print('[train] training_epochs = %s' % train_config.training_epochs)
    print('------------------------------------')

    # build dataset ========================

    # inputs_test_op, true_heatmap_test_op =  dataset_test_iterator.get_next()
    inputs_train_op, true_heatmap_train_op = dataset_train_iterator.get_next()

    with tf.Session() as sess:
        # Run the variable initializer
        sess.run(init_var)

        # train_handle    = sess.run(dataset_train_iterator.string_handle())
        # test_handle     = sess.run(dataset_test_iterator.string_handle())

        for epoch in range(train_config.training_epochs):

            inputs_train, true_heatmap_train = sess.run(
                [inputs_train_op, true_heatmap_train_op])
            # inputs_valid,true_heatmap_valid  = sess.run([inputs_test_op,true_heatmap_test_op])

            train_start_time = time.time()

            # train model
            # _,loss_train = sess.run([train_op,loss_op],
            #                          feed_dict={dataset_handle: train_handle,
            #                          modelbuilder.dropout_keeprate:model_config.output.dropout_keeprate})

            _, loss_train = sess.run(
                [train_op, loss_op],
                feed_dict={
                    inputs:
                    inputs_train,
                    true_heatmap:
                    true_heatmap_train,
                    modelbuilder.dropout_keeprate:
                    model_config.output.dropout_keeprate
                })

            train_elapsed_time = time.time() - train_start_time

            global_step_eval = global_step.eval()

            if train_config.display_step == 0:
                continue
            elif global_step_eval % train_config.display_step == 0:
                print('[train] curr epochs = %s' % epoch)

                # # test model
                # loss_test = loss_op.eval(feed_dict={dataset_handle: test_handle,
                #                                     modelbuilder.dropout_keeprate: 1.0})
                #
                # loss_test = loss_op.eval( feed_dict={inputs: inputs_valid,
                #                                     true_heatmap: true_heatmap_valid,
                #                                     modelbuilder.dropout_keeprate: 1.0})

                # tf summary
                summary_loss_train = tb_summary_loss_train.eval(
                    feed_dict={
                        inputs: inputs_train,
                        true_heatmap: true_heatmap_train,
                        modelbuilder.dropout_keeprate: 1.0
                    })
                # summary_loss_test  = tb_summary_loss_test.eval( feed_dict={inputs: inputs_valid,
                #                                                             true_heatmap: true_heatmap_valid,
                #                                                             modelbuilder.dropout_keeprate: 1.0})
                #

                # summary_loss_train = tb_summary_loss_train.eval(feed_dict={dataset_handle: train_handle,
                #                                                            modelbuilder.dropout_keeprate:1.0})
                #
                # summary_loss_test  = tb_summary_loss_test.eval(feed_dict={dataset_handle: test_handle,
                #                                                           modelbuilder.dropout_keeprate: 1.0})

                summary_lr = tb_summary_lr.eval()

                file_writer.add_summary(summary_loss_train, global_step_eval)
                # file_writer.add_summary(summary_loss_test,global_step_eval)
                file_writer.add_summary(summary_lr, global_step_eval)

                print('At step = %d, train elapsed_time = %.1f ms' %
                      (global_step_eval, train_elapsed_time))
                print("Training set loss (avg over batch)= %.2f   " %
                      (loss_train))
                # print("Test set Err loss (total batch)= %.2f %%" % (loss_test))
                print("--------------------------------------------")

        print("Training finished!")

    file_writer.close()
Beispiel #32
0
  model stores the path for model configurations.
  train stores the path for training configurations.
  test stores the path for testing configurations.
  gpu stores the gpu id used.
  """
  
  parser = argparse.ArgumentParser()
  parser.add_argument('--model', dest='model', type=str, required=True)
  parser.add_argument('--train', dest='train', type=str)
  parser.add_argument('--test', dest='test', type=str)
  args = parser.parse_args()

  # Build Model Graph from Config  
  model_config = configparser.ConfigParser()
  model_config.read(args.model)
  model = ModelBuilder(model_config)
  model.build_graph()
  model.compile()
  model.summary_txt()
  model.print_png()
  model.save_graph()

  # Train Model
  if args.train:
    train_config = configparser.ConfigParser()
    train_config.read(args.train)
    trainer = ModelTrainer(train_config)
    trainer.get_hyperparameters()
    trainer.get_train_set()
    trainer.get_dev_set()
    trainer.get_callbacks()
Beispiel #33
0
class ModelTrainer:
    def __init__(self, config_path='configs/sample_config.ini'):
        """Read and set configuration from config file (.ini file) and create
        keras.Model object or input function according to configuration. To add
        new model, simply add new base model to self._MODEL_MAP.

        Args:
            config_path: Str, path to config (.ini) file.

        Raises:
            ValueError: if values in config file does not have the correct type.
            ValueError: if optimizer does not exists in predefined map.
        """
        # Pre-defined learning rate schedules
        self._LR_SCHEDULE_MAP = {
            'ExponentialDecay':
                tf.keras.optimizers.schedules.ExponentialDecay,
            'PiecewiseConstantDecay':
                tf.keras.optimizers.schedules.PiecewiseConstantDecay,
            'PolynomialDecay':
                tf.keras.optimizers.schedules.PolynomialDecay,
        }

        # Pre-defined optimizers
        self._OPTIMIZER_MAP = {
            'Adam':
                tf.keras.optimizers.Adam,
            'RMSprop':
                tf.keras.optimizers.RMSprop,
        }

        # Pre-defined losses
        # IMPORTANT: DON'T USE TRIPLET HARD LOSS! EXTREMELY HARD TO TRAIN!
        self._LOSS_MAP = {
            'CrossEntropy':
                tf.keras.losses.CategoricalCrossentropy,
            'TripletHard':
                tfa.losses.TripletHardLoss,
            'TripletSemiHard':
                tfa.losses.TripletSemiHardLoss,

        }

        # Pre-defined metrics
        self._METRIC_MAP = {
            'Accuracy':
            tf.keras.metrics.CategoricalAccuracy,
        }

        # Get custom dataset
        self.datset_builder = DatasetBuilder(config_path=config_path)
        self.model_builder = ModelBuilder(config_path=config_path)

        # Parse config file
        config = configparser.ConfigParser()
        config.read(config_path)

        # Get classifier training config
        self._CLS_CKPT_DIR = config.get('CLASSIFIER_TRAINING', 'CKPT_DIR')
        self._CLS_MAX_STEP = config.getint('CLASSIFIER_TRAINING', 'MAX_STEP')
        self._CLS_OPTIMIZER = config.get('CLASSIFIER_TRAINING', 'OPTIMIZER')
        self._CLS_LR_BOUNDARIES = [
            int(boundary.strip()) for boundary in
            config.get('CLASSIFIER_TRAINING', 'LR_BOUNDARIES').split(',')
        ]
        self._CLS_LR_VALUES = [
            float(value.strip()) for value in
            config.get('CLASSIFIER_TRAINING', 'LR_VALUES').split(',')
        ]

        # Get triplet training config
        self._TPL_INIT_DIR = config.get('TRIPLET_TRAINING', 'INIT_DIR')
        self._TPL_CKPT_DIR = config.get('TRIPLET_TRAINING', 'CKPT_DIR')
        self._TPL_CYCLES = config.getint('TRIPLET_TRAINING', 'CYCLES')
        self._TPL_EPOCHS = config.getint('TRIPLET_TRAINING', 'EPOCHS')
        self._TPL_FILTER_SIZE = config.getint('TRIPLET_TRAINING', 'FILTER_SIZE')
        self._TPL_MARGIN = config.getfloat('TRIPLET_TRAINING', 'MARGIN')
        self._TPL_OPTIMIZER = config.get('TRIPLET_TRAINING', 'OPTIMIZER')
        self._TPL_LR_VALUE = config.getfloat('TRIPLET_TRAINING',
                                             'LEARNING_RATE')
        self._TPL_FREEZE_VARS = [
            var.strip() for var in
            config.get('TRIPLET_TRAINING', 'FREEZE_VARS').split(',')
        ]

        # Throw exception if optimizer is not defined
        if self._CLS_OPTIMIZER not in self._OPTIMIZER_MAP.keys():
            raise ValueError("CLASSIFIER_TRAINING OPTIMIZER not defined.")
        if self._TPL_OPTIMIZER not in self._OPTIMIZER_MAP.keys():
            raise ValueError("TRIPLET_TRAINING OPTIMIZER not defined.")

    def train_classifier(self):
        '''Train classifer according to specs in config file.'''
        # When training classifier, we uses one-hot encoding as label
        self.datset_builder.ONE_HOT = True

        # Create full model using model_builder
        model, input_name = self.model_builder.create_full_model()
        # Sanity check
        model.summary()

        # Set learning rate schedule
        boundaries = self._CLS_LR_BOUNDARIES
        values = self._CLS_LR_VALUES
        lr_schedule = self._LR_SCHEDULE_MAP['PiecewiseConstantDecay'](
            boundaries=boundaries, values=values)
        # Use learning reate schedule to create optimizer
        optimizer = self._OPTIMIZER_MAP[self._CLS_OPTIMIZER](
            learning_rate=lr_schedule)
        # Create loss function
        loss = self._LOSS_MAP['CrossEntropy'](from_logits=True)
        # Add accuracy metrics
        accuracy = self._METRIC_MAP['Accuracy']()
        model.compile(optimizer=optimizer, loss=loss, metrics=[accuracy])

        # Build tf.estimator
        estimator = tf.keras.estimator \
            .model_to_estimator(keras_model=model, model_dir=self._CLS_CKPT_DIR)
        train_spec = tf.estimator.TrainSpec(
            input_fn=self.datset_builder.get_train_input_fn(input_name),
            max_steps=self._CLS_MAX_STEP)
        eval_spec = tf.estimator.EvalSpec(
            input_fn=self.datset_builder.get_eval_input_fn(input_name))

        # Start training
        tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

    def _freeze_vars(self, model):
        """Freeze variables in the model based on regular expressions in
        self._TPL_FREEZE_VARS.

        Args:
            model: tf.keras.Model, the model within which variables are frozen.
        """
        # Get regular expressions in config file.
        freeze_var_res = self._TPL_FREEZE_VARS
        # Get layers that matches regular expression.
        freeze_layers = [layer for layer in model.layers if
                         any(re.match(str(pattern), layer.name) for pattern in
                             freeze_var_res)]
        # Freeze layers.
        print('Freezing {} layers.'.format(str(len(freeze_layers))))
        for layer in freeze_layers:
            print('Freezing layer {}.'.format(layer.name))
            layer.trainable = False

    def train_triplet_transfer(self):
        """Train encoder with triplet loss according to specs in config file."""
        # When training using triplet loss, we avoid using one-hot encoding
        self.datset_builder.ONE_HOT = False

        # Create full model using model_builder
        model, input_name = self.model_builder.create_full_model()
        # Sanity check
        model.summary()

        # Build optimizer
        optimizer = self._OPTIMIZER_MAP[self._TPL_OPTIMIZER](self._TPL_LR_VALUE)

        # Load initial weights from self._TPL_INIT_DIR
        init_dir = self._TPL_INIT_DIR
        latest = tf.train.latest_checkpoint(init_dir)
        model.load_weights(latest)

        # Get ResNet50 model
        resnet_model = model.layers[0]
        # Freeze specified variables
        self._freeze_vars(resnet_model)

        # Create loss function
        loss = self._LOSS_MAP['TripletSemiHard'](self._TPL_MARGIN)
        model.compile(optimizer=optimizer, loss=loss)

        # Train triplet model
        # In each cycle, a new training dataset with N labels are generated and
        # training is carried out for M epochs.
        # Total number of cycles = self._TPL_CYCLES
        # N = self._TPL_FILTER_SIZE
        # M = self._TPL_EPOCHS
        for i in range(self._TPL_CYCLES):
            print('Cycle #{}'.format(i+1))
            train_dataset = self.datset_builder.get_train_dataset(
                filter_size=self._TPL_FILTER_SIZE)
            history = model.fit(
                train_dataset,
                epochs=self._TPL_EPOCHS
            )
            # Store weights every 50 cycles
            if (i+1) % 50 == 0:
                model.save_weights(self._TPL_CKPT_DIR + '_#{}'.format(i+1))
        model.save_weights(self._TPL_CKPT_DIR)
Beispiel #34
0
def main(args):

    config = json.load(open(args.config, 'r'))
    # save config into experiment directory
    json.dump(config, open(os.path.join(args.experiment_dir, 'config.json'), 'w'))
    logging.info("Config json: %s", config)

    ball = Ball(config)

    file_config = config['files']
    base_data_dir = file_config['base-data-dir']
    model_config = config['model']
    train_config = config['training']

    model_builder = ModelBuilder(model_config,
                                 ball.get_word_vecs(),
                                 ball.get_character_vecs(),
                                 ball.get_feature_indxr(),
                                 ball.get_ent_cbow_vecs())

    logging.info("Model Summary:")
    model_builder.build_f().summary()

    logging.info("Building model...")
    model = model_builder.build_trainable_model()
    logging.info("Model Built!")

    logging.info("Building data ball...")
    data_converter = DataConverter(ball)
    logging.info("Data ball built!")

    trainer = Trainer(model,
                      ball,
                      data_converter,
                      neg_sample_k=train_config['neg_samples'],
                      batch_size=train_config['batch_size'],
                      neg_sample_from_cands=train_config['neg_sample_from_cands'])

    # Optimization Loop
    logging.info("--- Starting optimization loop ---")
    for epoch in xrange(0, train_config['epochs']):
        logging.info("Starting epoch %d", epoch + 1)

        data_iterator = WikilinksIterator(base_data_dir + 'train')

        for item in data_iterator.jsons():
            trainer.train_on(item)
        trainer.epoch_done()

        logging.info("Finished training epoch %d", epoch + 1)
        # temp weight save for evaluation
        tmp_weights_path = os.path.join(args.experiment_dir, 'tmp-model.weights')
        model.save_weights(tmp_weights_path)

        logging.info("Evaluating epoch %d", epoch + 1)
        test_model = model_builder.build_f(weights=tmp_weights_path)
        evaluator = Evaluator(test_model, ball, data_converter)
        data_iterator = WikilinksIterator(base_data_dir + 'dev')
        for item in data_iterator.jsons():
            evaluator.evaluate_on(item)
        accuracy = evaluator.evaluate_model()
        logging.info("Model accuracy for epoch %d is %.2f", epoch + 1, accuracy)

    logging.info("Saving final model")
    final_weights_path = os.path.join(args.experiment_dir, 'final-model.weights')
    model.save_weights(final_weights_path)

    test_model = model_builder.build_f(weights=final_weights_path)
    data_iterator = WikilinksIterator(base_data_dir + 'dev')
    final_evaluator = Evaluator(test_model, ball, data_converter)
    logging.info("Starting final model dev evaluation")
    for item in data_iterator.jsons():
        final_evaluator.evaluate_on(item)
    accuracy = final_evaluator.evaluate_model()
    logging.info("Final model dev accuracy is %.2f", accuracy)

    data_iterator = WikilinksIterator(base_data_dir + 'test')
    final_evaluator = Evaluator(test_model, ball, data_converter)
    logging.info("Starting final model test evaluation")
    for item in data_iterator.jsons():
        final_evaluator.evaluate_on(item)
    accuracy = final_evaluator.evaluate_model()
    logging.info("Final model test accuracy is %.2f", accuracy)
Beispiel #35
0
def main():
    # load config
    cfg.merge_from_file(args.config)

    # create model
    model = ModelBuilder()

    # load model
    model = load_pretrain(model, args.snapshot).cuda().eval()

    # build tracker
    tracker = build_tracker(model)

    #model_name = args.snapshot.split('/')[-1].split('.')[0]
    #total_lost = 0

    #cur_dir = os.path.dirname(os.path.realpath(__file__))

    #dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset)

    video_path = '/home/yuuzhao/Documents/project/pysot/testing_dataset/VOT2016'
    #lists = open('/home/lichao/tracking/LaSOT_Evaluation_Toolkit/sequence_evaluation_config/' + setfile + '.txt', 'r')
    #list_file = [line.strip() for line in lists]

    category = os.listdir(video_path)
    category.sort()

    # create dataset
    #dataset = DatasetFactory.create_dataset(name=args.dataset,dataset_root=dataset_root,load_img=False)

    template_acc = []
    template_cur = []
    init0 = []
    init = []
    pre = []
    gt = []  # init0 is reset init

    print("Category & Video:")
    for tmp_cat in category:
        tmp_cat_path = temp_path + '/' + tmp_cat
        if not os.path.isdir(tmp_cat_path):
            os.makedirs(tmp_cat_path)

        print("Category:", tmp_cat)
        video = os.listdir(join(video_path, tmp_cat))
        video.sort()
        #video_cut = video[0:frames_of_each_video]
        frame = 0

        #for picture in video_cut:  # 这个循环或许该去掉
        #    print("Frame:", picture)
        gt_path = join(video_path, tmp_cat, 'groundtruth.txt')

        ground_truth = np.loadtxt(gt_path, delimiter=',')
        # num_frames = len(ground_truth);  # num_frames = min(num_frames, frame_max)
        num_frames = frames_of_each_video
        # print("num_frames: ",num_frames)
        img_path = join(video_path, tmp_cat)
        # print("imgpath",img_path)
        imgFiles = [
            join(img_path, '%08d.jpg') % i for i in range(1, num_frames + 1)
        ]

        while frame < num_frames:
            print("frame:", frame)
            Polygon = ground_truth[frame]
            cx, cy, w, h = get_axis_aligned_bbox(Polygon)
            gt_rect = [cx, cy, w, h]

            image_file = imgFiles[frame]
            # target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
            img = cv2.imread(image_file)  # HxWxC

            if frame == 0:
                tracker.init(img, gt_rect)
            if w * h != 0:
                # image_file = imgFiles[frame]
                # target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
                # img = cv2.imread(image_file)  # HxWxC
                zf_acc = tracker.get_zf(img, gt_rect)

                output = tracker.track(img)
                pre_rect = output['bbox']
                zf_pre = tracker.get_zf(img, pre_rect)

                template_acc.append(zf_acc)
                template_cur.append((zf_pre))

                print("ACC&PRE")
                init0.append(0)
                init.append(frame)
                frame_reset = 0
                pre.append(0)
                gt.append(1)
                while frame < (num_frames - 1):
                    print("while ", frame, "<", num_frames)
                    frame = frame + 1
                    frame_reset = frame_reset + 1
                    image_file = imgFiles[frame]
                    if not image_file:
                        break

                    Polygon = ground_truth[frame]
                    cx, cy, w, h = get_axis_aligned_bbox(Polygon)
                    gt_rect = [cx, cy, w, h]

                    img = cv2.imread(image_file)  # HxWxC
                    zf_acc = tracker.get_zf(img, gt_rect)

                    output = tracker.track(img)
                    pre_rect = output['bbox']
                    zf_pre = tracker.get_zf(img, pre_rect)

                    # print("zf_pre:",zf_pre.shape)
                    # print("zf_acc:",zf_acc.shape)
                    # pdb.set_trace()
                    template_acc.append(zf_acc)
                    template_cur.append(zf_pre)
                    init0.append(frame_reset)
                    init.append(frame)
                    pre.append(1)
                    if frame == (num_frames - 1):  # last frame
                        print("if frame == num_frames-1")
                        gt.append(0)
                    else:
                        gt.append(1)

                    pre_rect_arr = np.array(pre_rect)
                    cx, cy, w, h = get_axis_aligned_bbox(pre_rect_arr)
                    target_pos, target_siz = np.array([cx,
                                                       cy]), np.array([w, h])

                    res = cxy_wh_2_rect(target_pos, target_siz)

                    if reset:
                        cx, cy, w, h = get_axis_aligned_bbox(
                            ground_truth[frame])
                        gt_rect = [cx, cy, w, h]
                        gt_rect = np.array(gt_rect)
                        iou = overlap_ratio(gt_rect, res)
                        if iou <= 0:
                            break
            else:
                print("else")
                template_acc.append(
                    torch.zeros([1, 3, 127, 127], dtype=torch.float32))
                template_cur.append(
                    torch.zeros([1, 3, 127, 127], dtype=torch.float32))
                init0.append(0)
                init.append(frame)
                pre.append(1)
                if frame == (num_frames - 1):  # last frame
                    gt.append(0)
                else:
                    gt.append(1)
            frame = frame + 1  # skip

        #写出一次
        #print("template_acc:",template_acc)
        #print("template_cur:",template_cur)
        #print("init:", init)
        #print("init0:",init0)
        #print("pre:",pre)

        #template_acc_con = np.concatenate(template_acc);
        #template_cur_con = np.concatenate(template_cur)

        print("write for each video")
        np.save(tmp_cat_path + '/template', template_acc)
        np.save(tmp_cat_path + '/templatei', template_cur)
        np.save(tmp_cat_path + '/init0', init0)
        np.save(tmp_cat_path + '/init', init)
        np.save(tmp_cat_path + '/pre', pre)
        np.save(tmp_cat_path + '/gt', gt)
    print("template")