def main(_):
    cpu = os.cpu_count()
    tf_config = _tf_config(flags)  #1
    # 分布式需要 TF_CONFIG 环境变量
    os.environ['TF_CONFIG'] = json.dumps(tf_config)  #2
    session_config = ConfigProto(device_count={'CPU': cpu},
                                 inter_op_parallelism_threads=cpu // 2,
                                 intra_op_parallelism_threads=cpu // 2,
                                 device_filters=flags.device_filters,
                                 allow_soft_placement=True)
    strategy = experimental.ParameterServerStrategy()
    run_config = estimator.RunConfig(
        **{
            'save_summary_steps': 100,
            'save_checkpoints_steps': 1000,
            'keep_checkpoint_max': 10,
            'log_step_count_steps': 100,
            'train_distribute': strategy,
            'eval_distribute': strategy,
        }).replace(session_config=session_config)

    model = estimator.Estimator(
        model_fn=model_fn,
        model_dir='/home/axing/din/checkpoints/din',  #实际应用中是分布式文件系统
        config=run_config,
        params={
            'tf_config': tf_config,
            'decay_rate': 0.9,
            'decay_steps': 10000,
            'learning_rate': 0.1
        })

    train_spec = estimator.TrainSpec(
        input_fn=lambda: input_fn(mode='train',
                                  num_workers=flags.num_workers,
                                  worker_index=flags.worker_index,
                                  pattern='/home/axing/din/dataset/*'),  #3
        max_steps=1000  #4
    )

    # 这里就假设验证集和训练集地址一样了,实际应用中是肯定不一样的。
    eval_spec = estimator.EvalSpec(
        input_fn=lambda: input_fn(mode='eval',
                                  pattern='/home/axing/din/dataset/*'),
        steps=100,  # 每次验证 100 个 batch size 的数据
        throttle_secs=60  # 每隔至少 60 秒验证一次
    )
    estimator.train_and_evaluate(model, train_spec, eval_spec)
def main(_):
    cpu = os.cpu_count()
    session_config = tf.ConfigProto(
        device_count={'GPU': 0,
                      'CPU': cpu},
        inter_op_parallelism_threads=cpu // 2,
        intra_op_parallelism_threads=cpu // 2,
        device_filters=[],
        allow_soft_placement=True)

    run_config = tf.estimator.RunConfig(**{
        'save_summary_steps': 100,
        'save_checkpoints_steps': 1000,
        'keep_checkpoint_max': 10,
        'log_step_count_steps': 100
    }).replace(session_config=session_config)

    model = tf.estimator.Estimator(
        model_fn=model_fn,
        model_dir='/home/axing/din/checkpoints/din',
        config=run_config,
        params={
            'decay_rate': 0.9,
            'decay_steps': 10000,
            'learning_rate': 0.1
        }
    )

    train_spec = tf.estimator.TrainSpec(
        input_fn=lambda: input_fn(mode='train', pattern='/home/axing/din/dataset/*'))
	
	# 这里就假设验证集和训练集地址一样了,实际应用中是肯定不一样的。
    eval_spec = tf.estimator.EvalSpec(
        input_fn=lambda: input_fn(mode='eval', pattern='/home/axing/din/dataset/*'),
        steps=100, # 每次验证 100 个 batch size 的数据
        throttle_secs=60 # 每隔至少 60 秒验证一次
    )
    tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
Exemple #3
0
def peek_classification(estimator, test_df):
    predict_df = test_df[150:200].copy()

    pred_iter = estimator.predict(lambda: input_fn(
        predict_df, num_epochs=1, shuffle=False, batch_size=10))

    classes = np.array(['outside', 'tram'])
    pred_class_id = []

    for pred_dict in pred_iter:
        pred_class_id.append(pred_dict['class_ids'])

    predict_df['predicted_class'] = classes[np.array(pred_class_id)]
    predict_df['correct'] = (
        predict_df['predicted_class'] == 'tram') == predict_df['imInTram']

    print(predict_df[['imInTram', 'predicted_class', 'correct']])
Exemple #4
0
def build_model(filename):
    wide_columns, deep_columns = build_model_columns()
    features, labels = input_fn(filename).make_one_shot_iterator().get_next()
    cols_to_vars = {}
    linear_logits = tf.feature_column.linear_model(
        features=features,
        feature_columns=wide_columns,
        cols_to_vars=cols_to_vars)
    predictions = tf.reshape(tf.nn.sigmoid(linear_logits), (-1, ))
    loss = tf.reduce_mean(
        tf.losses.log_loss(labels=labels, predictions=predictions))
    global_step = tf.train.get_or_create_global_step()
    optimizer = tf.train.FtrlOptimizer(learning_rate=0.1,
                                       l1_regularization_strength=0.1,
                                       l2_regularization_strength=0.1)
    train_op = optimizer.minimize(loss, global_step=global_step)

    tf.summary.scalar('prediction/mean', tf.reduce_mean(predictions))
    tf.summary.histogram('prediction', predictions)
    tf.summary.scalar('metrics/loss', loss)
    summary = tf.summary.merge_all()

    global_vars = tf.global_variables()
    uninitialized = tf.report_uninitialized_variables(tf.global_variables())

    global_init = tf.global_variables_initializer()
    local_init = [tf.local_variables_initializer(), tf.tables_initializer()]

    return {
        'train': {
            'train_op': train_op,
            'loss': loss,
        },
        'init': {
            'global': global_init,
            'local': local_init
        },
        'global_variables': global_vars,
        'uninitialized': uninitialized,
        'cols_to_vars': cols_to_vars,
        'summary': summary,
        'global_step': global_step,
    }
Exemple #5
0
def main():
    # build graph
    features, labels = input_fn(data_file='census_data/adult.data'
                                ).make_one_shot_iterator().get_next()
    model = build_model(features=features, labels=labels, mode='train')

    # inspect graph variables
    for col, var in model['cols_to_vars'].items():
        print('Column:  ', col)
        print('Variable:', var)
        print('-' * 50)

    # create session
    with tf.Session() as sess:
        sess.run(model['init'])

        for step in range(1, 1000):
            result = sess.run(model['train'])
            print('step =', step, 'loss =', result['loss'])

        signature_def_map = {}

        receiver = input_receiver()
        receiver_tensors = receiver.receiver_tensors
        pred_model = build_model(features=receiver.features, mode='predict')
        output = tf.estimator.export.PredictOutput(
            outputs=pred_model['predictions'])
        signature_def_map['serving_default'] = output.as_signature_def(
            receiver_tensors)
        # print('sig def map:', signature_def_map)
        local_init_op = tf.group(
            tf.local_variables_initializer(),
            tf.tables_initializer(),
        )
        builder = tf.saved_model.builder.SavedModelBuilder('export/wdl_single')
        builder.add_meta_graph_and_variables(
            sess, [tf.saved_model.tag_constants.SERVING],
            signature_def_map=signature_def_map,
            assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS),
            legacy_init_op=local_init_op,
            strip_default_attrs=False)
        builder.save(as_text=False)
Exemple #6
0
                                        DEFINES.model_hidden_size,
                                        'ffn_hidden_size':
                                        DEFINES.ffn_hidden_size,
                                        'attention_head_size':
                                        DEFINES.attention_head_size,
                                        'learning_rate':
                                        DEFINES.learning_rate,
                                        'vocabulary_length':
                                        vocabulary_length,
                                        'layer_size':
                                        DEFINES.layer_size,
                                        'max_sequence_length':
                                        DEFINES.max_sequence_length
                                    })

# 학습
tf.logging.set_verbosity(tf.logging.INFO)
classifier.train(input_fn=lambda: data.
                 input_fn(train_input_enc, train_input_dec, train_target_dec,
                          DEFINES.batch_size, DEFINES.train_repeats),
                 steps=1000)

# 평가
print("#### 평가 ####")
eval_result = classifier.evaluate(input_fn=lambda: data.input_fn(
    eval_input_enc, eval_input_dec, eval_target_dec, DEFINES.batch_size,
    DEFINES.train_repeats),
                                  steps=1)

print('\nEVAL set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
Exemple #7
0
tf.logging.set_verbosity(tf.logging.INFO)

estimator = tf.estimator.Estimator(
    model_fn=model.model_fn,
    model_dir="./model",
    params={"learning_rate": 1e-4})

datasets = [
    "dataset/SponzaDay8.tfrecord"
]

train = True
if train:
    for i in range(NUM_EPOCHS):
        print("Starting epoch", i)
        estimator.train(input_fn=lambda: data.input_fn(datasets, True, 1))

# obj = estimator.predict(
#     input_fn=lambda: data.input_fn1("KitchenEveningEval.tfrecord", 0))

# for p in obj:
#     rgb = np.zeros((1024, 1024, 3), 'uint8')
#     rgb[..., 0] = np.clip(
#         np.power(1.5 * p["orig"][:, :, 0], 1.0 / 2.2) * 255, 0, 255)
#     rgb[..., 1] = np.clip(
#         np.power(1.5 * p["orig"][:, :, 1], 1.0 / 2.2) * 255, 0, 255)
#     rgb[..., 2] = np.clip(
#         np.power(1.5 * p["orig"][:, :, 2], 1.0 / 2.2) * 255, 0, 255)

#     im = Image.fromarray(rgb, "RGB")
#     im.show()
adagrad_optimizer = tf.train.ProximalAdagradOptimizer(
    learning_rate=proxAdagrad_LEARNING_RATE, l1_regularization_strength=0.001)

tested_nums = []
accuracies = []
precisions = []
recalls = []
for learning_rate in [x * 0.01 for x in [1]]:
    print(f"testing: {learning_rate}")
    test_hidden_units = [15, 100]
    estimator = tf.estimator.DNNClassifier(
        feature_columns=[ax, ay, az, gx, gy, gz],
        hidden_units=test_hidden_units,
        optimizer=adam_optimizer)

    estimator.train(input_fn=lambda: input_fn(
        train_df, num_epochs=EPOCHS_NUM, shuffle=False, batch_size=64),
                    steps=None)

    result = estimator.evaluate(
        lambda: input_fn(test_df, num_epochs=1, shuffle=False, batch_size=64))
    tested_nums.append(learning_rate)
    accuracies.append(result["accuracy"])
    precisions.append(result["precision"])
    recalls.append(result["recall"])
    print(f"A: {result['accuracy']},  B: {result['precision']}")
    estimator.export_savedmodel(
        export_dir_base='adam-dnn',
        serving_input_receiver_fn=serving_input_receiver_fn)
pl.show_multiple_series([accuracies, precisions, recalls])
# pprint_result(result)
# peek_classification(estimator, test_df)
Exemple #9
0
#     model_dir="./model",
#     params={"learning_rate": 1e-4,
#             "class_weights": [1.0 for i in range(100)]})
#
# for i in range(15):
#     estimator.train(input_fn=lambda: data.input_fn("wiki_crop", True, 1))
#     ev = estimator.evaluate(input_fn=lambda: data.input_fn("eval", False, 1))
#     print("Accuracy on test set: ", ev["accuracy"])
#
# res = estimator.predict(input_fn=lambda: data.input_fn("test", False, 1))
# for r in res:
#     print(r["age"])

train = True
if train:
    features, labels = data.input_fn("wiki_crop", True, None)
else:
    features, labels = data.input_fn("test", False, 1)

input = keras.layers.Input(tensor=features)
base_model = keras.applications.InceptionV3(weights='imagenet', include_top=False, input_tensor=input)
for layer in base_model.layers:
    layer.trainable = False
pooling = keras.layers.GlobalAveragePooling2D()(base_model.output)
fully_connected = keras.layers.Dense(1024, activation='relu')(pooling)
output = keras.layers.Dense(100, activation='softmax')(fully_connected)
model = keras.models.Model(input, output)
model.compile(
    optimizer=keras.optimizers.SGD(1e-4),
    loss='categorical_crossentropy',
    target_tensors=[labels],
Exemple #10
0

# 채팅 시작
for i in range(10):
    question = input("Q: ")
    if question == 'quit':
        break

    predic_input_enc = data.data_processing([question], word2idx,
                                            DEFINES.enc_input)
    predic_output_dec = data.data_processing([""], word2idx, DEFINES.dec_input)
    predic_target_dec = data.data_processing([""], word2idx,
                                             DEFINES.dec_target)

    predictions = classifier.predict(input_fn=lambda: data.input_fn(
        predic_input_enc, predic_output_dec, predic_target_dec, 1, 1
    ))  #for문을 돌때마다 네트워크 빌드& 체크포인트 적용, 작업을 수행한다->속도저하
    # Estimator의 특성->보완이 필요한 부분

    # 답변 문장에 대한 softmax 확률을 받는다.
    prob = np.array([v['indexs'] for v in predictions])
    prob = np.squeeze(prob)

    # 확률적으로 답변 문장의 인덱스를 생성한다.
    words_index = [pred_indices(p, beta=DEFINES.softmax_beta) for p in prob]

    # 답변 문장의 인덱스를 실제 문장으로 변환한다.
    answer = ""
    for word in words_index:
        if word != 0 and word != 2:  # PAD = 0, END = 2
            answer += idx2word[word]
Exemple #11
0
# 채팅 시작
for i in range(100):
    question = input("Q: ")
    if question == 'quit':
        break

    predic_input_enc = data.data_processing([question], word2idx,
                                            DEFINES.enc_input)
    predic_output_dec = data.data_processing([""], word2idx, DEFINES.dec_input)
    predic_target_dec = data.data_processing([""], word2idx,
                                             DEFINES.dec_target)

    # 매 번 모델을 리빌드하고 checkpoint를 reload하기 때문에 속도가 늦음.
    # Estimator의 특징이므로 다른 방법으로 보완이 필요함.
    predictions = classifier.predict(input_fn=lambda: data.input_fn(
        predic_input_enc, predic_output_dec, predic_target_dec, 1, 1))

    # 답변 문장에 대한 softmax 확률을 받는다.
    prob = np.array([v['indexs'] for v in predictions])
    prob = np.squeeze(prob)

    # 확률적으로 답변 문장의 인덱스를 생성한다.
    words_index = [pred_indices(p, beta=DEFINES.softmax_beta) for p in prob]

    # 답변 문장의 인덱스를 실제 문장으로 변환한다.
    answer = ""
    for word in words_index:
        if word != 0 and word != 2:  # PAD = 0, END = 2
            answer += idx2word[word]
            answer += " "
Exemple #12
0
def train_per_eval(session_config):
    with tf.Session(config = session_config) as sess:
        batch_inputs = data.input_fn(mode = "train")
        
        model = Model(mode = "train", inputs = batch_inputs)

        print ("Training Graph Loaded") 
        global_step = 0

        saver = tf.train.Saver(max_to_keep = Hp.max_to_keep)
    
        init = tf.global_variables_initializer()
        sess.run(init)

        summary_writer = tf.summary.FileWriter(Hp.logdir, graph=sess.graph)

        if Hp.train_from_restore:
            latest_model = tf.train.latest_checkpoint(os.path.join(Hp.logdir, "models"))

            if Hp.restore_model is not None and Hp.restore_model != latest_model:
                print ("Restore Model from Specific Model")
                restore_model = Hp.restore_model
            else:
                print ("Restore Model from Last Checkpoint")
                restore_model = latest_model

            saver.restore(sess, restore_model)
	
        while global_step % Hp.train_step_per_eval != 0 or global_step==0:
            out = sess.run([
                model.train_op, 
                model.global_step,
                model.alignments,
                model.mag_hat,
                model.merged_labels,
                mode.merged])
	    
            global_step = out[1]
            summary = out[-1]
            summary_writer.add_summary(summary, global_step)

            if global_step%Hp.save_model_step == 0 and global_step!=0:
                print ("Saving Model Weights during Training")
                save_model_dir = os.path.join(Hp.logdir, "models")
                if not os.path.exists(save_model_dir):
                    os.mkdir(save_model_dir)

                saver.save(sess, os.path.join(save_model_dir, "model"), global_step = global_step)
                # set Hp.restore_model path
                #Hp.restore_model = os.path.join(save_model_dir, "model.ckpt-"+str(global_step))
                Hp.train_from_restore = True

                print ("saving Sample during Training")
                # store a sample to listen to both on tensorboard and local files
                save_sample_dir = os.path.join(Hp.logdir, "train")
                if not os.path.exists(save_sample_dir):
                    os.mkdir(save_sample_dir)

                wav_hat = signal_process.spectrogrom2wav(out[3][0])
                ground_truth = signal_process.spectrogrom2wav(out[4][0])
                signal_process.plot_alignment(out[2][0], gs = global_step, mode = "save_fig", path = save_sample_dir)


                write(os.path.join(save_sample_dir, 'gt_{}.wav'.format(global_step)), Hp.sample_rate, ground_truth)
                write(os.path.join(save_sample_dir, 'hat_{}.wav'.format(global_step)), Hp.sample_rate, wav_hat)


                merged = sess.run(tf.summary.merge(
                    [tf.summary.audio("train/sample_gt"+str(global_step), tf.expand_dims(ground_truth, 0), Hp.sample_rate),
                    tf.summary.audio("train/sample_hat_gs"+str(global_step), tf.expand_dims(wav_hat, 0), Hp.sample_rate),
                    tf.summary.image("train/attention_gs"+str(global_step), signal_process.plot_alignment(out[2][0], gs=global_step, mode="with_return"))]))
                
                summary_writer.add_summary(merged, global_step)
Exemple #13
0
                    type=float,
                    default=0.001,
                    help="The learning rate for training.")
params = parser.parse_args()

run_config = tf.estimator.RunConfig(
    model_dir=params.logdir,
    save_checkpoints_steps=1000,
)

estimator = tf.estimator.Estimator(model_fn=model.model_fn,
                                   config=run_config,
                                   params=params)

tf.logging.set_verbosity('INFO')

estimator.train(
    input_fn=lambda: data.input_fn(
        eval=False, use_validation_set=False, params=params),
    max_steps=100000,
    saving_listeners=[
        EvalCheckpointSaverListener(
            estimator, lambda: data.input_fn(
                eval=True, use_validation_set=True, params=params),
            "validation"),
        EvalCheckpointSaverListener(
            estimator, lambda: data.input_fn(
                eval=True, use_validation_set=False, params=params), "train"),
    ],
)
Exemple #14
0
)



tested_nums = []
accuracies=[]
precisions=[]
recalls=[]
for learning_rate in [x * 0.001 for x in [1]]:
  print(f"testing: {learning_rate}")
  test_hidden_units = ok_hidden_units
  estimator = tf.estimator.DNNClassifier(
    feature_columns=[ax, ay, az, gx, gy, gz],
    hidden_units=test_hidden_units,
    optimizer=adadelta_optimizer
  )

  estimator.train(input_fn=lambda:input_fn(train_df, num_epochs=EPOCHS_NUM, shuffle=False, batch_size=64), steps=None)

  result = estimator.evaluate(lambda:input_fn(test_df, num_epochs=1, shuffle=False, batch_size=64))
  tested_nums.append(learning_rate)
  accuracies.append(result["accuracy"])
  precisions.append(result["precision"])
  recalls.append(result["recall"])
  print(f"A: {result['accuracy']},  B: {result['precision']}")
  estimator.export_savedmodel(export_dir_base='adam-dnn', serving_input_receiver_fn=serving_input_receiver_fn)


pl.show_multiple_series([accuracies, precisions, recalls])
pprint_result(result)
peek_classification(estimator, test_df)
Exemple #15
0
def main(args):
    #tf.config.experimental_run_functions_eagerly(False)
    print('Start training')

    creds = {}
    with open(args.credentails) as json_file:
        creds = json.load(json_file)
    if not creds:
        print('Failed to load credentials file {}. Exiting'.format(args.credentails))

    s3def = creds['s3'][0]
    s3 = s3store(s3def['address'], 
                 s3def['access key'], 
                 s3def['secret key'], 
                 tls=s3def['tls'], 
                 cert_verify=s3def['cert_verify'], 
                 cert_path=s3def['cert_path']
                 )

    trainingset = '{}/{}/'.format(s3def['sets']['trainingset']['prefix'] , args.trainingset)
    print('Load training set {}/{} to {}'.format(s3def['sets']['trainingset']['bucket'],trainingset,args.trainingset_dir ))
    s3.Mirror(s3def['sets']['trainingset']['bucket'], trainingset, args.trainingset_dir)

    trainingsetDescriptionFile = '{}/description.json'.format(args.trainingset_dir)
    trainingsetDescription = json.load(open(trainingsetDescriptionFile))

    training_percent = 0.8
    config = {
        'descripiton': args.description,
        'traningset': trainingset,
        'trainingset description': trainingsetDescription,
        # 'dataset':args.dataset,
        'batch_size': args.batch_size,
        'classScale': 0.001, # scale value for each product class
        'augment_rotation' : 15., # Rotation in degrees
        'augment_flip_x': False,
        'augment_flip_y': True,
        'augment_brightness':0.,
        'augment_contrast': 0.,
        'augment_shift_x': 0.1, # in fraction of image
        'augment_shift_y': 0.1, # in fraction of image
        'scale_min': 0.5, # in fraction of image
        'scale_max': 2.0, # in fraction of image
        'input_shape': [args.size_y, args.size_x, args.depth],
        'shape': (args.size_y, args.size_x, args.depth),
        'split': tfds.Split.TRAIN,
        'classes': trainingsetDescription['classes']['classes'],
        'learning_rate': args.learning_rate,
        'init_weights':None,
        'clean': args.clean,
        'epochs':args.epochs,
        'training_percent':training_percent,
        'training':'train[:{}%]'.format(int(100*training_percent)),
        'validation':'train[{}%:]'.format(int(100*training_percent)),
        'channel_order': args.channel_order,
        #'training':'train[:80%]',
        #'validation':'train[80%:]',
        's3_address':s3def['address'],
        's3_sets':s3def['sets'],
        'initialmodel':args.initialmodel,
        'training_dir': args.training_dir,
    }

    if args.initialmodel is None or len(args.initialmodel) == 0:
        config['initialmodel'] = None
    if args.training_dir is None or len(args.training_dir) == 0:
        config['training_dir'] = tempfile.TemporaryDirectory(prefix='train', dir='.')

    if args.clean:
        shutil.rmtree(config['training_dir'], ignore_errors=True)


    strategy = None
    if(args.strategy == 'mirrored'):
        strategy = tf.distribute.MirroredStrategy(devices=args.devices)

    else:
        device = "/gpu:0"
        if args.devices is not None and len(args.devices > 0):
            device = args.devices[0]

        strategy = tf.distribute.OneDeviceStrategy(device=device)

    print('{} distribute with {} GPUs'.format(args.strategy,strategy.num_replicas_in_sync))

    savedmodelpath = '{}/{}'.format(args.savedmodel, args.savedmodelname)
    if not os.path.exists(savedmodelpath):
        os.makedirs(savedmodelpath)
    if not os.path.exists(config['training_dir']):
        os.makedirs(config['training_dir'])

    with strategy.scope():
        save_callback = tf.keras.callbacks.ModelCheckpoint(filepath=args.model_dir, monitor='loss',verbose=0,save_weights_only=False,save_freq='epoch')
        tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=args.model_dir, histogram_freq='epoch')
        callbacks = [
            #save_callback,
            #tensorboard_callback
        ]

        #train_dataset, datasetdata = input_fn(config, split=config['training'])
        #val_dataset, _ = input_fn(config, split=config['validation'])

        train_dataset = input_fn('train', args.trainingset_dir, config)
        val_dataset = input_fn('val', args.trainingset_dir, config)

        #for images, labels in train_dataset.take(1):
        #    for i in range(images.shape[0]):
        #        print(labels[i].numpy())

        # config['classes'] = datasetdata.features['label'].num_classes
        #train_images = int(datasetdata.splits.total_num_examples*config['training_percent'])
        #val_images = int(datasetdata.splits.total_num_examples*(1.0-config['training_percent']))

        train_images = config['batch_size'] # Guess training set if not provided
        val_images = config['batch_size']

        for dataset in trainingsetDescription['sets']:
            if(dataset['name']=="train"):
                train_images = dataset["length"]
            if(dataset['name']=="val"):
                val_images = dataset["length"]
        steps_per_epoch=int(train_images/config['batch_size'])
        validation_steps=int(val_images/config['batch_size'])

        if(args.min):
            steps_per_epoch= min(args.min_steps, steps_per_epoch)
            validation_steps=min(args.min_steps, validation_steps)
            config['epochs'] = 1        
        
        model =  LoadModel(config, s3, args.model_dir)

        # Display model
        model.summary()

        print("Fit model to data")
        model_history = model.fit(train_dataset, 
                                  validation_data=val_dataset,
                                  epochs=config['epochs'],
                                  steps_per_epoch=steps_per_epoch,
                                  validation_steps=validation_steps,
                                  callbacks=callbacks)

        history = model_history.history
        if 'loss' in history:
            loss = model_history.history['loss']
        else:
            loss = []
        if 'val_loss' in history:
            val_loss = model_history.history['val_loss']
        else:
            val_loss = []

        model_description = {'config':config,
                             'results': history
                             }
        #epochs = config['epochs']

        graph_history(loss,val_loss,savedmodelpath)

    print("Create saved model")
    model.save(savedmodelpath, save_format='tf')
    
    if args.saveonnx:
        onnx_req = "python -m tf2onnx.convert --saved-model {0} --opset 10 --output {0}/model.onnx".format(savedmodelpath)
        os.system(onnx_req)

    print("Compute results")
    PrepareInference(dataset=train_dataset, model=model)
    CreatePredictions(dataset=train_dataset, model=model, config=config, outpath=savedmodelpath, imgname='train')
    CreatePredictions(dataset=val_dataset, model=model, config=config, outpath=savedmodelpath, imgname='val')

    print("Save results")
    model_description = {'config':config,
                         'results': history
                        }
    WriteDictJson(model_description, '{}/description.json'.format(savedmodelpath))

    # Save confusion matrix: https://www.kaggle.com/grfiv4/plot-a-confusion-matrix
    saved_name = '{}/{}'.format(s3def['sets']['model']['prefix'] , args.savedmodelname)
    print('Save model to {}/{}'.format(s3def['sets']['model']['bucket'],saved_name))
    if s3.PutDir(s3def['sets']['model']['bucket'], savedmodelpath, saved_name):
        shutil.rmtree(savedmodelpath, ignore_errors=True)

    if args.clean or args.training_dir is None or len(args.training_dir) == 0:
        shutil.rmtree(config['training_dir'], ignore_errors=True)
Exemple #16
0
def eval(session_config):
    with tf.Session(config=session_config) as sess:
        batch_inputs = data.input_fn(mode="eval")

        model = Model(mode="eval", inputs=batch_inputs)

        print("Evaluation Graph Loaded")

        saver = tf.train.Saver(max_to_keep=Hp.max_to_keep)

        init = tf.global_variables_initializer()
        sess.run(init)

        summary_writer = tf.summary.FileWriter(Hp.logdir, graph=sess.graph)

        latest_model = tf.train.latest_checkpoint(
            os.path.join(Hp.logdir, "models"))
        global_step = int(latest_model.split('-')[1])

        saver.restore(sess, latest_model)
        print("Restore Model from Last Checkpoint")

        rounds = Hp.eval_sample_num // Hp.eval_batch_size
        loss = []
        mag_hat = []
        mag_gt = []
        mel_hat = []
        mel_gt = []
        align = []

        for i in range(rounds):
            out = sess.run([
                model.loss, model.mag_hat, model.merged_labels, model.mel_hat,
                model.inputs_reference, model.alignments
            ])

            loss.append(out[0])
            mag_hat.extend(out[1])
            mag_gt.extend(out[2])
            mel_hat.extend(out[3])
            mel_gt.extend(np.concatenate(out[4], axis=0))
            align.extend(out[5])

        print("saving Sample during Evaluation")
        # store a sample to listen to both on tensorboard and local files
        save_sample_dir = os.path.join(Hp.logdir, "eval")
        if not os.path.exists(save_sample_dir):
            os.mkdir(save_sample_dir)

        with open(os.path.join(save_sample_dir, "loss"), 'a+') as fout:
            fout.write("Step:{}\tLoss:{}\n".format(global_step,
                                                   np.mean(np.array(loss))))

        wav_hat = signal_process.spectrogrom2wav(mag_hat[0])
        ground_truth = signal_process.spectrogrom2wav(mag_gt[0])
        signal_process.plot_alignment(align[0],
                                      gs=global_step,
                                      mode="save_fig",
                                      path=save_sample_dir)

        write(os.path.join(save_sample_dir, 'gt_{}.wav'.format(global_step)),
              Hp.sample_rate, ground_truth)
        write(os.path.join(save_sample_dir, 'hat_{}.wav'.format(global_step)),
              Hp.sample_rate, wav_hat)

        merged = sess.run(
            tf.summary.merge([
                tf.summary.audio("eval/sample_gt" + str(global_step),
                                 tf.expand_dims(ground_truth, 0),
                                 Hp.sample_rate),
                tf.summary.audio("eval/sample_hat_gs" + str(global_step),
                                 tf.expand_dims(wav_hat, 0), Hp.sample_rate),
                tf.summary.image(
                    "eval/attention_gs" + str(global_step),
                    signal_process.plot_alignment(out[2][0],
                                                  gs=global_step,
                                                  mode="with_return"))
            ]))

        summary_writer.add_summary(merged, global_step)