def train(run_name, speaker, start_epoch, stop_epoch, img_c, img_w, img_h, frames_n, absolute_max_string_len, minibatch_size): DATASET_DIR = os.path.join(CURRENT_PATH, speaker, 'datasets') OUTPUT_DIR = os.path.join(CURRENT_PATH, speaker, 'results') LOG_DIR = os.path.join(CURRENT_PATH, speaker, 'logs') curriculum = Curriculum(curriculum_rules) lip_gen = BasicGenerator(dataset_path=DATASET_DIR, minibatch_size=minibatch_size, img_c=img_c, img_w=img_w, img_h=img_h, frames_n=frames_n, absolute_max_string_len=absolute_max_string_len, curriculum=curriculum, start_epoch=start_epoch).build() lipnet = LipNet(img_c=img_c, img_w=img_w, img_h=img_h, frames_n=frames_n, absolute_max_string_len=absolute_max_string_len, output_size=lip_gen.get_output_size()) lipnet.summary() adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss lipnet.model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=adam) # load weight if necessary if start_epoch > 0: weight_file = os.path.join(OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1))) lipnet.model.load_weights(weight_file) if start_epoch < 1: weight_file = os.path.join(OUTPUT_DIR, os.path.join(CURRENT_PATH,speaker,'results', 'weightsa.h5')) lipnet.model.load_weights(weight_file) spell = Spell(path=PREDICT_DICTIONARY) decoder = Decoder(greedy=PREDICT_GREEDY, beam_width=PREDICT_BEAM_WIDTH, postprocessors=[labels_to_text, spell.sentence]) # define callbacks statistics = Statistics(lipnet, lip_gen.next_val(), decoder, 256, output_dir=os.path.join(OUTPUT_DIR, run_name)) visualize = Visualize(os.path.join(OUTPUT_DIR, run_name), lipnet, lip_gen.next_val(), decoder, num_display_sentences=minibatch_size) tensorboard = TensorBoard(log_dir=os.path.join(LOG_DIR, run_name)) csv_logger = CSVLogger(os.path.join(LOG_DIR, "{}-{}.csv".format('training',run_name)), separator=',', append=True) checkpoint = ModelCheckpoint(os.path.join(OUTPUT_DIR, run_name, "weights{epoch:02d}.h5"), monitor='val_loss', save_weights_only=True, mode='auto', period=1) lipnet.model.fit_generator(generator=lip_gen.next_train(), steps_per_epoch=lip_gen.default_training_steps, epochs=stop_epoch, validation_data=lip_gen.next_val(), validation_steps=lip_gen.default_validation_steps, callbacks=[checkpoint, statistics, visualize, lip_gen, tensorboard, csv_logger], initial_epoch=start_epoch, verbose=1, max_q_size=5, workers=2, pickle_safe=True)
show_video_subtitle(frames=_video.face, subtitle=_align.sentence) print "Video: " print _video.length print np.array_equiv(_video.mouth, video.mouth), print np.array_equiv(_video.data, video.data), print np.array_equiv(_video.face, video.face) print "Align: " print labels_to_text(_align.padded_label.astype(np.int)) print _align.padded_label print _align.label_length print np.array_equiv(_align.sentence, align.sentence), print np.array_equiv(_align.label, align.label), print np.array_equiv(_align.padded_label, align.padded_label) curriculum = Curriculum(rules) video = Video(vtype='face', face_predictor_path= 'evaluation/models/shape_predictor_68_face_landmarks.dat') video.from_video('evaluation/samples/id2_vcd_swwp2s.mpg') align = Align( absolute_max_string_len=32, label_func=text_to_labels).from_file('evaluation/samples/swwp2s.align') print "=== TRAINING ===" for i in range(6): curriculum.update(i, train=True) print curriculum _video, _align, _ = curriculum.apply(video, align)