def train_model(model, X_train, X_test, y_train, y_test): cp = ModelCheckpoint('model-{epoch:03d}.h5', monitor = 'val_loss', verbose = 0, save_best_only=True, mode='auto') model.compile(loss='mean_squared_error', optimizer=Nadam(lr=1.0e-4)) model.fit_generator(batch_generator('data', X_train, y_train, 40, True), 20000, 30, max_q_size=1, validation_data=batch_generator('data', X_test, y_test, 40, False), nb_val_samples=len(X_test), callbacks=[cp], verbose=1)
anomalous_test_data_list_artificial_hot_towers = os.listdir( data_folder.replace("good_2016", "bad_2016/hot_towers")) #artificially created anomalous_test_data_list_artificial_missing_modules = os.listdir( data_folder.replace("good_2016", "bad_2016/missing_modules")) #artificially created #print(anomalous_test_data_list_natural) # print(anomalous_test_data_list_artificial_hot_towers) # print(anomalous_test_data_list_artificial_missing_modules) print("Current training set is made from " + str(len(train_data_list)) + " files and has " + str(helper_functions.get_num_samples(train_data_list)) + " examples") my_training_data_generator = helper_functions.batch_generator( 4, train_data_list, group=image_type, prep_level=args.prep_level) training_losses = [] for batch in my_training_data_generator: loss = trained_model.evaluate(batch, batch, batch_size=4, verbose=0) training_losses.append(loss) print("Current test set is made from " + str(len(test_data_list)) + " files and has " + str(helper_functions.get_num_samples(test_data_list)) + " examples") my_test_data_generator = helper_functions.batch_generator( 4, test_data_list, group=image_type, prep_level=args.prep_level) test_losses = [] for batch in my_test_data_generator: loss = trained_model.evaluate(batch, batch, batch_size=4, verbose=0)
+ str(args.regularize) # create a secondary validation model to run our similarity checks during training similarity = dot([word_embedding, context_embedding], axes=1, normalize=True) validation_model = Model(inputs=[word_index, context_index], outputs=[similarity]) sim_cb = helpers.SimilarityCallback(validation_model=validation_model) loss_plot = TensorBoard(log_dir=train_name + '_logs', write_graph=False) earlystopping = EarlyStopping(monitor='loss', min_delta=0.0001, patience=1, verbose=1, mode='auto') # How many times per epoch we will ask the batch generator to yield a batch? steps = no_train_pairs / batch_size # Let's start training! start = time.time() history = keras_model.fit_generator( helpers.batch_generator(wordpairs, vocab_dict, vocab_size, negative, batch_size, args.use_neighbors, neighbors_count), callbacks=[sim_cb, loss_plot, earlystopping], steps_per_epoch=steps, epochs=args.epochs, workers=cores, verbose=2) end = time.time() print('Training took:', int(end - start), 'seconds', file=sys.stderr) # Saving the resulting vectors: filename = train_name + '_' + run_name + '.vec.gz' helpers.save_word2vec_format(filename, vocab_dict, word_embedding_layer.get_weights()[0]) backend.clear_session()
# sim_cb = helpers.SimilarityCallback(validation_model=validation_model) loss_plot = TensorBoard(log_dir=train_name + '_logs', write_graph=False) earlystopping = EarlyStopping(monitor='loss', min_delta=0.0001, patience=1, verbose=1, mode='auto') # How many times per epoch we will ask the batch generator to yield a batch? steps = no_train_pairs / batch_size # Let's start training! start = time.time() history = keras_model.fit_generator(helpers.batch_generator( wordpairs, vocab_dict, vocab_size, negative, batch_size, args.use_neighbors, neighbors_count), callbacks=[loss_plot, earlystopping], steps_per_epoch=steps, epochs=args.epochs, workers=1, verbose=2) end = time.time() print('Training took:', int(end - start), 'seconds', file=sys.stderr) # Saving the resulting vectors: filename = train_name + '_' + run_name + '.vec.gz' helpers.save_word2vec_format(filename, vocab_dict, word_embedding_layer.get_weights()[0])
X_val = { 'ims': np.array( validation_ims ), 'bxs': all_validation_bxs, 'detectors': val_detectors_mask, 'true_bxs': val_matching_true_boxes } data = {'train': X_train, 'val': X_val ### TRAIN num_epochs = 10 batch_size = 32 steps_per_epoch_fit = np.ceil(len(data[ 'train' ][ 'ims' ]) / batch_size) steps_per_epoch_val = np.ceil(len(data[ 'val' ][ 'ims' ]) / batch_size) fit_gen = batch_generator( 'train', batch_size, shuffle = True) val_gen = batch_generator( 'val' , batch_size) model_body, model, global_step, metrics = create_model(anchors, class_names, reset_weights=False) model.compile( optimizer='adam', loss = {'yolo_loss': lambda y_true, y_pred: y_pred}, metrics = metrics) annealing_step = (steps_per_epoch_fit*num_epochs) / 4 hist = model.fit_generator(generator = fit_gen, steps_per_epoch = steps_per_epoch_fit, validation_data = val_gen, validation_steps = steps_per_epoch_val, epochs = num_epochs,
data_folder=os.environ["DATA"] except KeyError: "Please cd into the module's base folder and run set_env from there." file_list=os.listdir(data_folder) np.random.seed(1) np.random.shuffle(file_list) train_data_list=file_list[0:160] #choosing 63 here keeps ~80% of data for testing, rest for training and val, need to automatize this test_data_list=file_list[160:] print("Current training set is made from "+str(len(train_data_list))+" files and has "+str(helper_functions.get_num_samples(train_data_list))+" examples: ", train_data_list) print("Current test set is made from "+str(len(test_data_list))+" files and has "+str(helper_functions.get_num_samples(test_data_list))+" examples: ", test_data_list) my_test_data_generator=helper_functions.batch_generator(22900,test_data_list,image_type) y_true_test=[] y_predicted_test=[] for batch in my_test_data_generator: test_x,test_y=batch predicted=trained_model.predict(test_x) y_true_test=test_y[:,0] y_predicted_test=predicted[:,0] my_training_data_generator=helper_functions.batch_generator(44600,train_data_list,image_type) y_true_train=[]
emoji_index, _, emoji_sorted = build_emoji_index(vocab_f, emoji_64) # build vocab word2index, index2word = build_vocab(vocab_f) start_i, end_i = word2index['<s>'], word2index['</s>'] vocab_size = len(word2index) classifier = EmojiClassifier(batch_size, vocab_size, emoji_num, embed_size, num_unit, num_gpu) # build data train_data = build_data(train_ori_f, train_rep_f, word2index) test_data = build_data(test_ori_f, test_rep_f, word2index) test_batches = batch_generator(test_data, start_i, end_i, batch_size, permutate=False) print_out("*** CLASSIFIER DATA READY ***") config = tf.ConfigProto() config.gpu_options.allow_growth = True saver = tf.train.Saver() with tf.Session(config=config) as sess: classifier.set_sess(sess) classifier.set_emoji_index(emoji_index) global_step = best_step = 1 start_epoch = best_epoch = 1 best_loss = 1000. sess.run(tf.global_variables_initializer())