def review_model(test_dataflow, test_imgreview, model, history, model_id, imgproc, image_data_pipeline): """ Model diagnostics written to disk; performs prediction """ model_name = '{}_{}'.format(model_id, imgproc) datetime_now = datetime.now().strftime("%Y%m%d-%H%M%S") logger.info("STARTED model diagnostics for '{}'".format(model_name)) # Managing review directory review_dir = os.path.join(os.getcwd(), 'review') if not os.path.isdir(review_dir): os.makedirs(review_dir) # Report train/val loss vs. epochs if history is not None: logger.info("Reporting train/val loss vs. epochs plot") model_history_name = 'history_{}_{}.png'.format( model_name, datetime_now) mh_filepath = os.path.join(review_dir, model_history_name) plot_loss(mh_filepath, history) # Evaluate using test set logger.info("Evaluating test set and generating report") evaluate = model.evaluate_generator(test_dataflow) test_eval = str(model.metrics_names) + str(evaluate) test_eval_name = 'test_eval_{}_{}.txt'.format(model_name, datetime_now) te_filepath = os.path.join(review_dir, test_eval_name) with open(te_filepath, "w") as outfile: outfile.write(test_eval) # Prediction and reconstrution of N images logger.info("Predicting test image and generating review images") for file_path in test_imgreview: y_test_set = [file_path] test_imgreview_dataflow = RaiseDataGenerator(y_test_set, image_data_pipeline) Y_pred, Y_true = model_predict(test_imgreview_dataflow, model, image_data_pipeline, Y_true_fpath=file_path) img_pred_name = 'img_pred_{}_{}.png'.format(model_name, datetime_now) img_filepath = os.path.join(review_dir, img_pred_name) plot_imgpair(Y_pred, Y_true, img_filepath) logger.info("Wrote out review image: {}".format(img_pred_name)) logger.info("FINISHED model diagnostics for '{}'".format(model_name))
def fit_model(X_train, Y_train, model, checkpoint_dir, imgtup): imgname, imgfunc = imgtup chk = os.listdir(checkpoint_dir) if len(chk) > 1: # latest = tf.train.latest_checkpoint(checkpoint_dir) # model.load_weights(latest) pass else: datagen = ImageDataGenerator( preprocessing_function=imgfunc) # Transform all training images datagen.fit(X_train) # Compile model learning_rate = 1e-3 opt = AdamOptimizer(learning_rate=learning_rate) model.compile(optimizer=opt, loss=mean_absolute_error, metrics=['accuracy']) model.summary() # Fit model history = model.fit_generator(datagen.flow(X_train,Y_train, batch_size=32), steps_per_epoch=X_train.shape[0] / 32, epochs=100) plot_loss('review/train_val_loss_021_{}.png'.format(imgname), history) return model
model = simple_sony() #model = full_sony() #opt = Adam(lr=1e-4) opt = AdamOptimizer(learning_rate=learning_rate) model.compile(optimizer=opt, loss=mean_absolute_error, metrics=['accuracy']) # Fitting the model history = model.fit(X_train, Y_train, validation_split=0.25, epochs=100, batch_size=32, callbacks=[cp_callback]) plot_loss('review/train_val_loss.png', history) # Predicting with the model model.summary() output = model.predict(X_test) logger.debug("prediction output shape: {}".format(output.shape)) # Review image output every = 10000 for i in range(output.shape[0]): base = "review/" if i % every == 0:
embedding_dim =vecs.shape[1] MAX_SENT_LENGTH=100 MAX_SENTS=15 model =HAN(MAX_SENT_LENGTH,MAX_SENTS,max_features,embedding_dim,vecs) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[fbeta_score,'acc']) print("model fitting - Bidirectional LSTM") model.summary() plot_model(model,SAVEPATH+'/model.png',show_shapes=True) x_train, y_train, x_val, y_val =train_val_split(train_data,train_label,2017,0.2) loss_his = LossHistory() result_his =ResultHistory(test_data,SAVEPATH,False) result_dev_his =ResultHistory(x_val,SAVEPATH,False) model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=20, batch_size=128, callbacks=[loss_his,result_his,result_dev_his]) plot_loss(SAVEPATH, loss_his) i =0 for result in result_his.result: i += 1 np.savetxt(SAVEPATH + '/result_' + str(i) + '.txt', result, fmt="%.4f", delimiter=" ") i=0 for result in result_dev_his.result: i += 1 np.savetxt(SAVEPATH + '/dev_result_' + str(i) + '.txt', result, fmt="%.4f", delimiter=" ") np.savetxt(SAVEPATH+'/dev_label.txt',y_val,fmt="%i",delimiter=' ') if __name__ == '__main__': pass
def run_training(): image_files = glob.glob(os.path.join(config.DATA_DIR, "*.png")) image_files = image_files[:10] print(f"Number of Images Found: {len(image_files)}") # "../xywz.png" -> "xywz" targets_orig = [x.split("/")[-1].split(".")[0] for x in image_files] # separate the targets on character level targets = [[char for char in x] for x in targets_orig] targets_flat = [c for clist in targets for c in clist] lbl_encoder = preprocessing.LabelEncoder() lbl_encoder.fit(targets_flat) targets_enc = [lbl_encoder.transform(x) for x in targets] # label encodes from 0, so add 1 to start from 1: 0 will be saved for unknown targets_enc = np.array(targets_enc) + 1 print(f"Number of Unique Classes: {len(lbl_encoder.classes_)}") train_imgs, test_imgs, train_targets, test_targets, train_orig_targets, test_orig_targets = \ model_selection.train_test_split(image_files, targets_enc, targets_orig, test_size=0.1, random_state=42) train_dataset = dataset.ClassificationDataset(image_paths=train_imgs, targets=train_targets, resize=(config.IMAGE_HEIGHT, config.IMAGE_WIDTH)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS, shuffle=True) test_dataset = dataset.ClassificationDataset(image_paths=test_imgs, targets=test_targets, resize=(config.IMAGE_HEIGHT, config.IMAGE_WIDTH)) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS, shuffle=False) model = CaptchaModel(num_chars=len(lbl_encoder.classes_)) model.to(config.DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.8, patience=5, verbose=True) train_loss_data = [] test_loss_data = [] for epoch in range(config.EPOCHS): train_loss = engine.train_fn(model, train_loader, optimizer, save_model=True) eval_preds, test_loss = engine.eval_fn(model, test_loader) eval_captcha_preds = [] for vp in eval_preds: current_preds = decode_predictions(vp, lbl_encoder) eval_captcha_preds.extend(current_preds) combined = list(zip(test_orig_targets, eval_captcha_preds)) pprint(combined[:10]) test_dup_rem = [remove_duplicates(c) for c in test_orig_targets] accuracy = metrics.accuracy_score(test_dup_rem, eval_captcha_preds) print( f"Epoch={epoch}, Train Loss={train_loss}, Test Loss={test_loss} Accuracy={accuracy}" ) scheduler.step(test_loss) train_loss_data.append(train_loss) test_loss_data.append(test_loss) # print(train_dataset[0]) plot_loss(train_loss_data, test_loss_data, plot_path=config.PLOT_PATH) print("done")