def custom_fit(X_train, Y_train, X_test, Y_test, model, batch_size, epochs=10): # Print Train stats total_sentences, total_documents = 0, 0 total_documents = X_train.shape[0] total_sentences = sum([doc.shape[0] for doc in X_train]) print "X-wiki TRAIN stats: Total %d sentences in %d documents" % ( total_sentences, total_documents) class_weight = None if SCALE_LOSS_FUN: # Iterate as the no of sentences in each document is different # so np.unique() messes up. classes, counts = None, [] for _temp_Yi in Y_train: classes, _temp_counts = np.unique(_temp_Yi, return_counts=True) counts.append(_temp_counts) counts = np.sum(counts, axis=0) class_weight = dict(zip(classes.tolist(), counts / float(sum(counts)))) print class_weight train_avg_seg_len = np.mean( [helper.compute_avg_seg_len(Yi) for Yi in Y_train], axis=0) print ">> Train AVG_SEGMENT_LENGTH:", train_avg_seg_len print 'Train...' start_epoch = 0 if LOAD_SAVED_MODEL_AND_CONTINUE_TRAIN: # If we have saved model, then continue from the last epoch where we stopped start_epoch = saved_model_epoch_done # The epoch count is zero indexed in TRAIN, while the count in saved file is 1 indexed print_iter_count = 0 for epoch in range(start_epoch, epochs): mean_tr_acc, mean_tr_loss, mean_tr_rec = [], [], [] rLoss, rRecall, rAcc = 0, 0, 0 # Running parameters for printing while training for batch_count, ( batch_X_left, batch_X_mid, batch_X_right, batch_Y_mid) in enumerate( batch_gen_consecutive_context_segments_from_big_seq( "train", X_train, Y_train, batch_size, ONE_SIDE_CONTEXT_SIZE)): #batch_Y_vec = to_categorical_MULTI_DIM(batch_Y, nb_classes=2) try: #pdb.set_trace() batch_Y_mid = to_categorical(batch_Y_mid, nb_classes=2) start = time.time() tr_loss, tr_acc, tr_rec = model.train_on_batch( [batch_X_left, batch_X_mid, batch_X_right], batch_Y_mid) speed = time.time() - start mean_tr_acc.append(tr_acc) mean_tr_loss.append(tr_loss) mean_tr_rec.append(tr_rec) #rLoss, rRecall, rAcc = (rLoss*batch_count + tr_loss)/(batch_count + 1), (rRecall*batch_count + tr_rec)/(batch_count + 1), (rAcc*batch_count + tr_acc)/(batch_count + 1) #progbar.prog_bar(True, total_sentences, epochs, batch_size, epoch, batch_count, speed=speed, data={ 'rLoss': rLoss, 'rAcc': rAcc, 'rRec': rRecall }) progbar.prog_bar(True, total_sentences, epochs, batch_size, epoch, batch_count, speed=speed, data={ 'Loss': tr_loss, 'Acc': tr_acc, 'Rec': tr_rec }) # Print test results after every 100 batch trains #if (not batch_count % 100) and batch_count != 0: # print "\nTEST-ITER-COUNT: %d" %(print_iter_count) # testing_on_data("Wikipedia(DEVELOPMENT)", X_test, Y_test, model, batch_size, summary_only=True) # testing_on_data("Clinical", X_cli, Y_cli, model, batch_size, summary_only=True) # testing_on_data("Biography", X_bio, Y_bio, model, batch_size) # testing_on_data("Fiction", X_fic, Y_fic, model, batch_size, summary_only=True) # testing_on_data("Wikipedia(BENCHMARK)", X_wikitest, Y_wikitest, model, batch_size, summary_only=True) # print_iter_count += 1 except KeyboardInterrupt, SystemExit: print "" print "########################################################" print "###### Pausing execution. Press ENTER to continue #####" print "########################################################" out = raw_input( 'Enter "pdb" to get prompt or ENTER to exit.> ') if out == "pdb": pdb.set_trace() except Exception as e: print e print ">>>>> Is it intentional ?"
def custom_fit(X_train, Y_train, X_test, Y_test, model, batch_size, epochs=10): # Print Train stats total_sentences, total_documents = 0, 0 total_documents = X_train.shape[0] total_sentences = sum([doc.shape[0] for doc in X_train]) print "X-wiki TRAIN stats: Total %d sentences in %d documents" % ( total_sentences, total_documents) class_weight = None if SCALE_LOSS_FUN: # Iterate as the no of sentences in each document is different # so np.unique() messes up. classes, counts = None, [] for _temp_Yi in Y_train: classes, _temp_counts = np.unique(_temp_Yi, return_counts=True) counts.append(_temp_counts) counts = np.sum(counts, axis=0) class_weight = dict(zip(classes.tolist(), counts / float(sum(counts)))) print class_weight train_avg_seg_len = np.mean( [helper.compute_avg_seg_len(Yi) for Yi in Y_train], axis=0) print ">> Train AVG_SEGMENT_LENGTH:", train_avg_seg_len print 'Train...' start_epoch = 0 if LOAD_SAVED_MODEL_AND_CONTINUE_TRAIN: # If we have saved model, then continue from the last epoch where we stopped start_epoch = saved_model_epoch_done # The epoch count is zero indexed in TRAIN, while the count in saved file is 1 indexed print_iter_count = 0 for epoch in range(start_epoch, epochs): mean_tr_acc, mean_tr_loss, mean_tr_rec = [], [], [] batch_count = 0 rLoss, rRecall, rAcc = 0, 0, 0 # Running parameters for printing while training for i in range(total_documents): X, Y = X_train[i], Y_train[i] for (batch_X, batch_Y) in batch_gen_sentences_without_context( X, Y, batch_size, fixed_size=False): #pdb.set_trace() batch_Y = to_categorical( batch_Y, nb_classes=2) # Convert to output as 2 classes start = time.time() tr_loss, tr_acc, tr_rec = model.train_on_batch([batch_X], batch_Y) speed = time.time() - start mean_tr_acc.append(tr_acc) mean_tr_loss.append(tr_loss) mean_tr_rec.append(tr_rec) #rLoss, rRecall, rAcc = (rLoss*batch_count + tr_loss)/(batch_count + 1), (rRecall*batch_count + tr_rec)/(batch_count + 1), (rAcc*batch_count + tr_acc)/(batch_count + 1) #progbar.prog_bar(True, total_sentences, epochs, batch_size, epoch, batch_count, speed=speed, data={ 'rLoss': rLoss, 'rAcc': rAcc, 'rRec': rRecall }) progbar.prog_bar(True, total_sentences, epochs, batch_size, epoch, batch_count, speed=speed, data={ 'Loss': tr_loss, 'Acc': tr_acc, 'Rec': tr_rec }) batch_count += 1 progbar.end() if SAVE_MODEL_AFTER_EACH_EPOCH: model.save("model_trainable_%s_epoc_%d.h5" % (str(TRAINABLE_EMBEDDINGS), epoch + 1)) print ">> Epoch: %d/%d" % (epoch + 1, epochs) print('accuracy training = {}'.format(np.mean(mean_tr_acc))) print('recall training = {}'.format(np.mean(mean_tr_rec))) print('loss training = {}'.format(np.mean(mean_tr_loss))) testing_on_data("Wikipedia(DEVELOPMENT)", X_test, Y_test, model, batch_size, summary_only=True) testing_on_data("Clinical", X_cli, Y_cli, model, batch_size, summary_only=True) #testing_on_data("Biography", X_bio, Y_bio, model, batch_size) testing_on_data("Fiction", X_fic, Y_fic, model, batch_size, summary_only=True) testing_on_data("Wikipedia(BENCHMARK)", X_wikitest, Y_wikitest, model, batch_size, summary_only=True) print('___________________________________') # Testing print "####################################################################" print ">> (TEST) >> Testing, X:", X_test.shape, "Y:", Y_test.shape mean_te_acc, mean_te_loss, mean_te_rec = [], [], [] for i in range(X_test.shape[0]): X, Y = X_test[i], Y_test[i] for batch_X, batch_Y in batch_gen_sentences_without_context( X, Y, batch_size, fixed_size=False): te_loss, te_acc, te_rec = model.test_on_batch([batch_X], batch_Y) mean_te_acc.append(te_acc) mean_te_loss.append(te_loss) mean_te_rec.append(te_rec) print('accuracy testing = {}'.format(np.mean(mean_te_acc))) print('recall testing = {}'.format(np.mean(mean_te_rec))) print('loss testing = {}'.format(np.mean(mean_te_loss)))