def predict(): weight = np.load("./result/weight.npy") tr_data_loader = train_data_loader() te_data_loader = test_data_loader(tr_data_loader.mean, tr_data_loader.std) question = te_data_loader.get_data() # predict pre = np.dot(question, weight) pre = (pre * te_data_loader.std[9]) + te_data_loader.mean[9] for i in range(len(pre)): print("id:", i, pre[i]) # save file with open("./result/predict.csv", "w") as csvfile: writer = csv.writer(csvfile) writer.writerow(["id", "value"]) for i in range(len(pre)): id_name = 'id_' id_name = id_name + str(i) answer = float(pre[i]) if answer < 0: answer = 0 writer.writerow([id_name, answer])
"num_epochs" : config.num_epochs, "batch_size" : config.batch_size, "learning_rate" : config.learning_rate, "hidden_size" : config.hidden_size, "pretrained" : config.pretrained } # define a path to save experiment logs experiment_path = "./{}".format(config.exp) if not os.path.exists(experiment_path): os.mkdir(experiment_path) #create data loaders train_dataloader = data_loader.train_data_loader() test_dataloader = data_loader.test_data_loader() Model = model.newModel() Model.to(config.device) #define loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adadelta(Model.parameters(), lr = config.learning_rate) def train(dataloader, model, loss_fn, optimizer): size = len(dataloader.dataset) for batch, (X, y) in enumerate(dataloader): X, y = X.to(config.device), y.to(config.device) #compute prediction error pred = model(X)
print('='*100) # Inference validation / test set avg_cost, avg_correct, precision_count, recall_count, e_avg_correct, e_precision_count, e_recall_count = iteration_model(models, test_loader, parameter, False) # Individual and ensemble model's accuracy score on validation or test dataset for i in range(parameter["num_ensemble"]): print(str(i) + '_Val : [Epoch: {:>4}] cost = {:>.6} Accuracy = {:>.6}'.format(epoch + 1, avg_cost[i], avg_correct[i])) print('Ensemble [Epoch: {:>4}] Accuracy = {:>.6}'.format(epoch + 1, e_avg_correct)) f1Measure, precision, recall = calculation_measure(parameter["num_ensemble"], precision_count, recall_count) # Individual and ensemble model's f1, precisionb and recall score on validation or test dataset e_f1Measure, e_precision, e_recall = calculation_measure_ensemble(e_precision_count, e_recall_count) for i in range(parameter["num_ensemble"]): print(str(i) + '_Val : [Val] F1Measure : {:.6f} Precision : {:.6f} Recall : {:.6f}'.format(f1Measure[i], precision[i], recall[i])) print('Ensemble [Val] F1Measure : {:.6f} Precision : {:.6f} Recall : {:.6f}'.format(e_f1Measure, e_precision, e_recall)) print('=' * 100) save(parameter["output_dir"]) if parameter["mode"] == "test": load(parameter["output_dir"]) # For test dataset with label test_data = data_loader(parameter["input_dir"]) print(infer(test_data)) # For test dataset without label test_data = test_data_loader(parameter["input_dir"]) print(infer(test_data))
shuffle=True) print(res.history) train_loss, train_acc = res.history['loss'][0], res.history[ 'get_categorical_accuracy_keras'][0] nsml.report(summary=True, epoch=epoch, epoch_total=nb_epoch, loss=train_loss, acc=train_acc) nsml.save(epoch) # eunji config.mode = 'test' if config.mode == 'test': queries, db = test_data_loader('../../Data_example_test') # Query 개수: 195 # Reference(DB) 개수: 1,127 # Total (query + reference): 1,322 queries, query_img, references, reference_img = preprocess(queries, db) print( 'test data load queries {} query_img {} references {} reference_img {}' .format(len(queries), len(query_img), len(references), len(reference_img))) queries = np.asarray(queries) query_img = np.asarray(query_img) references = np.asarray(references)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30], gamma=0.1) else: scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 15, 20], gamma=0.1) else: raise ValueError('Invalid scheduler') # Loss function loss_fn = BlendedLoss(loss_type, cross_entropy_flag) # Train (fine-tune) model fit(online_train_loader, model, loss_fn, optimizer, scheduler, nb_epoch, start_epoch = start_epoch, device=device, log_interval=log_interval, save_model_to=config.model_save_dir) elif config.mode == 'test': test_dataset_path = dataset_path #+ '/test/test_data' queries, db = test_data_loader(test_dataset_path) #model = load(file_path=config.model_to_test) result_dict = infer(model, queries, db) print(result_dict) from sklearn.metrics import recall_score, precision_score import numpy as np positives = [] k = 1 for item in result_dict: print("---") index, query_item = item query = query_item[0]
model.add(Flatten()) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.summary() bind_model(model) if config.pause: nsml.paused(scope=locals()) bTrainmode = False q_p, r_p = test_data_loader(DATASET_PATH) print(q_p) if config.mode == 'train': bTrainmode = True """ Initiate RMSprop optimizer """ opt = keras.optimizers.rmsprop(lr=0.00045, decay=1e-6) # model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=[get_categorical_accuracy_keras]) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) """ Load data """ print('dataset path', DATASET_PATH) output_path = ['./img_list.pkl', './label_list.pkl'] train_dataset_path = DATASET_PATH + '/train/train_data'
def NER_test(): tf.reset_default_graph() config = Config() parser = argparse.ArgumentParser(description=sys.argv[0] + " description") parser = config.parse_arg(parser) print("[NER_test...]") try: parameter = vars(parser.parse_args()) except: parser.print_help() sys.exit(0) parameter["mode"] = "test" # data_loader를 이용해서 전체 데이터셋 가져옴 DATASET_PATH = './data' #test data path extern_data = [] if parameter["mode"] == "test": extern_data = test_data_loader(DATASET_PATH) # 가져온 문장별 데이터셋을 이용해서 각종 정보 및 학습셋 구성 dataset = Dataset(parameter, extern_data) # dev_dataset = Dataset(parameter, extern_data) # # Model 불러오기 # # if parameter["use_conv_model"]: # # model = ConvModel(dataset.parameter) # # print("[Use Conv with lstm...]") # # else: # # model = Model(dataset.parameter) # # print("[Use original lstm...]") # # # # # # model.build_model_test() dev_size = config.dev_size #train_extern_data, dev_extern_data = extern_data[:-dev_size], extern_data[-dev_size:] # dataset.make_input_data(train_extern_data) #dev_dataset.make_input_data(extern_data) # For Test set best_dev_f1 = 0 cur_patience = 0 # use_lr_decay = False with tf.Session() as sess: saver = tf.train.import_meta_graph('./saved/testmodel.meta') saver.restore(sess, tf.train.latest_checkpoint('./saved/')) graph = tf.get_default_graph() # print all tensors in checkpoint file #chkp.print_tensors_in_checkpoint_file("./saved/checkpoint.ckpt", tensor_name='', all_tensors=True) # placeholder param = {} param["morph"] = graph.get_tensor_by_name('morph') param["ne_dict"] = graph.get_tensor_by_name('ne_dict') param["character"] = graph.get_tensor_by_name('character') param["dropout_rate"] = graph.get_tensor_by_name('dropout_rate') param["weight_dropout_keep_prob"] = graph.get_tensor_by_name( 'weight_dropout_keep_prob') param["lstm_dropout"] = graph.get_tensor_by_name('lstm_dropout') param["label"] = graph.get_tensor_by_name('label') param["sequence"] = graph.get_tensor_by_name('sequence') param["character_len"] = graph.get_tensor_by_name('character_len') param["global_step"] = graph.get_tensor_by_name('global_step') param["emb_dropout_keep_prob"] = graph.get_tensor_by_name( 'emb_dropout_keep_prob') param["dense_dropout_keep_prob"] = graph.get_tensor_by_name( 'dense_dropout_keep_prob') param["learning_rate"] = graph.get_tensor_by_name('learning_rate') #tensor model.cost, model.viterbi_sequence, model.train_op param["cost"] = graph.get_tensor_by_name('cost') param["viterbi_sequence"] = graph.get_tensor_by_name( 'viterbi_sequence') param["train_op"] = graph.get_tensor_by_name('train_op') #for epoch in range(parameter["epochs"]): #random.shuffle(extern_data) # 항상 train set shuffle 시켜주자 dataset.make_input_data(extern_data) # Check for test set de_avg_cost, de_avg_correct, de_precision_count, de_recall_count = iteration_model_Test( dataset, parameter, param, train=False) print('[Epoch: {:>4}] cost = {:>.6} Accuracy = {:>.6}'.format( epoch + 1, de_avg_cost, de_avg_correct)) de_f1Measure, de_precision, de_recall = calculation_measure( de_precision_count, de_recall_count) print('[Test] F1Measure : {:.6f} Precision : {:.6f} Recall : {:.6f}'. format(de_f1Measure, de_precision, de_recall))
from matplotlib import pyplot as plt from model import Model from hyperparams import Hyperparams from data_loader import train_data_loader, test_data_loader, prediction_dataframe, hist_data, untransformed_price #logger configuration FORMAT = "[%(filename)s: %(lineno)3s] %(levelname)s: %(message)s" logging.basicConfig(level=logging.INFO, format=FORMAT) logger = logging.getLogger(__name__) H = Hyperparams() train_batch_generator = train_data_loader(H.train_batch_size, H.num_train) test_batch_generator = test_data_loader(H.test_batch_size, H.num_train) prediction_dataframe_gen = prediction_dataframe() scaler = prediction_dataframe_gen.get_scaler() logger.info("Generators instantiated") model = Model().get_model() logger.info("Model loaded") model.compile(optimizer='RMSProp', loss='mean_squared_error') logger.info("Model compiled") logger.info("Beginning training") train_num_batch = H.num_train//H.train_batch_size train_shuffled_batch = np.array([np.random.choice(train_num_batch, size=(train_num_batch), replace=False) for _ in range(H.num_epochs)]) test_num_batch = H.num_test//H.test_batch_size