Beispiel #1
0
def predict():
    weight = np.load("./result/weight.npy")
    tr_data_loader = train_data_loader()
    te_data_loader = test_data_loader(tr_data_loader.mean, tr_data_loader.std)
    question = te_data_loader.get_data()
    # predict
    pre = np.dot(question, weight)
    pre = (pre * te_data_loader.std[9]) + te_data_loader.mean[9]
    for i in range(len(pre)):
        print("id:", i, pre[i])
    # save file
    with open("./result/predict.csv", "w") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["id", "value"])
        for i in range(len(pre)):
            id_name = 'id_'
            id_name = id_name + str(i)
            answer = float(pre[i])
            if answer < 0:
                answer = 0
            writer.writerow([id_name, answer])
	"num_epochs" : config.num_epochs,
	"batch_size" : config.batch_size,
	"learning_rate" : config.learning_rate,
	"hidden_size" : config.hidden_size,
	"pretrained" : config.pretrained
}


# define a path to save experiment logs
experiment_path = "./{}".format(config.exp)
if not os.path.exists(experiment_path):
    os.mkdir(experiment_path)

#create data loaders
train_dataloader = data_loader.train_data_loader()
test_dataloader = data_loader.test_data_loader()

Model = model.newModel()
Model.to(config.device)

#define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adadelta(Model.parameters(), lr = config.learning_rate)

def train(dataloader, model, loss_fn, optimizer):
	size = len(dataloader.dataset)
	for batch, (X, y) in enumerate(dataloader):
		X, y = X.to(config.device), y.to(config.device)

		#compute prediction error
		pred = model(X)
            print('='*100)

            # Inference validation / test set
            avg_cost, avg_correct, precision_count, recall_count, e_avg_correct, e_precision_count, e_recall_count = iteration_model(models, test_loader, parameter, False)
            # Individual and ensemble model's accuracy score on validation or test dataset
            for i in range(parameter["num_ensemble"]):
                print(str(i) + '_Val : [Epoch: {:>4}] cost = {:>.6} Accuracy = {:>.6}'.format(epoch + 1, avg_cost[i], avg_correct[i]))
            print('Ensemble [Epoch: {:>4}]  Accuracy = {:>.6}'.format(epoch + 1, e_avg_correct))
            f1Measure, precision, recall = calculation_measure(parameter["num_ensemble"], precision_count, recall_count)

            # Individual and ensemble model's f1, precisionb and recall score on validation or test dataset
            e_f1Measure, e_precision, e_recall = calculation_measure_ensemble(e_precision_count, e_recall_count)
            for i in range(parameter["num_ensemble"]):
                print(str(i) + '_Val : [Val] F1Measure : {:.6f} Precision : {:.6f} Recall : {:.6f}'.format(f1Measure[i], precision[i], recall[i]))
            print('Ensemble [Val] F1Measure : {:.6f} Precision : {:.6f} Recall : {:.6f}'.format(e_f1Measure, e_precision,  e_recall))
            print('=' * 100)

            save(parameter["output_dir"])

    if parameter["mode"] == "test":
        load(parameter["output_dir"])

        # For test dataset with label
        test_data = data_loader(parameter["input_dir"])
        print(infer(test_data))

        # For test dataset without label
        test_data = test_data_loader(parameter["input_dir"])
        print(infer(test_data))

                            shuffle=True)
            print(res.history)
            train_loss, train_acc = res.history['loss'][0], res.history[
                'get_categorical_accuracy_keras'][0]
            nsml.report(summary=True,
                        epoch=epoch,
                        epoch_total=nb_epoch,
                        loss=train_loss,
                        acc=train_acc)
            nsml.save(epoch)

    # eunji

    config.mode = 'test'
    if config.mode == 'test':
        queries, db = test_data_loader('../../Data_example_test')

        # Query 개수: 195
        # Reference(DB) 개수: 1,127
        # Total (query + reference): 1,322

        queries, query_img, references, reference_img = preprocess(queries, db)

        print(
            'test data load queries {} query_img {} references {} reference_img {}'
            .format(len(queries), len(query_img), len(references),
                    len(reference_img)))

        queries = np.asarray(queries)
        query_img = np.asarray(query_img)
        references = np.asarray(references)
Beispiel #5
0
                scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30], gamma=0.1)
            else:
                scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 15, 20], gamma=0.1)
        else:
            raise ValueError('Invalid scheduler')

        # Loss function
        loss_fn = BlendedLoss(loss_type, cross_entropy_flag)

        # Train (fine-tune) model
        fit(online_train_loader, model, loss_fn, optimizer, scheduler, nb_epoch, start_epoch = start_epoch,
            device=device, log_interval=log_interval, save_model_to=config.model_save_dir)

    elif config.mode == 'test':
        test_dataset_path = dataset_path #+ '/test/test_data'
        queries, db = test_data_loader(test_dataset_path)
        #model = load(file_path=config.model_to_test)
        result_dict = infer(model, queries, db)

        print(result_dict)

        from sklearn.metrics import recall_score, precision_score
        import numpy as np


        positives = []
        k = 1
        for item in result_dict:
            print("---")
            index, query_item = item
            query = query_item[0]
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))
    model.summary()

    bind_model(model)

    if config.pause:
        nsml.paused(scope=locals())

    bTrainmode = False

    q_p, r_p = test_data_loader(DATASET_PATH)
    print(q_p)

    if config.mode == 'train':
        bTrainmode = True
        """ Initiate RMSprop optimizer """
        opt = keras.optimizers.rmsprop(lr=0.00045, decay=1e-6)
        # model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=[get_categorical_accuracy_keras])
        model.compile(loss='categorical_crossentropy',
                      optimizer=opt,
                      metrics=['accuracy'])
        """ Load data """
        print('dataset path', DATASET_PATH)
        output_path = ['./img_list.pkl', './label_list.pkl']
        train_dataset_path = DATASET_PATH + '/train/train_data'
Beispiel #7
0
def NER_test():
    tf.reset_default_graph()
    config = Config()
    parser = argparse.ArgumentParser(description=sys.argv[0] + " description")
    parser = config.parse_arg(parser)
    print("[NER_test...]")

    try:
        parameter = vars(parser.parse_args())
    except:
        parser.print_help()
        sys.exit(0)

    parameter["mode"] = "test"

    # data_loader를 이용해서 전체 데이터셋 가져옴
    DATASET_PATH = './data'  #test data path

    extern_data = []

    if parameter["mode"] == "test":
        extern_data = test_data_loader(DATASET_PATH)
    # 가져온 문장별 데이터셋을 이용해서 각종 정보 및 학습셋 구성

    dataset = Dataset(parameter, extern_data)
    # dev_dataset = Dataset(parameter, extern_data)
    # # Model 불러오기
    #     # if parameter["use_conv_model"]:
    #     #     model = ConvModel(dataset.parameter)
    #     #     print("[Use Conv with lstm...]")
    #     # else:
    #     #     model = Model(dataset.parameter)
    #     #     print("[Use original lstm...]")
    #     # #
    #     # # model.build_model_test()

    dev_size = config.dev_size
    #train_extern_data, dev_extern_data = extern_data[:-dev_size], extern_data[-dev_size:]
    # dataset.make_input_data(train_extern_data)
    #dev_dataset.make_input_data(extern_data)  # For Test set

    best_dev_f1 = 0
    cur_patience = 0
    # use_lr_decay = False

    with tf.Session() as sess:

        saver = tf.train.import_meta_graph('./saved/testmodel.meta')
        saver.restore(sess, tf.train.latest_checkpoint('./saved/'))
        graph = tf.get_default_graph()
        # print all tensors in checkpoint file
        #chkp.print_tensors_in_checkpoint_file("./saved/checkpoint.ckpt", tensor_name='', all_tensors=True)

        # placeholder
        param = {}
        param["morph"] = graph.get_tensor_by_name('morph')
        param["ne_dict"] = graph.get_tensor_by_name('ne_dict')
        param["character"] = graph.get_tensor_by_name('character')
        param["dropout_rate"] = graph.get_tensor_by_name('dropout_rate')
        param["weight_dropout_keep_prob"] = graph.get_tensor_by_name(
            'weight_dropout_keep_prob')
        param["lstm_dropout"] = graph.get_tensor_by_name('lstm_dropout')
        param["label"] = graph.get_tensor_by_name('label')
        param["sequence"] = graph.get_tensor_by_name('sequence')
        param["character_len"] = graph.get_tensor_by_name('character_len')
        param["global_step"] = graph.get_tensor_by_name('global_step')
        param["emb_dropout_keep_prob"] = graph.get_tensor_by_name(
            'emb_dropout_keep_prob')
        param["dense_dropout_keep_prob"] = graph.get_tensor_by_name(
            'dense_dropout_keep_prob')
        param["learning_rate"] = graph.get_tensor_by_name('learning_rate')

        #tensor   model.cost, model.viterbi_sequence, model.train_op
        param["cost"] = graph.get_tensor_by_name('cost')
        param["viterbi_sequence"] = graph.get_tensor_by_name(
            'viterbi_sequence')
        param["train_op"] = graph.get_tensor_by_name('train_op')

        #for epoch in range(parameter["epochs"]):
        #random.shuffle(extern_data)  # 항상 train set shuffle 시켜주자
        dataset.make_input_data(extern_data)

        # Check for test set
        de_avg_cost, de_avg_correct, de_precision_count, de_recall_count = iteration_model_Test(
            dataset, parameter, param, train=False)
        print('[Epoch: {:>4}] cost = {:>.6} Accuracy = {:>.6}'.format(
            epoch + 1, de_avg_cost, de_avg_correct))
        de_f1Measure, de_precision, de_recall = calculation_measure(
            de_precision_count, de_recall_count)
        print('[Test] F1Measure : {:.6f} Precision : {:.6f} Recall : {:.6f}'.
              format(de_f1Measure, de_precision, de_recall))
Beispiel #8
0
from matplotlib import pyplot as plt

from model import Model
from hyperparams import Hyperparams
from data_loader import train_data_loader, test_data_loader, prediction_dataframe, hist_data, untransformed_price


#logger configuration
FORMAT = "[%(filename)s: %(lineno)3s] %(levelname)s: %(message)s"
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)

H = Hyperparams()

train_batch_generator = train_data_loader(H.train_batch_size, H.num_train)
test_batch_generator = test_data_loader(H.test_batch_size, H.num_train)
prediction_dataframe_gen = prediction_dataframe()
scaler = prediction_dataframe_gen.get_scaler()
logger.info("Generators instantiated")

model = Model().get_model()
logger.info("Model loaded")

model.compile(optimizer='RMSProp', loss='mean_squared_error')
logger.info("Model compiled")

logger.info("Beginning training")
train_num_batch = H.num_train//H.train_batch_size
train_shuffled_batch = np.array([np.random.choice(train_num_batch, size=(train_num_batch), replace=False) for _ in range(H.num_epochs)])

test_num_batch = H.num_test//H.test_batch_size