Example #1
0
def model_test():
    """
    test the model on test dataset
    :return:
    """
    test_dataloader = DataLoader(DataMode.Train)
    test_dataset = test_dataloader.load_all_from_tfreocrds()
    base_model = tf.keras.models.load_model(os.path.join(SVAED_MODEL_DIR, '{}_model.h5'.format(config.dataset)))
    error_text = []
    real_text = []
    error_count = 0
    for batch, data in enumerate(test_dataset):
        images, label = data
        # print(images.shape, label.shape)
        input_length = np.array(np.ones(1) * int(9))
        y_pred = base_model.predict(x=images[tf.newaxis, :, :, :])
        # print(y_pred.shape)  # (64, 9, 37)
        decoded_dense, _ = tf.keras.backend.ctc_decode(y_pred, input_length,
                                                       greedy=config.ctc_greedy,
                                                       beam_width=config.beam_width,
                                                       top_paths=config.top_paths)

        str_real = ''.join([config.characters[x] for x in label if x != -1])
        str_pred = ''.join([config.characters[x] for x in decoded_dense[0][0] if x != -1])
        if str_pred != str_real:
            error_count += 1
            error_text.append(str_pred)
            real_text.append(str_real)

    test_accuracy = (test_dataloader.size - error_count) / test_dataloader.size
    print('test acc %f' % test_accuracy)
    for real, pred in zip(real_text, error_text):
        if len(pred) == 4:
            print('error pair: ', real, ' ', pred, )
def main(config, args):

    config["result_dir"] = args.path
    print(config["result_dir"])
    torch.manual_seed(config["seed"])
    random.seed(config["seed"])
    np.random.seed(config["seed"])
    path_data = config['path'] + "/safe_set_data.pth"
    if config['load_data']:
        data_loader = torch.load(path_data)
        data_loader.setBatchSize(config['n_batch'], config['train_val_ratio'])
    else:
        data_loader = DataLoader.DataLoader(config)
        torch.save(data_loader, path_data)

    print(data_loader.n_all_batches)
    model = SafeSet.SafeSet(config)

    path_model = config['path'] + "/safe_set_model.pth"
    path_model_results = config["result_dir"] + "/safe_set_model.pth"
    if not config['load_model']:
        trainer = Trainer.Trainer(config)
        trainer.train(model, data_loader)

        torch.save(model, path_model)
        torch.save(model, path_model_results)
    else:
        model = torch.load(path_model)

    validation = Validation.Validation(config)
    validation.validate(model, data_loader)
    validation.validateTest(model, data_loader)
    validation.validateTestUnseen(model, data_loader)
    validation.save_val()
    validation.save_model(model)
Example #3
0
def main():
	
	# optional command line args
	parser = argparse.ArgumentParser()
	parser.add_argument('--beamsearch', help='use beam search instead of best path decoding', action='store_true')

	args = parser.parse_args()

	decoderType = DecoderType.BestPath
	if args.beamsearch:
		decoderType = DecoderType.BeamSearch

	# validation on IAM dataset	
	
	# load training data, create TF model
	loader = DataLoader(FilePaths.fnTrain, Model.batchSize, Model.imgSize, Model.maxTextLen)

	# save characters of model for inference mode
	open(FilePaths.fnCharList, 'w').write(str().join(loader.charList))
	
	# save words contained in dataset into file
	open(FilePaths.fnCorpus, 'w').write(str(' ').join(loader.trainWords + loader.validationWords))

	# execute validation
	model = Model(loader.charList, decoderType, mustRestore=True)
	validate(model, loader)
Example #4
0
def main():

    ticker = _TICKER
    window_size=_WINDOWSIZE
    window_shift=_WINDOW_SHIFT

    d = DataLoader(ticker)
    d.loadData()
    d.prepData()

    x_history=d.features[-window_size:,:]
    x_history = x_history.reshape(1, x_history.shape[0], x_history.shape[1])

    start_price=d.prices[-1]

    model = SequenceModel()
    model.modelLoad("Data/" + ticker + '.h5', "Data/" + ticker + '_history.json')

    y_pred = model.predict_model(x_history)
    y_pred_delta=d.denormalize(y_pred,d.targets_mean,d.targets_std)
    y_pred_delta = y_pred_delta.flatten().reshape(-1, 1)
    y_pred_price=PricefromDelta(y_pred_delta,start_price)

    plot_dates=dates_axis(window_size,d)
    plotPredictions(y_pred_price,d,plot_dates)
Example #5
0
def uploadglueup():

	def createRow():
		return{'straw_barcode': str(row[0]),
			'glueup_type' : str(row[1]),
			'worker_barcode' : str(row[2]),
			'workstation_barcode' : str(row[3]),
			'comments' : str(row[4]),
			'glue_batch_number' : str(row[5]),}
	for row in upload_file:
		table = "straw_glueups"
		dataLoader = DataLoader(password,url,group,table)
		dataLoader.addRow(createRow())
		retVal,code,text =  dataLoader.send()

		if retVal:

			print "glueup upload success!\n"
			print text

		else:

			print "glueup upload failed!\n"
			print code
			print text

		dataLoader.clearRows()
def main():
    '''Main Function'''

    parser = argparse.ArgumentParser(description='translate.py')

    parser.add_argument('-model', required=True, help='Path to model .pt file')
    parser.add_argument(
        '-src',
        required=True,
        help='Source sequence to decode (one line per sequence)')
    parser.add_argument(
        '-vocab',
        required=True,
        help='Source sequence to decode (one line per sequence)')
    parser.add_argument('-output',
                        default='pred.txt',
                        help="""Path to output the predictions (each line will
                        be the decoded sequence""")
    parser.add_argument('-beam_size', type=int, default=5, help='Beam size')
    parser.add_argument('-batch_size', type=int, default=30, help='Batch size')
    parser.add_argument('-n_best',
                        type=int,
                        default=1,
                        help="""If verbose is set, will output the n_best
                        decoded sentences""")
    parser.add_argument('-no_cuda', action='store_true')

    opt = parser.parse_args()
    opt.cuda = not opt.no_cuda

    # Prepare DataLoader
    preprocess_data = torch.load(opt.vocab)
    preprocess_settings = preprocess_data['settings']
    test_src_word_insts = read_instances_from_file(
        opt.src, preprocess_settings.max_word_seq_len,
        preprocess_settings.keep_case)
    test_src_insts = convert_instance_to_idx_seq(
        test_src_word_insts, preprocess_data['dict']['src'])
    test_data = DataLoader(preprocess_data['dict']['src'],
                           preprocess_data['dict']['tgt'],
                           src_insts=test_src_insts,
                           cuda=opt.cuda,
                           shuffle=False,
                           batch_size=opt.batch_size)

    translator = Translator(opt)
    translator.model.eval()

    with open(opt.output, 'w') as f:
        for batch in tqdm(test_data,
                          mininterval=2,
                          desc='  - (Test)',
                          leave=False):
            all_hyp, all_scores = translator.translate_batch(batch)
            for idx_seqs in all_hyp:
                for idx_seq in idx_seqs:
                    pred_line = ' '.join(
                        [test_data.tgt_idx2word[idx] for idx in idx_seq])
                    f.write(pred_line + '\n')
    print('[Info] Finished.')
Example #7
0
def main():
    for j in range(0, 3):
        t = Times()
        times = []
        for i in range(1, 18):
            start_time = time.time()
            d = DataLoader(i)
            d_names = d.get_nodes_names()
            CostMatrix = d.get_final_matrix()

            fname_tsp = "results" + str(i)
            [fileID1, fileID2] = writeTSPLIBfile_FE(fname_tsp, CostMatrix,
                                                    user_comment)
            run_LKHsolver_cmd(fname_tsp)
            copy_toTSPLIBdir_cmd(fname_tsp)
            rm_solution_file_cmd(fname_tsp)
            curr_time = time.time() - start_time
            times.append(curr_time)
            processData(i, d.get_nodes_names(), d, curr_time, t)

        file = open("./times" + str(j) + ".txt", "w")
        file.write(
            "Instance \t Excecution \t Cut time \t Air time \t Total cut \t Total \n"
        )
        for i in range(0, len(times)):
            file.write(
                str(i + 1) + '\t' + str(times[i]) + '\t' +
                str(t.get_cut()[i]) + '\t' + str(t.get_air()[i]) + '\t' +
                str(t.get_cut()[i] + t.get_air()[i]) + '\t' +
                str(t.get_cut()[i] + t.get_air()[i] + t.get_excecution()[i]) +
                "\n")
        file.close()
Example #8
0
def example_one_to_one_nn_optimization():
    simple_nn = NeuralNetwork(1, [3, 3, 3],
                              1,
                              output_activation_function=linear)
    #data = linear_data(10000)
    data = sinus_data(100)
    data_gen = DataLoader(data)
    neural_predictor = lambda nn: predictor_fitness(
        nn.predict, data_gen.generator(), loss_function=mean_squared_error)

    x0w, x0b = simple_nn.get_weights_and_biases()
    print("Number of weights:", len(x0w))
    print("Number of biases:", len(x0b))
    x0 = x0w
    x0.extend(x0b)
    fitness = lambda ind: network_opt(ind, simple_nn, neural_predictor)

    # OPTIMIZE HERE
    best_ind = [1, 1 / 2.0, 1 / 2.0, 1, 10, 10, -15]
    print("Best fitness is:", fitness(best_ind))
    print(simple_nn.get_weights_and_biases())
    #

    plt.plot([x[0] for x in data], [x[1] for x in data], 'b*')
    simple_nn.set_all(best_ind)
    plt.plot([x[0] for x in data], [simple_nn.predict(x[0]) for x in data],
             'r*')
    plt.show()
Example #9
0
def main():
    ticker = _TICKER
    d = DataLoader(ticker)
    d.loadData()
    d.prepData()
    window_size = _WINDOWSIZE
    window_shift = _WINDOW_SHIFT
    start_index = d.getIndex(_TESTSTART)
    end_index = d.getIndex(_TESTEND)
    x_test, y_test, dates_test = createInputs(d.features, d.targets, d.dates,
                                              window_size, window_shift,
                                              start_index, end_index)

    print("Mean: ", d.targets_mean)
    print("STD: ", d.targets_std)
    print("x_test shape: ", x_test.shape)
    print("y_test shape: ", y_test.shape)
    # print("y_test 1st value: ", d.denormalize(y_test[0,0,0],d.targets_mean,d.targets_std))
    print("dates_test shape: ", dates_test.shape)

    model = SequenceModel()
    model.modelLoad("Data/" + ticker + '.h5',
                    "Data/" + ticker + '_history.json')
    y_pred = model.predict_model(x_test)
    y_pred_price, y_actuals, y_dates = createPlotData(start_index, end_index,
                                                      y_pred, d)

    # checkModel(d,x_test, y_actuals, y_dates, start_index)

    plotTestPerformance(y_pred_price,
                        y_actuals,
                        y_dates,
                        model.history_dict,
                        d.targets_std,
                        window_size=window_size)
Example #10
0
def main():
    model_save_folder = args.model_save_folder
    if not os.path.exists(model_save_folder):
        os.makedirs(model_save_folder)

    model_save_path = args.model_save_folder + '/cnn.model'
    dict_save_path = args.model_save_folder + '/cnn.dict'

    dl = DataLoader(args.train_file, args.dev_file, args.test_file,
                    args.freq_threshold, args.max_batch_size, args.max_length,
                    args.min_length)
    en_dict, train_data, dev_data, test_data = dl.prepareData()

    with open(dict_save_path, 'wb') as handle:
        pickle.dump(en_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

    clf = ConvNet(en_dict.index2word,
                  args.emb_size,
                  dl.class_inv_freq,
                  args.num_kernels,
                  args.kernel_sizes,
                  args.learning_rate,
                  model_save_path,
                  pretrained_path=args.pretrained_path)
    clf.fit(train_data, dev_data, args.num_epochs)
    y_true, y_pred = clf.predict(test_data)
    corrects = (y_true == y_pred).sum()
    print('testing accuracy: {:}'.format(corrects * 1.0 /
                                         test_data.num_instances))
Example #11
0
def main():
    path_order_info = "../../../all_database_in_csv/jos_vm_orders.csv"
    path_sku_info = "../../../all_database_in_csv/jos_vm_order_item.csv"
    path_to_user_type = "../../../all_database_in_csv/jos_vm_shopper_vendor_xref.csv"
    # Инициируем конвеер предварительной обработки загрузчиком, который на локальной машине открывает два файла.
    pipeline = DataPipeline(
        DataLoader("local storage",
                   first=path_order_info,
                   second=path_sku_info,
                   third=path_to_user_type))
    data = pipeline.prepare_data(datetime(2019, 1, 1),
                                 prefixes_to_remove=["RP"])

    # Удаляем непопулярные SKU и user-ов(нужно ли удалять юзеров?)
    data = pipeline.remove_unpopular_item_and_users(data)

    # Инициализируем, обучаем, сохраняем, загружаем модель матрицы PMI
    model = PmiModel()
    model.fit_model(data)
    model.save_binary("data")
    model.load_binary("data")
    # print(model.pmi_matrix.shape)
    # print(model.evaluate("AMX-02"))
    # Находим все sku некоторого пользователся
    grouped_data = data.groupby('user_id').agg(
        {"order_item_sku": [("list", lambda x: list(x)), ("count", "count")]})
    users_sku = grouped_data.loc[6709][("order_item_sku", "list")]
    print(users_sku)
    # Инициализируем рекоменадательную систему моделью. Получаем список предсказаний с очками для каждого из них.
    recomendator = PmiRecommendator(model)
    answer = recomendator.create_list(users_sku)
    print(answer.shape, answer)
Example #12
0
def main():
    project = test_CNN.get_project_and_check_arguments(sys.argv,
                                                       "run_data_loader.py")
    print "start creating data for project: ", project.project_name
    data_loader = DataLoader(project, motifs_base_path)
    data_loader.create_npy_files()
    print "End!"
    def train(self, model_citations, model_authors):
        d_train = DataLoader()
        training_data = d_train.training_data_with_abstracts_citations().data

        # Load trained embeddings
        print("Loading the citations training embeddings...")
        pretrained_embeddings_citations, pretrained_embeddings_id_map_citations = \
            self._load_train_embeddings(model_citations)
        print("Loaded.")

        print("Loading the authors training embeddings...")
        pretrained_embeddings_authors, pretrained_embeddings_id_map_authors = \
            self._load_train_embeddings(model_authors)
        print("Loaded.")

        training_ids = list(training_data.chapter)
        training_embeddings_citations = pretrained_embeddings_citations[[
            pretrained_embeddings_id_map_citations[id] for id in training_ids
        ]]
        training_embeddings_authors = pretrained_embeddings_authors[[
            pretrained_embeddings_id_map_authors[id] for id in training_ids
        ]]

        # Concatenate embeddings
        training_embeddings = np.concatenate(
            (training_embeddings_citations, training_embeddings_authors),
            axis=1)

        self.label_encoder = LabelEncoder()
        self.labels = self.label_encoder.fit_transform(
            training_data.conferenceseries)
        self.classifier.fit(training_embeddings, self.labels)
        self._save_model_classifier()

        print("Training finished.")
def train_and_test_network():
    """
    Train a neural network and test it. Can also train on other feature types,
    or run the experimenter to run different configurations
    """
    min_speakers = 1
    max_speakers = 10

    # Load data from filesystem
    data_loader = DataLoader(train_dir, test_src_dr, test_dest_dir)
    data_loader.force_recreate = False
    data_loader.min_speakers = min_speakers
    data_loader.max_speakers = max_speakers

    # Train network
    train, (test_x, test_y) = data_loader.load_data()
    libri_x, libri_y = data_loader.load_libricount(libri_dir)

    # Train and test network
    file = 'testing_rnn'
    net = RNN()
    net.save_to_file(file)
    net.train(train, min_speakers, max_speakers, FEATURE_TYPE)

    net.load_from_file(file)

    timit_results = net.test(test_x, test_y, FEATURE_TYPE)
    libri_results = net.test(libri_x, libri_y, FEATURE_TYPE)
Example #15
0
def main():
    """ Main function """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--train", help="train the neural network", action="store_true")
    parser.add_argument(
        "--validate", help="validate the neural network", action="store_true")
    parser.add_argument(
        "--wordbeamsearch", help="use word beam search instead of best path decoding", action="store_true")
    args = parser.parse_args()

    decoderType = DecoderType.BestPath
    if args.wordbeamsearch:
        decoderType = DecoderType.WordBeamSearch

    if args.train or args.validate:
        loader = DataLoader(FilePaths.fnTrain, Model.batchSize,
                            Model.imgSize, Model.maxTextLen, load_aug=True)

        if args.train:
            model = Model(loader.charList, decoderType)
            train(model, loader)
        elif args.validate:
            model = Model(loader.charList, decoderType, mustRestore=False)
            validate(model, loader)

    else:
        print(open(FilePaths.fnAccuracy).read())
        model = Model(open(FilePaths.fnCharList).read(),
                      decoderType, mustRestore=False)
        infer(model, FilePaths.fnInfer)
Example #16
0
 def test_generate_list_of_numbers(self):
     start = 1
     end = 4
     expect = ["01", "02", "03", "04"]
     dl = DataLoader()
     result = dl.generate_list_of_numbers(start, end)
     self.assertEqual(result, expect)
Example #17
0
def main():
    config = tf.compat.v1.ConfigProto(
        allow_soft_placement=True
    )  ## tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.compat.v1.Session(
            config=config) as sess:  ## tf.Session(config=config) as sess:
        copy_file(save_file_dir)
        dataloader = DataLoader(FLAGS.dir, FLAGS.limits)
        model = SeqUnit(batch_size=FLAGS.batch_size,
                        hidden_size=FLAGS.hidden_size,
                        emb_size=FLAGS.emb_size,
                        field_size=FLAGS.field_size,
                        pos_size=FLAGS.pos_size,
                        field_vocab=FLAGS.field_vocab,
                        source_vocab=FLAGS.source_vocab,
                        position_vocab=FLAGS.position_vocab,
                        target_vocab=FLAGS.target_vocab,
                        scope_name="seq2seq",
                        name="seq2seq",
                        field_concat=FLAGS.field,
                        position_concat=FLAGS.position,
                        fgate_enc=FLAGS.fgate_encoder,
                        dual_att=FLAGS.dual_attention,
                        decoder_add_pos=FLAGS.decoder_pos,
                        encoder_add_pos=FLAGS.encoder_pos,
                        learning_rate=FLAGS.learning_rate)
        sess.run(tf.compat.v1.global_variables_initializer())
        # copy_file(save_file_dir)
        if FLAGS.load != '0':
            model.load(save_dir)
        if FLAGS.mode == 'train':
            train(sess, dataloader, model)
        else:
            test(sess, dataloader, model)
Example #18
0
def main():

    '''
    Main block to (1) Load data, (2) Create x_train, y_train and dates_train, (3) Run the model, (4) Save the model and (5) Plot training and validation performance
    Loads data from TestModel class
    '''

    ticker=_TICKER
    d=DataLoader(ticker)
    d.loadData()
    d.prepData()
    start_index=d.getIndex(_START)
    end_index=d.getIndex(_END)

    #Generates x_train, y_train and dates_train. We need dates_train in order to plot the x-axis later
    x_train,y_train,dates_train=createInputs(d.features,
                                             d.targets,
                                             d.dates,
                                             _WINDOWSIZE,
                                             _WINDOW_SHIFT,
                                             start_index,
                                             end_index)
    #Creates model instance and runs the model
    aapl_model=SequenceModel()
    epochs=_EPOCHS
    aapl_model.build_model(x_train,y_train,batch_size=_BATCHSIZE,epochs=epochs)
    aapl_model.modelSave("Data/"+ticker+'.h5',"Data/"+ticker+'_history.json')
    # aapl_model.modelLoad('AAPL.h5', 'AAPL_history.json')

    #Plot model
    plotPerformance(aapl_model,aapl_model.history_dict, d.targets_std)
Example #19
0
    def test(self, step_num):
        self.sess.run(tf.local_variables_initializer())
        self.reload(step_num)

        from DataLoader import BrainLoader as DataLoader
        self.data_reader = DataLoader(self.conf)
        self.data_reader.get_data(mode='test')
        self.num_test_batch = self.data_reader.count_num_batch(
            self.conf.batch_size, mode='test')
        self.is_train = False
        self.sess.run(tf.local_variables_initializer())
        for step in range(self.num_test_batch):
            start = step * self.conf.batch_size
            end = (step + 1) * self.conf.batch_size
            x_test, y_test = self.data_reader.next_batch(start,
                                                         end,
                                                         mode='test')
            feed_dict = {
                self.x: x_test,
                self.y: y_test,
                self.mask_with_labels: False
            }
            self.sess.run([self.mean_loss_op, self.mean_accuracy_op],
                          feed_dict=feed_dict)
        test_loss, test_acc = self.sess.run(
            [self.mean_loss, self.mean_accuracy])
        print('-' * 18 + 'Test Completed' + '-' * 18)
        print('test_loss= {0:.4f}, test_acc={1:.01%}'.format(
            test_loss, test_acc))
        print('-' * 50)
Example #20
0
def main():
    ticker = _TICKER
    d = DataLoader(ticker)
    d.loadData()
    d.prepData()
    window_size = _WINDOWSIZE
    window_shift = _WINDOW_SHIFT
    start_index = d.getIndex(_TESTSTART)
    end_index = d.getIndex(_TESTEND)
    x_test, y_test, dates_test = createInputs(d.features, d.targets, d.dates,
                                              window_size, window_shift,
                                              start_index, end_index)

    aapl_model = SequenceModel()
    aapl_model.modelLoad("Data/" + ticker + '.h5',
                         "Data/" + ticker + '_history.json')
    y_pred = aapl_model.predict_model(x_test)
    y_pred = d.denormalize(y_pred, d.targets_mean, d.targets_std)
    # print(y_pred.shape)
    y_dates = d.dates[start_index + 1:end_index]
    y_actuals = d.targets[start_index + 1:end_index]
    y_actuals = d.denormalize(y_actuals, d.targets_mean, d.targets_std)

    plotTestPerformance(y_pred,
                        y_actuals,
                        y_dates,
                        aapl_model.history,
                        d.targets_std,
                        window_size=window_size)
Example #21
0
def main(input_path):
    rounds = 1
    auc_array = np.zeros((rounds, 1))
    time_array = np.zeros((rounds, 1))
    dim_array = np.zeros((rounds, 1))
    data_describe=''
    for m in range(rounds):
        data = DataLoader(input_path)
        data.data_prepare()
        dim = data.all_features_num
        n = len(data.data_matrix)
        labels = data.list_of_class
        data_describe="{}, {}, {}, ".format(data.data_name, n, dim)
        scores_all, totalTime, dim_vec, it = ODNUTFramework(data)
        final_scores_all =  scores_all
        time_array[m] = totalTime
        dim_array[m] = dim_vec[-1]
        roc_auc = roc_auc_score(labels, final_scores_all)
        auc_array[m] = roc_auc

    roc_auc = np.mean(auc_array)
    runtime = np.mean(time_array)
    avgDim = np.mean(dim_array)

    print_text = data_describe+"{:.4}, {:.4}, {:.4}s".format( avgDim, roc_auc, runtime)
    print(print_text)
    doc = open('out.txt', 'a')
    print(print_text, file=doc)
    doc.close()
Example #22
0
 def train(self):
     self.sess.run(tf.local_variables_initializer())
     self.best_validation_accuracy = 0
     self.data_reader = DataLoader(self.conf)
     self.data_reader.get_data(mode='train')
     self.data_reader.get_data(mode='valid')
     self.train_loop()
Example #23
0
def strawcutlengths():
	
	def createRow():
				return{'straw_barcode': str(row[0]),
				#'create_time' : str(row[1]), #Website gets real time somehow.
				'worker_barcode' : str(row[2]),
				'workstation_barcode' : str(row[3]),
				'nominal_length' : str(row[4]),
				'measured_length': str(row[5]),
				'temperature' : str(row[6]),
				'humidity' : str(row[7]),
				#'comments' : str(row[8]),
				}
	for row in upload_file:
		table = "straw_cut_lengths"
		dataLoader = DataLoader(password,url,group,table)
		dataLoader.addRow(createRow())
		retVal,code,text =  dataLoader.send()
		if retVal:
			print "upload straw length success!\n"
			print text
		else:
			print "upload straw length failed!\n"
			print code
			print text
		dataLoader.clearRows()
    def pre_train(self, dataset):
        """ Pre-trains the model based on one or more datasets.

        """
        if not self.load:
            writer = SummaryWriter('runs/' + self.name)
            data_loader = DataLoader(dataset, self.batch_size, single=True)
            data_loader = data_loader.data_loader
            train_size = int(0.9 * len(data_loader))
            val_size = len(data_loader) - train_size
            train_data, val_data = torch.utils.data.random_split(data_loader, [train_size, val_size])

            print('Starting training on {} batches of train data with {} batches of validation data.'.format(train_size,
                                                                                                             val_size))
            best_loss = np.inf
            best_model = None
            for epoch in range(self.num_epochs):
                mean_train_loss = 0
                mean_train_acc = 0
                self.FCNN.train()
                for data, label in train_data:
                    self.optimizer.zero_grad()
                    predictions = self.FCNN(data)
                    loss = self.criterion(predictions, torch.argmax(label, dim=1).long())
                    eq = np.equal(torch.argmax(predictions, dim=1).cpu(), torch.argmax(label, dim=1).cpu())
                    acc = torch.mean(eq.float())
                    mean_train_acc += acc
                    mean_train_loss += loss
                    loss.backward()
                    self.optimizer.step()
                mean_train_loss = mean_train_loss / train_size
                mean_train_acc = mean_train_acc / train_size
                mean_val_loss = 0
                mean_val_acc = 0
                self.FCNN.eval()
                for data, label in val_data:
                    with torch.no_grad():
                        predictions = self.FCNN(data)
                        loss = self.criterion(predictions, torch.argmax(label, dim=1).long())
                        eq = np.equal(torch.argmax(predictions, dim=1).cpu(), torch.argmax(label, dim=1).cpu())
                        acc = torch.mean(eq.float())
                        mean_val_loss += loss
                        mean_val_acc += acc
                mean_val_loss = mean_val_loss / val_size
                mean_val_acc = mean_val_acc / val_size

                if mean_val_loss < best_loss:
                    best_model = deepcopy(self.FCNN)
                    torch.save(self.FCNN.state_dict(), 'runs/' + self.name + '/best_model.pth')
                    best_loss = mean_val_loss
                writer.add_scalar('Loss/val', mean_val_loss, epoch)
                writer.add_scalar('Loss/train', mean_train_loss, epoch)
                writer.add_scalar('Acc/train', mean_train_acc, epoch)
                writer.add_scalar('Acc/val', mean_val_acc, epoch)
                print('Epoch {}/{} train loss: {}, train acc: {}, val loss: {}, val acc: {}'.format(epoch,
                      self.num_epochs-1,mean_train_loss, mean_train_acc, mean_val_loss, mean_val_acc))
            self.FCNN = best_model
        else:
            print('Loading model settings from {}'.format(self.load))
            self.load_model()
Example #25
0
    def __init__(self):
        print("Initializing IDS..")
        self.affinity_threshold = 0.3
        self.alert_threshold = 1
        self.secondaryIDS_count = 1
        self.detector_set_size = 50

        self.secondaryIDSs = []

        print("Loading self..")
        self.self = DataLoader().load_genes()

        self.detector_generator = DetectorGenerator()
        self.detector_generator.test_set = self.self

        print("Generating detector agents..")
        self.create_secondary_ids()

        print("Creating adversary..")
        self.adversary = Adversary()

        # Stats
        self.tp = 0
        self.tn = 0
        self.fp = 0
        self.fn = 0
Example #26
0
def main():
    """ main function """
    # define some command line arguments
    parser = argparse.ArgumentParser(description = 'Simple Todo handwritten text recognition')
    parser.add_argument('--train', action='store_true', help='train the NN')
    parser.add_argument('--validate', action='store_true', help='validate the NN')

    args = parser.parse_args()

    decoder_type = DecoderType.best_path

    if args.train or args.validate:

        loader = DataLoader(Params.dataset, Model.batch_size, Model.image_size, Model.max_text_len)

        # save characters of model for inference mode
        open(Params.char_list, 'w').write(str().join(loader.char_list))

        if args.train:

            print("Training NN!\n")
            model = Model(loader.char_list)
            train(model, loader)
        
        elif args.validate:
            print("Validating NN!\n")
            model = Model(loader.char_list)
            validate(model, loader)

    else:
        print("Recognizing image from "+ Params.input_image + " file!\n")
        model = Model(open(Params.char_list).read(), decoder_type, must_restore=True)
        infer(model, Params.input_image)
Example #27
0
def main(input_path):
    t0 = time.time()
    data = DataLoader(input_path)
    data.data_prepare()
    t1 = time.time()

    auc_list = np.zeros(RUN_TIMES)
    time_list = np.zeros(RUN_TIMES)
    for i in range(RUN_TIMES):
        time_0 = time.time()
        score, n_iter = MIX.fit(data, batch_size=batch_size, episode_max=episode_max,
                                                    epsilon=epsilon, k=k, verbose=False)

        # perform MIX'
        # score, n_iter = OutlierEstimator.fit_prime(data, batch_size=64, episode_max=10000,
        #                                                  k=k, verbose=False)

        time_1 = time.time()
        auc_list[i] = roc_auc_score(data.list_of_class, score)
        time_list[i] = (t1 - t0) + (time_1 - time_0)

    print_text = "{}, {:.4},{:.4}, {:.4}s".format(data.data_name,
          np.average(auc_list), np.std(auc_list), np.average(time_list))

    print(print_text)

    doc = open('out.txt', 'a')
    print(print_text, file=doc)
    doc.close()
    return
Example #28
0
def main():
    # Constants
    num_seconds = 10
    sample_rate = 44100
    data_width = num_seconds * sample_rate
    num_classes = 200

    # Hyperparameters
    learning_rate = 0.001

    # Load the data
    data_loader = DataLoader(truncate_secs=num_seconds, dtype=np.float32)

    # Create the network
    xs = tf.placeholder(tf.float32, [None, 1, data_width, 1])
    ys = tf.placeholder(tf.float32, [None, num_classes])
    network_output = create_network(xs, num_classes)
    predictions = tf.nn.softmax(network_output)
    loss = tf.reduce_sum((ys - predictions)**2.0)
    optim = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    test_x, test_y = data_loader.get_batch(test=True)

    for i in range(10000):
        batch_x, batch_y = data_loader.get_batch()
        sess.run(optim, feed_dict={xs: batch_x.reshape([-1, 1, data_width, 1]), ys: batch_y})
        if i % 20 == 0:
            preds, loss_val = sess.run((predictions, loss), feed_dict={xs: test_x.reshape([-1, 1, data_width, 1]), ys: test_y})
            print(i, "test error:", loss_val, np.sum(np.argmax(preds, axis=1) == np.argmax(test_y, axis=1)))
def split_train_test(csv_datapath=None):
    dataframe = DataLoader(csv_datapath).load_dataframe_from_datapath()

    # Drop rows with values missing
    dataframe = dataframe.dropna().reset_index(drop=True)

    time_sequences = split_dataframe_into_time_sequences(dataframe,
                                                         time_index=False)
    print(time_sequences[0])

    # There are 179 complete sequences, so choose 20% (36) of these at random and store as test data
    test_indices = np.random.choice(len(time_sequences),
                                    round(0.2 * len(time_sequences)),
                                    replace=False)
    print(test_indices)
    train_indices = [
        i for i in range(len(time_sequences)) if i not in test_indices
    ]

    test_sequences = [time_sequences[i] for i in test_indices]
    train_sequences = [time_sequences[i] for i in train_indices]

    test_dataframe = pd.concat(test_sequences)
    train_dataframe = pd.concat(train_sequences)

    print(test_dataframe)

    test_dataframe.to_csv("../data/test/test_data.csv", index=False)
    train_dataframe.to_csv("../data/train/train_data.csv", index=False)
Example #30
0
def validate(filePath):
    "validate NN"
    # load training data
    loader = DataLoader(filePath, Model.batchSize, Model.imgSize,
                        Model.maxTextLen)

    # create TF model
    model = Model(loader.charList, useBeamSearch)

    # save characters of model for inference mode
    open(fnCharList, 'w').write(str().join(loader.charList))

    print('Validate NN')
    loader.validationSet()
    numOK = 0
    numTotal = 0
    while loader.hasNext():
        iterInfo = loader.getIteratorInfo()
        print('Batch:', iterInfo[0], '/', iterInfo[1])
        batch = loader.getNext()
        recognized = model.inferBatch(batch)

        print('Ground truth -> Recognized')
        for i in range(len(recognized)):
            isOK = batch.gtTexts[i] == recognized[i]
            print('[OK]' if isOK else '[ERR]', '"' + batch.gtTexts[i] + '"',
                  '->', '"' + recognized[i] + '"')
            numOK += 1 if isOK else 0
            numTotal += 1

    # print validation result
    accuracy = numOK / numTotal
    print('Correctly recognized words:', accuracy * 100.0, '%')