def model_test(): """ test the model on test dataset :return: """ test_dataloader = DataLoader(DataMode.Train) test_dataset = test_dataloader.load_all_from_tfreocrds() base_model = tf.keras.models.load_model(os.path.join(SVAED_MODEL_DIR, '{}_model.h5'.format(config.dataset))) error_text = [] real_text = [] error_count = 0 for batch, data in enumerate(test_dataset): images, label = data # print(images.shape, label.shape) input_length = np.array(np.ones(1) * int(9)) y_pred = base_model.predict(x=images[tf.newaxis, :, :, :]) # print(y_pred.shape) # (64, 9, 37) decoded_dense, _ = tf.keras.backend.ctc_decode(y_pred, input_length, greedy=config.ctc_greedy, beam_width=config.beam_width, top_paths=config.top_paths) str_real = ''.join([config.characters[x] for x in label if x != -1]) str_pred = ''.join([config.characters[x] for x in decoded_dense[0][0] if x != -1]) if str_pred != str_real: error_count += 1 error_text.append(str_pred) real_text.append(str_real) test_accuracy = (test_dataloader.size - error_count) / test_dataloader.size print('test acc %f' % test_accuracy) for real, pred in zip(real_text, error_text): if len(pred) == 4: print('error pair: ', real, ' ', pred, )
def main(config, args): config["result_dir"] = args.path print(config["result_dir"]) torch.manual_seed(config["seed"]) random.seed(config["seed"]) np.random.seed(config["seed"]) path_data = config['path'] + "/safe_set_data.pth" if config['load_data']: data_loader = torch.load(path_data) data_loader.setBatchSize(config['n_batch'], config['train_val_ratio']) else: data_loader = DataLoader.DataLoader(config) torch.save(data_loader, path_data) print(data_loader.n_all_batches) model = SafeSet.SafeSet(config) path_model = config['path'] + "/safe_set_model.pth" path_model_results = config["result_dir"] + "/safe_set_model.pth" if not config['load_model']: trainer = Trainer.Trainer(config) trainer.train(model, data_loader) torch.save(model, path_model) torch.save(model, path_model_results) else: model = torch.load(path_model) validation = Validation.Validation(config) validation.validate(model, data_loader) validation.validateTest(model, data_loader) validation.validateTestUnseen(model, data_loader) validation.save_val() validation.save_model(model)
def main(): # optional command line args parser = argparse.ArgumentParser() parser.add_argument('--beamsearch', help='use beam search instead of best path decoding', action='store_true') args = parser.parse_args() decoderType = DecoderType.BestPath if args.beamsearch: decoderType = DecoderType.BeamSearch # validation on IAM dataset # load training data, create TF model loader = DataLoader(FilePaths.fnTrain, Model.batchSize, Model.imgSize, Model.maxTextLen) # save characters of model for inference mode open(FilePaths.fnCharList, 'w').write(str().join(loader.charList)) # save words contained in dataset into file open(FilePaths.fnCorpus, 'w').write(str(' ').join(loader.trainWords + loader.validationWords)) # execute validation model = Model(loader.charList, decoderType, mustRestore=True) validate(model, loader)
def main(): ticker = _TICKER window_size=_WINDOWSIZE window_shift=_WINDOW_SHIFT d = DataLoader(ticker) d.loadData() d.prepData() x_history=d.features[-window_size:,:] x_history = x_history.reshape(1, x_history.shape[0], x_history.shape[1]) start_price=d.prices[-1] model = SequenceModel() model.modelLoad("Data/" + ticker + '.h5', "Data/" + ticker + '_history.json') y_pred = model.predict_model(x_history) y_pred_delta=d.denormalize(y_pred,d.targets_mean,d.targets_std) y_pred_delta = y_pred_delta.flatten().reshape(-1, 1) y_pred_price=PricefromDelta(y_pred_delta,start_price) plot_dates=dates_axis(window_size,d) plotPredictions(y_pred_price,d,plot_dates)
def uploadglueup(): def createRow(): return{'straw_barcode': str(row[0]), 'glueup_type' : str(row[1]), 'worker_barcode' : str(row[2]), 'workstation_barcode' : str(row[3]), 'comments' : str(row[4]), 'glue_batch_number' : str(row[5]),} for row in upload_file: table = "straw_glueups" dataLoader = DataLoader(password,url,group,table) dataLoader.addRow(createRow()) retVal,code,text = dataLoader.send() if retVal: print "glueup upload success!\n" print text else: print "glueup upload failed!\n" print code print text dataLoader.clearRows()
def main(): '''Main Function''' parser = argparse.ArgumentParser(description='translate.py') parser.add_argument('-model', required=True, help='Path to model .pt file') parser.add_argument( '-src', required=True, help='Source sequence to decode (one line per sequence)') parser.add_argument( '-vocab', required=True, help='Source sequence to decode (one line per sequence)') parser.add_argument('-output', default='pred.txt', help="""Path to output the predictions (each line will be the decoded sequence""") parser.add_argument('-beam_size', type=int, default=5, help='Beam size') parser.add_argument('-batch_size', type=int, default=30, help='Batch size') parser.add_argument('-n_best', type=int, default=1, help="""If verbose is set, will output the n_best decoded sentences""") parser.add_argument('-no_cuda', action='store_true') opt = parser.parse_args() opt.cuda = not opt.no_cuda # Prepare DataLoader preprocess_data = torch.load(opt.vocab) preprocess_settings = preprocess_data['settings'] test_src_word_insts = read_instances_from_file( opt.src, preprocess_settings.max_word_seq_len, preprocess_settings.keep_case) test_src_insts = convert_instance_to_idx_seq( test_src_word_insts, preprocess_data['dict']['src']) test_data = DataLoader(preprocess_data['dict']['src'], preprocess_data['dict']['tgt'], src_insts=test_src_insts, cuda=opt.cuda, shuffle=False, batch_size=opt.batch_size) translator = Translator(opt) translator.model.eval() with open(opt.output, 'w') as f: for batch in tqdm(test_data, mininterval=2, desc=' - (Test)', leave=False): all_hyp, all_scores = translator.translate_batch(batch) for idx_seqs in all_hyp: for idx_seq in idx_seqs: pred_line = ' '.join( [test_data.tgt_idx2word[idx] for idx in idx_seq]) f.write(pred_line + '\n') print('[Info] Finished.')
def main(): for j in range(0, 3): t = Times() times = [] for i in range(1, 18): start_time = time.time() d = DataLoader(i) d_names = d.get_nodes_names() CostMatrix = d.get_final_matrix() fname_tsp = "results" + str(i) [fileID1, fileID2] = writeTSPLIBfile_FE(fname_tsp, CostMatrix, user_comment) run_LKHsolver_cmd(fname_tsp) copy_toTSPLIBdir_cmd(fname_tsp) rm_solution_file_cmd(fname_tsp) curr_time = time.time() - start_time times.append(curr_time) processData(i, d.get_nodes_names(), d, curr_time, t) file = open("./times" + str(j) + ".txt", "w") file.write( "Instance \t Excecution \t Cut time \t Air time \t Total cut \t Total \n" ) for i in range(0, len(times)): file.write( str(i + 1) + '\t' + str(times[i]) + '\t' + str(t.get_cut()[i]) + '\t' + str(t.get_air()[i]) + '\t' + str(t.get_cut()[i] + t.get_air()[i]) + '\t' + str(t.get_cut()[i] + t.get_air()[i] + t.get_excecution()[i]) + "\n") file.close()
def example_one_to_one_nn_optimization(): simple_nn = NeuralNetwork(1, [3, 3, 3], 1, output_activation_function=linear) #data = linear_data(10000) data = sinus_data(100) data_gen = DataLoader(data) neural_predictor = lambda nn: predictor_fitness( nn.predict, data_gen.generator(), loss_function=mean_squared_error) x0w, x0b = simple_nn.get_weights_and_biases() print("Number of weights:", len(x0w)) print("Number of biases:", len(x0b)) x0 = x0w x0.extend(x0b) fitness = lambda ind: network_opt(ind, simple_nn, neural_predictor) # OPTIMIZE HERE best_ind = [1, 1 / 2.0, 1 / 2.0, 1, 10, 10, -15] print("Best fitness is:", fitness(best_ind)) print(simple_nn.get_weights_and_biases()) # plt.plot([x[0] for x in data], [x[1] for x in data], 'b*') simple_nn.set_all(best_ind) plt.plot([x[0] for x in data], [simple_nn.predict(x[0]) for x in data], 'r*') plt.show()
def main(): ticker = _TICKER d = DataLoader(ticker) d.loadData() d.prepData() window_size = _WINDOWSIZE window_shift = _WINDOW_SHIFT start_index = d.getIndex(_TESTSTART) end_index = d.getIndex(_TESTEND) x_test, y_test, dates_test = createInputs(d.features, d.targets, d.dates, window_size, window_shift, start_index, end_index) print("Mean: ", d.targets_mean) print("STD: ", d.targets_std) print("x_test shape: ", x_test.shape) print("y_test shape: ", y_test.shape) # print("y_test 1st value: ", d.denormalize(y_test[0,0,0],d.targets_mean,d.targets_std)) print("dates_test shape: ", dates_test.shape) model = SequenceModel() model.modelLoad("Data/" + ticker + '.h5', "Data/" + ticker + '_history.json') y_pred = model.predict_model(x_test) y_pred_price, y_actuals, y_dates = createPlotData(start_index, end_index, y_pred, d) # checkModel(d,x_test, y_actuals, y_dates, start_index) plotTestPerformance(y_pred_price, y_actuals, y_dates, model.history_dict, d.targets_std, window_size=window_size)
def main(): model_save_folder = args.model_save_folder if not os.path.exists(model_save_folder): os.makedirs(model_save_folder) model_save_path = args.model_save_folder + '/cnn.model' dict_save_path = args.model_save_folder + '/cnn.dict' dl = DataLoader(args.train_file, args.dev_file, args.test_file, args.freq_threshold, args.max_batch_size, args.max_length, args.min_length) en_dict, train_data, dev_data, test_data = dl.prepareData() with open(dict_save_path, 'wb') as handle: pickle.dump(en_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) clf = ConvNet(en_dict.index2word, args.emb_size, dl.class_inv_freq, args.num_kernels, args.kernel_sizes, args.learning_rate, model_save_path, pretrained_path=args.pretrained_path) clf.fit(train_data, dev_data, args.num_epochs) y_true, y_pred = clf.predict(test_data) corrects = (y_true == y_pred).sum() print('testing accuracy: {:}'.format(corrects * 1.0 / test_data.num_instances))
def main(): path_order_info = "../../../all_database_in_csv/jos_vm_orders.csv" path_sku_info = "../../../all_database_in_csv/jos_vm_order_item.csv" path_to_user_type = "../../../all_database_in_csv/jos_vm_shopper_vendor_xref.csv" # Инициируем конвеер предварительной обработки загрузчиком, который на локальной машине открывает два файла. pipeline = DataPipeline( DataLoader("local storage", first=path_order_info, second=path_sku_info, third=path_to_user_type)) data = pipeline.prepare_data(datetime(2019, 1, 1), prefixes_to_remove=["RP"]) # Удаляем непопулярные SKU и user-ов(нужно ли удалять юзеров?) data = pipeline.remove_unpopular_item_and_users(data) # Инициализируем, обучаем, сохраняем, загружаем модель матрицы PMI model = PmiModel() model.fit_model(data) model.save_binary("data") model.load_binary("data") # print(model.pmi_matrix.shape) # print(model.evaluate("AMX-02")) # Находим все sku некоторого пользователся grouped_data = data.groupby('user_id').agg( {"order_item_sku": [("list", lambda x: list(x)), ("count", "count")]}) users_sku = grouped_data.loc[6709][("order_item_sku", "list")] print(users_sku) # Инициализируем рекоменадательную систему моделью. Получаем список предсказаний с очками для каждого из них. recomendator = PmiRecommendator(model) answer = recomendator.create_list(users_sku) print(answer.shape, answer)
def main(): project = test_CNN.get_project_and_check_arguments(sys.argv, "run_data_loader.py") print "start creating data for project: ", project.project_name data_loader = DataLoader(project, motifs_base_path) data_loader.create_npy_files() print "End!"
def train(self, model_citations, model_authors): d_train = DataLoader() training_data = d_train.training_data_with_abstracts_citations().data # Load trained embeddings print("Loading the citations training embeddings...") pretrained_embeddings_citations, pretrained_embeddings_id_map_citations = \ self._load_train_embeddings(model_citations) print("Loaded.") print("Loading the authors training embeddings...") pretrained_embeddings_authors, pretrained_embeddings_id_map_authors = \ self._load_train_embeddings(model_authors) print("Loaded.") training_ids = list(training_data.chapter) training_embeddings_citations = pretrained_embeddings_citations[[ pretrained_embeddings_id_map_citations[id] for id in training_ids ]] training_embeddings_authors = pretrained_embeddings_authors[[ pretrained_embeddings_id_map_authors[id] for id in training_ids ]] # Concatenate embeddings training_embeddings = np.concatenate( (training_embeddings_citations, training_embeddings_authors), axis=1) self.label_encoder = LabelEncoder() self.labels = self.label_encoder.fit_transform( training_data.conferenceseries) self.classifier.fit(training_embeddings, self.labels) self._save_model_classifier() print("Training finished.")
def train_and_test_network(): """ Train a neural network and test it. Can also train on other feature types, or run the experimenter to run different configurations """ min_speakers = 1 max_speakers = 10 # Load data from filesystem data_loader = DataLoader(train_dir, test_src_dr, test_dest_dir) data_loader.force_recreate = False data_loader.min_speakers = min_speakers data_loader.max_speakers = max_speakers # Train network train, (test_x, test_y) = data_loader.load_data() libri_x, libri_y = data_loader.load_libricount(libri_dir) # Train and test network file = 'testing_rnn' net = RNN() net.save_to_file(file) net.train(train, min_speakers, max_speakers, FEATURE_TYPE) net.load_from_file(file) timit_results = net.test(test_x, test_y, FEATURE_TYPE) libri_results = net.test(libri_x, libri_y, FEATURE_TYPE)
def main(): """ Main function """ parser = argparse.ArgumentParser() parser.add_argument( "--train", help="train the neural network", action="store_true") parser.add_argument( "--validate", help="validate the neural network", action="store_true") parser.add_argument( "--wordbeamsearch", help="use word beam search instead of best path decoding", action="store_true") args = parser.parse_args() decoderType = DecoderType.BestPath if args.wordbeamsearch: decoderType = DecoderType.WordBeamSearch if args.train or args.validate: loader = DataLoader(FilePaths.fnTrain, Model.batchSize, Model.imgSize, Model.maxTextLen, load_aug=True) if args.train: model = Model(loader.charList, decoderType) train(model, loader) elif args.validate: model = Model(loader.charList, decoderType, mustRestore=False) validate(model, loader) else: print(open(FilePaths.fnAccuracy).read()) model = Model(open(FilePaths.fnCharList).read(), decoderType, mustRestore=False) infer(model, FilePaths.fnInfer)
def test_generate_list_of_numbers(self): start = 1 end = 4 expect = ["01", "02", "03", "04"] dl = DataLoader() result = dl.generate_list_of_numbers(start, end) self.assertEqual(result, expect)
def main(): config = tf.compat.v1.ConfigProto( allow_soft_placement=True ) ## tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.compat.v1.Session( config=config) as sess: ## tf.Session(config=config) as sess: copy_file(save_file_dir) dataloader = DataLoader(FLAGS.dir, FLAGS.limits) model = SeqUnit(batch_size=FLAGS.batch_size, hidden_size=FLAGS.hidden_size, emb_size=FLAGS.emb_size, field_size=FLAGS.field_size, pos_size=FLAGS.pos_size, field_vocab=FLAGS.field_vocab, source_vocab=FLAGS.source_vocab, position_vocab=FLAGS.position_vocab, target_vocab=FLAGS.target_vocab, scope_name="seq2seq", name="seq2seq", field_concat=FLAGS.field, position_concat=FLAGS.position, fgate_enc=FLAGS.fgate_encoder, dual_att=FLAGS.dual_attention, decoder_add_pos=FLAGS.decoder_pos, encoder_add_pos=FLAGS.encoder_pos, learning_rate=FLAGS.learning_rate) sess.run(tf.compat.v1.global_variables_initializer()) # copy_file(save_file_dir) if FLAGS.load != '0': model.load(save_dir) if FLAGS.mode == 'train': train(sess, dataloader, model) else: test(sess, dataloader, model)
def main(): ''' Main block to (1) Load data, (2) Create x_train, y_train and dates_train, (3) Run the model, (4) Save the model and (5) Plot training and validation performance Loads data from TestModel class ''' ticker=_TICKER d=DataLoader(ticker) d.loadData() d.prepData() start_index=d.getIndex(_START) end_index=d.getIndex(_END) #Generates x_train, y_train and dates_train. We need dates_train in order to plot the x-axis later x_train,y_train,dates_train=createInputs(d.features, d.targets, d.dates, _WINDOWSIZE, _WINDOW_SHIFT, start_index, end_index) #Creates model instance and runs the model aapl_model=SequenceModel() epochs=_EPOCHS aapl_model.build_model(x_train,y_train,batch_size=_BATCHSIZE,epochs=epochs) aapl_model.modelSave("Data/"+ticker+'.h5',"Data/"+ticker+'_history.json') # aapl_model.modelLoad('AAPL.h5', 'AAPL_history.json') #Plot model plotPerformance(aapl_model,aapl_model.history_dict, d.targets_std)
def test(self, step_num): self.sess.run(tf.local_variables_initializer()) self.reload(step_num) from DataLoader import BrainLoader as DataLoader self.data_reader = DataLoader(self.conf) self.data_reader.get_data(mode='test') self.num_test_batch = self.data_reader.count_num_batch( self.conf.batch_size, mode='test') self.is_train = False self.sess.run(tf.local_variables_initializer()) for step in range(self.num_test_batch): start = step * self.conf.batch_size end = (step + 1) * self.conf.batch_size x_test, y_test = self.data_reader.next_batch(start, end, mode='test') feed_dict = { self.x: x_test, self.y: y_test, self.mask_with_labels: False } self.sess.run([self.mean_loss_op, self.mean_accuracy_op], feed_dict=feed_dict) test_loss, test_acc = self.sess.run( [self.mean_loss, self.mean_accuracy]) print('-' * 18 + 'Test Completed' + '-' * 18) print('test_loss= {0:.4f}, test_acc={1:.01%}'.format( test_loss, test_acc)) print('-' * 50)
def main(): ticker = _TICKER d = DataLoader(ticker) d.loadData() d.prepData() window_size = _WINDOWSIZE window_shift = _WINDOW_SHIFT start_index = d.getIndex(_TESTSTART) end_index = d.getIndex(_TESTEND) x_test, y_test, dates_test = createInputs(d.features, d.targets, d.dates, window_size, window_shift, start_index, end_index) aapl_model = SequenceModel() aapl_model.modelLoad("Data/" + ticker + '.h5', "Data/" + ticker + '_history.json') y_pred = aapl_model.predict_model(x_test) y_pred = d.denormalize(y_pred, d.targets_mean, d.targets_std) # print(y_pred.shape) y_dates = d.dates[start_index + 1:end_index] y_actuals = d.targets[start_index + 1:end_index] y_actuals = d.denormalize(y_actuals, d.targets_mean, d.targets_std) plotTestPerformance(y_pred, y_actuals, y_dates, aapl_model.history, d.targets_std, window_size=window_size)
def main(input_path): rounds = 1 auc_array = np.zeros((rounds, 1)) time_array = np.zeros((rounds, 1)) dim_array = np.zeros((rounds, 1)) data_describe='' for m in range(rounds): data = DataLoader(input_path) data.data_prepare() dim = data.all_features_num n = len(data.data_matrix) labels = data.list_of_class data_describe="{}, {}, {}, ".format(data.data_name, n, dim) scores_all, totalTime, dim_vec, it = ODNUTFramework(data) final_scores_all = scores_all time_array[m] = totalTime dim_array[m] = dim_vec[-1] roc_auc = roc_auc_score(labels, final_scores_all) auc_array[m] = roc_auc roc_auc = np.mean(auc_array) runtime = np.mean(time_array) avgDim = np.mean(dim_array) print_text = data_describe+"{:.4}, {:.4}, {:.4}s".format( avgDim, roc_auc, runtime) print(print_text) doc = open('out.txt', 'a') print(print_text, file=doc) doc.close()
def train(self): self.sess.run(tf.local_variables_initializer()) self.best_validation_accuracy = 0 self.data_reader = DataLoader(self.conf) self.data_reader.get_data(mode='train') self.data_reader.get_data(mode='valid') self.train_loop()
def strawcutlengths(): def createRow(): return{'straw_barcode': str(row[0]), #'create_time' : str(row[1]), #Website gets real time somehow. 'worker_barcode' : str(row[2]), 'workstation_barcode' : str(row[3]), 'nominal_length' : str(row[4]), 'measured_length': str(row[5]), 'temperature' : str(row[6]), 'humidity' : str(row[7]), #'comments' : str(row[8]), } for row in upload_file: table = "straw_cut_lengths" dataLoader = DataLoader(password,url,group,table) dataLoader.addRow(createRow()) retVal,code,text = dataLoader.send() if retVal: print "upload straw length success!\n" print text else: print "upload straw length failed!\n" print code print text dataLoader.clearRows()
def pre_train(self, dataset): """ Pre-trains the model based on one or more datasets. """ if not self.load: writer = SummaryWriter('runs/' + self.name) data_loader = DataLoader(dataset, self.batch_size, single=True) data_loader = data_loader.data_loader train_size = int(0.9 * len(data_loader)) val_size = len(data_loader) - train_size train_data, val_data = torch.utils.data.random_split(data_loader, [train_size, val_size]) print('Starting training on {} batches of train data with {} batches of validation data.'.format(train_size, val_size)) best_loss = np.inf best_model = None for epoch in range(self.num_epochs): mean_train_loss = 0 mean_train_acc = 0 self.FCNN.train() for data, label in train_data: self.optimizer.zero_grad() predictions = self.FCNN(data) loss = self.criterion(predictions, torch.argmax(label, dim=1).long()) eq = np.equal(torch.argmax(predictions, dim=1).cpu(), torch.argmax(label, dim=1).cpu()) acc = torch.mean(eq.float()) mean_train_acc += acc mean_train_loss += loss loss.backward() self.optimizer.step() mean_train_loss = mean_train_loss / train_size mean_train_acc = mean_train_acc / train_size mean_val_loss = 0 mean_val_acc = 0 self.FCNN.eval() for data, label in val_data: with torch.no_grad(): predictions = self.FCNN(data) loss = self.criterion(predictions, torch.argmax(label, dim=1).long()) eq = np.equal(torch.argmax(predictions, dim=1).cpu(), torch.argmax(label, dim=1).cpu()) acc = torch.mean(eq.float()) mean_val_loss += loss mean_val_acc += acc mean_val_loss = mean_val_loss / val_size mean_val_acc = mean_val_acc / val_size if mean_val_loss < best_loss: best_model = deepcopy(self.FCNN) torch.save(self.FCNN.state_dict(), 'runs/' + self.name + '/best_model.pth') best_loss = mean_val_loss writer.add_scalar('Loss/val', mean_val_loss, epoch) writer.add_scalar('Loss/train', mean_train_loss, epoch) writer.add_scalar('Acc/train', mean_train_acc, epoch) writer.add_scalar('Acc/val', mean_val_acc, epoch) print('Epoch {}/{} train loss: {}, train acc: {}, val loss: {}, val acc: {}'.format(epoch, self.num_epochs-1,mean_train_loss, mean_train_acc, mean_val_loss, mean_val_acc)) self.FCNN = best_model else: print('Loading model settings from {}'.format(self.load)) self.load_model()
def __init__(self): print("Initializing IDS..") self.affinity_threshold = 0.3 self.alert_threshold = 1 self.secondaryIDS_count = 1 self.detector_set_size = 50 self.secondaryIDSs = [] print("Loading self..") self.self = DataLoader().load_genes() self.detector_generator = DetectorGenerator() self.detector_generator.test_set = self.self print("Generating detector agents..") self.create_secondary_ids() print("Creating adversary..") self.adversary = Adversary() # Stats self.tp = 0 self.tn = 0 self.fp = 0 self.fn = 0
def main(): """ main function """ # define some command line arguments parser = argparse.ArgumentParser(description = 'Simple Todo handwritten text recognition') parser.add_argument('--train', action='store_true', help='train the NN') parser.add_argument('--validate', action='store_true', help='validate the NN') args = parser.parse_args() decoder_type = DecoderType.best_path if args.train or args.validate: loader = DataLoader(Params.dataset, Model.batch_size, Model.image_size, Model.max_text_len) # save characters of model for inference mode open(Params.char_list, 'w').write(str().join(loader.char_list)) if args.train: print("Training NN!\n") model = Model(loader.char_list) train(model, loader) elif args.validate: print("Validating NN!\n") model = Model(loader.char_list) validate(model, loader) else: print("Recognizing image from "+ Params.input_image + " file!\n") model = Model(open(Params.char_list).read(), decoder_type, must_restore=True) infer(model, Params.input_image)
def main(input_path): t0 = time.time() data = DataLoader(input_path) data.data_prepare() t1 = time.time() auc_list = np.zeros(RUN_TIMES) time_list = np.zeros(RUN_TIMES) for i in range(RUN_TIMES): time_0 = time.time() score, n_iter = MIX.fit(data, batch_size=batch_size, episode_max=episode_max, epsilon=epsilon, k=k, verbose=False) # perform MIX' # score, n_iter = OutlierEstimator.fit_prime(data, batch_size=64, episode_max=10000, # k=k, verbose=False) time_1 = time.time() auc_list[i] = roc_auc_score(data.list_of_class, score) time_list[i] = (t1 - t0) + (time_1 - time_0) print_text = "{}, {:.4},{:.4}, {:.4}s".format(data.data_name, np.average(auc_list), np.std(auc_list), np.average(time_list)) print(print_text) doc = open('out.txt', 'a') print(print_text, file=doc) doc.close() return
def main(): # Constants num_seconds = 10 sample_rate = 44100 data_width = num_seconds * sample_rate num_classes = 200 # Hyperparameters learning_rate = 0.001 # Load the data data_loader = DataLoader(truncate_secs=num_seconds, dtype=np.float32) # Create the network xs = tf.placeholder(tf.float32, [None, 1, data_width, 1]) ys = tf.placeholder(tf.float32, [None, num_classes]) network_output = create_network(xs, num_classes) predictions = tf.nn.softmax(network_output) loss = tf.reduce_sum((ys - predictions)**2.0) optim = tf.train.AdamOptimizer(learning_rate).minimize(loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) test_x, test_y = data_loader.get_batch(test=True) for i in range(10000): batch_x, batch_y = data_loader.get_batch() sess.run(optim, feed_dict={xs: batch_x.reshape([-1, 1, data_width, 1]), ys: batch_y}) if i % 20 == 0: preds, loss_val = sess.run((predictions, loss), feed_dict={xs: test_x.reshape([-1, 1, data_width, 1]), ys: test_y}) print(i, "test error:", loss_val, np.sum(np.argmax(preds, axis=1) == np.argmax(test_y, axis=1)))
def split_train_test(csv_datapath=None): dataframe = DataLoader(csv_datapath).load_dataframe_from_datapath() # Drop rows with values missing dataframe = dataframe.dropna().reset_index(drop=True) time_sequences = split_dataframe_into_time_sequences(dataframe, time_index=False) print(time_sequences[0]) # There are 179 complete sequences, so choose 20% (36) of these at random and store as test data test_indices = np.random.choice(len(time_sequences), round(0.2 * len(time_sequences)), replace=False) print(test_indices) train_indices = [ i for i in range(len(time_sequences)) if i not in test_indices ] test_sequences = [time_sequences[i] for i in test_indices] train_sequences = [time_sequences[i] for i in train_indices] test_dataframe = pd.concat(test_sequences) train_dataframe = pd.concat(train_sequences) print(test_dataframe) test_dataframe.to_csv("../data/test/test_data.csv", index=False) train_dataframe.to_csv("../data/train/train_data.csv", index=False)
def validate(filePath): "validate NN" # load training data loader = DataLoader(filePath, Model.batchSize, Model.imgSize, Model.maxTextLen) # create TF model model = Model(loader.charList, useBeamSearch) # save characters of model for inference mode open(fnCharList, 'w').write(str().join(loader.charList)) print('Validate NN') loader.validationSet() numOK = 0 numTotal = 0 while loader.hasNext(): iterInfo = loader.getIteratorInfo() print('Batch:', iterInfo[0], '/', iterInfo[1]) batch = loader.getNext() recognized = model.inferBatch(batch) print('Ground truth -> Recognized') for i in range(len(recognized)): isOK = batch.gtTexts[i] == recognized[i] print('[OK]' if isOK else '[ERR]', '"' + batch.gtTexts[i] + '"', '->', '"' + recognized[i] + '"') numOK += 1 if isOK else 0 numTotal += 1 # print validation result accuracy = numOK / numTotal print('Correctly recognized words:', accuracy * 100.0, '%')