def train(config, network_spec=None): data_provider = DataProvider(config.db) env = StockEnvironment(data_provider, config, 0) agent = overwrite_agent(env, network_spec, config) if config.overwrite_agent else load_agent( config, env, network_spec) mlflow.log_param("agent", "tensorforce.agents.DQNAgent") for key in config.agent_specs: mlflow.log_param(key, config.agent_specs[key]) runner = Runner(agent=agent, environment=env) offset = 20000 num_episodes = 20 step = 0 while data_provider.has_data_key(offset + config.max_step_per_episode): runner.run(num_episodes=num_episodes) offset = offset + config.max_step_per_episode env.offset = offset agent.save(config.agent_dir, config.agent_name) if step % 10 == 0: evaluate(config, data_provider, offset - config.max_step_per_episode, agent) step += 1 return agent, env
def validate(s2s, n_samples): bsize = 1 # batch size vgen = DataProvider(n_samples, mlen, batch_size=bsize) correct = 0 correct_elements = 0 total = 0 total_elements = 0 for _ in range(n_samples): batch, slen, _d_inputs, _d_seqlen, _targets_e, _targets_d = vgen.next() inp = [] for i, b1 in enumerate(batch): for j in range(slen[i]): inp.append(chr(b1[j].index(1))) e_results, results = s2s.do_inference(batch, slen, vgen) pred = [chr(np.argmax(e_results))] for i, result in enumerate(results): for j, res in enumerate(result): # for each seq in a mini batch pred.append(chr(np.argmax(res))) pred = pred[:-1] # ignore the end char print "Inp: ", inp print "Prd: ", pred for c1, c2 in zip(inp, pred): if c1 == c2: correct_elements += 1 total_elements += 1 if inp == pred: correct += 1 total += 1 print "EXACT match validation accuracy: ", (float(correct) / total) * 100, "%" print "Elementwise match validation accuracy: ", ( float(correct_elements) / total_elements) * 100, "%"
def main(): # parse config config_file = sys.argv[1] config = Config(config_file) # setup logger setup_logging(config.working_dir) # encoding func encoding_func = ENCODING_METHOD_MAP[config.encoding_method] encoding_func2 = ENCODING_METHOD_MAP[config.encoding_method2] log_to_file('Encoding method2', config.encoding_method2) data_provider = [] for p in range(config.base_model_count): temp_provider = DataProvider(encoding_func, encoding_func2, config.data_file, config.test_file, config.batch_size, max_len_hla=config.max_len_hla, max_len_pep=config.max_len_pep, model_count=config.model_count) data_provider.append(temp_provider) log_to_file('Traning samples', len(data_provider[0].train_samples[0])) log_to_file('Val samples', len(data_provider[0].validation_samples[0])) log_to_file('Traning steps', data_provider[0].train_steps()) log_to_file('Val steps', data_provider[0].val_steps()) log_to_file('Batch size', data_provider[0].batch_size) log_to_file('max_len_hla', data_provider[0].max_len_hla) log_to_file('max_len_pep', data_provider[0].max_len_pep) for p in range(config.base_model_count): train(config, data_provider[p], p)
def execute_command(self): args = self.parser.parse_args() d = DataProvider() data = d.extract_data() x = CalculationLogic() if args.task == 't1': if args.district and args.year: if args.gender: print(x.task1(data, args.year, args.district,args.gender)) else: print(x.task1(data, args.year, args.district)) elif args.task == 't2': if args.district: if args.gender: print(x.task2(data,args.district,args.gender)) else: print(x.task2(data, args.district)) elif args.task == 't3': if args.year: if args.gender: print(x.task3(data,args.year,args.gender)) else: print(x.task3(data,args.year)) elif args.task == 't4': if args.gender: print(x.task4(data,args.gender)) else: print(x.task4(data)) elif args.task == 't5': if args.district and args.district2: if args.gender: print(x.task5(data,args.district,args.district2,args.gender)) else: print(x.task5(data,args.district,args.district2))
def main(): # parse config config_file = sys.argv[1] config = Config(config_file) # setup logger setup_logging(config.working_dir) # encoding func encoding_func = ENCODING_METHOD_MAP[config.encoding_method] encoding_func2= ENCODING_METHOD_MAP[config.encoding_method2] log_to_file('Encoding method2', config.encoding_method2) data_provider=[] for p in range(config.base_model_count): temp_provider = DataProvider( encoding_func, encoding_func2, config.data_file, config.test_file, config.batch_size, max_len_hla=config.max_len_hla, max_len_pep=config.max_len_pep, model_count=config.model_count ) data_provider.append(temp_provider) log_to_file('max_len_hla', data_provider[0].max_len_hla) log_to_file('max_len_pep', data_provider[0].max_len_pep) test(config, data_provider[0])
def main(): x = tf.placeholder(tf.float32, [batch_size, 512, 512, 3]) y = tf.placeholder(tf.float32, [None, 3]) out = M.test_architecture2(x) dp = DataProvider(True, ['g0']) dp.set_batch_size(batch_size) angular_loss = angular_error_fn(out, y) nr_step = 100 saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, "tf_log/model.ckpt") for epoch in range(0, nr_epochs): for step in range(0, nr_step): batch = dp.get_batch() feed_x = batch[0] feed_y = batch[2] ans, angular_error = sess.run([out,angular_loss], feed_dict = {x: feed_x, y:feed_y}) print(str(step) + " Angular_error: " + str(angular_error)) print(ans[0]) print(feed_y[0]) img = feed_x[0] / feed_x[0].max() #cv2.imshow("Input", np.power(img, 1 / 2.2)) #cv2.waitKey(0) cv2.imwrite("data/inference/" + str(step) + "_img_input.png", 255*np.power(img, 1 / 2.2)) img_gt = sp.apply_gt(img, feed_y[0]) cv2.imwrite("data/inference/" + str(step) + "_img_gt.png", 255*np.power(img_gt, 1 / 2.2)) img_pred = sp.apply_gt(img, ans[0]) cv2.imwrite("data/inference/" + str(step) + "_img_pred.png", 255*np.power(img_pred, 1 / 2.2)) dp.stop()
def train(args): if not os.path.isdir('./%s' % args.output): os.system('mkdir ./%s' % args.output) if args.cont: try: model = torch.load(args.cont) print('load success') except: model = OwnModel() else: model = OwnModel() dp = DataProvider(args.dataset) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-5) crit = torch.nn.CrossEntropyLoss() mbsize = 1024 for epoch in range(args.max_iter): odata, olabel = dp.train_iter(mbsize) data = Variable(torch.from_numpy(odata)) label = Variable(torch.from_numpy(olabel)) lr = get_lr(model.iter) pred = model(data) pred = pred.contiguous().view(-1, 2) loss = crit(pred, label) optimizer.zero_grad() for group in optimizer.param_groups: group['lr'] = lr loss.backward() optimizer.step() print('iter:%s loss:%s' % (epoch, loss.data.numpy()), end='\r') if epoch % 10 == 0: torch.save(model, './%s/model.pkl' % args.output)
def setup(trello_key, trello_secret, board_id, out, delimiter, card_extractors, filters): # validate inputs if not trello_key or not trello_secret: raise click.BadParameter('trello_secret and trello_key are required') if not board_id: raise click.BadParameter('board_id is required') trello_client = TrelloClient( api_key=trello_key, api_secret=trello_secret, ) data_provider = DataProvider(Board( trello_client, board_id=board_id, )) print(data_provider.board.name) # TODO: add logging database = DataBase(delimiter=delimiter) runner = Runner(data_provider, database, card_extractors_parameter=[ Parameter(x.strip()) for x in card_extractors.split(',') ], filters=[Parameter(x.strip()) for x in filters.split(',')] if filters else []) runner.run() database.export(out)
def main(): # read data dp = DataProvider() dp.read_data("train.csv") if not os.path.exists(MODEL_DIR): os.makedirs(MODEL_DIR) create_model(dp)
def preparation(): """Prepare databases and start background tasks.""" # Kill old processes if running kill_bg_servers() time.sleep(10) data_dir = config.DATA_DIR log.info("Removing Databases.") with contextlib.suppress(FileNotFoundError): # Remove Bloom Filter os.remove(data_dir + config.BLOOM_FILE) # Remove Databases os.remove(data_dir + config.KEYSERVER_DB) os.remove(data_dir + config.STORAGE_DB) # Add User log.info("Prepare User DB.") db.main(UserType.CLIENT, ['testuser', 'password', '-a'], no_print=True) db.main(UserType.OWNER, ['testprovider', 'password', '-a'], no_print=True) log.info("Starting Background Servers.") subprocess.run([f"{config.WORKING_DIR}src/allStart.sh", "eval"]) time.sleep(10) # Create data provider client d = DataProvider('testprovider') d.set_password('password') # Check that servers are really online tries = 0 done = False while not done: try: if tries >= 1: # Try to start servers again. kill_bg_servers() time.sleep(10) subprocess.run( [f"{config.WORKING_DIR}src/allStart.sh", "eval"]) time.sleep(10) tries = 0 # Check Key Server d.get_token(ServerType.KeyServer) # Check celery r = d.get(d.KEYSERVER.replace('provider', 'celery')) if r.content != b"True": raise RuntimeError("Celery of keyserver not started.") # Check Storage Server d.get_token(ServerType.StorageServer) # Check celery r = d.get(d.STORAGESERVER.replace('provider', 'celery')) if r.content != b"True": raise RuntimeError("Celery of storage-server not started.") # Success done = True except Exception as e: log.error(f"Server not up, yet. Try: {tries}. Error: {str(e)}") tries += 1 time.sleep(5)
def rnn(): data_provider = DataProvider(data_dir, BATCH_SIZE, SEQUENCE_LENGTH) model = RNNModel(data_provider.vocabulary_size, batch_size=BATCH_SIZE, sequence_length=SEQUENCE_LENGTH, hidden_layer_size=HIDDEN_LAYER_SIZE, cells_size=CELLS_SIZE) with tf.Session() as sess: summaries = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_dir) writer.add_graph(sess.graph) sess.run(tf.global_variables_initializer()) # Forward pass and one backward pass of all the training examples epoch = 0 temp_losses = [] smooth_losses = [] while True: sess.run( tf.assign(model.learning_rate, LEARNING_RATE * (DECAY_RATE**epoch))) data_provider.reset_batch_pointer() state = sess.run(model.initial_state) for batch in range(data_provider.batches_size): inputs, targets = data_provider.next_batch() feed = {model.input_data: inputs, model.targets: targets} for index, (c, h) in enumerate(model.initial_state): feed[c] = state[index].c feed[h] = state[index].h # Iteration is the number of times batch data has passed # through the neural network - both forward and backwards # propagation iteration = epoch * data_provider.batches_size + batch summary, loss, state, _ = sess.run( [summaries, model.cost, model.final_state, model.train_op], feed) writer.add_summary(summary, iteration) temp_losses.append(loss) if iteration % SAMPLING_FREQUENCY == 0: sample_model(sess, data_provider, iteration) if iteration % LOGGING_FREQUENCY == 0: smooth_loss = np.mean(temp_losses) smooth_losses.append(smooth_loss) temp_losses = [] plot(smooth_losses, "iterations (thousands)", "loss") print('{{"metric": "iteration", "value": {}}}'.format( iteration)) print('{{"metric": "epoch", "value": {}}}'.format(epoch)) print('{{"metric": "loss", "value": {}}}'.format( smooth_loss)) epoch += 1
def main(): # read data dp = DataProvider() xgb_model = XGBClassifier() dp.read_data("train.csv") if not os.path.exists(MODEL_DIR): os.makedirs(MODEL_DIR) # create_xgbmodel(dp,xgb_model,device="gpu") opt = Optimizer() tune_with_TPE(dp, xgb_model, opt)
def main(): # ---------- Upload video frames: ----------------------------------------- video_name = "movies/test_video.mp4" data = DataProvider(video_name) # ---------- Create set of edges E: --------------------------------------- print("Create set of edges E ...") E = create_E(data) print("Finished.") # ---------- Compute relative transformation for each edge in E: ---------- print("Compute relative transformations for each edge in E ...") if os.path.exists('output_file'): os.remove('output_file') f = open('output_file', 'w+') for e in E: # Compute features correspondence by running SIFT: p, q, w = get_FC_by_SIFT(data.images[e.src], data.images[e.dst]) # center the points around zero: img_sz = data.img_sz p[:, 0] = p[:, 0] - img_sz[1] / 2 q[:, 0] = q[:, 0] - img_sz[1] / 2 p[:, 1] = p[:, 1] - img_sz[0] / 2 q[:, 1] = q[:, 1] - img_sz[0] / 2 # Compute relative transformation (theta): E[e] = compute_LS_rigid_motion(p, q, w) # Add this measurement to the output file: pose = ' '.join([str(p) for p in np.reshape(E[e], (6, ))]) f.write('EDGE_SE2 ' + str(e.src) + ' ' + str(e.dst) + ' ' + pose + '\n') print("Finished.") # ---------- view the relative transformation of a few edges: imgs = [] titles = [] for i, e in enumerate(E): if i in (200, 201, 203): imgs.append(data.images[e.src]) imgs.append(data.images[e.dst]) transformed_I = warp_image(data.images[e.dst], E[e], cv2.INTER_CUBIC) imgs.append(transformed_I) titles.append('I1') titles.append('I2') titles.append('theta(I2)') fig1 = open_figure(1, '', (5, 3)) PlotImages(1, 3, 3, 1, imgs, titles, 'gray', axis=False, colorbar=False) plt.show() fig1.savefig('relative_trans_results.png', dpi=1000)
def show_patches(): from data_provider import DataProvider dp = DataProvider(True, ['g0']) dp.set_batch_size(10) while True: batch = dp.get_batch() for img in batch[0]: #img = img / np.mean(img, axis=(0, 1))[None, None, :] img = img / img.max() cv2.imshow("Input", np.power(img, 1 / 2.2)) cv2.waitKey(0)
def main(parameters): data_provider = DataProvider() train_loader, test_loader = data_provider.get_data_loaders(**parameters) writer = SummaryWriter() trainer_type = parameters["experiment"] + "net" trainer = TrainerFactory.create_trainer(trainer_type, train_loader, test_loader, writer, **parameters) trainer.run(parameters["epochs"]) writer.close()
def test_epoch_complete(self): # check if every element of the dataset is really seen at the end provider = DataProvider('final_data', 8) dataset_img = [img.tostring() for img in provider.images] while provider.next_batch_available(): batch_img, _ = provider.get_batch() for img in batch_img: if img.tostring() in dataset_img: dataset_img.remove(img.tostring()) self.assertEqual(len(dataset_img), 0)
def test_get_casted_dataframe(self, mock_method): date_column = pd.date_range(start=datetime.datetime.today(), periods=4) mock_method.side_effect = [ pd.DataFrame([ ' min ', 'asdasdasd0', ' ciao', 'ciao ' ], dtype='object'), pd.DataFrame(['UD', ' O', 'P ', ' TS '], dtype='object'), pd.DataFrame([0, 1, 1, 1]), pd.DataFrame((([np.nan] * 3) + [0.24]), dtype='float64'), pd.DataFrame(date_column) ] # Creo dataframe di test, uguale al self.dp.df solo con stringhe data = { 'col1': [' min ', 'asdasdasd0', ' ciao', 'ciao '], 'col2': ['UD', ' O', 'P ', ' TS '], 'col3': ['0', '1', '1', '1'], 'col4': ([np.nan] * 3) + ['0.24'], 'col5': date_column.strftime("%Y-%m-%d") # Cast date to string } df = pd.DataFrame(data) # Creo DataProvider di test con il dataframe solo di stringhe test_dp = DataProvider(df=df, column_types={ 0: 'object', 1: 'object', 2: 'int', 3: 'float', 4: 'date' }, column_constraints={ 0: False, 1: False, 2: True, 3: False, 4: False }) # Effettuo il casting casted_df = test_dp.get_casted_dataframe() self.assertEqual(casted_df.dtypes.tolist(), [ np.dtype('O'), np.dtype('O'), np.dtype('int64'), np.dtype('float64'), np.dtype('<M8[ns]') ])
def evaluate(self, data, prm): data_pred = DataProvider() data_pred.copy_from(data) data_pred.Xtr = self.net.predict(data_pred.Xtr, batch_size=prm.batch_size, verbose=1) data_pred.Xde = self.net.predict(data_pred.Xde, batch_size=prm.batch_size, verbose=1) data_pred.Xte = self.net.predict(data_pred.Xte, batch_size=prm.batch_size, verbose=1) return data_pred
def __init__(self, part='2', img_width=28, filter_width=28, num_filters=2, num_classes=2, alpha=.01, activation_function='sigmoid', relu_alpha=0, sig_lambdas=(1, 1, 1), subset_size=1, tanh_lambda=1): self.part = part if self.part == '2': self.filter_width = 28 self.num_filters = 2 num_classes = 2 train_dir = '../data/part2/train/*' test_dir = '../data/part2/train/*' if self.part == '3a' or part == '3b': self.filter_width = 7 self.num_filters = 16 num_classes = 10 train_dir = '../data/part3/train/*' test_dir = '../data/part3/train/*' self.img_width = img_width self.output_dim = num_classes self.alpha = alpha self.activation_function = activation_function self.relu_alpha = relu_alpha self.sig_lambdas = sig_lambdas self.tanh_lambda = tanh_lambda #computed properties self.conv_mat_H = np.power((img_width - self.filter_width + 1), 2) #number kernel positions self.conv_mat_S = img_width - self.filter_width #space between kerel and outside of image self.conv_output_dim = self.conv_mat_H * self.num_filters #create data provider to feed in data self.dp = DataProvider(train_dir, test_dir, num_classes, subset_size) if part == '2' or part == '3a': self.init_weights_3A() else: self.init_weights_3B()
def main(): # parse config config_file = sys.argv[1] config = Config(config_file) folder = config_file.split('/')[0] encoding_func = ENCODING_METHOD_MAP[config.encoding_method] encoding_func2= ENCODING_METHOD_MAP[config.encoding_method2] data_provider = DataProvider( encoding_func, encoding_func2, config.data_file, config.test_file, config.batch_size, max_len_hla=config.max_len_hla, max_len_pep=config.max_len_pep, model_count=config.model_count ) device = config.device models = config.model_count*config.base_model_count print(models) total_df=pd.DataFrame() for i in range(models): # load and prepare model path = folder + "/best_model_{}.pytorch".format(i) state_dict = torch.load(path) model = Model(config) model.load_state_dict(state_dict) model.to(device) model.eval() data_provider.new_epoch() for _ in range(data_provider.test_steps()): data = data_provider.batch_test() hla_a, hla_mask, hla_a2, hla_mask2, pep, pep_mask, pep2, pep_mask2, uid_list = data temp_attn = {} temp_attn_hla = {} with torch.no_grad(): temp = model.encoder_peptide2.conv_0(pep2.to(device)) temp, att = model.encoder_peptide2.att_0(temp.to(device)) for i in range(config.batch_size): temp_attn[uid_list[i].split('-')[3]]=att[i].tolist() temp_df=pd.DataFrame.from_dict(temp_attn,orient="index") total_df=pd.concat([total_df,temp_df]) avg_= total_df.mean(axis=0) avg_= pd.DataFrame({'position':avg_.index+1, 'avg weight':avg_.values}) avg_.to_csv(folder + "/" + "attn_weight.csv",index=None)
def test_get_column_constraints_is_respected_NotImplemented(self): # Creo istanza di Data Provider data = { # duplicando l'ultimo valore della prima colonna 'col1': ['222365896', '522559845', '333652214', '522559845'], 'col2': ['UD', ' O', 'P ', ' TS '] } df = pd.DataFrame(data) col_types = {0: 'object', 1: 'object'} dp = DataProvider(df, col_types, column_constraints=NotImplemented) # Test valore corrispondente duplicated_values = dp.get_column_constraints_is_respected() pd.testing.assert_series_equal(duplicated_values, pd.Series([], dtype='object'))
def main(): dp = DataProvider() test_data = dp.get_test_data() model_name="rando:0%reg_a:0%max_d:0%subsa:1%boost:gbtree%nthre:8%colsa:1%learn:0.025%scale:5.2872645858027125%max_d:3%missi:None%gamma:0%base_:0.5%colsa:1%min_c:2%seed:100%n_job:1%silen:0%n_est:800%reg_l:1%objec:binary:logistic%" path="/home/msaffarm/KaggleChallenges/SafeDriverPred/xgbModel/trainedModels/" + model_name model = get_model(path) test_ids = test_data[["id"]].as_matrix() test_data.drop(["id"], axis=1,inplace=True) preds = model.get_booster().predict(xgb.DMatrix(test_data)) final_pred = np.concatenate([test_ids.reshape(-1,1),preds.reshape(-1,1)],axis=1) final_pred_df = pd.DataFrame(final_pred,columns=["id","target"]) final_pred_df["id"] = final_pred_df["id"].astype(int) print(final_pred_df) final_pred_df.to_csv("predictions.csv",index=False)
def show_patches(): from data_provider import DataProvider # dp = DataProvider(False, ['s0']) dp = DataProvider(True, ['s0']) dp.set_batch_size(10) while True: batch = dp.get_batch() imgs = batch[0] illums = batch[2] for i in range(len(imgs)): #img = img / np.mean(img, axis=(0, 1))[None, None, :] img = imgs[i] / imgs[i].max() illum = illums[i] print('illum: ', illum) cv2.imshow("Input", np.power(img, 1 / 2.2)) cv2.waitKey(0)
def setUpClass(cls): data = { 'col1': [' min ', 'asdasdasd0', ' ciao', 'ciao '], 'col2': ['UD', ' O', 'P ', ' TS '], 'col3': [0, 1, 1, 1], 'col4': ([np.nan] * 3) + [0.24], 'col5': pd.date_range(start=datetime.datetime.today(), periods=4) } df = pd.DataFrame(data) col_types = {0: 'object', 1: 'object', 2: 'int', 3: 'float', 4: 'date'} col_constraints = {0: False, 1: False, 2: True, 3: False, 4: False} cls.dp = DataProvider(df, col_types, col_constraints)
def __init__(self, task_num, data_form): """ :param data_path: :param task_num: train model on which task :param data_form: 1 denotes x_batch shape: [batch_size, num_utterance, sequence_max_len, vocab_size] 2 denotes x_batch shape: [batch_size, num_all_word_in_dialog, vocab_size] Hierarchical LSTM, MLP use data form 1 LSTM, AttnNet use data form 2 """ self.output_dim = 20 # self.path = data_path self.max_num_utterance = 25 self.max_num_words_in_dialog = 180 self.vocab_size = 205 self.embed_matrix = None self.load_embed('../my_dataset/sub_glove_embedding_with_oov.txt') self.data_provider = DataProvider(data_form) if task_num == 1: self.task_num = 'task1' self.train_set = copy.deepcopy(self.data_provider.task1.train) self.val_set = copy.deepcopy(self.data_provider.task1.val) self.test_sets = [ None, copy.deepcopy(self.data_provider.task1.test1), copy.deepcopy(self.data_provider.task1.test2), copy.deepcopy(self.data_provider.task1.test3), copy.deepcopy(self.data_provider.task1.test4) ] elif task_num == 2: self.task_num = 'task2' self.train_set = copy.deepcopy(self.data_provider.task2.train) self.val_set = copy.deepcopy(self.data_provider.task2.val) self.test_sets = [ None, copy.deepcopy(self.data_provider.task2.test1), copy.deepcopy(self.data_provider.task2.test2), copy.deepcopy(self.data_provider.task2.test3), copy.deepcopy(self.data_provider.task2.test4) ] else: raise Exception('task num must be one of [1, 2]!')
def load_test_images(self): data = DataProvider() self.idx = data.idx self.test_imgs = data.test_imgs # prepare arrays after knowing the exact number of test frames: self.M = self.test_imgs.shape[0] # number of test images self.test_trans_all = np.zeros((self.M,2,3)) # predicted /ground-truth transformations for the tests images self.test_trans = np.zeros((self.M,2,3)) # only relevant transformations from self.test_trans_all self.test_trans_refine = np.zeros((self.M, 3, 3)) # refined transformations: affine + homography self.test_imgs_warped = np.zeros((self.M, self.img_emb_sz[0], self.img_emb_sz[1], self.img_emb_sz[2])) self.fg_imgs = np.zeros((self.M, self.img_sz[0], self.img_sz[1], self.img_sz[2])) self.bg_imgs = np.zeros((self.M, self.img_sz[0], self.img_sz[1], self.img_sz[2])) self.img_orig = np.zeros((self.M, self.img_sz[0], self.img_sz[1], self.img_sz[2])) self.mean_err = np.zeros((self.M, self.img_sz[0], self.img_sz[1]))
def get_model(tr): clf_pic_name = os.path.join( ".", "tf_str_seq2seq_sm.sess") # this will be used to save the model num_sequences = 15 * 1024 batch_size = 16 dgen = DataProvider(num_sequences, mlen, batch_size=batch_size) input_dims = 128 hidden_size = 256 #128 num_decoder_symbols = 128 num_epochs = 10 # clf_pic_name, input_dims, hidden_size, num_decoder_symbols, s2s = GRU_Seq2Seq(clf_pic_name, input_dims, hidden_size, num_decoder_symbols) if tr == "YES": s2s.train(dgen, num_epochs) return s2s
def test_get_column_constraints_is_respected_multicolumn(self): # Creo istanza di Data Provider data = { 'col1': ['222365896', '522559845', '522559845', '522559845'], 'col2': ['UD', 'GO', 'PN', 'GO'], 'col3': [1, 2, 3, 4] } df = pd.DataFrame(data) col_types = {0: 'object', 1: 'object', 2: 'int'} col_constraints = {0: True, 1: True, 2: False} dp = DataProvider(df, col_types, col_constraints) # Test valore corrispondente duplicated_values = dp.get_column_constraints_is_respected() pd.testing.assert_series_equal(duplicated_values, pd.Series([False, False, False, True])) self.assertEqual(duplicated_values.sum(), 1)
def test_get_column_constraints_is_respected_strings(self): # Creo istanza di Data Provider data = { # duplicando l'ultimo valore della prima colonna 'col1': ['222365896', '522559845', '333652214', '522559845'], 'col2': ['UD', ' O', 'P ', ' TS '] } df = pd.DataFrame(data) col_types = {0: 'object', 1: 'object'} col_constraints = {0: True, 1: False} dp = DataProvider(df, col_types, col_constraints) # Test valore corrispondente duplicated_values = dp.get_column_constraints_is_respected() pd.testing.assert_series_equal(duplicated_values, pd.Series([False, False, False, True])) self.assertEqual(duplicated_values.sum(), 1)
def show_patches(): from data_provider import DataProvider dp = DataProvider(True, ['g0']) dp.set_batch_size(1) while True: batch = dp.get_batch() images = batch[0] labels = batch[2] for i in range(len(images)): img = images[i] gt = labels[i] #img = img / np.mean(img, axis=(0, 1))[None, None, :] img = img / img.max() cv2.imshow("Input", np.power(img, 1 / 2.2)) cv2.waitKey(0) img = apply_gt(img, gt) cv2.imshow("Corrected", np.power(img, 1 / 2.2)) cv2.waitKey(0)