def __getitem__(self, item): img_name = self.imglst[item] prefix = ".".join(img_name.split('.')[:-1]) label_name = prefix + '.txt' text_polys, text_tags = parse_lines( os.path.join(self.data_dir, label_name)) im = cv2.imread(os.path.join(self.data_dir, img_name)) # im = Image.open(os.path.join(self.data_dir, img_name)).convert('RGB') im = np.array(im)[:, :, :3] image, score_map, kernel_map, training_mask = process_data( im, text_polys, text_tags, self.num_kernel) if self.debug: im_show = np.concatenate([ score_map * 255, kernel_map[0, :, :] * 255, kernel_map[1, :, :] * 255, kernel_map[2, :, :] * 255, training_mask * 255 ], axis=1) cv2.imshow('img', image) cv2.imshow('score_map', im_show) cv2.waitKey() image = mx.nd.array(image) score_map = mx.nd.array(score_map, dtype=np.float32) kernal_map = mx.nd.array(kernel_map, dtype=np.float32) training_mask = mx.nd.array(training_mask, dtype=np.float32) trans_image = self.trans(image) return trans_image, score_map, kernal_map, training_mask, transforms.ToTensor( )(image)
def train(): images, labels = process_data('./data/train-images-idx3-ubyte', './data/train-labels-idx1-ubyte') train_set = Mnist(images, labels) # train_loader = DataLoader(train_set, batch_size=64, # shuffle=True, num_workers=8, pin_memory=True) train_loader = DataLoader(train_set, batch_size=64, shuffle=True) model = Convnet() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5) aver = Averager() for epoch in range(1, 11): lr_scheduler.step() model.train() for i, batch in enumerate(train_loader, 1): # image, label = [_.cuda() for _ in batch] image, label = batch score = model(image) loss = F.cross_entropy(score, label.long()) acc = count_acc(score, label, aver) print('epoch %d batch %d acc: %f' % (epoch, i, acc)) optimizer.zero_grad() loss.backward() optimizer.step() print('epoch %d acc: %f' % (epoch, aver.item())) save_model(model, 'model-1')
def on_epoch_end(self, epoch, logs={}): # for each essay set calculate the QWK scores qwk_scores = [] number_essays = [] if self.print_to_screen: print("\nQWK Scores") for essay_set in range(1, 9): essays_in_set = self.essays[self.essays['essay_set'] == essay_set] X, y = process_data(essays_in_set) y_true = essays_in_set['domain1_score'].values normalised_prediction = self.model.predict(X) normalised_prediction = np.array(normalised_prediction) y_pred = np.around( (normalised_prediction * (maximum_scores[essay_set] - minimum_scores[essay_set])) + minimum_scores[essay_set]) qwk_score = cohen_kappa_score(y_true, y_pred, weights='quadratic') qwk_scores.append(qwk_score) number_essays.append(len(essays_in_set)) if self.print_to_screen: print("Set {}: {:.2f}".format(essay_set, qwk_score), end=' ') qwk_scores = np.array(qwk_scores) number_essays = np.array(number_essays) weighted_qwk_score = np.sum( qwk_scores * number_essays) / np.sum(number_essays) if self.print_to_screen: print('\nWeighted QWK score: {:.2f}'.format(weighted_qwk_score)) if self.save_to_file: summary = "Epoch " + str(epoch + 1) log_values = "\n" for key, value in logs.items(): log_values += "{}: {:.4f} ".format(key, value) individual_qwk_scores = "\n" for essay_set in range(8): individual_qwk_scores += "Set {}: {:.2f} ".format( essay_set + 1, qwk_scores[essay_set]) summary = summary + log_values + individual_qwk_scores summary += '\nWeighted QWK score: {:.2f}'.format( weighted_qwk_score) summary += '\n\n' with open(os.path.join(constants.SAVE_DIR, "scores.txt"), "a") as f: f.write(summary)
async def import_data(request): data = await request.post() column_data = defaultdict(dict) for k, v in data.items(): key = "_".join(k.split("_")[:-1]) index = int(k.split("_")[-1]) column_data[index][key] = v for i in column_data.keys(): column_data[i]["name"] = column_data[i]["name"].lower() env = util.process_data(column_data, content) for k, vs in env.items(): iris.env[k] = vs iris.env_order[k] = len(iris.env_order) return web.Response(status=302, headers={"Location": "http://localhost:3000/"})
def test(): images, labels = process_data('./data/t10k-images-idx3-ubyte', './data/t10k-labels-idx1-ubyte') test_set = Mnist(images, labels) # train_loader = DataLoader(train_set, batch_size=64, # shuffle=True, num_workers=8, pin_memory=True) test_loader = DataLoader(test_set, batch_size=64, shuffle=True) model = Convnet() model.load_state_dict(torch.load('./model/model-1.pth')) model.eval() aver = Averager() for i, batch in enumerate(test_loader, 1): # image, label = [_.cuda() for _ in batch] image, label = batch score = model(image) count_acc(score, label, aver) print('test acc: %f' % aver.item())
this method does not support huge number of feature """ # change to your data file path train_data_file_path = 'data/train.csv' test_data_file_path = 'data/test.csv' train_lable, raw_train_data = util.group_by_visit_number(train_data_file_path) test_label, raw_test_data = util.group_by_visit_number(test_data_file_path, False) # feature to be tested with feature_set = [['d'], ['s', 'd'], ['f'], ['s', 'f']] feature_result = [] for feature in feature_set: # filter the data with the required feature pro_train_data = util.process_data(raw_train_data, feature) pro_test_data = util.process_data(raw_test_data, feature) bag_of_features = util.get_feature_bag(pro_train_data, pro_test_data, {}) train_data = util.one_hot_encoding(pro_train_data, bag_of_features) train_lable = np.array(train_lable) # 5-fold cross validation num_train_data = len(train_data) num_fold = 5 step_size = num_train_data / num_fold result = [] for i in range(0, num_fold): start_index = i * step_size end_index = (i + 1) * step_size
img_path = "C:\\dataset\\viden_test\\test_plates2\\" # model to be used at test time act_model = create_crnn_model(train=False) # load the saved best model weights act_model.load_weights('best_model.hdf5') for pathAndFilename in glob.iglob(img_path + "*.jpg"): print("predicting for:" + pathAndFilename) # predict outputs on validation images # img = Image.open(pathAndFilename) # img = img.resize((128, 32), Image.BICUBIC) # img = np.array(img) /255; # img = np.sum(img, axis=2,keepdims=True) img, _, _, _ = process_data(pathAndFilename, "1_1") img = img / 255. img = np.expand_dims(img, axis=0) prediction = act_model.predict(img) # use CTC decoder out = K.get_value( K.ctc_decode(prediction, input_length=np.ones(prediction.shape[0]) * prediction.shape[1], greedy=False)[0][0]) head, tail = ntpath.split(pathAndFilename) txt = tail.split('_')[1] # see the results i = 0 le = min(10, out.shape[1])
from auction import Input from auction import Auction import util auction_parameters = Input() test = Auction(auction_parameters) market_prices, buyer_profits, seller_profits = test.run() market_prices, round_avgs, seller_avgs, avg_buyer_profit_per_round, avg_seller_profit_per_round, avg_market_price = util.process_data( market_prices, auction_parameters, buyer_profits, seller_profits) print(f"Market Prices:") for i, round_outcome in enumerate(market_prices): print(f"round {i}: {round_outcome}") print() print( f"Buyer profits for all {auction_parameters.num_round} rounds: \n{buyer_profits}\n" f"AVG Buyer profits per round: \n{avg_buyer_profit_per_round}\n" f"Seller profits for all {auction_parameters.num_round} rounds: \n{seller_profits}\n" f"AVG Seller profits per round: {avg_seller_profit_per_round}\n" f"AVG market price: {avg_market_price}\n")
""" train_data_file_path = 'data/train.csv' test_data_file_path = 'data/test.csv' train_lable, raw_train_data = util.group_by_visit_number(train_data_file_path) test_label, raw_test_data = util.group_by_visit_number(test_data_file_path, False) # feature to be tested with feature_set = [['d'], ['s', 'd'], ['f'], ['s', 'f']] feature_result = [] for feature in feature_set: # filter the data with the required feature pro_train_data = util.process_data(raw_train_data, feature) pro_test_data = util.process_data(raw_test_data, feature) bag_of_features = util.get_feature_bag(pro_train_data, pro_test_data, {}) train_data = util.one_hot_encoding(pro_train_data, bag_of_features) train_lable = np.array(train_lable) # 5-fold cross validation num_train_data = len(train_data) num_fold = 5 step_size = num_train_data / num_fold result = [] for i in range(0, num_fold): start_index = i * step_size end_index = (i + 1) * step_size
from models.mot_lstm import get_model print('Loading data..') essay_length = 500 essays_train, essays_cv, essays_test = load_data(DATASET_DIR, train_size=0.8, validation_size=0.2) print("Training Examples: {}".format(len(essays_train))) print("Cross Validation Data: {}".format(len(essays_cv))) print("Testing Data: {}".format(len(essays_test))) print('Data loaded.') print() print('Processing data..') X_train, y_train = process_data(essays_train) print("X_train.shape: {}, y_train.shape: {}".format(X_train.shape, y_train.shape)) X_cv, y_cv = process_data(essays_cv) print("X_cv.shape: {}, y_cv.shape: {}".format(X_cv.shape, y_cv.shape)) print('Processing done.') print() print('Loading model..') model = get_model(embedding_dimension=50, essay_length=essay_length) print(model.summary()) print('Model loaded.') qwkscore = QWKScore(essays_cv)
def main(): arg_parser = argparse.ArgumentParser( description="parser for End-to-End Memory Networks") arg_parser.add_argument("--train", type=int, default=1) arg_parser.add_argument("--epochs", type=int, default=100, help="number of training epochs, default: 100") arg_parser.add_argument("--batch-size", type=int, default=32, help="batch size for training, default: 32") arg_parser.add_argument("--lr", type=float, default=0.01, help="learning rate, default: 0.01") arg_parser.add_argument("--embed-size", type=int, default=25, help="embedding dimensions, default: 25") arg_parser.add_argument("--task-number", type=int, default=1, help="task to process, default: 1") arg_parser.add_argument( "--hops", type=int, default=1, help="Number of hops to make: 1, 2 or 3; default: 1 ") arg_parser.add_argument( "--anneal-factor", type=int, default=2, help="factor to anneal by every 'anneal-epoch(s)', default: 2") arg_parser.add_argument( "--anneal-epoch", type=int, default=25, help="anneal every [anneal-epoch] epoch, default: 25") arg_parser.add_argument("--eval", type=int, default=1, help="evaluate after training, default: 1") arg_parser.add_argument("--cuda", type=int, default=0, help="train on GPU, default: 0") arg_parser.add_argument("--memory-size", type=int, default=50, help="upper limit on memory size, default: 50") arg_parser.add_argument( "--log-epochs", type=int, default=4, help="Number of epochs after which to log progress, default: 4") arg_parser.add_argument("--joint-training", type=int, default=0, help="joint training flag, default: 0") arg_parser.add_argument( "--saved-model-dir", type=str, default="./saved/", help="path to folder where trained model will be saved.") arg_parser.add_argument("--data-dir", type=str, default="./data/tasks_1-20_v1-2/en", help="path to folder from where data is loaded") arg_parser.add_argument("--debug", type=bool, default=False, help="Flag for debugging purposes") args = arg_parser.parse_args() check_paths(args) save_model_path = model_path(args) train_batches, val_batches, test_batches, train_set, val_set, test_set, sentence_size, vocab_size, story_size, word_idx = \ process_data(args) if args.train == 1: train_network(train_batches, val_batches, test_batches, train_set, val_set, test_set, story_size=story_size, vocab_size=vocab_size, save_model_path=save_model_path, args=args) if args.eval == 1: model = save_model_path eval_network(story_size=story_size, vocab_size=vocab_size, EMBED_SIZE=args.embed_size, batch_size=args.batch_size, depth=args.hops, model=model, test_batches=test_batches, test=test_set, cuda=args.cuda)
from db_wrapper import DB from util import get_film_complete_info, process_data, obj, CACHE_FILE, cache_obj # fetching connection db = DB() con = db.con cur = db.cur # fetching data for movie cur.execute('select * from films where name like "%{}%"'.format('A New Hope')) res = cur.fetchall() data = get_film_complete_info(1) if not len( res) else get_film_complete_info(res[0][0]) # processing data data['characters'] = [process_data('characters', i) for i in data['characters']] data['planets'] = [process_data('planets', i) for i in data['planets']] data['starships'] = [process_data( 'starships', i) for i in data['starships']] data['vehicles'] = [process_data('vehicles', i) for i in data['vehicles']] data['species'] = [process_data('species', i) for i in data['species']] # composing json file with open('task_two.json', 'w') as js: json.dump(data, js, indent=4) # pickling the cache object cache_obj()
def run_experiment(auction_parameters, iterations_per_configuration): tot_buyer_profit_per_round = 0 tot_seller_profit_per_round = 0 tot_avg_market_price = 0 tot_market_prices = 0 for _ in range(0, iterations_per_configuration): test = Auction(auction_parameters) market_prices, buyer_profits, seller_profits = test.run() market_prices, round_avgs, seller_avgs, avg_buyer_profit_per_round, avg_seller_profit_per_round, avg_market_price = util.process_data( market_prices, auction_parameters, buyer_profits, seller_profits) tot_buyer_profit_per_round += avg_buyer_profit_per_round tot_seller_profit_per_round += avg_seller_profit_per_round tot_avg_market_price += avg_market_price tot_market_prices += market_prices avg_buyer_profit_per_round = tot_buyer_profit_per_round / float( iterations_per_configuration) avg_seller_profit_per_round = tot_seller_profit_per_round / float( iterations_per_configuration) avg_market_price = tot_avg_market_price / float( iterations_per_configuration) market_prices = tot_market_prices / float(iterations_per_configuration) results = { "params": auction_parameters, "avg_buyer_profit_per_round": avg_buyer_profit_per_round, "avg_seller_profit_per_round": avg_seller_profit_per_round, "avg_market_price": avg_market_price, "market_prices": market_prices } return results