def __init__(self, trainpath, testpath, detector='sift'): self.trainstream = data_loader(filepath=trainpath, channels=CHANNELS, preprocess=PREPROCESS_QUEUE, randomize=RANDOM) self.teststream = data_loader(filepath=testpath, channels=CHANNELS, preprocess=PREPROCESS_QUEUE, randomize=RANDOM) # self.SIFT = cv2.xfeatures.SIFT_create() # Model if detector.lower() == 'orb': self.KPD = cv2.ORB_create(nfeatures=FEATURES, scoreType=cv2.ORB_FAST_SCORE) self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) elif detector.lower() == 'surf': self.KPD = cv2.xfeatures2d.SURF_create(hessianThreshold=1000) index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5) search_params = dict(checks=50) # or pass empty dictionary self.bf = cv2.FlannBasedMatcher(index_params, search_params) elif detector.lower() == 'sift': self.KPD = cv2.xfeatures2d.SIFT_create(nfeatures=FEATURES) self.bf = cv2.BFMatcher(crossCheck=True) else: print('Unknown Keypoint Detector') exit(1) # Cluster method self.kMeans = MiniBatchKMeans(VOCAB_SIZE, batch_size=64)
def __init__(self, trainpath, testpath, n=3, detector='orb', use_n_matches=30): self.trainstream = data_loader(filepath=trainpath, channels=CHANNELS, preprocess=PREPROCESS_QUEUE, randomize=RANDOM) self.teststream = data_loader(filepath=testpath, channels=CHANNELS, preprocess=PREPROCESS_QUEUE, randomize=RANDOM) self.num_matches = use_n_matches if detector.lower() == 'orb': self.KPD = cv2.ORB_create(nfeatures=FEATURES, scoreType=cv2.ORB_FAST_SCORE) self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) elif detector.lower() == 'surf': self.KPD = cv2.xfeatures2d.SURF_create(hessianThreshold=1000) index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5) search_params = dict(checks=50) # or pass empty dictionary self.bf = cv2.FlannBasedMatcher(index_params, search_params) elif detector.lower() == 'sift': self.KPD = cv2.xfeatures2d.SIFT_create(nfeatures=FEATURES) self.bf = cv2.BFMatcher(crossCheck=True) else: print('Unknown Keypoint Detector') exit(1) # Nearest neighbors self.n = n # Descriptor dictionary self.desc = dict()
def main(): cuda = torch.cuda.is_available() and True embedding_size = 200 convolution_size = 3 LR= 0.01 CNN_size = 667 batch_size = 5 num_epoch = 1 # 10 print("-- cnn model details --") print("embedding Size:", CNN_size, "convolution size:", convolution_size, "learning rate:", LR, "batch size:", batch_size, "num epochs:", num_epoch) padding = "<padding>" train_file = "../data/ask_ubuntu/train_random.txt" dev_file = "../data/ask_ubuntu/dev.txt" test_file = "../data/ask_ubuntu/test.txt" corpus_file = "../data/ask_ubuntu/text_tokenized.txt" embedding_path = "../data/ask_ubuntu/vectors_pruned.200.txt" data_loader = util.data_loader(corpus_file, cut_off = 2, padding=padding) encoder = util.Encoder(data_loader.vocab_map[padding], data_loader, embedding_path, cuda) print("loaded encoder") CNN = util.CNN(embedding_size, CNN_size, convolution_size) if cuda: encoder = encoder.cuda() CNN = CNN.cuda() print("loading annotations...") dev = data_loader.read_annotations(dev_file, 20, 10) dev_data = data_loader.create_eval_batches(dev) test = data_loader.read_annotations(test_file, 20, 10) test_data = data_loader.create_eval_batches(test) train_data = data_loader.read_annotations(train_file) print("loaded annotations") train_losses, dev_metrics, test_metrics = \ util.train(encoder, CNN, num_epoch, data_loader, train_data, dev_data, test_data, batch_size, util.CNN_forward, True, cuda, LR=LR) CNN = CNN.cpu() torch.save(CNN, "cnn.model") return train_losses, dev_metrics, test_metrics
def main(): cuda = torch.cuda.is_available() and True num_epoch = 10 batch_size = 2 input_size = 200 output_size = 120 LR = 0.001 dev_file = "../data/ask_ubuntu/dev.txt" test_file = "../data/ask_ubuntu/test.txt" train_file = "../data/ask_ubuntu/train_random.txt" corpus_file = "../data/ask_ubuntu/text_tokenized.txt" padding = "<padding>" embedding_path = "../data/ask_ubuntu/vectors_pruned.200.txt" print("-- lstm model --") print("embedding size:", output_size, "learning rate:", LR, "batch size:", batch_size, "num epoch:", num_epoch) # Represent each question as a word sequence (and not as a bag of words) data_loader = util.data_loader(corpus_file, cut_off=1, padding=padding) dev = data_loader.read_annotations(dev_file, 20, 10) dev_data = data_loader.create_eval_batches(dev) test = data_loader.read_annotations(test_file, 20, 10) test_data = data_loader.create_eval_batches(test) train_data = data_loader.read_annotations(train_file, 10, 2) # Utilize an exisiting vector representation of the words encoder = util.Encoder(data_loader.vocab_map[padding], data_loader, embedding_path, cuda) print("embeddings done") model = util.LSTM(input_size, output_size) if cuda: model = model.cuda() encoder = encoder.cuda() train_losses, dev_metrics, test_metrics \ = util.train(encoder, model, num_epoch, data_loader, train_data, dev_data, test_data, batch_size, util.LSTM_forward, True, cuda, LR) model = model.cpu() torch.save(model, "lstm.model") return train_losses, dev_metrics, test_metrics
def __init__(self, trainpath, testpath, detectors=('surf', 'sift', 'orb'), weights=None, params=None): if weights: assert len(detectors) == len(weights) self.weights = weights else: self.weights = [1] * len(detectors) self.trainpath = trainpath self.testpath = testpath self.detectors = detectors self.members = [] for detector in detectors: self.members.append(BF(trainpath, testpath, n=1, detector=detector, use_n_matches=NUM_MATCHES)) self.labels = [l[1] for l in data_loader(filepath=testpath)]
def main(): ######################### Hyperparameter######################### learning_rate = 3e-3 decay_rate = 0.9 # decay per epoch model_name = 'deep_CNN_modified' num_epochs = 12 ################################################################# train_dir = 'saved_model/{}/'.format(model_name) if not os.path.exists(train_dir): os.makedirs(train_dir) result_dir = 'result_plot/{}/'.format(model_name) if not os.path.exists(result_dir): os.makedirs(result_dir) # full dataset dataset = data_loader('data/kitti_full.npy', 'data/kitti_full_metadata.json') # # mini dataset # dataset = data_loader('data/kitti_mini.npy', 'data/kitti_mini_metadata.json') model = deep_CNN_model(learning_rate, model_name, num_epochs, dataset, train_dir, result_dir) sess = tf.Session() model = initialize_model(sess, model, train_dir) model.train(sess)
return img def chop_lower(img): return img[:2 * int(img.shape[0] / 3), :] def zoomy(img, shave=SHAVE): y, x, _ = img.shape chunk = int(shave * y) return img[chunk:y - chunk, :, :] def zoomx(img, shave=SHAVE): y, x, _ = img.shape chunk = int(shave * y) return img[chunk:y - chunk, :, :] if __name__ == '__main__': loader = data_loader( filepath='/home/benedict/classes/cv/project/lists/splits/0_train.txt', channels=cv2.IMREAD_GRAYSCALE, randomize=False) for img, target in loader: img = resize(img) img = binarize(img) img = inverse(img) show(img)
args = get_args() if args.version == 'v1': model = modelV1(input_shape=3*32*32, num_classes=10).to(dev) elif args.version == 'v2': model = modelV2(num_classes=10).to(dev) elif args.version == 'v3': model = modelV3(num_classes=10).to(dev) elif args.version == 'v4': model = modelV4(num_classes=10).to(dev) else: model = standardArchs(name=args.version, num_output=10).to(dev) print(model) train_dataloader = data_loader(args.root_dir + '/train', batch_size=32, train=True) test_dataloader = data_loader(args.root_dir + '/test', batch_size=8, train=False) if args.optim == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.base_lr, momentum=0.9, weight_decay=1e-4) if args.optim == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.base_lr) if args.optim == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=args.base_lr) #We can use more if need to for epoch in range(args.num_epochs): train_epoch(epoch, model, train_dataloader, optimizer, args.base_lr, args.version) test_epoch(epoch, model, test_dataloader, args.version)
def main(): use_lstm = True cuda = torch.cuda.is_available() and True embedding_size = 300 convolution_size = 3 output_size = 200 cnn_output_size = 600 batch_size = 2 num_epoch = 10 classifier_hidden_size = 20 lr = 0.01 padding = "<padding>" embedding_path = "../data/glove.txt" corpus_file = "../data/ask_ubuntu/text_tokenized.txt" corpus_2 = "../data/stackexchange_android/corpus.txt" train_file = "../data/ask_ubuntu/train_random.txt" pos_dev = "../data/stackexchange_android/dev.pos.txt" neg_dev = "../data/stackexchange_android/dev.neg.txt" pos_test = "../data/stackexchange_android/test.pos.txt" neg_test = "../data/stackexchange_android/test.neg.txt" if use_lstm: print("-- lstm model --") print("embedding size:", embedding_size, "learning rate:", lr, "batch size:", batch_size, "num epoch:", num_epoch) else: print("-- cnn model --") print("embedding size:", embedding_size, "learning rate:", lr, "batch size:", batch_size, "num epoch:", num_epoch,\ "convolution size:", convolution_size, "cnn output size:", cnn_output_size) data_loader = util.data_loader(corpus_file, cut_off=1, padding=padding) data_loader.read_new_corpus(corpus_2) dev_annotations = util.read_annotations_2(pos_dev, neg_dev, -1, -1) test_annotations = util.read_annotations_2(pos_test, neg_test, -1, -1) dev_data = data_loader.create_eval_batches(dev_annotations, first_corpus=False) test_data = data_loader.create_eval_batches(test_annotations, first_corpus=False) train_data = data_loader.read_annotations(train_file) if use_lstm: print("-- lstm model --") print("embedding size:", embedding_size, "learning rate:", lr, "batch size:", batch_size, "num epoch:", num_epoch) model = util.LSTM(embedding_size, output_size) forward = util.LSTM_forward classifier = util.Classifier(output_size, classifier_hidden_size) else: print("-- cnn model --") print("embedding size:", embedding_size, "learning rate:", lr, "batch size:", batch_size, "num epoch:", num_epoch,\ "convolution size:", convolution_size, "cnn output size:", cnn_output_size) model = util.CNN(embedding_size, cnn_output_size, convolution_size) forward = util.CNN_forward classifier = util.Classifier(cnn_output_size, classifier_hidden_size) encoder = util.Encoder(data_loader.vocab_map[padding], data_loader, embedding_path, cuda, embedding_size) print("encoder loaded") if cuda: model = model.cuda() encoder = encoder.cuda() classifier = classifier.cuda() return util.advisarial_trainer(encoder, model, classifier, num_epoch, data_loader, train_data, dev_data, test_data, batch_size, forward, cuda, LR=0.0001, L=False)
def main(): cuda = torch.cuda.is_available() and True num_epoch = 1 # 10 batch_size = 2 embedding_size = 300 output_size = 120 convolution_size = 3 corpus_file = "../data/ask_ubuntu/text_tokenized.txt" embedding_path = "../data/glove.txt" train_file = "../data/ask_ubuntu/train_random.txt" corpus_2 = "../data/stackexchange_android/corpus.txt" pos_dev = "../data/stackexchange_android/dev.pos.txt" neg_dev = "../data/stackexchange_android/dev.neg.txt" pos_test = "../data/stackexchange_android/test.pos.txt" neg_test = "../data/stackexchange_android/test.neg.txt" use_lstm = False padding = "<padding>" if use_lstm: lr = 0.0001 embedding_size = 300 print("-- lstm model --") print("embedding size:", embedding_size, "learning rate:", lr, "batch size:", batch_size, "num epoch:", num_epoch) else: lr = 0.001 cnn_output_size = 500 print("-- cnn model --") print("embedding size:", embedding_size, "learning rate:", lr, "batch size:", batch_size, "num epoch:", num_epoch, "convolution size:", convolution_size, "cnn output size:", cnn_output_size) data_loader = util.data_loader(corpus_file, cut_off=0, padding=padding) data_loader.read_new_corpus(corpus_2) print("encoder loading...") encoder = util.Encoder(data_loader.vocab_map[padding], data_loader, embedding_path, cuda, embedding_size) print("encoder loaded") dev_annotations = util.read_annotations_2(pos_dev, neg_dev, -1, -1) test_annotations = util.read_annotations_2(pos_test, neg_test, -1, -1) dev_data = data_loader.create_eval_batches(dev_annotations, first_corpus=False) test_data = data_loader.create_eval_batches(test_annotations, first_corpus=False) train_data = data_loader.read_annotations(train_file, 10, 3) print("run model") if use_lstm: model = util.LSTM(embedding_size, output_size) forward = util.LSTM_forward else: model = util.CNN(embedding_size, cnn_output_size, convolution_size) forward = util.CNN_forward if cuda: model = model.cuda() encoder = encoder.cuda() return util.train_cross(encoder, model, num_epoch, data_loader, train_data, dev_data, test_data, batch_size, forward, lr, pre_trained_encoder=True, cuda=cuda)
def train(args): torch.manual_seed(args.seed) # Get data loader train_data, dev_data, word2id, id2word, char2id, new_args = data_loader( args) model = QAxl(new_args) if args.use_cuda: model = model.cuda() dev_batches = get_batches(dev_data, args.batch_size, evaluation=True) # Get optimizer and scheduler parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.Adamax(parameters, lr=args.lrate) lrate = args.lrate if args.eval: model.load_state_dict(torch.load(args.model_dir)) model.eval() model.SelfEvaluate(dev_batches, args.data_dir + 'dev_eval.json', answer_file=args.answer_file, drop_file=args.data_dir + 'drop.json', dev=args.data_dir + 'dev.json') exit() if args.load_model: model.load_state_dict(torch.load(args.model_dir)) best_score = 0.0 ## Training for epoch in range(1, args.epochs + 1): train_batches = get_batches(train_data, args.batch_size) dev_batches = get_batches(dev_data, args.batch_size, evaluation=True) model.train() for i, train_batch in enumerate(train_batches): loss = model(train_batch) model.zero_grad() optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(parameters, new_args['grad_clipping']) optimizer.step() model.reset_parameters() if i % 100 == 0: print( 'epoch = %d, loss = %.5f, step = %d, lrate = %.5f best_score = %.3f' % (epoch, model.train_loss.value, i, lrate, best_score)) sys.stdout.flush() model.eval() exact_match_score, F1 = model.SelfEvaluate( dev_batches, args.data_dir + 'dev_eval.json', answer_file=args.answer_file, drop_file=args.data_dir + 'drop.json', dev=args.data_dir + 'dev-v2.0.json') if best_score < F1: best_score = F1 print('saving %s ...' % args.model_dir) torch.save(model.state_dict(), args.model_dir) if epoch > 0 and epoch % args.decay_period == 0: lrate *= args.decay for param_group in optimizer.param_groups: param_group['lr'] = lrate
def main(): parser = argparse.ArgumentParser(description='reads input file and trains\ a model for toxicity classification') # Required arguments parser.add_argument("-i", "--input", required=True, help="Path to training set") parser.add_argument("-t", "--test", required=True, help="Path to test set") parser.add_argument("-o", "--output", required=True, help="Path to submission file") parser.add_argument("-e", "--embedding", required=True, help="Path to embedding file") # Optional arguments parser.add_argument("-c", "--text_column", type=str, default="commnet_text", help="name of the column that contains the text") parser.add_argument("-l", "--label_column", type=str, default="target", help="name of the column that contains the labels") parser.add_argument("-p", "--preprocess", type=bool, default=False, help="if set the text will be processed") parser.add_argument("-m", "--model", type=str, choices=['LSTM', 'CNN'], default="LSTM", help="the deep neural network") args = vars(parser.parse_args()) # Loading the data X_train, y_train, X_test, tokenizer = data_loader(args['input'], args['test'], args['text_column'], args['label_column'], args['preprocess']) # Loading the embeddings word_index = tokenizer.word_index embedding_matrix = build_embeddings(args['embedding'], word_index) # building the model model_selection = args['model'] Model = Model() if model_selection == 'LSTM': Model.buil_lstm() elif model_selection == 'CNN': Model.build_cnn() else: raise ValueError("the model must be CNN or LSTM") # Training the model Model.train(X_train, y_train) # Predicting the labels for test set test_df = pd.read_csv(args['test']) Model.predict(X_test, test_df, args['output'])
Logging.add_log(f'Read Parameter: model: {model}, word: {word}', 'debug') config = load.Config() text_model = load.Model(config) # 加载数据 if model == 'FastText': train_data, train_label = FastText_dataloader('train', word, config) test_data, test_label = FastText_dataloader('test', word, config) eva_data, eva_label = FastText_dataloader('dev', word, config) else: _, train_data, train_label = load_data('train', word, config) _, test_data, test_label = load_data('test', word, config) _, eva_data, eva_label = load_data('dev', word, config) Logging.add_log(f'Completed loading data', 'debug') dataloader = data_loader(data=train_data, label=train_label, config=config) test_dataloader = data_loader(data=test_data, label=test_label, config=config) eva_dataloader = data_loader(data=eva_data, label=eva_label, config=config) Logging.add_log(f'Start Training', 'debug') train(config=config, model=text_model, data=dataloader, name=model, test_dataloader=test_dataloader, eva_dataloader=eva_dataloader, logger=Logging)
default='results', help='results save path') parser.add_argument('--inverse_order', type=bool, default=True, help='True: [input, target], False: [target, input]') opt = parser.parse_args() print(opt) # results save path if not os.path.isdir(opt.dataset + '_results/test_results'): os.mkdir(opt.dataset + '_results/test_results') # data_loader test_loader = util.data_loader('data/' + opt.dataset + '/' + opt.test_subfolder, 1, shuffle=False) img_size = test_loader.shape[1] # variables x = tf.placeholder(tf.float32, shape=(None, opt.input_size, opt.input_size, test_loader.shape[3])) # network G = network.generator(x, opt.ngf) # open session and initialize all variables saver = tf.train.Saver() sess = tf.InteractiveSession() tf.global_variables_initializer().run()
default=True, help='0: [input, target], 1 - [target, input]') opt = parser.parse_args() print(opt) # results save path root = opt.dataset + '_' + opt.save_root + '/' model = opt.dataset + '_' if not os.path.isdir(root): os.mkdir(root) if not os.path.isdir(root + 'Fixed_results'): os.mkdir(root + 'Fixed_results') # data_loader train_loader = util.data_loader('data/' + opt.dataset + '/' + opt.train_subfolder, opt.batch_size, shuffle=True) test_loader = util.data_loader('data/' + opt.dataset + '/' + opt.test_subfolder, opt.test_batch_size, shuffle=True) img_size = train_loader.shape[1] test_img = test_loader.next_batch() if opt.inverse_order: fixed_x_ = test_img[:, :, img_size:, :] fixed_y_ = test_img[:, :, 0:img_size, :] else: fixed_x_ = test_img[:, :, 0:img_size, :] fixed_y_ = test_img[:, :, img_size:, :] if img_size != opt.input_size: