Ejemplo n.º 1
0
    def __init__(self, trainpath, testpath, detector='sift'):
        self.trainstream = data_loader(filepath=trainpath, channels=CHANNELS,
                                       preprocess=PREPROCESS_QUEUE, randomize=RANDOM)
        self.teststream = data_loader(filepath=testpath, channels=CHANNELS,
                                      preprocess=PREPROCESS_QUEUE, randomize=RANDOM)
        # self.SIFT = cv2.xfeatures.SIFT_create()

        # Model
        if detector.lower() == 'orb':
            self.KPD = cv2.ORB_create(nfeatures=FEATURES, scoreType=cv2.ORB_FAST_SCORE)
            self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
        elif detector.lower() == 'surf':
            self.KPD = cv2.xfeatures2d.SURF_create(hessianThreshold=1000)
            index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
            search_params = dict(checks=50)  # or pass empty dictionary
            self.bf = cv2.FlannBasedMatcher(index_params, search_params)
        elif detector.lower() == 'sift':
            self.KPD = cv2.xfeatures2d.SIFT_create(nfeatures=FEATURES)
            self.bf = cv2.BFMatcher(crossCheck=True)
        else:
            print('Unknown Keypoint Detector')
            exit(1)

        # Cluster method
        self.kMeans = MiniBatchKMeans(VOCAB_SIZE, batch_size=64)
Ejemplo n.º 2
0
    def __init__(self,
                 trainpath,
                 testpath,
                 n=3,
                 detector='orb',
                 use_n_matches=30):
        self.trainstream = data_loader(filepath=trainpath,
                                       channels=CHANNELS,
                                       preprocess=PREPROCESS_QUEUE,
                                       randomize=RANDOM)
        self.teststream = data_loader(filepath=testpath,
                                      channels=CHANNELS,
                                      preprocess=PREPROCESS_QUEUE,
                                      randomize=RANDOM)

        self.num_matches = use_n_matches

        if detector.lower() == 'orb':
            self.KPD = cv2.ORB_create(nfeatures=FEATURES,
                                      scoreType=cv2.ORB_FAST_SCORE)
            self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
        elif detector.lower() == 'surf':
            self.KPD = cv2.xfeatures2d.SURF_create(hessianThreshold=1000)
            index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
            search_params = dict(checks=50)  # or pass empty dictionary
            self.bf = cv2.FlannBasedMatcher(index_params, search_params)
        elif detector.lower() == 'sift':
            self.KPD = cv2.xfeatures2d.SIFT_create(nfeatures=FEATURES)
            self.bf = cv2.BFMatcher(crossCheck=True)
        else:
            print('Unknown Keypoint Detector')
            exit(1)

        # Nearest neighbors
        self.n = n

        # Descriptor dictionary
        self.desc = dict()
Ejemplo n.º 3
0
def main():
    cuda = torch.cuda.is_available() and True
    embedding_size = 200
    convolution_size = 3
    LR= 0.01
    CNN_size = 667
    batch_size = 5
    num_epoch = 1 # 10
    
    print("-- cnn model details --")
    print("embedding Size:", CNN_size, "convolution size:", convolution_size, "learning rate:", LR, "batch size:", batch_size, "num epochs:", num_epoch)

    padding = "<padding>"
    train_file = "../data/ask_ubuntu/train_random.txt"
    dev_file = "../data/ask_ubuntu/dev.txt"
    test_file = "../data/ask_ubuntu/test.txt"
    corpus_file = "../data/ask_ubuntu/text_tokenized.txt"
    embedding_path = "../data/ask_ubuntu/vectors_pruned.200.txt"

    data_loader = util.data_loader(corpus_file, cut_off = 2, padding=padding)

    encoder = util.Encoder(data_loader.vocab_map[padding], data_loader, embedding_path, cuda)

    print("loaded encoder")
    CNN = util.CNN(embedding_size, CNN_size, convolution_size)
    
    if cuda:
        encoder = encoder.cuda()
        CNN = CNN.cuda()

    print("loading annotations...")
    dev  = data_loader.read_annotations(dev_file, 20, 10)
    dev_data  = data_loader.create_eval_batches(dev)
    test = data_loader.read_annotations(test_file, 20, 10)
    test_data = data_loader.create_eval_batches(test)
    train_data = data_loader.read_annotations(train_file)
    print("loaded annotations")

    train_losses, dev_metrics, test_metrics = \
        util.train(encoder, CNN, num_epoch, data_loader, train_data, dev_data, test_data, batch_size, util.CNN_forward, True, cuda, LR=LR)
    
    CNN = CNN.cpu()
    torch.save(CNN, "cnn.model")
    
    return train_losses, dev_metrics, test_metrics
Ejemplo n.º 4
0
def main():
    cuda = torch.cuda.is_available() and True
    num_epoch = 10
    batch_size = 2
    input_size = 200
    output_size = 120
    LR = 0.001
    dev_file = "../data/ask_ubuntu/dev.txt"
    test_file = "../data/ask_ubuntu/test.txt"
    train_file = "../data/ask_ubuntu/train_random.txt"
    corpus_file = "../data/ask_ubuntu/text_tokenized.txt"
    padding = "<padding>"
    embedding_path = "../data/ask_ubuntu/vectors_pruned.200.txt"

    print("-- lstm model --")
    print("embedding size:", output_size, "learning rate:", LR, "batch size:",
          batch_size, "num epoch:", num_epoch)

    # Represent each question as a word sequence (and not as a bag of words)
    data_loader = util.data_loader(corpus_file, cut_off=1, padding=padding)

    dev = data_loader.read_annotations(dev_file, 20, 10)
    dev_data = data_loader.create_eval_batches(dev)
    test = data_loader.read_annotations(test_file, 20, 10)
    test_data = data_loader.create_eval_batches(test)
    train_data = data_loader.read_annotations(train_file, 10, 2)

    # Utilize an exisiting vector representation of the words
    encoder = util.Encoder(data_loader.vocab_map[padding], data_loader,
                           embedding_path, cuda)

    print("embeddings done")

    model = util.LSTM(input_size, output_size)
    if cuda:
        model = model.cuda()
        encoder = encoder.cuda()

    train_losses, dev_metrics, test_metrics \
        = util.train(encoder, model, num_epoch, data_loader, train_data, dev_data, test_data, batch_size, util.LSTM_forward, True, cuda, LR)

    model = model.cpu()
    torch.save(model, "lstm.model")
    return train_losses, dev_metrics, test_metrics
Ejemplo n.º 5
0
    def __init__(self, trainpath, testpath, detectors=('surf', 'sift', 'orb'), weights=None, params=None):
        if weights:
            assert len(detectors) == len(weights)
            self.weights = weights
        else:
            self.weights = [1] * len(detectors)

        self.trainpath = trainpath
        self.testpath = testpath

        self.detectors = detectors
        self.members = []
        for detector in detectors:
            self.members.append(BF(trainpath,
                                   testpath,
                                   n=1,
                                   detector=detector,
                                   use_n_matches=NUM_MATCHES))

        self.labels = [l[1] for l in data_loader(filepath=testpath)]
Ejemplo n.º 6
0
def main():
    ######################### Hyperparameter#########################
    learning_rate = 3e-3
    decay_rate = 0.9  # decay per epoch
    model_name = 'deep_CNN_modified'
    num_epochs = 12
    #################################################################
    train_dir = 'saved_model/{}/'.format(model_name)
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
    result_dir = 'result_plot/{}/'.format(model_name)
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)
    # full dataset
    dataset = data_loader('data/kitti_full.npy',
                          'data/kitti_full_metadata.json')
    # # mini dataset
    # dataset = data_loader('data/kitti_mini.npy', 'data/kitti_mini_metadata.json')
    model = deep_CNN_model(learning_rate, model_name, num_epochs, dataset,
                           train_dir, result_dir)
    sess = tf.Session()
    model = initialize_model(sess, model, train_dir)
    model.train(sess)
Ejemplo n.º 7
0
    return img


def chop_lower(img):
    return img[:2 * int(img.shape[0] / 3), :]


def zoomy(img, shave=SHAVE):
    y, x, _ = img.shape
    chunk = int(shave * y)
    return img[chunk:y - chunk, :, :]


def zoomx(img, shave=SHAVE):
    y, x, _ = img.shape
    chunk = int(shave * y)
    return img[chunk:y - chunk, :, :]


if __name__ == '__main__':
    loader = data_loader(
        filepath='/home/benedict/classes/cv/project/lists/splits/0_train.txt',
        channels=cv2.IMREAD_GRAYSCALE,
        randomize=False)

    for img, target in loader:
        img = resize(img)
        img = binarize(img)
        img = inverse(img)
        show(img)
Ejemplo n.º 8
0
	args = get_args()

	if args.version == 'v1':
		model = modelV1(input_shape=3*32*32, num_classes=10).to(dev)
	elif args.version == 'v2':
		model = modelV2(num_classes=10).to(dev)
	elif args.version == 'v3':
		model = modelV3(num_classes=10).to(dev)
	elif args.version == 'v4':
		model = modelV4(num_classes=10).to(dev)
	else:
		model = standardArchs(name=args.version, num_output=10).to(dev)
	print(model)

	train_dataloader = data_loader(args.root_dir + '/train', batch_size=32, train=True)
	test_dataloader = data_loader(args.root_dir + '/test', batch_size=8, train=False)

	if args.optim == 'sgd':
		optimizer = optim.SGD(model.parameters(), lr=args.base_lr, momentum=0.9, weight_decay=1e-4)
	if args.optim == 'adam':
		optimizer = optim.Adam(model.parameters(), lr=args.base_lr)
	if args.optim == 'adagrad':
		optimizer = optim.Adagrad(model.parameters(), lr=args.base_lr)
	#We can use more if need to


	for epoch in range(args.num_epochs):
		train_epoch(epoch, model, train_dataloader, optimizer, args.base_lr, args.version)
		test_epoch(epoch, model, test_dataloader, args.version)
def main():
    use_lstm = True
    cuda = torch.cuda.is_available() and True
    embedding_size = 300
    convolution_size = 3
    output_size = 200
    cnn_output_size = 600
    batch_size = 2
    num_epoch = 10
    classifier_hidden_size = 20
    lr = 0.01

    padding = "<padding>"
    embedding_path = "../data/glove.txt"
    corpus_file = "../data/ask_ubuntu/text_tokenized.txt"
    corpus_2 = "../data/stackexchange_android/corpus.txt"
    train_file = "../data/ask_ubuntu/train_random.txt"
    pos_dev = "../data/stackexchange_android/dev.pos.txt"
    neg_dev = "../data/stackexchange_android/dev.neg.txt"
    pos_test = "../data/stackexchange_android/test.pos.txt"
    neg_test = "../data/stackexchange_android/test.neg.txt"

    if use_lstm:
        print("-- lstm model --")
        print("embedding size:", embedding_size, "learning rate:", lr,
              "batch size:", batch_size, "num epoch:", num_epoch)
    else:
        print("-- cnn model --")
        print("embedding size:", embedding_size, "learning rate:", lr, "batch size:", batch_size, "num epoch:", num_epoch,\
               "convolution size:", convolution_size, "cnn output size:", cnn_output_size)

    data_loader = util.data_loader(corpus_file, cut_off=1, padding=padding)
    data_loader.read_new_corpus(corpus_2)

    dev_annotations = util.read_annotations_2(pos_dev, neg_dev, -1, -1)
    test_annotations = util.read_annotations_2(pos_test, neg_test, -1, -1)
    dev_data = data_loader.create_eval_batches(dev_annotations,
                                               first_corpus=False)
    test_data = data_loader.create_eval_batches(test_annotations,
                                                first_corpus=False)

    train_data = data_loader.read_annotations(train_file)

    if use_lstm:
        print("-- lstm model --")
        print("embedding size:", embedding_size, "learning rate:", lr,
              "batch size:", batch_size, "num epoch:", num_epoch)

        model = util.LSTM(embedding_size, output_size)
        forward = util.LSTM_forward
        classifier = util.Classifier(output_size, classifier_hidden_size)
    else:
        print("-- cnn model --")
        print("embedding size:", embedding_size, "learning rate:", lr, "batch size:", batch_size, "num epoch:", num_epoch,\
               "convolution size:", convolution_size, "cnn output size:", cnn_output_size)

        model = util.CNN(embedding_size, cnn_output_size, convolution_size)
        forward = util.CNN_forward
        classifier = util.Classifier(cnn_output_size, classifier_hidden_size)

    encoder = util.Encoder(data_loader.vocab_map[padding], data_loader,
                           embedding_path, cuda, embedding_size)

    print("encoder loaded")

    if cuda:
        model = model.cuda()
        encoder = encoder.cuda()
        classifier = classifier.cuda()

    return util.advisarial_trainer(encoder,
                                   model,
                                   classifier,
                                   num_epoch,
                                   data_loader,
                                   train_data,
                                   dev_data,
                                   test_data,
                                   batch_size,
                                   forward,
                                   cuda,
                                   LR=0.0001,
                                   L=False)
Ejemplo n.º 10
0
def main():
    cuda = torch.cuda.is_available() and True
    num_epoch = 1  # 10
    batch_size = 2
    embedding_size = 300
    output_size = 120
    convolution_size = 3

    corpus_file = "../data/ask_ubuntu/text_tokenized.txt"
    embedding_path = "../data/glove.txt"
    train_file = "../data/ask_ubuntu/train_random.txt"
    corpus_2 = "../data/stackexchange_android/corpus.txt"
    pos_dev = "../data/stackexchange_android/dev.pos.txt"
    neg_dev = "../data/stackexchange_android/dev.neg.txt"
    pos_test = "../data/stackexchange_android/test.pos.txt"
    neg_test = "../data/stackexchange_android/test.neg.txt"

    use_lstm = False
    padding = "<padding>"

    if use_lstm:
        lr = 0.0001
        embedding_size = 300
        print("-- lstm model --")
        print("embedding size:", embedding_size, "learning rate:", lr,
              "batch size:", batch_size, "num epoch:", num_epoch)

    else:
        lr = 0.001
        cnn_output_size = 500
        print("-- cnn model --")
        print("embedding size:", embedding_size, "learning rate:", lr,
              "batch size:", batch_size, "num epoch:", num_epoch,
              "convolution size:", convolution_size, "cnn output size:",
              cnn_output_size)

    data_loader = util.data_loader(corpus_file, cut_off=0, padding=padding)
    data_loader.read_new_corpus(corpus_2)

    print("encoder loading...")
    encoder = util.Encoder(data_loader.vocab_map[padding], data_loader,
                           embedding_path, cuda, embedding_size)
    print("encoder loaded")

    dev_annotations = util.read_annotations_2(pos_dev, neg_dev, -1, -1)
    test_annotations = util.read_annotations_2(pos_test, neg_test, -1, -1)
    dev_data = data_loader.create_eval_batches(dev_annotations,
                                               first_corpus=False)
    test_data = data_loader.create_eval_batches(test_annotations,
                                                first_corpus=False)

    train_data = data_loader.read_annotations(train_file, 10, 3)

    print("run model")

    if use_lstm:
        model = util.LSTM(embedding_size, output_size)
        forward = util.LSTM_forward
    else:
        model = util.CNN(embedding_size, cnn_output_size, convolution_size)
        forward = util.CNN_forward

    if cuda:
        model = model.cuda()
        encoder = encoder.cuda()

    return util.train_cross(encoder,
                            model,
                            num_epoch,
                            data_loader,
                            train_data,
                            dev_data,
                            test_data,
                            batch_size,
                            forward,
                            lr,
                            pre_trained_encoder=True,
                            cuda=cuda)
Ejemplo n.º 11
0
def train(args):
    torch.manual_seed(args.seed)

    # Get data loader
    train_data, dev_data, word2id, id2word, char2id, new_args = data_loader(
        args)
    model = QAxl(new_args)

    if args.use_cuda:
        model = model.cuda()

    dev_batches = get_batches(dev_data, args.batch_size, evaluation=True)

    # Get optimizer and scheduler
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.Adamax(parameters, lr=args.lrate)
    lrate = args.lrate

    if args.eval:
        model.load_state_dict(torch.load(args.model_dir))
        model.eval()
        model.SelfEvaluate(dev_batches,
                           args.data_dir + 'dev_eval.json',
                           answer_file=args.answer_file,
                           drop_file=args.data_dir + 'drop.json',
                           dev=args.data_dir + 'dev.json')
        exit()

    if args.load_model:
        model.load_state_dict(torch.load(args.model_dir))

    best_score = 0.0

    ## Training
    for epoch in range(1, args.epochs + 1):
        train_batches = get_batches(train_data, args.batch_size)
        dev_batches = get_batches(dev_data, args.batch_size, evaluation=True)

        model.train()
        for i, train_batch in enumerate(train_batches):
            loss = model(train_batch)
            model.zero_grad()
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(parameters,
                                           new_args['grad_clipping'])
            optimizer.step()
            model.reset_parameters()

            if i % 100 == 0:
                print(
                    'epoch = %d,  loss = %.5f, step = %d, lrate = %.5f best_score = %.3f'
                    % (epoch, model.train_loss.value, i, lrate, best_score))
                sys.stdout.flush()

        model.eval()
        exact_match_score, F1 = model.SelfEvaluate(
            dev_batches,
            args.data_dir + 'dev_eval.json',
            answer_file=args.answer_file,
            drop_file=args.data_dir + 'drop.json',
            dev=args.data_dir + 'dev-v2.0.json')

        if best_score < F1:
            best_score = F1
            print('saving %s ...' % args.model_dir)
            torch.save(model.state_dict(), args.model_dir)
        if epoch > 0 and epoch % args.decay_period == 0:
            lrate *= args.decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lrate
Ejemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser(description='reads input file and trains\
                                     a model for toxicity classification')

    # Required arguments
    parser.add_argument("-i",
                        "--input",
                        required=True,
                        help="Path to training set")
    parser.add_argument("-t", "--test", required=True, help="Path to test set")
    parser.add_argument("-o",
                        "--output",
                        required=True,
                        help="Path to submission file")
    parser.add_argument("-e",
                        "--embedding",
                        required=True,
                        help="Path to embedding file")

    # Optional arguments
    parser.add_argument("-c",
                        "--text_column",
                        type=str,
                        default="commnet_text",
                        help="name of the column that contains the text")
    parser.add_argument("-l",
                        "--label_column",
                        type=str,
                        default="target",
                        help="name of the column that contains the labels")
    parser.add_argument("-p",
                        "--preprocess",
                        type=bool,
                        default=False,
                        help="if set the text will be processed")
    parser.add_argument("-m",
                        "--model",
                        type=str,
                        choices=['LSTM', 'CNN'],
                        default="LSTM",
                        help="the deep neural network")

    args = vars(parser.parse_args())

    # Loading the data
    X_train, y_train, X_test, tokenizer = data_loader(args['input'],
                                                      args['test'],
                                                      args['text_column'],
                                                      args['label_column'],
                                                      args['preprocess'])
    # Loading the embeddings
    word_index = tokenizer.word_index
    embedding_matrix = build_embeddings(args['embedding'], word_index)

    # building the model
    model_selection = args['model']
    Model = Model()
    if model_selection == 'LSTM':
        Model.buil_lstm()
    elif model_selection == 'CNN':
        Model.build_cnn()
    else:
        raise ValueError("the model must be CNN or LSTM")

    # Training the model
    Model.train(X_train, y_train)

    # Predicting the labels for test set
    test_df = pd.read_csv(args['test'])
    Model.predict(X_test, test_df, args['output'])
Ejemplo n.º 13
0
    Logging.add_log(f'Read Parameter: model: {model}, word: {word}', 'debug')

    config = load.Config()
    text_model = load.Model(config)

    # 加载数据
    if model == 'FastText':
        train_data, train_label = FastText_dataloader('train', word, config)
        test_data, test_label = FastText_dataloader('test', word, config)
        eva_data, eva_label = FastText_dataloader('dev', word, config)
    else:
        _, train_data, train_label = load_data('train', word, config)
        _, test_data, test_label = load_data('test', word, config)
        _, eva_data, eva_label = load_data('dev', word, config)
    Logging.add_log(f'Completed loading data', 'debug')

    dataloader = data_loader(data=train_data, label=train_label, config=config)
    test_dataloader = data_loader(data=test_data,
                                  label=test_label,
                                  config=config)
    eva_dataloader = data_loader(data=eva_data, label=eva_label, config=config)

    Logging.add_log(f'Start Training', 'debug')
    train(config=config,
          model=text_model,
          data=dataloader,
          name=model,
          test_dataloader=test_dataloader,
          eva_dataloader=eva_dataloader,
          logger=Logging)
Ejemplo n.º 14
0
                    default='results',
                    help='results save path')
parser.add_argument('--inverse_order',
                    type=bool,
                    default=True,
                    help='True: [input, target], False: [target, input]')
opt = parser.parse_args()
print(opt)

# results save path
if not os.path.isdir(opt.dataset + '_results/test_results'):
    os.mkdir(opt.dataset + '_results/test_results')

# data_loader
test_loader = util.data_loader('data/' + opt.dataset + '/' +
                               opt.test_subfolder,
                               1,
                               shuffle=False)
img_size = test_loader.shape[1]

# variables
x = tf.placeholder(tf.float32,
                   shape=(None, opt.input_size, opt.input_size,
                          test_loader.shape[3]))

# network
G = network.generator(x, opt.ngf)

# open session and initialize all variables
saver = tf.train.Saver()
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
Ejemplo n.º 15
0
                    default=True,
                    help='0: [input, target], 1 - [target, input]')
opt = parser.parse_args()
print(opt)

# results save path
root = opt.dataset + '_' + opt.save_root + '/'
model = opt.dataset + '_'
if not os.path.isdir(root):
    os.mkdir(root)
if not os.path.isdir(root + 'Fixed_results'):
    os.mkdir(root + 'Fixed_results')

# data_loader
train_loader = util.data_loader('data/' + opt.dataset + '/' +
                                opt.train_subfolder,
                                opt.batch_size,
                                shuffle=True)
test_loader = util.data_loader('data/' + opt.dataset + '/' +
                               opt.test_subfolder,
                               opt.test_batch_size,
                               shuffle=True)
img_size = train_loader.shape[1]
test_img = test_loader.next_batch()
if opt.inverse_order:
    fixed_x_ = test_img[:, :, img_size:, :]
    fixed_y_ = test_img[:, :, 0:img_size, :]
else:
    fixed_x_ = test_img[:, :, 0:img_size, :]
    fixed_y_ = test_img[:, :, img_size:, :]

if img_size != opt.input_size: