Beispiel #1
0
def loadDat(rootDir='data'):
    #Training set
    X = np.load(rootDir + '/src.npy')
    Y = np.load(rootDir + '/dst.npy')

    #Randomly shuffle data
    inds = np.arange(X.shape[0])
    permInds = np.random.permutation(inds)
    X = X[permInds]
    Y = Y[permInds]

    #XTest = np.load(rootDir + '/srcTest.npy')[:m]
    #YTest = np.load(rootDir + '/dstTest.npy')[:m]

    M = X.shape[0]
    XTrain, YTrain = X[:(.8 * M)][:m], Y[:(.8 * M)][:m]
    XTest, YTest = X[(.8 * M):(.9 * M)][:m], Y[(.8 * M):(.9 * M)][:m]
    XVal, YVal = X[(.9 * M):][:m], Y[(.9 * M):][:m]

    batchTrain = batcher.Batcher(XTrain, YTrain)
    batchVal = batcher.Batcher(XVal, YVal)
    batchTest = batcher.Batcher(XTest, YTest)

    #Vocabulary
    vocabSrc = np.load(rootDir + '/vocabSrc.npy', encoding='latin1').tolist()
    vocabDst = np.load(rootDir + '/vocabDst.npy', encoding='latin1').tolist()

    vocabSrcSz = len(vocabSrc.keys())
    vocabDstSz = len(vocabDst.keys())

    return batchTrain, batchVal, batchTest, vocabSrc, vocabDst, vocabSrcSz, vocabDstSz
Beispiel #2
0
def main():
    parser = argparse.ArgumentParser(description='IFT6135')
    parser.add_argument('--batch-size', type=int, default=32)
    parser.add_argument('--use-cuda', type=bool, default=False)
    parser.add_argument('--model', default=None)

    args = parser.parse_args()
    params = parameters.params

    test_batch_loader = batcher.Batcher("./Data/test/", shuffle=False, test=True)

    model = mdl.ConvNet(params)
    model.load_state_dict(t.load(args.model))
    if args.use_cuda:
        model = model.cuda()

    test_step = model.tester()
    img, pred = test_batch_loader.test(test_step, use_cuda=args.use_cuda)

    with open('./Results/current_result.csv', mode='w') as csv_file:
        csv_file.write("id,label\n")
        for i in range(len(img)):
            csv_file.write(img[i].split('/')[-1][:-4] + ',' + test_batch_loader.classes[pred[i]] + '\n')
        csv_file.close()

    return
Beispiel #3
0
def main():
    print("main")
    bs = 8
    model = ClassificationCNN(shapein=(bs, 450, 600, 3),
                              convpoolSizes=[(20, 11, 11), (20, 11, 11),
                                             (20, 5, 5)],
                              denseSizes=[300, 100],
                              k=7)
    batcher = bt.Batcher()
    model.fit(batcher.generate(bs), batcher.testGenerate(bs))
Beispiel #4
0
def get_images(tfrecord_paths, label_name='wealthpooled', return_meta=False):
    '''
    Args
    - tfrecord_paths: list of str, length N <= 32, paths of TFRecord files

    Returns: np.array, shape [N, 224, 224, 8], type float32
    '''
    init_iter, batch_op = batcher.Batcher(tfrecord_files=tfrecord_paths,
                                          dataset=DATASET,
                                          batch_size=32,
                                          ls_bands='ms',
                                          nl_band='merge',
                                          label_name=label_name,
                                          shuffle=False,
                                          augment=False,
                                          negatives='zero',
                                          normalize=True).get_batch()
    with tf.Session() as sess:
        sess.run(init_iter)
        if return_meta:
            ret = sess.run(batch_op)
        else:
            ret = sess.run(batch_op['images'])
    return ret
Beispiel #5
0
def main():
    # Parameters
    learning_rate = 0.005
    batch_size = 32
    test_batch_size = 128
    training_epochs = 10

    # Use logits as a proxy to test whether we've already defined everything
    try:
        defined_test = logits
    except NameError:
        print('logits did not exist, creating')
        # tf Graph input
        x = tf.placeholder("float", [None, 32, 32, 1], name='X_placeholder')
        y = tf.placeholder("float", [None, n_classes], name='Y_placeholder')
        # Define model
        logits = model.SignModel(x, n_classes)
        # Define loss and optimizer
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits, y))
        optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=learning_rate).minimize(cost)
    else:
        print('Model, variables, placeholders, and operations already defined')

    last_cp = tf.train.latest_checkpoint('.', latest_filename=None)
    if (last_cp is None):
        print('Checkpoint not found')
        # Initializing the variables
        init = tf.global_variables_initializer()
    else:
        print('Checkpoint found: {}'.format(last_cp))

    # Add ops to save and restore all the variables.
    saver = tf.train.Saver()

    # Launch the graph
    with tf.Session() as sess:
        if (last_cp is not None):
            # Restore variables from disk.
            print('Restoring: {}'.format(last_cp))
            saver.restore(sess, last_cp)
        else:
            print('No checkpoint to restore, initializing')
            sess.run(init)

        # Training cycle
        data = batcher.Batcher(GCN_train, y_train)

        for epoch in range(training_epochs):
            total_batch = int(n_train / batch_size)
            # Loop over all batches
            for i in range(total_batch):
                batch_x, batch_y = data.next_batch(batch_size)
                # Run optimization op (backprop) and cost op (to get loss value)
                sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
            # Display logs per epoch step
            c = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
            print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(c))

        print("Optimization Finished!")

        # Save the variables to disk.
        save_path = saver.save(sess, "./SignsModel.ckpt")
        print("Model saved in file: %s" % save_path)

        # Test model
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))

        # Calculate accuracy in batches to avoid running out of memory
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        testdata = batcher.Batcher(GCN_test, y_test)
        test_batches = int(n_test / test_batch_size)
        # Loop over all batches
        total_acc = 0
        for i in range(test_batches):
            test_batch_x, test_batch_y = data.next_batch(test_batch_size)
            total_acc += accuracy.eval({x: test_batch_x, y: test_batch_y})

        print("Accuracy:", total_acc / test_batches)
Beispiel #6
0
        '--results-logs-path',
        default='./results/',
        metavar='RP',
        help='folder where to store the results (default: ./results/)')
    parser.add_argument('--input-file', metavar='IF', help='input file')
    parser.add_argument('--output-file', metavar='OF', help='output file')
    parser.add_argument('--model', metavar="MODEL", help='model file')
    parser.add_argument('--decoder',
                        metavar="DEC",
                        default=0,
                        type=int,
                        help='decoder to use for translation')

    args = parser.parse_args()

    batch_loader = batcher.Batcher("./data/")

    params = parameters.params
    params["vocab_size"] = batch_loader.vocab_size
    params["dropout"] = args.dropout

    model = model.RNNAutoEncoder(params)
    model.load_state_dict(t.load(args.model))
    if args.use_cuda:
        model.cuda()
    model.train()

    translate = model.translator(batch_loader)
    input_file = open(args.input_file, 'r')
    output_file = open(args.output_file, 'w')
    translate(input_file,
Beispiel #7
0
def main(args, params, valid_acc_thresh=0):
    train_batch_loader = batcher.Batcher("./Data/train/")
    valid_batch_loader = batcher.Batcher("./Data/valid/")

    model = mdl.ConvNet(params)
    if args.use_cuda:
        model = model.cuda()

    learning_rate = params["learning_rate"]
    optimizer = SGD(model.parameters(), learning_rate)

    train_step = model.trainer(optimizer)
    valid_step = model.validator()

    tracking_valid_loss = []
    tracking_valid_acc = []
    tracking_train_loss = []
    tracking_train_loss_epoch = []
    tracking_train_acc = []
    tracking_train_acc_epoch = []
    current_epoch = 0

    while current_epoch < args.epochs:
        iteration = 0
        while current_epoch == train_batch_loader.epoch:
            batch = train_batch_loader.next_batch(batch_size=args.batch_size)

            #we print the result each 50 exemple
            if iteration % 50 == 0:
                loss, acc = train_step(batch, use_cuda=args.use_cuda)
                tracking_train_loss.append(loss)
                tracking_train_acc.append(acc)
                print("Epoch: " + str(current_epoch + 1) + ", It: " +
                      str(iteration + 1) + ", Loss: " + str(loss))
            else:
                loss, acc = train_step(batch, use_cuda=args.use_cuda)
                tracking_train_loss.append(loss)
                tracking_train_acc.append(acc)
            iteration += 1
        current_epoch += 1
        loss_valid, acc_valid = valid_batch_loader.eval(valid_step,
                                                        use_cuda=args.use_cuda)
        tracking_valid_loss.append(loss_valid)
        tracking_valid_acc.append(acc_valid)
        tracking_train_loss_epoch.append(
            sum(tracking_train_loss) / float(len(tracking_train_loss)))
        tracking_train_loss = []
        tracking_train_acc_epoch.append(
            sum(tracking_train_acc) / float(len(tracking_train_acc)))
        tracking_train_acc = []
        print('\n')
        print("***VALIDATION***")
        print("Epoch: " + str(current_epoch) + ", Loss: " + str(loss_valid) +
              ", Acc: " + str(acc_valid))
        print("****************")
        print('\n')
        if tracking_valid_acc[-1] < valid_acc_thresh:
            break
        if current_epoch >= 3:
            """if current_epoch >= 8:
                learning_rate = learning_rate/2
                optimizer = SGD(model.parameters(), learning_rate)
                train_step = model.trainer(optimizer)
            else:"""
            if tracking_valid_loss[-2] <= tracking_valid_loss[-1]:
                learning_rate = learning_rate / 2
                optimizer = SGD(model.parameters(), learning_rate)
                train_step = model.trainer(optimizer)
                print("learning rate adapted to " + str(learning_rate))
    t.save(
        model.state_dict(), "./models/" + args.model_name + "_acc" +
        str(tracking_valid_acc[-1]) + "_e" + str(current_epoch) + ".model")
    plt.plot(range(len(tracking_train_loss_epoch)),
             tracking_train_loss_epoch,
             label="train")
    plt.plot(range(len(tracking_train_loss_epoch)),
             tracking_valid_loss,
             label="valid")
    plt.xlabel("epoch")
    plt.ylabel("loss")
    plt.show()
    plt.plot(range(len(tracking_train_loss_epoch)),
             tracking_train_acc_epoch,
             label="train")
    plt.plot(range(len(tracking_train_loss_epoch)),
             tracking_valid_acc,
             label="valid")
    plt.xlabel("epoch")
    plt.ylabel("accuracy")
    plt.show()
    with open("./rescc" + ("_m" if args.modified_loss else "") + ".txt",
              'w') as f:
        f.write(str(tracking_train_loss_epoch))
        f.write('\n')
        f.write(str(tracking_train_acc_epoch))
        f.write('\n')
        f.write(str(tracking_valid_loss))
        f.write('\n')
        f.write(str(tracking_valid_acc))
        f.close()

    return tracking_valid_loss[-1], tracking_valid_acc[-1]
Beispiel #8
0
def getData(m, T):
   xTrain, yTrain = genDat(m, T)
   batchGen = batcher.Batcher(xTrain, yTrain)
   return batchGen
Beispiel #9
0
        outRef = open('translatedRef.txt', 'w')

    m = 130000
    T = 30  #Max vector length, padded
    H = 256
    batchSize = 16
    maxIters = 100010
    numLayers = 4
    embedDim = 128
    vocabIn = 47861
    vocabOut = 22731
    savename = 'NMTeng2viet'
    #Load in data
    X = np.load('datFixed/testSrc.npy')[:m]
    Y = np.load('datFixed/testDst.npy')[:m]
    batchTrain = batcher.Batcher(X, Y)

    #Validation set
    X = np.load('datFixed/valSrc.npy')[:m]
    Y = np.load('datFixed/valDst.npy')[:m]
    batchVal = batcher.Batcher(X, Y)
    #Test vars
    vocabDict = np.load('lookupDict.npy')[0]
    vocabDictSrc = np.load('lookupDictSrc.npy')[0]

    print 'Initializing Variables'
    #Initialize variables
    UNK = 0
    START = 1
    STOP = 2
Beispiel #10
0
    test,
    word2id=word2id,
    split='test',
    save_path='data/test/test.hdf5',
    resize=resize)
dev_numeric = data_ops.load_or_make_numeric_examples(
    dev,
    word2id=word2id,
    split='dev',
    save_path='data/dev/dev.hdf5',
    resize=resize)

# Batcher
batcher_train = batcher.Batcher(data=train_numeric,
                                size=20,
                                shuffle=True,
                                pad=True,
                                repeat=True)
batcher_test = batcher.Batcher(data=test_numeric,
                               size=20)  # size=len(test_numeric['image']))
batcher_dev = batcher.Batcher(data=dev_numeric, size=len(
    dev_numeric['image']))  # size=len(dev_numeric['image']))

# Graph inputs
images_t = tf.placeholder(
    tf.float32, [None, *resize, 3],
    name='images_t')  # The 3 there is the number of RGB channels
questions_t = tf.placeholder(tf.int32, [None, None], name='questions_t')
labels_t = tf.placeholder(tf.int32, [
    None,
], name='labels_t')