Example #1
0
def main():
    print()
    n_classes = int(sys.argv[1])
    model = int(sys.argv[2])
    dataset_info = open_file(
        'data/sign-to-gloss/cleaned/split-files/dataset-info-' +
        str(n_classes))
    print('Dataset Info set size: ', len(dataset_info.keys()))
    print()
    train_phrase = open_file(
        'data/sign-to-gloss/cleaned/split-files/train-phrase-' +
        str(n_classes))
    val_phrase = open_file(
        'data/sign-to-gloss/cleaned/split-files/val-phrase-' + str(n_classes))
    test_phrase = open_file(
        'data/sign-to-gloss/cleaned/split-files/test-phrase-' + str(n_classes))
    print('Training Phrase set size: ', len(train_phrase))
    print('Validation Phrase set size: ', len(val_phrase))
    print('Testing Phrase set size: ', len(test_phrase))
    print()
    batch_size = 50
    vocab_size = n_classes + 2
    parameters = {
        'tar_vocab_size': vocab_size,
        'emb_size': 512,
        'rnn_size': 512,
        'batch_size': batch_size,
        'epochs': 20,
        'train_steps_per_epoch': len(train_phrase) // batch_size,
        'rate': 0.3,
        'val_steps_per_epoch': len(val_phrase) // batch_size,
        'test_steps': len(test_phrase) // batch_size,
        'model': model
    }
    save_file(
        parameters, 'results/sign-to-gloss/wlasl-' + str(n_classes) +
        '/luong/model_' + str(model) + '/utils/parameters')
    print()
    print('No. of Training steps per epoch: ',
          parameters['train_steps_per_epoch'])
    print('No. of Validation steps per epoch: ',
          parameters['val_steps_per_epoch'])
    print('No. of Testing steps: ', parameters['test_steps'])
    print()
    train_dataset = tf.data.Dataset.from_tensor_slices(
        (train_phrase)).shuffle(len(train_phrase))
    train_dataset = train_dataset.batch(batch_size, drop_remainder=True)
    val_dataset = tf.data.Dataset.from_tensor_slices(
        (val_phrase)).shuffle(len(val_phrase))
    val_dataset = val_dataset.batch(batch_size, drop_remainder=True)
    test_dataset = tf.data.Dataset.from_tensor_slices(
        (test_phrase)).shuffle(len(test_phrase))
    test_dataset = test_dataset.batch(batch_size, drop_remainder=True)
    print('Model Training started')
    print()
    #model_training(train_dataset, val_dataset, dataset_info, parameters)
    print('Model Testing started')
    print()
    model_testing(test_dataset, dataset_info, parameters)
Example #2
0
def main():
    print()
    model = int(sys.argv[1])
    train_inp = text_retrieve('spt-tokenized/train.gloss')
    val_inp = text_retrieve('spt-tokenized/val.gloss')
    test_inp = text_retrieve('spt-tokenized/test.gloss')
    train_tar = text_retrieve('spt-tokenized/train.en')
    val_tar = text_retrieve('spt-tokenized/val.en')
    test_tar = text_retrieve('spt-tokenized/test.en')
    print('No. of original sentences in Training set: ', len(train_inp))
    print('No. of original sentences in Validation set: ', len(val_inp))
    print('No. of original sentences in Test set: ', len(test_inp))
    print()
    max_length = 40
    train_inp, train_tar = create_new_dataset(train_inp, train_tar, max_length)
    val_inp, val_tar = create_new_dataset(val_inp, val_tar, max_length)
    test_inp, test_tar = create_new_dataset(test_inp, test_tar, max_length)
    print('No. of new sentences in Training set: ', len(train_inp))
    print('No. of new sentences in Validation set: ', len(val_inp))
    print('No. of new sentences in Test set: ', len(test_inp))
    print()
    inp_lang, train_inp, val_inp, test_inp = tokenize(train_inp, val_inp, test_inp, max_length)
    tar_lang, train_tar, val_tar, test_tar = tokenize(train_tar, val_tar, test_tar, max_length)
    print('Input Vocabulary size: ', len(inp_lang.word_index) + 1)
    print('Target Vocabulary size: ', len(tar_lang.word_index) + 1)
    print()
    batch_size = 128
    save_file(inp_lang.word_index, 'model_' + str(model) + '/utils/inp-word-index')
    save_file(inp_lang.index_word, 'model_' + str(model) + '/utils/inp-index-word')
    save_file(tar_lang.word_index, 'model_' + str(model) + '/utils/tar-word-index')
    save_file(tar_lang.index_word, 'model_' + str(model) + '/utils/tar-index-word')
    parameters = {'inp_vocab_size': len(inp_lang.word_index) + 1, 'tar_vocab_size': len(tar_lang.word_index) + 1,
                  'emb_size': 512, 'rnn_size': 512, 'batch_size': batch_size, 'epochs': 30,
                  'train_steps_per_epoch': len(train_inp) // batch_size, 'rate': 0.3,
                  'val_steps_per_epoch': len(val_inp) // batch_size, 'test_steps': len(test_inp) // batch_size,
                  'max_length': max_length, 'model': model}
    save_file(parameters, 'model_' + str(model) + '/utils/parameters')
    print()
    print('No. of Training steps per epoch: ', parameters['train_steps_per_epoch'])
    print('No. of Validation steps per epoch: ', parameters['val_steps_per_epoch'])
    print('No. of Testing steps: ', parameters['test_steps'])
    print()
    train_dataset = tf.data.Dataset.from_tensor_slices((train_inp, train_tar)).shuffle(len(train_inp))
    train_dataset = train_dataset.batch(batch_size, drop_remainder=True)
    val_dataset = tf.data.Dataset.from_tensor_slices((val_inp, val_tar)).shuffle(len(val_inp))
    val_dataset = val_dataset.batch(batch_size, drop_remainder=True)
    test_dataset = tf.data.Dataset.from_tensor_slices((test_inp, test_tar)).shuffle(len(test_inp))
    test_dataset = test_dataset.batch(batch_size, drop_remainder=True)
    print('Model training started')
    print()
    model_training(train_dataset, val_dataset, parameters)
    model_testing(test_dataset, parameters)
Example #3
0
def upload_model():
    """Stores a checkpoint or saved model and returns a unique ID"""

    if request.method == 'POST' and 'file' in request.files:
        f = request.files['file']
        print(type(f))
        #valid , response = utils.validate_model(f)

        if "model" in request.form.keys():
            network = json.loads(request.form['model'])
            new_checkpoint = utils.insert_params(network, f)

            # check if model works

            valid = utils.model_testing(new_checkpoint)
            #print(valid)
            if valid == False:
                return jsonify({
                    "message":
                    "Your model is inconsistent with the description. Verify and retry"
                })

        else:
            return jsonify({
                "message":
                "model param not present in request. Fill and resubmit"
            })

        sec_file = secure_filename(f.filename)

        # generate random id: TOD0: write a function that checks for conflict
        model_id = ''.join(str(e) for e in list(randint(0, 9, 20)))

        #f.save(os.path.join('checkpoints', sec_file))
        filename = os.path.join('checkpoints', sec_file)  #+ "_" + model_id
        filename = filename.split('.')[0] + "_" + model_id + ".pth"
        torch.save(new_checkpoint, filename)
        # insert into model store
        utils.insert_id(model_id, filename.split('/')[-1])

        # send email if present

        if "email" in request.form.keys():
            recipient = request.form["email"]

            subject = "MODEL ID CONFIRMATION"
            sender = '*****@*****.**'
            body = 'Your model has been successfully uploaded and saved. Your MODEL ID is: ' + model_id
            body += ' Please use this ID if you want to make predictions based on this model'

            res = utils.send_email(mail, body, subject, sender, recipient)
            print(res)

        return jsonify({"status": "saved", "id": model_id})
    else:
        return Response('Bad request', status=500)
Example #4
0
def main():
    print()
    model = int(sys.argv[1])
    train_inp = open_file('data/gloss-to-grapheme/swt-tokenized/train.gloss')
    val_inp = open_file('data/gloss-to-grapheme/swt-tokenized/val.gloss')
    test_inp = open_file('data/gloss-to-grapheme/swt-tokenized/test.gloss')
    train_tar = open_file('data/gloss-to-grapheme/swt-tokenized/train.en')
    val_tar = open_file('data/gloss-to-grapheme/swt-tokenized/val.en')
    test_tar = open_file('data/gloss-to-grapheme/swt-tokenized/test.en')
    print('No. of original sentences in Training set: ', len(train_inp))
    print('No. of original sentences in Validation set: ', len(val_inp))
    print('No. of original sentences in Test set: ', len(test_inp))
    print()
    train_inp, val_inp, test_inp = tokenize(train_inp, val_inp, test_inp)
    train_tar, val_tar, test_tar = tokenize(train_tar, val_tar, test_tar)
    batch_size = 128
    loc_from = '/home/preetham/Documents/Preetham/masters-thesis/results/gloss-to-grapheme/tokenizer/'
    inp_lang = tfds.deprecated.text.SubwordTextEncoder.load_from_file(
        loc_from + 'gloss-swt')
    tar_lang = tfds.deprecated.text.SubwordTextEncoder.load_from_file(
        loc_from + 'en-swt')
    print('Input Vocabulary size: ', inp_lang.vocab_size + 2)
    print('Target Vocabulary size: ', tar_lang.vocab_size + 2)
    print()
    if model <= 4:
        n_layers = model
        d_model = 512
        dropout = 0.1
        n_heads = 8
    else:
        n_layers = model - 4
        d_model = 1024
        dropout = 0.3
        n_heads = 16
    parameters = {
        'inp_vocab_size': inp_lang.vocab_size + 2,
        'tar_vocab_size': tar_lang.vocab_size + 2,
        'n_layers': n_layers,
        'd_model': d_model,
        'dff': 4 * d_model,
        'batch_size': batch_size,
        'epochs': 30,
        'n_heads': n_heads,
        'train_steps_per_epoch': len(train_inp) // batch_size,
        'dropout': dropout,
        'val_steps_per_epoch': len(val_inp) // batch_size,
        'test_steps': len(test_inp) // batch_size,
        'model': model
    }
    save_file(
        parameters, 'results/gloss-to-grapheme/transformer/model_' +
        str(model) + '/utils/parameters')
    print()
    print('No. of Training steps per epoch: ',
          parameters['train_steps_per_epoch'])
    print('No. of Validation steps per epoch: ',
          parameters['val_steps_per_epoch'])
    print('No. of Testing steps: ', parameters['test_steps'])
    print()
    train_dataset = tf.data.Dataset.from_tensor_slices((train_inp, train_tar))
    train_dataset = train_dataset.shuffle(
        len(train_inp)).padded_batch(batch_size)
    val_dataset = tf.data.Dataset.from_tensor_slices((val_inp, val_tar))
    val_dataset = val_dataset.shuffle(len(val_inp)).padded_batch(batch_size)
    test_dataset = tf.data.Dataset.from_tensor_slices((test_inp, test_tar))
    test_dataset = test_dataset.shuffle(len(test_inp)).padded_batch(batch_size)
    model_training(train_dataset, val_dataset, parameters)
    model_testing(test_dataset, parameters)