def main(): print() n_classes = int(sys.argv[1]) model = int(sys.argv[2]) dataset_info = open_file( 'data/sign-to-gloss/cleaned/split-files/dataset-info-' + str(n_classes)) print('Dataset Info set size: ', len(dataset_info.keys())) print() train_phrase = open_file( 'data/sign-to-gloss/cleaned/split-files/train-phrase-' + str(n_classes)) val_phrase = open_file( 'data/sign-to-gloss/cleaned/split-files/val-phrase-' + str(n_classes)) test_phrase = open_file( 'data/sign-to-gloss/cleaned/split-files/test-phrase-' + str(n_classes)) print('Training Phrase set size: ', len(train_phrase)) print('Validation Phrase set size: ', len(val_phrase)) print('Testing Phrase set size: ', len(test_phrase)) print() batch_size = 50 vocab_size = n_classes + 2 parameters = { 'tar_vocab_size': vocab_size, 'emb_size': 512, 'rnn_size': 512, 'batch_size': batch_size, 'epochs': 20, 'train_steps_per_epoch': len(train_phrase) // batch_size, 'rate': 0.3, 'val_steps_per_epoch': len(val_phrase) // batch_size, 'test_steps': len(test_phrase) // batch_size, 'model': model } save_file( parameters, 'results/sign-to-gloss/wlasl-' + str(n_classes) + '/luong/model_' + str(model) + '/utils/parameters') print() print('No. of Training steps per epoch: ', parameters['train_steps_per_epoch']) print('No. of Validation steps per epoch: ', parameters['val_steps_per_epoch']) print('No. of Testing steps: ', parameters['test_steps']) print() train_dataset = tf.data.Dataset.from_tensor_slices( (train_phrase)).shuffle(len(train_phrase)) train_dataset = train_dataset.batch(batch_size, drop_remainder=True) val_dataset = tf.data.Dataset.from_tensor_slices( (val_phrase)).shuffle(len(val_phrase)) val_dataset = val_dataset.batch(batch_size, drop_remainder=True) test_dataset = tf.data.Dataset.from_tensor_slices( (test_phrase)).shuffle(len(test_phrase)) test_dataset = test_dataset.batch(batch_size, drop_remainder=True) print('Model Training started') print() #model_training(train_dataset, val_dataset, dataset_info, parameters) print('Model Testing started') print() model_testing(test_dataset, dataset_info, parameters)
def main(): print() model = int(sys.argv[1]) train_inp = text_retrieve('spt-tokenized/train.gloss') val_inp = text_retrieve('spt-tokenized/val.gloss') test_inp = text_retrieve('spt-tokenized/test.gloss') train_tar = text_retrieve('spt-tokenized/train.en') val_tar = text_retrieve('spt-tokenized/val.en') test_tar = text_retrieve('spt-tokenized/test.en') print('No. of original sentences in Training set: ', len(train_inp)) print('No. of original sentences in Validation set: ', len(val_inp)) print('No. of original sentences in Test set: ', len(test_inp)) print() max_length = 40 train_inp, train_tar = create_new_dataset(train_inp, train_tar, max_length) val_inp, val_tar = create_new_dataset(val_inp, val_tar, max_length) test_inp, test_tar = create_new_dataset(test_inp, test_tar, max_length) print('No. of new sentences in Training set: ', len(train_inp)) print('No. of new sentences in Validation set: ', len(val_inp)) print('No. of new sentences in Test set: ', len(test_inp)) print() inp_lang, train_inp, val_inp, test_inp = tokenize(train_inp, val_inp, test_inp, max_length) tar_lang, train_tar, val_tar, test_tar = tokenize(train_tar, val_tar, test_tar, max_length) print('Input Vocabulary size: ', len(inp_lang.word_index) + 1) print('Target Vocabulary size: ', len(tar_lang.word_index) + 1) print() batch_size = 128 save_file(inp_lang.word_index, 'model_' + str(model) + '/utils/inp-word-index') save_file(inp_lang.index_word, 'model_' + str(model) + '/utils/inp-index-word') save_file(tar_lang.word_index, 'model_' + str(model) + '/utils/tar-word-index') save_file(tar_lang.index_word, 'model_' + str(model) + '/utils/tar-index-word') parameters = {'inp_vocab_size': len(inp_lang.word_index) + 1, 'tar_vocab_size': len(tar_lang.word_index) + 1, 'emb_size': 512, 'rnn_size': 512, 'batch_size': batch_size, 'epochs': 30, 'train_steps_per_epoch': len(train_inp) // batch_size, 'rate': 0.3, 'val_steps_per_epoch': len(val_inp) // batch_size, 'test_steps': len(test_inp) // batch_size, 'max_length': max_length, 'model': model} save_file(parameters, 'model_' + str(model) + '/utils/parameters') print() print('No. of Training steps per epoch: ', parameters['train_steps_per_epoch']) print('No. of Validation steps per epoch: ', parameters['val_steps_per_epoch']) print('No. of Testing steps: ', parameters['test_steps']) print() train_dataset = tf.data.Dataset.from_tensor_slices((train_inp, train_tar)).shuffle(len(train_inp)) train_dataset = train_dataset.batch(batch_size, drop_remainder=True) val_dataset = tf.data.Dataset.from_tensor_slices((val_inp, val_tar)).shuffle(len(val_inp)) val_dataset = val_dataset.batch(batch_size, drop_remainder=True) test_dataset = tf.data.Dataset.from_tensor_slices((test_inp, test_tar)).shuffle(len(test_inp)) test_dataset = test_dataset.batch(batch_size, drop_remainder=True) print('Model training started') print() model_training(train_dataset, val_dataset, parameters) model_testing(test_dataset, parameters)
def upload_model(): """Stores a checkpoint or saved model and returns a unique ID""" if request.method == 'POST' and 'file' in request.files: f = request.files['file'] print(type(f)) #valid , response = utils.validate_model(f) if "model" in request.form.keys(): network = json.loads(request.form['model']) new_checkpoint = utils.insert_params(network, f) # check if model works valid = utils.model_testing(new_checkpoint) #print(valid) if valid == False: return jsonify({ "message": "Your model is inconsistent with the description. Verify and retry" }) else: return jsonify({ "message": "model param not present in request. Fill and resubmit" }) sec_file = secure_filename(f.filename) # generate random id: TOD0: write a function that checks for conflict model_id = ''.join(str(e) for e in list(randint(0, 9, 20))) #f.save(os.path.join('checkpoints', sec_file)) filename = os.path.join('checkpoints', sec_file) #+ "_" + model_id filename = filename.split('.')[0] + "_" + model_id + ".pth" torch.save(new_checkpoint, filename) # insert into model store utils.insert_id(model_id, filename.split('/')[-1]) # send email if present if "email" in request.form.keys(): recipient = request.form["email"] subject = "MODEL ID CONFIRMATION" sender = '*****@*****.**' body = 'Your model has been successfully uploaded and saved. Your MODEL ID is: ' + model_id body += ' Please use this ID if you want to make predictions based on this model' res = utils.send_email(mail, body, subject, sender, recipient) print(res) return jsonify({"status": "saved", "id": model_id}) else: return Response('Bad request', status=500)
def main(): print() model = int(sys.argv[1]) train_inp = open_file('data/gloss-to-grapheme/swt-tokenized/train.gloss') val_inp = open_file('data/gloss-to-grapheme/swt-tokenized/val.gloss') test_inp = open_file('data/gloss-to-grapheme/swt-tokenized/test.gloss') train_tar = open_file('data/gloss-to-grapheme/swt-tokenized/train.en') val_tar = open_file('data/gloss-to-grapheme/swt-tokenized/val.en') test_tar = open_file('data/gloss-to-grapheme/swt-tokenized/test.en') print('No. of original sentences in Training set: ', len(train_inp)) print('No. of original sentences in Validation set: ', len(val_inp)) print('No. of original sentences in Test set: ', len(test_inp)) print() train_inp, val_inp, test_inp = tokenize(train_inp, val_inp, test_inp) train_tar, val_tar, test_tar = tokenize(train_tar, val_tar, test_tar) batch_size = 128 loc_from = '/home/preetham/Documents/Preetham/masters-thesis/results/gloss-to-grapheme/tokenizer/' inp_lang = tfds.deprecated.text.SubwordTextEncoder.load_from_file( loc_from + 'gloss-swt') tar_lang = tfds.deprecated.text.SubwordTextEncoder.load_from_file( loc_from + 'en-swt') print('Input Vocabulary size: ', inp_lang.vocab_size + 2) print('Target Vocabulary size: ', tar_lang.vocab_size + 2) print() if model <= 4: n_layers = model d_model = 512 dropout = 0.1 n_heads = 8 else: n_layers = model - 4 d_model = 1024 dropout = 0.3 n_heads = 16 parameters = { 'inp_vocab_size': inp_lang.vocab_size + 2, 'tar_vocab_size': tar_lang.vocab_size + 2, 'n_layers': n_layers, 'd_model': d_model, 'dff': 4 * d_model, 'batch_size': batch_size, 'epochs': 30, 'n_heads': n_heads, 'train_steps_per_epoch': len(train_inp) // batch_size, 'dropout': dropout, 'val_steps_per_epoch': len(val_inp) // batch_size, 'test_steps': len(test_inp) // batch_size, 'model': model } save_file( parameters, 'results/gloss-to-grapheme/transformer/model_' + str(model) + '/utils/parameters') print() print('No. of Training steps per epoch: ', parameters['train_steps_per_epoch']) print('No. of Validation steps per epoch: ', parameters['val_steps_per_epoch']) print('No. of Testing steps: ', parameters['test_steps']) print() train_dataset = tf.data.Dataset.from_tensor_slices((train_inp, train_tar)) train_dataset = train_dataset.shuffle( len(train_inp)).padded_batch(batch_size) val_dataset = tf.data.Dataset.from_tensor_slices((val_inp, val_tar)) val_dataset = val_dataset.shuffle(len(val_inp)).padded_batch(batch_size) test_dataset = tf.data.Dataset.from_tensor_slices((test_inp, test_tar)) test_dataset = test_dataset.shuffle(len(test_inp)).padded_batch(batch_size) model_training(train_dataset, val_dataset, parameters) model_testing(test_dataset, parameters)