def main(): #langs = ['russian', 'turkish', 'spanish', 'arabic', 'georgian', 'german', 'navajo', 'finnish'] langs = ['arabic'] sig_root = '/Users/roeeaharoni/GitHub/sigmorphon2016/' for lang in langs: train_path = '{0}/data/{1}-task1-train'.format(sig_root, lang) test_path = '{0}/data/{1}-task1-dev'.format(sig_root, lang) # load train and test data (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data(train_path) (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data(test_path) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(train_words, train_lemmas, train_feat_dicts) # align the words to the inflections, the alignment will later be used by the model print 'started aligning' train_word_pairs = zip(train_lemmas, train_words) test_word_pairs = zip(test_lemmas, test_words) align_symbol = '~' # train_aligned_pairs = dumb_align(train_word_pairs, align_symbol) train_aligned_pairs = common.mcmc_align(train_word_pairs, align_symbol) # TODO: align together? test_aligned_pairs = common.mcmc_align(test_word_pairs, align_symbol) # random.shuffle(train_aligned_pairs) # for p in train_aligned_pairs[:100]: # generate_template(p) print 'finished aligning' for i, p in enumerate(test_aligned_pairs): print i print p[0] print p[1] + '\n' return
def main(): #langs = ['russian', 'turkish', 'spanish', 'arabic', 'georgian', 'german', 'navajo', 'finnish'] langs = ['arabic'] sig_root = '/Users/roeeaharoni/GitHub/sigmorphon2016/' for lang in langs: train_path = '{0}/data/{1}-task1-train'.format(sig_root, lang) test_path = '{0}/data/{1}-task1-dev'.format(sig_root, lang) # load train and test data (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data(train_path) (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data(test_path) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet( train_words, train_lemmas, train_feat_dicts) # align the words to the inflections, the alignment will later be used by the model print 'started aligning' train_word_pairs = zip(train_lemmas, train_words) test_word_pairs = zip(test_lemmas, test_words) align_symbol = '~' # train_aligned_pairs = dumb_align(train_word_pairs, align_symbol) train_aligned_pairs = common.mcmc_align(train_word_pairs, align_symbol) # TODO: align together? test_aligned_pairs = common.mcmc_align(test_word_pairs, align_symbol) # random.shuffle(train_aligned_pairs) # for p in train_aligned_pairs[:100]: # generate_template(p) print 'finished aligning' for i, p in enumerate(test_aligned_pairs): print i print p[0] print p[1] + '\n' return
def main(train_path, dev_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, feat_input_dim, epochs, layers, optimization, regularization, learning_rate, plot, eval_only, ensemble): if plot: parallelize_training = False print 'plotting, parallelization is disabled!!!' else: parallelize_training = PARALLELIZE hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'FEAT_INPUT_DIM': feat_input_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'PATIENCE': MAX_PATIENCE, 'REGULARIZATION': regularization, 'LEARNING_RATE': learning_rate} print 'train path = ' + str(train_path) print 'dev path =' + str(dev_path) print 'test path =' + str(test_path) for param in hyper_params: print param + '=' + str(hyper_params[param]) # load train and test data (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data(train_path) (dev_words, dev_lemmas, dev_feat_dicts) = prepare_sigmorphon_data.load_data(dev_path) (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data(test_path) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(train_words, train_lemmas, train_feat_dicts) # used for character dropout alphabet.append(NULL) alphabet.append(UNK) # used during decoding alphabet.append(EPSILON) alphabet.append(BEGIN_WORD) alphabet.append(END_WORD) # add indices to alphabet - used to indicate when copying from lemma to word for marker in [str(i) for i in xrange(3 * MAX_PREDICTION_LEN)]: alphabet.append(marker) # indicates the FST to step forward in the input alphabet.append(STEP) # char 2 int alphabet_index = dict(zip(alphabet, range(0, len(alphabet)))) inverse_alphabet_index = {index: char for char, index in alphabet_index.items()} # feat 2 int feature_alphabet = common.get_feature_alphabet(train_feat_dicts) feature_alphabet.append(UNK_FEAT) feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet)))) # align the words to the inflections, the alignment will later be used by the model print 'started aligning' train_word_pairs = zip(train_lemmas, train_words) dev_word_pairs = zip(dev_lemmas, dev_words) # train_aligned_pairs = dumb_align(train_word_pairs, ALIGN_SYMBOL) train_aligned_pairs = common.mcmc_align(train_word_pairs, ALIGN_SYMBOL) # TODO: align together? dev_aligned_pairs = common.mcmc_align(dev_word_pairs, ALIGN_SYMBOL) # random.shuffle(train_aligned_pairs) # for p in train_aligned_pairs[:100]: # generate_template(p) print 'finished aligning' # joint model: cluster the data by POS type (features) train_pos_to_data_indices = common.cluster_data_by_pos(train_feat_dicts) dev_pos_to_data_indices = common.cluster_data_by_pos(dev_feat_dicts) train_cluster_to_data_indices = train_pos_to_data_indices dev_cluster_to_data_indices = dev_pos_to_data_indices # factored model: cluster the data by inflection type (features) # train_morph_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types) # test_morph_to_data_indices = common.cluster_data_by_morph_type(dev_feat_dicts, feature_types) # train_cluster_to_data_indices = train_morph_to_data_indices # dev_cluster_to_data_indices = test_morph_to_data_indices # create input for each model and then parallelize or run in loop. params = [] for cluster_index, cluster_type in enumerate(train_cluster_to_data_indices): params.append([input_dim, hidden_dim, layers, cluster_index, cluster_type, train_lemmas, train_feat_dicts, train_words, dev_lemmas, dev_feat_dicts, train_cluster_to_data_indices, dev_words, dev_cluster_to_data_indices, alphabet, alphabet_index, inverse_alphabet_index, epochs, optimization, results_file_path, train_aligned_pairs, dev_aligned_pairs, feat_index, feature_types, feat_input_dim, feature_alphabet, plot]) if not eval_only: if parallelize_training: # set maxtasksperchild=1 to free finished processes p = Pool(4, maxtasksperchild=1) print 'now training {0} models in parallel'.format(len(train_cluster_to_data_indices)) p.map(train_cluster_model_wrapper, params) else: print 'now training {0} models in loop'.format(len(train_cluster_to_data_indices)) last_epochs = [] for p in params: cluster_index = p[3] cluster_name = p[4] trained_model, last_epoch = train_cluster_model(*p) # print when did each model stop epoch_output = 'cluster {0} - {1} stopped on epoch {2}'.format(cluster_index, cluster_name, last_epoch) last_epochs.append(epoch_output) print epoch_output with open(results_file_path + '.epochs', 'w') as f: f.writelines(last_epochs) print 'finished training all models' else: print 'skipped training by request. evaluating best models:' # eval on dev print '=========DEV EVALUATION:=========' evaluate_ndst(alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet, feature_types, hidden_dim, hyper_params, input_dim, inverse_alphabet_index, layers, results_file_path, sigmorphon_root_dir, dev_cluster_to_data_indices, dev_feat_dicts, dev_lemmas, dev_path, dev_words, train_cluster_to_data_indices, train_path, train_words) # eval on test print '=========TEST EVALUATION:=========' test_cluster_to_data_indices = common.cluster_data_by_pos(dev_feat_dicts) evaluate_ndst(alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet, feature_types, hidden_dim, hyper_params, input_dim, inverse_alphabet_index, layers, results_file_path, sigmorphon_root_dir, test_cluster_to_data_indices, test_feat_dicts, test_lemmas, test_path, test_words, train_cluster_to_data_indices, train_path, train_words) return
def main(train_path, dev_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, feat_input_dim, epochs, layers, optimization, regularization, learning_rate, plot, eval_only, ensemble): hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'FEAT_INPUT_DIM': feat_input_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'PATIENCE': MAX_PATIENCE, 'REGULARIZATION': regularization, 'LEARNING_RATE': learning_rate} print 'train path = ' + str(train_path) print 'dev path =' + str(dev_path) print 'test path =' + str(test_path) for param in hyper_params: print param + '=' + str(hyper_params[param]) # load train and test data (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data(train_path) (dev_words, dev_lemmas, dev_feat_dicts) = prepare_sigmorphon_data.load_data(dev_path) (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data(test_path) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(train_words, train_lemmas, train_feat_dicts) # used for character dropout alphabet.append(NULL) alphabet.append(UNK) # used during decoding alphabet.append(EPSILON) alphabet.append(BEGIN_WORD) alphabet.append(END_WORD) # add indices to alphabet - used to indicate when copying from lemma to word for marker in [str(i) for i in xrange(3 * MAX_PREDICTION_LEN)]: alphabet.append(marker) # indicates the FST to step forward in the input alphabet.append(STEP) # char 2 int alphabet_index = dict(zip(alphabet, range(0, len(alphabet)))) inverse_alphabet_index = {index: char for char, index in alphabet_index.items()} # feat 2 int feature_alphabet = common.get_feature_alphabet(train_feat_dicts) feature_alphabet.append(UNK_FEAT) feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet)))) if not eval_only: # align the words to the inflections, the alignment will later be used by the model print 'started aligning' train_word_pairs = zip(train_lemmas, train_words) dev_word_pairs = zip(dev_lemmas, dev_words) # train_aligned_pairs = dumb_align(train_word_pairs, ALIGN_SYMBOL) train_aligned_pairs = common.mcmc_align(train_word_pairs, ALIGN_SYMBOL) # TODO: align together? dev_aligned_pairs = common.mcmc_align(dev_word_pairs, ALIGN_SYMBOL) print 'finished aligning' last_epochs = [] trained_model, last_epoch = train_model_wrapper(input_dim, hidden_dim, layers, train_lemmas, train_feat_dicts, train_words, dev_lemmas, dev_feat_dicts, dev_words, alphabet, alphabet_index, inverse_alphabet_index, epochs, optimization, results_file_path, train_aligned_pairs, dev_aligned_pairs, feat_index, feature_types, feat_input_dim, feature_alphabet, plot) # print when did each model stop print 'stopped on epoch {}'.format(last_epoch) with open(results_file_path + '.epochs', 'w') as f: f.writelines(last_epochs) print 'finished training all models' else: print 'skipped training by request. evaluating best models:' # eval on dev #~ print '=========DEV EVALUATION:=========' #~ evaluate_ndst(alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet, feature_types, #~ hidden_dim, hyper_params, input_dim, inverse_alphabet_index, layers, results_file_path, #~ sigmorphon_root_dir, dev_feat_dicts, dev_lemmas, dev_path, #~ dev_words, train_path) # eval on test print '=========TEST EVALUATION:=========' evaluate_ndst(alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet, feature_types, hidden_dim, hyper_params, input_dim, inverse_alphabet_index, layers, results_file_path, sigmorphon_root_dir, test_feat_dicts, test_lemmas, test_path, test_words, train_path) return
def main(train_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, feat_input_dim, epochs, layers, optimization, regularization, learning_rate, plot): if plot: parallelize_training = False print 'plotting, parallelization is disabled!!!' else: parallelize_training = PARALLELIZE hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'FEAT_INPUT_DIM': feat_input_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'PATIENCE': MAX_PATIENCE, 'REGULARIZATION': regularization, 'LEARNING_RATE': learning_rate} print 'train path = ' + str(train_path) print 'test path =' + str(test_path) for param in hyper_params: print param + '=' + str(hyper_params[param]) # load train and test data (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data(train_path) (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data(test_path) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(train_words, train_lemmas, train_feat_dicts) # used for character dropout alphabet.append(NULL) alphabet.append(UNK) # used during decoding alphabet.append(EPSILON) alphabet.append(BEGIN_WORD) alphabet.append(END_WORD) # add indices to alphabet - used to indicate when copying from lemma to word for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]: alphabet.append(marker) # char 2 int alphabet_index = dict(zip(alphabet, range(0, len(alphabet)))) inverse_alphabet_index = {index: char for char, index in alphabet_index.items()} # feat 2 int feature_alphabet = common.get_feature_alphabet(train_feat_dicts) feature_alphabet.append(UNK_FEAT) feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet)))) # align the words to the inflections, the alignment will later be used by the model print 'started aligning' train_word_pairs = zip(train_lemmas, train_words) test_word_pairs = zip(test_lemmas, test_words) align_symbol = '~' # train_aligned_pairs = dumb_align(train_word_pairs, align_symbol) train_aligned_pairs = common.mcmc_align(train_word_pairs, align_symbol) # TODO: align together? test_aligned_pairs = common.mcmc_align(test_word_pairs, align_symbol) # random.shuffle(train_aligned_pairs) # for p in train_aligned_pairs[:100]: # generate_template(p) print 'finished aligning' # joint model: cluster the data by POS type (features) train_pos_to_data_indices = common.cluster_data_by_pos(train_feat_dicts) test_pos_to_data_indices = common.cluster_data_by_pos(test_feat_dicts) train_cluster_to_data_indices = train_pos_to_data_indices test_cluster_to_data_indices = test_pos_to_data_indices # factored model: cluster the data by inflection type (features) # train_morph_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types) # test_morph_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types) # train_cluster_to_data_indices = train_morph_to_data_indices # test_cluster_to_data_indices = test_morph_to_data_indices # create input for each model and then parallelize or run in loop. params = [] for cluster_index, cluster_type in enumerate(train_cluster_to_data_indices): params.append([input_dim, hidden_dim, layers, cluster_index, cluster_type, train_lemmas, train_feat_dicts, train_words, test_lemmas, test_feat_dicts, train_cluster_to_data_indices, test_words, test_cluster_to_data_indices, alphabet, alphabet_index, inverse_alphabet_index, epochs, optimization, results_file_path, train_aligned_pairs, test_aligned_pairs, feat_index, feature_types, feat_input_dim, feature_alphabet, plot]) if parallelize_training: # set maxtasksperchild=1 to free finished processes p = Pool(4, maxtasksperchild=1) print 'now training {0} models in parallel'.format(len(train_cluster_to_data_indices)) models = p.map(train_cluster_model_wrapper, params) else: print 'now training {0} models in loop'.format(len(train_cluster_to_data_indices)) for p in params: trained_model, last_epoch = train_cluster_model(*p) print 'finished training all models' # evaluate best models os.system('python task1_evaluate_best_joint_structured_models_blstm_feed_fix.py --cnn-mem 6096 --input={0} --hidden={1} --feat-input={2} \ --epochs={3} --layers={4} --optimization={5} {6} {7} {8} {9}'.format(input_dim, hidden_dim, feat_input_dim, epochs, layers, optimization, train_path, test_path, results_file_path, sigmorphon_root_dir)) return
def main(train_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, feat_input_dim, epochs, layers, optimization, regularization, learning_rate, plot): if plot: parallelize_training = False print 'plotting, parallelization is disabled!!!' else: parallelize_training = PARALLELIZE hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'FEAT_INPUT_DIM': feat_input_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'PATIENCE': MAX_PATIENCE, 'REGULARIZATION': regularization, 'LEARNING_RATE': learning_rate} print 'train path = ' + str(train_path) print 'test path =' + str(test_path) for param in hyper_params: print param + '=' + str(hyper_params[param]) # load train and test data (train_target_words, train_source_words, train_target_feat_dicts, train_source_feat_dicts) = prepare_sigmorphon_data.load_data(train_path, 2) (test_target_words, test_source_words, test_target_feat_dicts, test_source_feat_dicts) = prepare_sigmorphon_data.load_data(test_path, 2) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(train_target_words, train_source_words, train_target_feat_dicts, train_source_feat_dicts) # used for character dropout alphabet.append(NULL) alphabet.append(UNK) # used during decoding alphabet.append(EPSILON) alphabet.append(BEGIN_WORD) alphabet.append(END_WORD) # add indices to alphabet - used to indicate when copying from lemma to word for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]: alphabet.append(marker) # char 2 int alphabet_index = dict(zip(alphabet, range(0, len(alphabet)))) inverse_alphabet_index = {index: char for char, index in alphabet_index.items()} # feat 2 int feature_alphabet = common.get_feature_alphabet(train_source_feat_dicts + train_target_feat_dicts) feature_alphabet.append(UNK_FEAT) feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet)))) # align the words to the inflections, the alignment will later be used by the model print 'started aligning' train_word_pairs = zip(train_source_words, train_target_words) test_word_pairs = zip(test_source_words, test_target_words) align_symbol = '~' # train_aligned_pairs = dumb_align(train_word_pairs, align_symbol) train_aligned_pairs = common.mcmc_align(train_word_pairs, align_symbol) # TODO: align together? test_aligned_pairs = common.mcmc_align(test_word_pairs, align_symbol) # random.shuffle(train_aligned_pairs) # for p in train_aligned_pairs[:100]: # generate_template(p) print 'finished aligning' # joint model: cluster the data by POS type (features) # TODO: do we need to cluster on both source and target feats? # probably enough to cluster on source here becasue pos will be same # (no derivational morphology in this task) train_pos_to_data_indices = common.cluster_data_by_pos(train_source_feat_dicts) test_pos_to_data_indices = common.cluster_data_by_pos(test_source_feat_dicts) train_cluster_to_data_indices = train_pos_to_data_indices test_cluster_to_data_indices = test_pos_to_data_indices # factored model: cluster the data by inflection type (features) # train_morph_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types) # test_morph_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types) # train_cluster_to_data_indices = train_morph_to_data_indices # test_cluster_to_data_indices = test_morph_to_data_indices # create input for each model and then parallelize or run in loop. params = [] for cluster_index, cluster_type in enumerate(train_cluster_to_data_indices): params.append( [input_dim, hidden_dim, layers, cluster_index, cluster_type, train_source_words, train_source_feat_dicts, train_target_words, train_target_feat_dicts, test_source_words, test_source_feat_dicts, train_cluster_to_data_indices, test_target_words, test_target_feat_dicts, test_cluster_to_data_indices, alphabet, alphabet_index, inverse_alphabet_index, epochs, optimization, results_file_path, train_aligned_pairs, test_aligned_pairs, feat_index, feature_types, feat_input_dim, feature_alphabet, plot]) if parallelize_training: # set maxtasksperchild=1 to free finished processes p = Pool(4, maxtasksperchild=1) print 'now training {0} models in parallel'.format(len(train_cluster_to_data_indices)) models = p.map(train_cluster_model_wrapper, params) else: print 'now training {0} models in loop'.format(len(train_cluster_to_data_indices)) for p in params: trained_model, last_epoch = train_cluster_model(*p) print 'finished training all models' # evaluate best models os.system('python task2_evaluate_best_joint_structured_models_blstm_feed_fix.py --cnn-mem 6096 --input={0} --hidden={1} --feat-input={2} \ --epochs={3} --layers={4} --optimization={5} {6} {7} {8} {9}'.format(input_dim, hidden_dim, feat_input_dim, epochs, layers, optimization, train_path, test_path, results_file_path, sigmorphon_root_dir)) return
def main(train_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, feat_input_dim, epochs, layers, optimization): parallelize_training = PARALLELIZE hyper_params = { 'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'FEAT_INPUT_DIM': feat_input_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'PATIENCE': MAX_PATIENCE, 'REGULARIZATION': REGULARIZATION, 'LEARNING_RATE': LEARNING_RATE } print 'train path = ' + str(train_path) print 'test path =' + str(test_path) for param in hyper_params: print param + '=' + str(hyper_params[param]) # load train and test data (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data(train_path) (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data(test_path) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet( train_words, train_lemmas, train_feat_dicts) # used for character dropout alphabet.append(NULL) alphabet.append(UNK) # used during decoding alphabet.append(EPSILON) alphabet.append(BEGIN_WORD) alphabet.append(END_WORD) # add indices to alphabet - used to indicate when copying from lemma to word for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]: alphabet.append(marker) # char 2 int alphabet_index = dict(zip(alphabet, range(0, len(alphabet)))) inverse_alphabet_index = { index: char for char, index in alphabet_index.items() } # feat 2 int feature_alphabet = common.get_feature_alphabet(train_feat_dicts) feature_alphabet.append(UNK_FEAT) feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet)))) # align the words to the inflections, the alignment will later be used by the model print 'started aligning' train_word_pairs = zip(train_lemmas, train_words) test_word_pairs = zip(test_lemmas, test_words) align_symbol = '~' # train_aligned_pairs = dumb_align(train_word_pairs, align_symbol) train_aligned_pairs = common.mcmc_align(train_word_pairs, align_symbol) # TODO: align together? test_aligned_pairs = common.mcmc_align(test_word_pairs, align_symbol) # random.shuffle(train_aligned_pairs) # for p in train_aligned_pairs[:100]: # generate_template(p) print 'finished aligning' # joint model: cluster the data by POS type (features) train_pos_to_data_indices = common.cluster_data_by_pos(train_feat_dicts) test_pos_to_data_indices = common.cluster_data_by_pos(test_feat_dicts) train_cluster_to_data_indices = train_pos_to_data_indices test_cluster_to_data_indices = test_pos_to_data_indices # factored model: cluster the data by inflection type (features) # train_morph_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types) # test_morph_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types) # train_cluster_to_data_indices = train_morph_to_data_indices # test_cluster_to_data_indices = test_morph_to_data_indices # TODO: change build_model (done), train_model (in progress), predict (done), one word loss (done) etc. to take the # features in account # create input for each model and then parallelize or run in loop. params = [] for cluster_index, cluster_type in enumerate( train_cluster_to_data_indices): params.append([ input_dim, hidden_dim, layers, cluster_index, cluster_type, train_lemmas, train_feat_dicts, train_words, test_lemmas, test_feat_dicts, train_cluster_to_data_indices, test_words, test_cluster_to_data_indices, alphabet, alphabet_index, inverse_alphabet_index, epochs, optimization, results_file_path, train_aligned_pairs, test_aligned_pairs, feat_index, feature_types, feat_input_dim, feature_alphabet ]) if parallelize_training: p = Pool(4, maxtasksperchild=1) print 'now training {0} models in parallel'.format( len(train_cluster_to_data_indices)) p.map(train_cluster_model_wrapper, params) else: print 'now training {0} models in loop'.format( len(train_cluster_to_data_indices)) for p in params: train_cluster_model(*p) print 'finished training all models' # evaluate best models os.system( 'python task1_evaluate_best_joint_structured_models.py --cnn-mem 6096 --input={0} --hidden={1} --feat-input={2} \ --epochs={3} --layers={4} --optimization={5} {6} {7} {8} {9}'. format(input_dim, hidden_dim, feat_input_dim, epochs, layers, optimization, train_path, test_path, results_file_path, sigmorphon_root_dir)) return
def main(train_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, feat_input_dim, epochs, layers, optimization, regularization, learning_rate, plot): if plot: parallelize_training = False print 'plotting, parallelization is disabled!!!' else: parallelize_training = PARALLELIZE hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'FEAT_INPUT_DIM': feat_input_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'PATIENCE': MAX_PATIENCE, 'REGULARIZATION': regularization, 'LEARNING_RATE': learning_rate} print 'train path = ' + str(train_path) print 'test path =' + str(test_path) for param in hyper_params: print param + '=' + str(hyper_params[param]) # load train and test data (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data(train_path) (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data(test_path) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(train_words, train_lemmas, train_feat_dicts) # used for character dropout alphabet.append(NULL) alphabet.append(UNK) # used during decoding alphabet.append(EPSILON) alphabet.append(BEGIN_WORD) alphabet.append(END_WORD) # add indices to alphabet - used to indicate when copying from lemma to word for marker in [str(i) for i in xrange(3*MAX_PREDICTION_LEN)]: alphabet.append(marker) # indicates the FST to step forward in the input alphabet.append(STEP) # char 2 int alphabet_index = dict(zip(alphabet, range(0, len(alphabet)))) inverse_alphabet_index = {index: char for char, index in alphabet_index.items()} # feat 2 int feature_alphabet = common.get_feature_alphabet(train_feat_dicts) feature_alphabet.append(UNK_FEAT) feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet)))) # align the words to the inflections, the alignment will later be used by the model print 'started aligning' train_word_pairs = zip(train_lemmas, train_words) test_word_pairs = zip(test_lemmas, test_words) # train_aligned_pairs = dumb_align(train_word_pairs, ALIGN_SYMBOL) train_aligned_pairs = common.mcmc_align(train_word_pairs, ALIGN_SYMBOL) # TODO: align together? test_aligned_pairs = common.mcmc_align(test_word_pairs, ALIGN_SYMBOL) # random.shuffle(train_aligned_pairs) # for p in train_aligned_pairs[:100]: # generate_template(p) print 'finished aligning' # joint model: cluster the data by POS type (features) train_pos_to_data_indices = common.cluster_data_by_pos(train_feat_dicts) test_pos_to_data_indices = common.cluster_data_by_pos(test_feat_dicts) train_cluster_to_data_indices = train_pos_to_data_indices test_cluster_to_data_indices = test_pos_to_data_indices # factored model: cluster the data by inflection type (features) # train_morph_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types) # test_morph_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types) # train_cluster_to_data_indices = train_morph_to_data_indices # test_cluster_to_data_indices = test_morph_to_data_indices # create input for each model and then parallelize or run in loop. params = [] for cluster_index, cluster_type in enumerate(train_cluster_to_data_indices): params.append([input_dim, hidden_dim, layers, cluster_index, cluster_type, train_lemmas, train_feat_dicts, train_words, test_lemmas, test_feat_dicts, train_cluster_to_data_indices, test_words, test_cluster_to_data_indices, alphabet, alphabet_index, inverse_alphabet_index, epochs, optimization, results_file_path, train_aligned_pairs, test_aligned_pairs, feat_index, feature_types, feat_input_dim, feature_alphabet, plot]) if parallelize_training: # set maxtasksperchild=1 to free finished processes p = Pool(4, maxtasksperchild=1) print 'now training {0} models in parallel'.format(len(train_cluster_to_data_indices)) models = p.map(train_cluster_model_wrapper, params) else: print 'now training {0} models in loop'.format(len(train_cluster_to_data_indices)) last_epochs = [] for p in params: cluster_index = p[3] cluster_name = p[4] trained_model, last_epoch = train_cluster_model(*p) # print when did each model stop epoch_output = 'cluster {0} - {1} stopped on epoch {2}'.format(cluster_index, cluster_name, last_epoch) last_epochs.append(epoch_output) print epoch_output with open(results_file_path + '.epochs', 'w') as f: f.writelines(last_epochs) print 'finished training all models' # evaluate best models os.system('python task1_evaluate_best_nfst_models.py --cnn-mem 6096 --input={0} --hidden={1} \ --feat-input={2} --epochs={3} --layers={4} --optimization={5} {6} {7} {8} {9}'.format(input_dim, hidden_dim, feat_input_dim, epochs, layers, optimization, train_path, test_path, results_file_path, sigmorphon_root_dir)) for e in last_epochs: print 'last epoch is {}'.format(e) return
def train_model(model, encoder_frnn, encoder_rrnn, decoder_rnn, train_lemmas, train_feat_dicts, train_words, dev_lemmas, dev_feat_dicts, dev_words, alphabet_index, inverse_alphabet_index, epochs, optimization, results_file_path, feat_index, feature_types, plot): print 'training...' np.random.seed(17) random.seed(17) if optimization == 'ADAM': trainer = pc.AdamTrainer(model, lam=REGULARIZATION, alpha=LEARNING_RATE, beta_1=0.9, beta_2=0.999, eps=1e-8) elif optimization == 'MOMENTUM': trainer = pc.MomentumSGDTrainer(model) elif optimization == 'SGD': trainer = pc.SimpleSGDTrainer(model) elif optimization == 'ADAGRAD': trainer = pc.AdagradTrainer(model) elif optimization == 'ADADELTA': trainer = pc.AdadeltaTrainer(model) else: trainer = pc.SimpleSGDTrainer(model) train_sanity_set_size = 100 total_loss = 0 best_avg_dev_loss = 999 best_dev_accuracy = -1 best_train_accuracy = -1 best_dev_epoch = 0 best_train_epoch = 0 patience = 0 train_len = len(train_words) epochs_x = [] train_loss_y = [] dev_loss_y = [] train_accuracy_y = [] dev_accuracy_y = [] # progress bar init widgets = [progressbar.Bar('>'), ' ', progressbar.ETA()] train_progress_bar = progressbar.ProgressBar(widgets=widgets, maxval=epochs).start() avg_loss = -1 e = 0 print 'started aligning' train_word_pairs = zip(train_lemmas, train_words) dev_word_pairs = zip(dev_lemmas, dev_words) align_symbol = '~' # train_aligned_pairs = dumb_align(train_word_pairs, align_symbol) train_aligned_pairs = common.mcmc_align(train_word_pairs, align_symbol) dev_aligned_pairs = common.mcmc_align(dev_word_pairs, align_symbol) print 'finished aligning' for e in xrange(epochs): # randomize the training set indices = range(train_len) random.shuffle(indices) train_set = zip(train_lemmas, train_feat_dicts, train_words, train_aligned_pairs) train_set = [train_set[i] for i in indices] # compute loss for each example and update for i, example in enumerate(train_set): lemma, feats, word, alignment = example loss = compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types, alignment) loss_value = loss.value() total_loss += loss_value loss.backward() trainer.update() if i > 0: avg_loss = total_loss / float(i + e * train_len) else: avg_loss = total_loss if i % 100 == 0 and i > 0: print 'went through {} examples out of {}'.format(i, train_len) if EARLY_STOPPING: print 'starting epoch evaluation' # get train accuracy print 'train sanity prediction:' train_predictions = predict_sequences( model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, train_lemmas[:train_sanity_set_size], train_feat_dicts[:train_sanity_set_size], feat_index, feature_types) print 'train sanity evaluation:' train_accuracy = evaluate_model( train_predictions, train_lemmas[:train_sanity_set_size], train_feat_dicts[:train_sanity_set_size], train_words[:train_sanity_set_size], feature_types, True)[1] if train_accuracy > best_train_accuracy: best_train_accuracy = train_accuracy best_train_epoch = e dev_accuracy = 0 avg_dev_loss = 0 if len(dev_lemmas) > 0: print 'dev prediction:' # get dev accuracy dev_predictions = predict_sequences(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, dev_lemmas, dev_feat_dicts, feat_index, feature_types) print 'dev evaluation:' # get dev accuracy dev_accuracy = evaluate_model(dev_predictions, dev_lemmas, dev_feat_dicts, dev_words, feature_types, print_results=True)[1] if dev_accuracy >= best_dev_accuracy: best_dev_accuracy = dev_accuracy best_dev_epoch = e # save best model to disk task1_attention_implementation.save_pycnn_model( model, results_file_path) print 'saved new best model' patience = 0 else: patience += 1 # found "perfect" model if dev_accuracy == 1: train_progress_bar.finish() if plot: plt.cla() return model, e # get dev loss total_dev_loss = 0 for i in xrange(len(dev_lemmas)): total_dev_loss += compute_loss( model, encoder_frnn, encoder_rrnn, decoder_rnn, dev_lemmas[i], dev_feat_dicts[i], dev_words[i], alphabet_index, feat_index, feature_types, dev_aligned_pairs[i]).value() avg_dev_loss = total_dev_loss / float(len(dev_lemmas)) if avg_dev_loss < best_avg_dev_loss: best_avg_dev_loss = avg_dev_loss print 'epoch: {0} train loss: {1:.4f} dev loss: {2:.4f} dev accuracy: {3:.4f} train accuracy = {4:.4f} \ best dev accuracy {5:.4f} (epoch {8}) best train accuracy: {6:.4f} (epoch {9}) patience = {7}'.format( e, avg_loss, avg_dev_loss, dev_accuracy, train_accuracy, best_dev_accuracy, best_train_accuracy, patience, best_dev_epoch, best_train_epoch) if patience == MAX_PATIENCE: print 'out of patience after {0} epochs'.format(str(e)) # TODO: would like to return best model but pycnn has a bug with save and load. Maybe copy via code? # return best_model[0] train_progress_bar.finish() if plot: plt.cla() return model, e else: # if no dev set is present, optimize on train set print 'no dev set for early stopping, running all epochs until perfectly fitting or patience was \ reached on the train set' if train_accuracy > best_train_accuracy: best_train_accuracy = train_accuracy # save best model to disk task1_attention_implementation.save_pycnn_model( model, results_file_path) print 'saved new best model' patience = 0 else: patience += 1 print 'epoch: {0} train loss: {1:.4f} train accuracy = {2:.4f} best train accuracy: {3:.4f} \ patience = {4}'.format(e, avg_loss, train_accuracy, best_train_accuracy, patience) # found "perfect" model on train set or patience has reached if train_accuracy == 1 or patience == MAX_PATIENCE: train_progress_bar.finish() if plot: plt.cla() return model, e # update lists for plotting train_accuracy_y.append(train_accuracy) epochs_x.append(e) train_loss_y.append(avg_loss) dev_loss_y.append(avg_dev_loss) dev_accuracy_y.append(dev_accuracy) # finished epoch train_progress_bar.update(e) if plot: with plt.style.context('fivethirtyeight'): p1, = plt.plot(epochs_x, dev_loss_y, label='dev loss') p2, = plt.plot(epochs_x, train_loss_y, label='train loss') p3, = plt.plot(epochs_x, dev_accuracy_y, label='dev acc.') p4, = plt.plot(epochs_x, train_accuracy_y, label='train acc.') plt.legend(loc='upper left', handles=[p1, p2, p3, p4]) plt.savefig(results_file_path + 'plot.png') train_progress_bar.finish() if plot: plt.cla() print 'finished training. average loss: {} best epoch on dev: {} best epoch on train: {}'.format( str(avg_loss), best_dev_epoch, best_train_epoch) return model, e, best_train_epoch
def main(): # train_path = '../data/heb/hebrew-task1-train' # dev_path = '../data/heb/hebrew-task1-dev' # test_path = '../data/heb/hebrew-task1-test' # train_path = '/Users/roeeaharoni/GitHub/sigmorphon2016/data/german-task1-train' # dev_path = '/Users/roeeaharoni/GitHub/sigmorphon2016/data/german-task1-dev' # test_path = '../biu/gold/german-task1-test' train_path = '/Users/roeeaharoni/GitHub/sigmorphon2016/data/finnish-task1-train' dev_path = '/Users/roeeaharoni/GitHub/sigmorphon2016/data/finnish-task1-dev' test_path = '../biu/gold/finnish-task1-test' (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data(train_path) (dev_words, dev_lemmas, dev_feat_dicts) = prepare_sigmorphon_data.load_data(dev_path) (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data(test_path) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet( train_words, train_lemmas, train_feat_dicts) print 'started aligning' train_word_pairs = zip(train_lemmas, train_words) test_word_pairs = zip(test_lemmas, test_words) dev_word_pairs = zip(dev_lemmas, dev_words) align_symbol = '~' train_aligned_pairs = common.mcmc_align(train_word_pairs, align_symbol) index2template = {} for i, aligned_pair in enumerate(train_aligned_pairs): template = task1_single_ms2s.generate_template_from_alignment( aligned_pair) index2template[i] = template dev_handled = 0 print 'now trying all templates on dev' for pair in dev_word_pairs: lemma, inflection = pair for template in index2template.values(): prediction = task1_single_ms2s.instantiate_template( template, lemma) if prediction == inflection: dev_handled += 1 break print "train templates handled {} examples in dev out of {}, {}%".format( dev_handled, len(dev_lemmas), float(dev_handled) / len(dev_lemmas) * 100) test_handled = 0 print 'now trying all templates on test' for pair in test_word_pairs: lemma, inflection = pair for template in index2template.values(): prediction = task1_single_ms2s.instantiate_template( template, lemma) if prediction == inflection: test_handled += 1 break print "train templates handled {} examples in test out of {}, {}%".format( test_handled, len(test_lemmas), float(test_handled) / len(test_lemmas) * 100)
def train_model(model, encoder_frnn, encoder_rrnn, decoder_rnn, train_lemmas, train_feat_dicts, train_words, dev_lemmas, dev_feat_dicts, dev_words, alphabet_index, inverse_alphabet_index, epochs, optimization, results_file_path, feat_index, feature_types, plot): print 'training...' np.random.seed(17) random.seed(17) if optimization == 'ADAM': trainer = pc.AdamTrainer(model, lam=REGULARIZATION, alpha=LEARNING_RATE, beta_1=0.9, beta_2=0.999, eps=1e-8) elif optimization == 'MOMENTUM': trainer = pc.MomentumSGDTrainer(model) elif optimization == 'SGD': trainer = pc.SimpleSGDTrainer(model) elif optimization == 'ADAGRAD': trainer = pc.AdagradTrainer(model) elif optimization == 'ADADELTA': trainer = pc.AdadeltaTrainer(model) else: trainer = pc.SimpleSGDTrainer(model) train_sanity_set_size = 100 total_loss = 0 best_avg_dev_loss = 999 best_dev_accuracy = -1 best_train_accuracy = -1 best_dev_epoch = 0 best_train_epoch = 0 patience = 0 train_len = len(train_words) epochs_x = [] train_loss_y = [] dev_loss_y = [] train_accuracy_y = [] dev_accuracy_y = [] # progress bar init widgets = [progressbar.Bar('>'), ' ', progressbar.ETA()] train_progress_bar = progressbar.ProgressBar(widgets=widgets, maxval=epochs).start() avg_loss = -1 e = 0 print 'started aligning' train_word_pairs = zip(train_lemmas, train_words) dev_word_pairs = zip(dev_lemmas, dev_words) align_symbol = '~' # train_aligned_pairs = dumb_align(train_word_pairs, align_symbol) train_aligned_pairs = common.mcmc_align(train_word_pairs, align_symbol) dev_aligned_pairs = common.mcmc_align(dev_word_pairs, align_symbol) print 'finished aligning' for e in xrange(epochs): # randomize the training set indices = range(train_len) random.shuffle(indices) train_set = zip(train_lemmas, train_feat_dicts, train_words, train_aligned_pairs) train_set = [train_set[i] for i in indices] # compute loss for each example and update for i, example in enumerate(train_set): lemma, feats, word, alignment = example loss = compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types, alignment) loss_value = loss.value() total_loss += loss_value loss.backward() trainer.update() if i > 0: avg_loss = total_loss / float(i + e * train_len) else: avg_loss = total_loss if i % 100 == 0 and i > 0: print 'went through {} examples out of {}'.format(i, train_len) if EARLY_STOPPING: print 'starting epoch evaluation' # get train accuracy print 'train sanity prediction:' train_predictions = predict_sequences(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, train_lemmas[:train_sanity_set_size], train_feat_dicts[:train_sanity_set_size], feat_index, feature_types) print 'train sanity evaluation:' train_accuracy = evaluate_model(train_predictions, train_lemmas[:train_sanity_set_size], train_feat_dicts[:train_sanity_set_size], train_words[:train_sanity_set_size], feature_types, True)[1] if train_accuracy > best_train_accuracy: best_train_accuracy = train_accuracy best_train_epoch = e dev_accuracy = 0 avg_dev_loss = 0 if len(dev_lemmas) > 0: print 'dev prediction:' # get dev accuracy dev_predictions = predict_sequences(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, dev_lemmas, dev_feat_dicts, feat_index, feature_types) print 'dev evaluation:' # get dev accuracy dev_accuracy = evaluate_model(dev_predictions, dev_lemmas, dev_feat_dicts, dev_words, feature_types, print_results=True)[1] if dev_accuracy >= best_dev_accuracy: best_dev_accuracy = dev_accuracy best_dev_epoch = e # save best model to disk save_pycnn_model(model, results_file_path) print 'saved new best model' patience = 0 else: patience += 1 # found "perfect" model if dev_accuracy == 1: train_progress_bar.finish() if plot: plt.cla() return model, e # get dev loss total_dev_loss = 0 for i in xrange(len(dev_lemmas)): total_dev_loss += compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, dev_lemmas[i], dev_feat_dicts[i], dev_words[i], alphabet_index, feat_index, feature_types, dev_aligned_pairs[i]).value() avg_dev_loss = total_dev_loss / float(len(dev_lemmas)) if avg_dev_loss < best_avg_dev_loss: best_avg_dev_loss = avg_dev_loss print 'epoch: {0} train loss: {1:.4f} dev loss: {2:.4f} dev accuracy: {3:.4f} train accuracy = {4:.4f} \ best dev accuracy {5:.4f} (epoch {8}) best train accuracy: {6:.4f} (epoch {9}) patience = {7}'.format( e, avg_loss, avg_dev_loss, dev_accuracy, train_accuracy, best_dev_accuracy, best_train_accuracy, patience, best_dev_epoch, best_train_epoch) if patience == MAX_PATIENCE: print 'out of patience after {0} epochs'.format(str(e)) # TODO: would like to return best model but pycnn has a bug with save and load. Maybe copy via code? # return best_model[0] train_progress_bar.finish() if plot: plt.cla() return model, e else: # if no dev set is present, optimize on train set print 'no dev set for early stopping, running all epochs until perfectly fitting or patience was \ reached on the train set' if train_accuracy > best_train_accuracy: best_train_accuracy = train_accuracy # save best model to disk save_pycnn_model(model, results_file_path) print 'saved new best model' patience = 0 else: patience += 1 print 'epoch: {0} train loss: {1:.4f} train accuracy = {2:.4f} best train accuracy: {3:.4f} \ patience = {4}'.format(e, avg_loss, train_accuracy, best_train_accuracy, patience) # found "perfect" model on train set or patience has reached if train_accuracy == 1 or patience == MAX_PATIENCE: train_progress_bar.finish() if plot: plt.cla() return model, e # update lists for plotting train_accuracy_y.append(train_accuracy) epochs_x.append(e) train_loss_y.append(avg_loss) dev_loss_y.append(avg_dev_loss) dev_accuracy_y.append(dev_accuracy) # finished epoch train_progress_bar.update(e) if plot: with plt.style.context('fivethirtyeight'): p1, = plt.plot(epochs_x, dev_loss_y, label='dev loss') p2, = plt.plot(epochs_x, train_loss_y, label='train loss') p3, = plt.plot(epochs_x, dev_accuracy_y, label='dev acc.') p4, = plt.plot(epochs_x, train_accuracy_y, label='train acc.') plt.legend(loc='upper left', handles=[p1, p2, p3, p4]) plt.savefig(results_file_path + 'plot.png') train_progress_bar.finish() if plot: plt.cla() print 'finished training. average loss: {} best epoch on dev: {} best epoch on train: {}'.format(str(avg_loss), best_dev_epoch, best_train_epoch) return model, e, best_train_epoch
def main(train_path, dev_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, feat_input_dim, epochs, layers, optimization, regularization, learning_rate, plot, eval_only, ensemble): hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'FEAT_INPUT_DIM': feat_input_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'PATIENCE': MAX_PATIENCE, 'REGULARIZATION': regularization, 'LEARNING_RATE': learning_rate} print 'train path = ' + str(train_path) print 'dev path =' + str(dev_path) print 'test path =' + str(test_path) for param in hyper_params: print param + '=' + str(hyper_params[param]) # load train and test data (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data(train_path) (dev_words, dev_lemmas, dev_feat_dicts) = prepare_sigmorphon_data.load_data(dev_path) (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data(test_path) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(train_words, train_lemmas, train_feat_dicts) # used for character dropout alphabet.append(NULL) alphabet.append(UNK) # used during decoding alphabet.append(EPSILON) alphabet.append(BEGIN_WORD) alphabet.append(END_WORD) # add indices to alphabet - used to indicate when copying from lemma to word for marker in [str(i) for i in xrange(3 * MAX_PREDICTION_LEN)]: alphabet.append(marker) # indicates the FST to step forward in the input alphabet.append(STEP) # char 2 int alphabet_index = dict(zip(alphabet, range(0, len(alphabet)))) inverse_alphabet_index = {index: char for char, index in alphabet_index.items()} # feat 2 int feature_alphabet = common.get_feature_alphabet(train_feat_dicts) feature_alphabet.append(UNK_FEAT) feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet)))) # align the words to the inflections, the alignment will later be used by the model print 'started aligning' train_word_pairs = zip(train_lemmas, train_words) dev_word_pairs = zip(dev_lemmas, dev_words) # train_aligned_pairs = dumb_align(train_word_pairs, ALIGN_SYMBOL) train_aligned_pairs = common.mcmc_align(train_word_pairs, ALIGN_SYMBOL) # TODO: align together? dev_aligned_pairs = common.mcmc_align(dev_word_pairs, ALIGN_SYMBOL) # random.shuffle(train_aligned_pairs) # for p in train_aligned_pairs[:100]: # generate_template(p) print 'finished aligning' if not eval_only: last_epochs = [] trained_model, last_epoch = train_model_wrapper(input_dim, hidden_dim, layers, train_lemmas, train_feat_dicts, train_words, dev_lemmas, dev_feat_dicts, dev_words, alphabet, alphabet_index, inverse_alphabet_index, epochs, optimization, results_file_path, train_aligned_pairs, dev_aligned_pairs, feat_index, feature_types, feat_input_dim, feature_alphabet, plot) # print when did each model stop print 'stopped on epoch {}'.format(last_epoch) with open(results_file_path + '.epochs', 'w') as f: f.writelines(last_epochs) print 'finished training all models' else: print 'skipped training by request. evaluating best models:' # eval on dev print '=========DEV EVALUATION:=========' evaluate_ndst(alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet, feature_types, hidden_dim, hyper_params, input_dim, inverse_alphabet_index, layers, results_file_path, sigmorphon_root_dir, dev_feat_dicts, dev_lemmas, dev_path, dev_words, train_path) # eval on test print '=========TEST EVALUATION:=========' evaluate_ndst(alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet, feature_types, hidden_dim, hyper_params, input_dim, inverse_alphabet_index, layers, results_file_path, sigmorphon_root_dir, test_feat_dicts, test_lemmas, test_path, test_words, train_path) return