def search_hparams(): verbose=False ngrams = [{'ngram':x} for x in range(1,8,1)] smooth = [{'est':'mle'}, {'est':'laplace'}, {'est':'good-turing'}] smooth.extend([{'est':'add-delta', 'delta':x/10} for x in range(1,10,1)]) with open('hmm_res.csv', 'w') as f: for config in itertools.product(ngrams, smooth): config = {**config[0], **config[1]} if 'conf_mat' in locals(): del conf_mat for iters in range(7): hmm = HMM(config) if 'conf_mat' in locals(): conf_mat += hmm.prep_data().shuffle(None).split(0.1).train().eval() else: conf_mat = hmm.prep_data().shuffle(None).split(0.1).train().eval() res_str = '{};'.format(config) print("Configuration = {}: ".format(config)) precision, recall = metrics.MicroAvg(conf_mat) f1 = metrics.Fscore(precision, recall, 1) res_str += '{};'.format(f1) print('MicroAvg:',precision,recall,f1) precision, recall = metrics.MacroAvg(conf_mat) f1 = metrics.Fscore(recall, precision, 1) res_str += '{};'.format(f1) print('MacroAvg:', precision, recall, f1) acc = metrics.AvgAcc(conf_mat) res_str += '{};'.format(acc) print('AvgAcc:', acc) f.write(res_str+'\n') conf_mat = metrics.NormalizeConfusion(conf_mat) if verbose: print('ConfMat:\n', np.array_str(conf_mat, max_line_width=300, precision=4)) print('----------------------------------------------')
def search_hparams(): verbose = False with open('crf_word_res.csv','w') as f: for num_ftrs in range(len(CRF.WORD_FTRS)): num_ftrs += 1 for ftrs in combinations(CRF.WORD_FTRS, num_ftrs): config = {'ftrs':ftrs} if 'conf_mat' in locals(): del conf_mat for i in range(7): crf = CRF(config) if 'conf_mat' in locals(): conf_mat += crf.prep_data().shuffle(None).split(0.1).train().eval() else: conf_mat = crf.prep_data().shuffle(None).split(0.1).train().eval() res_str = '{};'.format(config) print("Configuration = {}: ".format(config)) precision, recall = metrics.MicroAvg(conf_mat) f1 = metrics.Fscore(precision, recall, 1) res_str += '{};'.format(f1) print('MicroAvg:',precision,recall,f1) precision, recall = metrics.MacroAvg(conf_mat) f1 = metrics.Fscore(recall, precision, 1) res_str += '{};'.format(f1) print('MacroAvg:', precision, recall, f1) acc = metrics.AvgAcc(conf_mat) res_str += '{};'.format(acc) print('AvgAcc:',acc) f.write(res_str+'\n') conf_mat = metrics.NormalizeConfusion(conf_mat) if verbose: print('ConfMat:\n', np.array_str(conf_mat, max_line_width=300, precision=4)) print('----------------------------------------------')
def PrintConfMat(conf_mat): precision, recall = metrics.MicroAvg(conf_mat) f1 = metrics.Fscore(precision, recall, 1) print('MicroAvg:') print(' Precision = {}\n Recall = {}\n F1 = {}'.format( precision, recall, f1)) precision, recall = metrics.MacroAvg(conf_mat) f1 = metrics.Fscore(recall, precision, 1) print('MacroAvg:') print(' Precision = {}\n Recall = {}\n F1 = {}'.format( precision, recall, f1)) print('Avg Accuracy:', metrics.AvgAcc(conf_mat))
def check_seeds(): config = {'ngram': 3, 'est': 'add-delta', 'delta': 0.3} print("seed, accuracy") for seed in range(11): if 'conf_mat' in locals(): del conf_mat for iters in range(7): hmm = HMM(config) if 'conf_mat' in locals(): conf_mat += hmm.prep_data().shuffle(seed).split(0.1).train().eval() else: conf_mat = hmm.prep_data().shuffle(seed).split(0.1).train().eval() acc = metrics.AvgAcc(conf_mat) print(seed, acc)
def check_seeds(): config = {'ftrs': ('IS_FIRST', 'IS_LAST', 'IDX', 'VAL', 'PRV_VAL', 'NXT_VAL', 'FRST_VAL', 'LST_VAL', 'SCND_VAL', 'SCND_LST_VAL')} print("seed, accuracy") for seed in range(11): if 'conf_mat' in locals(): del conf_mat for iters in range(7): crf = CRF(config) if 'conf_mat' in locals(): conf_mat += crf.prep_data().shuffle(seed).split(0.1).train().eval() else: conf_mat = crf.prep_data().shuffle(seed).split(0.1).train().eval() acc = metrics.AvgAcc(conf_mat) print(seed, acc)
def search_hparams(): verbose = False with open('crf_sentence_res.csv', 'w') as f: poss_words_ids = [[0], [-1, 0, 1], [-2, 0, 2], [-2, -1, 0, 1, 2]] for poss_words_id in poss_words_ids: config = {'extr_word_idx': poss_words_id} for num_word_ftrs in range(6, len(CRF.WORD_FTRS)): num_word_ftrs += 1 for word_ftrs in combinations(CRF.WORD_FTRS, num_word_ftrs): config['word_ftrs'] = word_ftrs for num_stc_ftrs in range(len(CRF.STC_FTRS) + 1): for stc_ftrs in combinations(CRF.STC_FTRS, num_stc_ftrs): config['stc_ftrs'] = stc_ftrs if 'conf_mat' in locals(): del conf_mat for i in range(7): #print(config) crf = CRF(config) if 'conf_mat' in locals(): conf_mat += crf.prep_data().shuffle( 0).split(0.1).train().eval() else: conf_mat = crf.prep_data().shuffle( 0).split(0.1).train().eval() res_str = '{};'.format(config) print("Configuration = {}: ".format(config)) precision, recall = metrics.MicroAvg(conf_mat) f1 = metrics.Fscore(precision, recall, 1) res_str += '{};'.format(f1) print('MicroAvg:', precision, recall, f1) precision, recall = metrics.MacroAvg(conf_mat) f1 = metrics.Fscore(recall, precision, 1) res_str += '{};'.format(f1) print('MacroAvg:', precision, recall, f1) acc = metrics.AvgAcc(conf_mat) res_str += '{};'.format(acc) print('AvgAcc:', acc) f.write(res_str + '\n') conf_mat = metrics.NormalizeConfusion(conf_mat) if verbose: print( 'ConfMat:\n', np.array_str(conf_mat, max_line_width=300, precision=4)) print( '----------------------------------------------' )
def search_hparams(): verbose = False with open('rnn_res.csv', 'a') as f: num_layers = [1, 2, 3, 4, 5, 6] n_hidden = [32, 64, 128] emb = ['one-hot', 'mds', 'nn'] win_len = [4] for config_tuple in product(num_layers, n_hidden, emb, win_len): config = { 'n_layers': config_tuple[0], 'hidden_dim': config_tuple[1], 'embedding': config_tuple[2], 'win_len': config_tuple[3] } if 'conf_mat' in locals(): del conf_mat for i in range(5): rnn = Encoder(config) if 'conf_mat' in locals(): conf_mat += rnn.prep_model().shuffle(None).split( 0.05).train(epochs=10000, lr=1e-2, alg='adamw').eval() else: conf_mat = rnn.prep_model().shuffle(None).split( 0.05).train(epochs=10000, lr=1e-2, alg='adamw').eval() res_str = '{};'.format(config) print("Configuration = {}: ".format(config)) precision, recall = metrics.MicroAvg(conf_mat) f1 = metrics.Fscore(precision, recall, 1) res_str += '{};'.format(f1) print('MicroAvg:', precision, recall, f1) precision, recall = metrics.MacroAvg(conf_mat) f1 = metrics.Fscore(recall, precision, 1) res_str += '{};'.format(f1) print('MacroAvg:', precision, recall, f1) acc = metrics.AvgAcc(conf_mat) res_str += '{};'.format(acc) print('AvgAcc:', acc) f.write(res_str + '\n') conf_mat = metrics.NormalizeConfusion(conf_mat) if verbose: print('ConfMat:\n', np.array_str(conf_mat, max_line_width=300, precision=4)) print('----------------------------------------------') f.flush()
def check_seeds(): config = { 'n_layers': 3, 'hidden_dim': 32, 'embedding': 'mds', 'win_len': 4 } print("seed, accuracy") for seed in range(11): if 'conf_mat' in locals(): del conf_mat for i in range(5): rnn = Encoder(config) if 'conf_mat' in locals(): conf_mat += rnn.prep_model().shuffle(seed).split(0.05).train( epochs=10000, lr=1e-2, alg='adamw').eval() else: conf_mat = rnn.prep_model().shuffle(seed).split(0.05).train( epochs=10000, lr=1e-2, alg='adamw').eval() acc = metrics.AvgAcc(conf_mat) print(seed, acc)