Exemple #1
0
def search_hparams():
    verbose=False
    ngrams = [{'ngram':x} for x in range(1,8,1)]
    smooth = [{'est':'mle'}, {'est':'laplace'}, {'est':'good-turing'}]
    smooth.extend([{'est':'add-delta', 'delta':x/10} for x in range(1,10,1)])

    with open('hmm_res.csv', 'w') as f:
        for config in itertools.product(ngrams, smooth):
            config = {**config[0], **config[1]}
            if 'conf_mat' in locals():
                del conf_mat
            for iters in range(7):
                hmm = HMM(config)
                if 'conf_mat' in locals():
                    conf_mat += hmm.prep_data().shuffle(None).split(0.1).train().eval()
                else:
                    conf_mat  = hmm.prep_data().shuffle(None).split(0.1).train().eval()
            res_str = '{};'.format(config)
            print("Configuration = {}: ".format(config))
            precision, recall = metrics.MicroAvg(conf_mat)
            f1 = metrics.Fscore(precision, recall, 1)
            res_str += '{};'.format(f1)
            print('MicroAvg:',precision,recall,f1)
            precision, recall = metrics.MacroAvg(conf_mat)
            f1 = metrics.Fscore(recall, precision, 1)
            res_str += '{};'.format(f1)
            print('MacroAvg:', precision, recall, f1)
            acc = metrics.AvgAcc(conf_mat)
            res_str += '{};'.format(acc)
            print('AvgAcc:', acc)
            f.write(res_str+'\n')
            conf_mat = metrics.NormalizeConfusion(conf_mat)
            if verbose:
                print('ConfMat:\n', np.array_str(conf_mat, max_line_width=300, precision=4))
                print('----------------------------------------------')
Exemple #2
0
def search_hparams():
    verbose = False
    with open('crf_word_res.csv','w') as f:
        for num_ftrs in range(len(CRF.WORD_FTRS)):
            num_ftrs += 1
            for ftrs in combinations(CRF.WORD_FTRS, num_ftrs):
                config = {'ftrs':ftrs}
                if 'conf_mat' in locals():
                    del conf_mat
                for i in range(7):
                    crf = CRF(config)
                    if 'conf_mat' in locals():
                        conf_mat += crf.prep_data().shuffle(None).split(0.1).train().eval()
                    else:
                        conf_mat = crf.prep_data().shuffle(None).split(0.1).train().eval()
                res_str = '{};'.format(config)
                print("Configuration = {}: ".format(config))
                precision, recall = metrics.MicroAvg(conf_mat)
                f1 = metrics.Fscore(precision, recall, 1)
                res_str += '{};'.format(f1)
                print('MicroAvg:',precision,recall,f1)
                precision, recall = metrics.MacroAvg(conf_mat)
                f1 = metrics.Fscore(recall, precision, 1)
                res_str += '{};'.format(f1)
                print('MacroAvg:', precision, recall, f1)
                acc = metrics.AvgAcc(conf_mat)
                res_str += '{};'.format(acc)
                print('AvgAcc:',acc)
                f.write(res_str+'\n')
                conf_mat = metrics.NormalizeConfusion(conf_mat)
                if verbose:
                    print('ConfMat:\n', np.array_str(conf_mat, max_line_width=300, precision=4))
                    print('----------------------------------------------')
Exemple #3
0
def PrintConfMat(conf_mat):
    precision, recall = metrics.MicroAvg(conf_mat)
    f1 = metrics.Fscore(precision, recall, 1)
    print('MicroAvg:')
    print('   Precision = {}\n   Recall = {}\n   F1 = {}'.format(
        precision, recall, f1))

    precision, recall = metrics.MacroAvg(conf_mat)
    f1 = metrics.Fscore(recall, precision, 1)
    print('MacroAvg:')
    print('   Precision = {}\n   Recall = {}\n   F1 = {}'.format(
        precision, recall, f1))

    print('Avg Accuracy:', metrics.AvgAcc(conf_mat))
Exemple #4
0
def check_seeds():
    config = {'ngram': 3, 'est': 'add-delta', 'delta': 0.3}
    print("seed, accuracy")
    for seed in range(11):
        if 'conf_mat' in locals():
            del conf_mat
        for iters in range(7):
            hmm = HMM(config)
            if 'conf_mat' in locals():
                conf_mat += hmm.prep_data().shuffle(seed).split(0.1).train().eval()
            else:
                conf_mat = hmm.prep_data().shuffle(seed).split(0.1).train().eval()
        acc = metrics.AvgAcc(conf_mat)
        print(seed, acc)
Exemple #5
0
def check_seeds():
    config = {'ftrs': ('IS_FIRST', 'IS_LAST', 'IDX', 'VAL', 'PRV_VAL', 'NXT_VAL', 'FRST_VAL', 'LST_VAL', 'SCND_VAL', 'SCND_LST_VAL')}
    print("seed, accuracy")
    for seed in range(11):
        if 'conf_mat' in locals():
            del conf_mat
        for iters in range(7):
            crf = CRF(config)
            if 'conf_mat' in locals():
                conf_mat += crf.prep_data().shuffle(seed).split(0.1).train().eval()
            else:
                conf_mat = crf.prep_data().shuffle(seed).split(0.1).train().eval()
        acc = metrics.AvgAcc(conf_mat)
        print(seed, acc)
def search_hparams():
    verbose = False
    with open('crf_sentence_res.csv', 'w') as f:
        poss_words_ids = [[0], [-1, 0, 1], [-2, 0, 2], [-2, -1, 0, 1, 2]]
        for poss_words_id in poss_words_ids:
            config = {'extr_word_idx': poss_words_id}
            for num_word_ftrs in range(6, len(CRF.WORD_FTRS)):
                num_word_ftrs += 1
                for word_ftrs in combinations(CRF.WORD_FTRS, num_word_ftrs):
                    config['word_ftrs'] = word_ftrs
                    for num_stc_ftrs in range(len(CRF.STC_FTRS) + 1):
                        for stc_ftrs in combinations(CRF.STC_FTRS,
                                                     num_stc_ftrs):
                            config['stc_ftrs'] = stc_ftrs
                            if 'conf_mat' in locals():
                                del conf_mat
                            for i in range(7):
                                #print(config)
                                crf = CRF(config)
                                if 'conf_mat' in locals():
                                    conf_mat += crf.prep_data().shuffle(
                                        0).split(0.1).train().eval()
                                else:
                                    conf_mat = crf.prep_data().shuffle(
                                        0).split(0.1).train().eval()
                            res_str = '{};'.format(config)
                            print("Configuration = {}: ".format(config))
                            precision, recall = metrics.MicroAvg(conf_mat)
                            f1 = metrics.Fscore(precision, recall, 1)
                            res_str += '{};'.format(f1)
                            print('MicroAvg:', precision, recall, f1)
                            precision, recall = metrics.MacroAvg(conf_mat)
                            f1 = metrics.Fscore(recall, precision, 1)
                            res_str += '{};'.format(f1)
                            print('MacroAvg:', precision, recall, f1)
                            acc = metrics.AvgAcc(conf_mat)
                            res_str += '{};'.format(acc)
                            print('AvgAcc:', acc)
                            f.write(res_str + '\n')
                            conf_mat = metrics.NormalizeConfusion(conf_mat)
                            if verbose:
                                print(
                                    'ConfMat:\n',
                                    np.array_str(conf_mat,
                                                 max_line_width=300,
                                                 precision=4))
                                print(
                                    '----------------------------------------------'
                                )
Exemple #7
0
def search_hparams():
    verbose = False
    with open('rnn_res.csv', 'a') as f:
        num_layers = [1, 2, 3, 4, 5, 6]
        n_hidden = [32, 64, 128]
        emb = ['one-hot', 'mds', 'nn']
        win_len = [4]
        for config_tuple in product(num_layers, n_hidden, emb, win_len):
            config = {
                'n_layers': config_tuple[0],
                'hidden_dim': config_tuple[1],
                'embedding': config_tuple[2],
                'win_len': config_tuple[3]
            }
            if 'conf_mat' in locals():
                del conf_mat
            for i in range(5):
                rnn = Encoder(config)
                if 'conf_mat' in locals():
                    conf_mat += rnn.prep_model().shuffle(None).split(
                        0.05).train(epochs=10000, lr=1e-2, alg='adamw').eval()
                else:
                    conf_mat = rnn.prep_model().shuffle(None).split(
                        0.05).train(epochs=10000, lr=1e-2, alg='adamw').eval()
            res_str = '{};'.format(config)
            print("Configuration = {}: ".format(config))
            precision, recall = metrics.MicroAvg(conf_mat)
            f1 = metrics.Fscore(precision, recall, 1)
            res_str += '{};'.format(f1)
            print('MicroAvg:', precision, recall, f1)
            precision, recall = metrics.MacroAvg(conf_mat)
            f1 = metrics.Fscore(recall, precision, 1)
            res_str += '{};'.format(f1)
            print('MacroAvg:', precision, recall, f1)
            acc = metrics.AvgAcc(conf_mat)
            res_str += '{};'.format(acc)
            print('AvgAcc:', acc)
            f.write(res_str + '\n')
            conf_mat = metrics.NormalizeConfusion(conf_mat)
            if verbose:
                print('ConfMat:\n',
                      np.array_str(conf_mat, max_line_width=300, precision=4))
                print('----------------------------------------------')
            f.flush()
Exemple #8
0
def check_seeds():
    config = {
        'n_layers': 3,
        'hidden_dim': 32,
        'embedding': 'mds',
        'win_len': 4
    }
    print("seed, accuracy")
    for seed in range(11):
        if 'conf_mat' in locals():
            del conf_mat
        for i in range(5):
            rnn = Encoder(config)
            if 'conf_mat' in locals():
                conf_mat += rnn.prep_model().shuffle(seed).split(0.05).train(
                    epochs=10000, lr=1e-2, alg='adamw').eval()
            else:
                conf_mat = rnn.prep_model().shuffle(seed).split(0.05).train(
                    epochs=10000, lr=1e-2, alg='adamw').eval()
        acc = metrics.AvgAcc(conf_mat)
        print(seed, acc)