def supernatural_rs():
    file_config = FilesConfig(
        vocab_file='twitter_hashtag/twitterhashtags.vocab',
        dataset_file='DataSetsEraldo/dataSetSupernatural.txt',
        task='supernatural')
    c = CorpusTE(train_file='DataSetsEraldo/dataSetSupernatural.txt',
                 vocab_file='twitter_hashtag/twitterhashtags.vocab')
    x, y = c.prepare()
    ndy = np.array(y)
    print('exemplos positivos, todo dataset')
    print(ndy.sum())
    print(c.size)
    f = KFold(c, 3, rand=1)
    f.prepare_fold(x, y)

    myTuner = Tuner(c, file_config)
    epochs = (100, 0)
    lrs = (1e-5, 1e-1)
    myTuner.random_search_cv(execs=6,
                             epoch_limits=epochs,
                             lr_limits=lrs,
                             cv=10,
                             folds=f,
                             freeze_epochs=True,
                             freeze_lr=False)
    print("RS finished!\n")
Beispiel #2
0
def test_kfold():
    c = CorpusTE(train_file='DataSetsEraldo/dataSetSupernatural.txt',
                 vocab_file='twitter_hashtag/twitterhashtags.vocab')
    x, y = c.prepare()
    print(c.size)
    f = KFold(c, 3, rand=1)
    f.prepare_fold(x, y)

    cnn_config = TCNNConfig()
    cnn_config.num_epochs = 10

    file_config = FilesConfig(vocab_file='twitterhashtags.vocab',
                              dataset_file='DataSetsEraldo/dataSetBahia.txt')
    for i in range(5):
        for cf in f:
            model0 = TextCNN(cnn_config)
            print(c.train_distribution())
            c.prepare_sample(x, y, size=300)
            c.sub_sampling(size=300)
            print(c.x_train.shape)
            t = Trainer(corpus=cf,
                        model=model0,
                        config=cnn_config,
                        file_config=file_config,
                        verbose=True)
            train_acc, train_loss, val_acc, val_loss, best_epoch = t.train()
Beispiel #3
0
def train_cv():
    ultimos_r = []
    dt = []
    file_config = FilesConfig(
        vocab_file='twitter_hashtag/twitterhashtags.vocab',
        dataset_file='DataSetsEraldo/dataSetSupernatural.txt')
    c = CorpusTE(train_file='DataSetsEraldo/dataSetSupernatural.txt',
                 vocab_file='twitter_hashtag/twitterhashtags.vocab')
    x, y = c.prepare()
    print(c.size)
    f = KFold(c, 3, rand=1)
    f.prepare_fold(x, y)

    for cv in f:
        t = Trainer(corpus=cv, file_config=file_config, verbose=True)
        ultimos_r.append(t.train(dt))

    print(ultimos_r)
    print(':)')
def pre_rs_supernatural():
    file_config = FilesConfig(
        vocab_file='twitter_hashtag/twitterhashtags.vocab',
        dataset_file='twitter_hashtag/out.txt',
        task='supernatural')
    c = CorpusTE(train_file='DataSetsEraldo/dataSetSupernatural.txt',
                 vocab_file='twitter_hashtag/twitterhashtags.vocab')
    x, y = c.prepare()
    print(c.size)
    f = KFold(c, 3, rand=1)
    f.prepare_fold(x, y)

    myTuner = Tuner(c, file_config)
    epochs = (100, 6)
    lrs = (1e-5, 1e-2)
    myTuner.random_search_cv(execs=1,
                             epoch_limits=epochs,
                             lr_limits=lrs,
                             cv=1,
                             folds=f,
                             freeze_lr=True,
                             freeze_epochs=True)
    print("PRS finished!\n")
def supernatural_lltrain():
    file_config = FilesConfig(
        vocab_file='twitter_hashtag/twitterhashtags.vocab',
        dataset_file='twitter_hashtag/out.txt',
        task='10llsupernatural')
    c = CorpusTE(train_file='DataSetsEraldo/dataSetSupernatural.txt',
                 vocab_file='twitter_hashtag/twitterhashtags.vocab')
    x, y = c.prepare()
    print(c.size)
    f = KFold(c, 3, rand=1)
    f.prepare_fold(x, y)

    cnn_config_s = TCNNConfig()
    cnn_config_s.num_classes = 2

    args = [
        cnn_config_s,
        '../experiments/1kthashtag.2019-10-21/checkpoints/model21102019-211333epc200lr0.0001.emb',
        '../experiments/1kthashtag.2019-10-21/checkpoints/model21102019-211333epc200lr0.0001.convs'
    ]

    f = RandomSplit(corpus=c, n=10, sub=350)
    f.x = x
    f.y = y

    t = Tuner(c, file_config, callback=model_load, args=args, rand=False)
    epochs = (5, 6)
    lrs = (1e-5, 1e-2)
    t.random_search_rsplit(execs=4,
                           rsplits=f,
                           epoch_limits=epochs,
                           lr_limits=lrs,
                           freeze_epochs=True,
                           freeze_lr=False,
                           r=10)

    print("RS finished!\n")
Beispiel #6
0
def test_kfold_rs():

    cnn_config = TCNNConfig()
    cnn_config.num_epochs = 4
    file_config = FilesConfig(
        vocab_file='twitter_hashtag/twitterhashtags.vocab',
        dataset_file='twitter_hashtag/out.txt')
    c = CorpusTE(train_file='DataSetsEraldo/dataSetSupernatural.txt',
                 vocab_file='twitter_hashtag/twitterhashtags.vocab')
    x, y = c.prepare()
    print(c.size)
    f = KFold(c, 3, rand=1)
    f.prepare_fold(x, y)

    myTuner = Tuner(c, file_config)
    epochs = (10, 30)
    lrs = (0.0001, 0.01)
    myTuner.random_search_cv(execs=5,
                             epoch_limits=epochs,
                             lr_limits=lrs,
                             cv=4,
                             folds=f,
                             freeze_lr=True)
    print("RS finished!\n")