def supernatural_rs(): file_config = FilesConfig( vocab_file='twitter_hashtag/twitterhashtags.vocab', dataset_file='DataSetsEraldo/dataSetSupernatural.txt', task='supernatural') c = CorpusTE(train_file='DataSetsEraldo/dataSetSupernatural.txt', vocab_file='twitter_hashtag/twitterhashtags.vocab') x, y = c.prepare() ndy = np.array(y) print('exemplos positivos, todo dataset') print(ndy.sum()) print(c.size) f = KFold(c, 3, rand=1) f.prepare_fold(x, y) myTuner = Tuner(c, file_config) epochs = (100, 0) lrs = (1e-5, 1e-1) myTuner.random_search_cv(execs=6, epoch_limits=epochs, lr_limits=lrs, cv=10, folds=f, freeze_epochs=True, freeze_lr=False) print("RS finished!\n")
def test_kfold(): c = CorpusTE(train_file='DataSetsEraldo/dataSetSupernatural.txt', vocab_file='twitter_hashtag/twitterhashtags.vocab') x, y = c.prepare() print(c.size) f = KFold(c, 3, rand=1) f.prepare_fold(x, y) cnn_config = TCNNConfig() cnn_config.num_epochs = 10 file_config = FilesConfig(vocab_file='twitterhashtags.vocab', dataset_file='DataSetsEraldo/dataSetBahia.txt') for i in range(5): for cf in f: model0 = TextCNN(cnn_config) print(c.train_distribution()) c.prepare_sample(x, y, size=300) c.sub_sampling(size=300) print(c.x_train.shape) t = Trainer(corpus=cf, model=model0, config=cnn_config, file_config=file_config, verbose=True) train_acc, train_loss, val_acc, val_loss, best_epoch = t.train()
def train_cv(): ultimos_r = [] dt = [] file_config = FilesConfig( vocab_file='twitter_hashtag/twitterhashtags.vocab', dataset_file='DataSetsEraldo/dataSetSupernatural.txt') c = CorpusTE(train_file='DataSetsEraldo/dataSetSupernatural.txt', vocab_file='twitter_hashtag/twitterhashtags.vocab') x, y = c.prepare() print(c.size) f = KFold(c, 3, rand=1) f.prepare_fold(x, y) for cv in f: t = Trainer(corpus=cv, file_config=file_config, verbose=True) ultimos_r.append(t.train(dt)) print(ultimos_r) print(':)')
def pre_rs_supernatural(): file_config = FilesConfig( vocab_file='twitter_hashtag/twitterhashtags.vocab', dataset_file='twitter_hashtag/out.txt', task='supernatural') c = CorpusTE(train_file='DataSetsEraldo/dataSetSupernatural.txt', vocab_file='twitter_hashtag/twitterhashtags.vocab') x, y = c.prepare() print(c.size) f = KFold(c, 3, rand=1) f.prepare_fold(x, y) myTuner = Tuner(c, file_config) epochs = (100, 6) lrs = (1e-5, 1e-2) myTuner.random_search_cv(execs=1, epoch_limits=epochs, lr_limits=lrs, cv=1, folds=f, freeze_lr=True, freeze_epochs=True) print("PRS finished!\n")
def supernatural_lltrain(): file_config = FilesConfig( vocab_file='twitter_hashtag/twitterhashtags.vocab', dataset_file='twitter_hashtag/out.txt', task='10llsupernatural') c = CorpusTE(train_file='DataSetsEraldo/dataSetSupernatural.txt', vocab_file='twitter_hashtag/twitterhashtags.vocab') x, y = c.prepare() print(c.size) f = KFold(c, 3, rand=1) f.prepare_fold(x, y) cnn_config_s = TCNNConfig() cnn_config_s.num_classes = 2 args = [ cnn_config_s, '../experiments/1kthashtag.2019-10-21/checkpoints/model21102019-211333epc200lr0.0001.emb', '../experiments/1kthashtag.2019-10-21/checkpoints/model21102019-211333epc200lr0.0001.convs' ] f = RandomSplit(corpus=c, n=10, sub=350) f.x = x f.y = y t = Tuner(c, file_config, callback=model_load, args=args, rand=False) epochs = (5, 6) lrs = (1e-5, 1e-2) t.random_search_rsplit(execs=4, rsplits=f, epoch_limits=epochs, lr_limits=lrs, freeze_epochs=True, freeze_lr=False, r=10) print("RS finished!\n")
def test_kfold_rs(): cnn_config = TCNNConfig() cnn_config.num_epochs = 4 file_config = FilesConfig( vocab_file='twitter_hashtag/twitterhashtags.vocab', dataset_file='twitter_hashtag/out.txt') c = CorpusTE(train_file='DataSetsEraldo/dataSetSupernatural.txt', vocab_file='twitter_hashtag/twitterhashtags.vocab') x, y = c.prepare() print(c.size) f = KFold(c, 3, rand=1) f.prepare_fold(x, y) myTuner = Tuner(c, file_config) epochs = (10, 30) lrs = (0.0001, 0.01) myTuner.random_search_cv(execs=5, epoch_limits=epochs, lr_limits=lrs, cv=4, folds=f, freeze_lr=True) print("RS finished!\n")