import torch from torch import nn from nlplay.data.cache import WordVectorsManager, WV, DS, DSManager from nlplay.models.pytorch.classifiers.qrnn import QRNN from nlplay.models.pytorch.pretrained import get_pretrained_vecs from nlplay.features.text_cleaner import * from nlplay.models.pytorch.trainer import PytorchModelTrainer from nlplay.models.pytorch.dataset import DSGenerator from nlplay.utils import utils logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG, datefmt="%Y-%m-%d %H:%M:%S") # Input data files ds = DSManager(DS.IMDB.value) train_csv, test_csv, val_csv = ds.get_partition_paths() lm = WordVectorsManager(WV.GLOVE_EN6B_100.value) pretrained_vec = lm.get_wv_path() # Model Parameters num_epochs = 10 batch_size = 64 ngram_range = (1, 1) max_features = 20000 max_seq = 80 embedding_size = 100 dropout = 0.3 lr = 0.001 num_workers = 1
import logging import torch from torch import nn from nlplay.data.cache import DSManager, DS from nlplay.features.text_cleaner import base_cleaner from nlplay.models.pytorch.classifiers.linear import SMLinearModel from nlplay.models.pytorch.dataset import CSRDatasetGenerator from nlplay.models.pytorch.trainer import PytorchModelTrainer logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG, datefmt="%Y-%m-%d %H:%M:%S") # Input data files ds = DSManager(DS.IMDB.value) train_csv, test_csv, val_csv = ds.get_partition_paths() # Model Parameters batch_size = 512 learning_rate = 0.0075 weight_decay = 0.000005 ngram_range = (1, 2) min_df = 5 max_df = 0.87 max_features = 50000 sublinear_tf = True stop_words = None num_epochs = 8 num_workers = 1 # Data preparation ds = CSRDatasetGenerator() train_ds, val_ds = ds.from_csv(train_file=train_csv, val_file=test_csv, ngram_range=ngram_range,
import logging import torch from torch import nn from nlplay.data.cache import DSManager, DS from nlplay.features.text_cleaner import * from nlplay.models.pytorch.classifiers.exam import EXAM from nlplay.models.pytorch.dataset import DSGenerator from nlplay.models.pytorch.trainer import PytorchModelTrainer logging.basicConfig( format="%(asctime)s %(message)s", level=logging.DEBUG, datefmt="%Y-%m-%d %H:%M:%S" ) # Input data files ds = DSManager(DS.AG_NEWS.value) train_csv, test_csv, val_csv = ds.get_partition_paths() # Inputs & Model Parameters num_epochs = 3 batch_size = 16 ngram_range = (1, 1) region_size = 7 max_features = 100000 max_seq = 256 embedding_size = 128 dropout = 0.2 lr = 0.0001 num_workers = 1 # Data preparation ds = DSGenerator()
import pandas as pd from torch import nn from torch.utils.data import TensorDataset from nlplay.data.cache import DSManager, DS from nlplay.features.text_cleaner import base_cleaner from nlplay.models.pytorch.classifiers.charcnn import CharCNN_Zhang from nlplay.models.pytorch.trainer import PytorchModelTrainer from nlplay.models.pytorch.utils import char_vectorizer from nlplay.utils.parlib import parallelApply logging.basicConfig(format="%(asctime)s %(message)s", level=logging.DEBUG, datefmt="%Y-%m-%d %H:%M:%S") # Input data files ds = DSManager(DS.IMDB.value) train_csv, test_csv, val_csv = ds.get_partition_paths() # Vocabulary Setup vocab = (list(string.ascii_lowercase) + list(string.digits) + list(string.punctuation) + ["\n"]) char2idx = {} idx2char = {} vocab = list(set(vocab)) for idx, t in enumerate(vocab): char2idx[t] = idx # Experiment parameters max_seq = 1014 vocabulary_size = len(vocab) num_epochs = 100