Beispiel #1
0
    if not args.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")

train_data = dataloader('train_shuf.txt', args.batch_size, args.bptt)
val_data = dataloader('val.txt', args.batch_size, args.bptt)

eval_batch_size = args.batch_size

###############################################################################
# Build the model
###############################################################################

ntokens = 27
model = RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied)

optimizer = optim.SGD(model.parameters(), lr=args.lr)

criterion = nn.CrossEntropyLoss()

###############################################################################
# Training code
###############################################################################

def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""
    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)

sys.path.append("../d2l_func/")
from data_prepare import load_data_jay_song, data_iter_random, data_iter_consecutive, to_onehot
from model_train import train_rnn_pytorch
from predict import predict_rnn_pytorch
from rnn_model import RNNModel

if __name__ == "__main__":
    # load data
    corpus_index, char_to_idx, vocab_set, vocab_size = load_data_jay_song()
    # model
    hidden_num = 256
    rnn_layer = nn.LSTM(vocab_size, hidden_num)
    model = RNNModel(rnn_layer, vocab_size)
    model = model.cuda()
    loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    params = {
        "epoch_num": 10,
        "model": model,
        "loss": loss,
        "optimizer": optimizer,
        "batch_size": 64,
        "num_step": 32,
        "corpus_index": corpus_index,
        "data_iter": data_iter_consecutive,
        "char_to_idx": char_to_idx,
        "vocab_set": vocab_set,
        "vocab_size": vocab_size,
        "predict_rnn_pytorch": predict_rnn_pytorch,
        "pred_num": 50,
Beispiel #3
0
class RNNClassifier(BaseClassifier):
    def __init__(self):
        super().__init__('RNN')
        self.hyper_parameters = {}  # dictionary of the chosen hyper-parameters
        self.model = None
        self.criterion = None
        self.optimizer = None
        self.sequence_length = None

    def get_hyper_parameters_grid(self):
        grid = {
            'lr': [0.001, 0.01, 0.1],
            'epochs': [10, 50, 100],
            'n_neurons_fc': [64, 128, 256],
            'hidden_dim': [64, 128, 256]
        }
        return grid

    def set_hyper_parameters(self, hyper_parameters_dict):
        self.hyper_parameters = hyper_parameters_dict

    def set_best_hyper_parameters(self):
        self.hyper_parameters = {
            'lr': 0.01,
            'epochs': 50,
            'n_neurons_fc': 128,
            'hidden_dim': 64
        }

    def fit(self, X, y):
        X_tweet_text_tensor, X_other_features_tensor = self.get_X_tensors(X)
        y_tensor = self.convert_to_tensor(y, target=True)
        n_neurons_fc = self.hyper_parameters['n_neurons_fc']
        hidden_dim = self.hyper_parameters['hidden_dim']
        self.model = RNNModel(num_features=len(X.columns),
                              num_class=2,
                              hidden_dim=hidden_dim,
                              n_neurons_fc=n_neurons_fc,
                              sequence_length=self.sequence_length)
        self.init_loss_and_optimizer()
        epochs = self.hyper_parameters['epochs']
        n_batches = 20
        for i in range(epochs):
            for i in range(n_batches):
                # Local batches and labels
                local_X1, local_X2, local_y = self.get_batch(
                    X_tweet_text_tensor, X_other_features_tensor, y_tensor,
                    n_batches, i)
                self.optimizer.zero_grad()

                y_pred = self.model(local_X1, local_X2)
                loss = self.criterion(y_pred, local_y)
                loss.backward()
                self.optimizer.step()

    def predict(self, X):
        X_tweet_text_tensor, X_other_features_tensor = self.get_X_tensors(X)
        outputs = self.model(X_tweet_text_tensor, X_other_features_tensor)

        _, predictions = torch.max(outputs, 1)
        return predictions

    def predict_proba(self, X):
        X_tweet_text_tensor, X_other_features_tensor = self.get_X_tensors(X)
        outputs = self.model(X_tweet_text_tensor, X_other_features_tensor)

        predictions = outputs.detach().numpy()
        return predictions

    def init_loss_and_optimizer(self):
        """ Initializes the loss and optimizer for the current .fit
        """
        self.criterion = CrossEntropyLoss()
        self.optimizer = Adam(self.model.parameters(),
                              lr=self.hyper_parameters['lr'])

    def convert_to_tensor(self, df, target=False):
        """ converts the given DataFrame to a tensor.

        :param df: the DataFrame to convert
        :type df: pd.DataFrame
        :param target: indicates whether we are using the features df(False) or the target df(True). Defaults to False
        :type target: bool
        :return: the converted tensor
        :rtype: torch.Tensor
        """
        if target:
            return torch.LongTensor(df.values)
        return torch.FloatTensor(df.values)

    def get_X_tensors(self, X):
        """ splits the given X df to tweet text indexes for embedding and other extracted features.

        :param X: the df to split
        :type X: pd.DataFrame
        :return: X_tweet_text_tensor, X_other_features_tensor
        :rtype: tuple
        """
        X_tweet_text = X['tweet text']
        X_other_features = X.drop(labels=['tweet text'], axis=1)
        X_tensor_other_features = self.convert_to_tensor(X_other_features,
                                                         target=False)
        indices_list = []
        for words_list in X_tweet_text.values:
            indices_list.append([word_to_ix[w] for w in words_list])
        X_tensor_tweet_text = torch.LongTensor(indices_list)
        # X_tensor_tweet_text = torch.LongTensor([word_to_ix[w] for w in X_tweet_text.values])
        return X_tensor_tweet_text, X_tensor_other_features

    def get_batch(self, X_tweet_text_tensor, X_other_features_tensor, y_tensor,
                  n_batches, i):
        """ Creates the i'th batch from the given data.

        :param X_tweet_text_tensor: data to get batch from
        :type X_tweet_text_tensor: torch.Tensor
        :param X_other_features_tensor: data to get batch from
        :type X_other_features_tensor: torch.Tensor
        :param y_tensor: data to get batch from
        :type y_tensor: torch.Tensor
        :param n_batches: the amount of total batches we need
        :type n_batches: int
        :param i: the current batch we want to take
        :type i: int
        :return: a tuple of the batched data
        :rtype: tuple
        """
        X1_batch = X_tweet_text_tensor[i * n_batches:(i + 1) * n_batches, ]
        X2_batch = X_other_features_tensor[i * n_batches:(i + 1) * n_batches, ]
        y_batch = y_tensor[i * n_batches:(i + 1) * n_batches, ]
        return X1_batch, X2_batch, y_batch
Beispiel #4
0
    X_val, Y_val = tensor_loader.load_X_Y_rnn(logger, args.val_table_name, chunk=0, total_chunks=total_chunks, no_gpu=args.no_gpu, validation_set=True)

    N, seq_length, D_in = X_train.shape  # Number of samples, sequence length, number of features.
    if args.top100_labels:  # Dimension of the hidden units, and dimension of the output vector.
        H, D_out = 1000, 100
    else:
        H, D_out = 100, 10

    model = RNNModel(D_in, H, D_out)

    if not args.no_gpu:
        model.cuda()

    loss_fn = torch.nn.BCEWithLogitsLoss(size_average=True)
    learning_rate, decay, momentum = 0.01, 1e-6, 0.9
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=decay, momentum=momentum, nesterov=True)

    tb_logger_train = tensorboardX.SummaryWriter(log_dir='../tensorboard_logs/rnn_train_' + str(experiment_id))
    tb_logger_val = tensorboardX.SummaryWriter(log_dir='../tensorboard_logs/rnn_val_' + str(experiment_id))
    metrics_train = defaultdict(list)
    metrics_val = defaultdict(list)
    metrics_test = defaultdict(list)

    epochs = 3  # TODO move to program args
    for chunk in range(total_chunks):
        if chunk > 0:  # Load next chunk (first chunk will already be loaded).
            X_train, Y_train = tensor_loader.load_X_Y_rnn(logger, args.train_table_name, chunk=chunk, total_chunks=total_chunks, no_gpu=args.no_gpu)
            X_val, Y_val = tensor_loader.load_X_Y_rnn(logger, args.val_table_name, chunk=chunk, total_chunks=total_chunks, no_gpu=args.no_gpu, validation_set=True)

        for epoch in range(epochs):
            # First of all, train the model using the training set.