# Load the training data.
    train_loader = _get_train_data_loader(args.batch_size, args.data_dir)

    # Build the model.
    model = LSTMClassifier(args.embedding_dim, args.hidden_dim,
                           args.vocab_size).to(device)

    with open(os.path.join(args.data_dir, "word_dict.pkl"), "rb") as f:
        model.word_dict = pickle.load(f)

    print("Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}.".
          format(args.embedding_dim, args.hidden_dim, args.vocab_size))

    # Train the model.
    optimizer = optim.Adam(model.parameters())
    loss_fn = torch.nn.BCELoss()

    train(model, train_loader, args.epochs, optimizer, loss_fn, device)

    # Save the parameters used to construct the model
    model_info_path = os.path.join(args.model_dir, 'model_info.pth')
    with open(model_info_path, 'wb') as f:
        model_info = {
            'embedding_dim': args.embedding_dim,
            'hidden_dim': args.hidden_dim,
            'vocab_size': args.vocab_size,
        }
        torch.save(model_info, f)

# Save the word_dict
Exemple #2
0
#instantiate the model
model = LSTMClassifier(12, 20, len(all_words), len(tags)).to(device)

#architecture
print(model)

#No. of trianable parameters
# def count_parameters(model):
#     return sum(p.numel() for p in model.parameters() if p.requires_grad)

# print(f'The model has {count_parameters(model):,} trainable parameters')

# Loss and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(dtype=torch.long).to(device)
        labels = labels.to(dtype=torch.long).to(device)
        # Forward pass
        outputs = model(words)
        # print("output_shape")
        # print(outputs.shape)
        # if y would be one-hot, we must apply
        # labels = torch.max(labels, 1)[1]
        loss = F.cross_entropy(outputs, labels)

        # Backward and optimize
Exemple #3
0
def main():
    data, labels = read_imdb_data()
    train_X, test_X, train_y, test_y = prepare_imdb_data(data, labels)
    #storing the preprocess data as cache
    cache_dir = os.path.join(
        "cache", "sentiment_analysis")  # where to store cache files
    os.makedirs(cache_dir, exist_ok=True)  # ensure cache directory exists
    # Preprocess data
    train_X, test_X, train_y, test_y = preprocess_data(train_X, test_X,
                                                       train_y, test_y,
                                                       cache_dir)
    #building word dict from reviews
    word_dict = build_dict(train_X)
    #now we store word dict for future references
    data_dir = 'data/pytorch'  # The folder we will use for storing data
    if not os.path.exists(data_dir):  # Make sure that the folder exists
        os.makedirs(data_dir)
    with open(os.path.join(data_dir, 'word_dict.pkl'), "wb") as f:
        pickle.dump(word_dict, f)

    train_X, train_X_len = convert_and_pad_data(word_dict, train_X)
    test_X, test_X_len = convert_and_pad_data(word_dict, test_X)

    #store processed data
    pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X_len), pd.DataFrame(train_X)], axis=1) \
        .to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)

    loadEnv()
    # Accessing variables.
    access_key_id = os.getenv('ACCESS_KEY_ID')
    secret_key = os.getenv('SECRET_KEY')
    region = os.getenv('AWS_REGION')
    execution_role = os.getenv('EXEC_ROLE')
    # create sagemaker session
    session = boto3.Session(aws_access_key_id=access_key_id,
                            aws_secret_access_key=secret_key,
                            region_name=region)

    sagemaker_session = sagemaker.Session(boto_session=session)

    #update data to s3 bucket
    bucket = sagemaker_session.default_bucket()
    prefix = 'sagemaker/sentiment_rnn'
    role = execution_role
    input_data = sagemaker_session.upload_data(path=data_dir,
                                               bucket=bucket,
                                               key_prefix=prefix)

    # Read in only the first 250 rows
    train_sample = pd.read_csv(os.path.join(data_dir, 'train.csv'),
                               header=None,
                               names=None,
                               nrows=250)

    # Turn the input pandas dataframe into tensors
    train_sample_y = torch.from_numpy(
        train_sample[[0]].values).float().squeeze()
    train_sample_X = torch.from_numpy(train_sample.drop([0],
                                                        axis=1).values).long()

    # Build the dataset
    train_sample_ds = torch.utils.data.TensorDataset(train_sample_X,
                                                     train_sample_y)
    # Build the dataloader
    train_sample_dl = torch.utils.data.DataLoader(train_sample_ds,
                                                  batch_size=50)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    lstm_model = LSTMClassifier(32, 100, 5000).to(device)
    optimizer = optim.Adam(lstm_model.parameters())
    loss_fn = torch.nn.BCELoss()

    train(lstm_model, train_sample_dl, 5, optimizer, loss_fn, device)

    estimator = PyTorch(entry_point="train.py",
                        source_dir="train",
                        role=role,
                        framework_version='0.4.0',
                        train_instance_count=1,
                        train_instance_type='ml.m4.xlarge',
                        hyperparameters={
                            'epochs': 10,
                            'hidden_dim': 200,
                        })
    estimator.fit({'training': input_data})

    # Deploy the trained model
    class StringPredictor(RealTimePredictor):
        def __init__(self, endpoint_name, sagemaker_session):
            super(StringPredictor, self).__init__(endpoint_name,
                                                  sagemaker_session,
                                                  content_type='text/plain')

    py_model = PyTorchModel(model_data=estimator.model_data,
                            role=role,
                            framework_version='0.4.0',
                            entry_point='predict.py',
                            source_dir='serve',
                            predictor_cls=StringPredictor)
    pytorch_predictor = py_model.deploy(initial_instance_count=1,
                                        instance_type='ml.m4.xlarge')
    print(pytorch_predictor.endpoint)
    return
Exemple #4
0
    # Load the training data.
    train_loader = _get_train_data_loader(args.batch_size, args.data_dir)

    # Build the model.
    model = LSTMClassifier(args.embedding_dim, args.hidden_dim,
                           args.vocab_size).to(device)

    with open(os.path.join(args.data_dir, "word_dict.pkl"), "rb") as f:
        model.word_dict = pickle.load(f)

    print("Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}.".
          format(args.embedding_dim, args.hidden_dim, args.vocab_size))

    # Train the model.
    optimizer = optim.Adam(model.parameters(), lr=0.007)
    loss_fn = torch.nn.BCELoss()

    train(model, train_loader, args.epochs, optimizer, loss_fn, device)

    # Save the parameters used to construct the model
    model_info_path = os.path.join(args.model_dir, 'model_info.pth')
    with open(model_info_path, 'wb') as f:
        model_info = {
            'embedding_dim': args.embedding_dim,
            'hidden_dim': args.hidden_dim,
            'vocab_size': args.vocab_size,
        }
        torch.save(model_info, f)

# Save the word_dict
Exemple #5
0
# 単語のベクトル次元数
EMBEDDING_DIM = 10
# 隠れ層の次元数
HIDDEN_DIM = 128
# データ全体の単語数
VOCAB_SIZE = len(word2index)
# 分類先のカテゴリの数
TAG_SIZE = len(classes)

# モデル宣言
model = LSTMClassifier(EMBEDDING_DIM, HIDDEN_DIM, VOCAB_SIZE, TAG_SIZE)
# 損失関数はNLLLoss()を使用 LogSoftmaxを使う時はNLLLoss
loss_function = nn.NLLLoss()
# 最適化手法 lossの減少に時間がかかるため要検討
optimizer = optim.SGD(model.parameters(), lr=0.01)

# 各エポックの合計loss値を格納
losses = []

for epoch in range(100):
    all_loss = 0
    for text, cls in zip(traindata['Text'], traindata['Class']):
        # モデルが持ってる勾配の情報をリセット
        model.zero_grad()
        # 文章を単語IDの系列に変換(modelに食わせられる形に変換)
        inputs = sentence2index(text)
        # 順伝播の結果を受け取る
        out = model(inputs)
        # 正解カテゴリをテンソル化
        answer = class2tensor(cls)
Exemple #6
0
class Trainer:
    def __init__(self,
                 config,
                 n_gpu,
                 vocab,
                 train_loader=None,
                 val_loader=None):
        self.config = config
        self.vocab = vocab
        self.n_gpu = n_gpu
        self.train_loader = train_loader
        self.val_loader = val_loader

        # Build model
        vocab_size = self.vocab.vocab_size()

        self.model = LSTMClassifier(self.config, vocab_size,
                                    self.config.n_label)
        self.model.to(device)

        if self.n_gpu > 1:
            self.model = nn.DataParallel(self.model)

        # Build optimizer
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.config.lr)

        # Build criterion
        self.criterion = nn.CrossEntropyLoss()

    def train(self):
        best_f1 = 0.0
        best_acc = 0.0
        global_step = 0
        batch_f1 = []
        batch_acc = []
        for epoch in range(self.config.num_epoch):
            batch_loss = []
            for step, batch in enumerate(self.train_loader):
                self.model.train()
                batch = tuple(t.to(device) for t in batch)
                batch = sort_batch(batch)
                input_ids, input_lengths, labels = batch

                outputs = self.model(input_ids, input_lengths)
                loss = self.criterion(
                    outputs['logits'].view(-1, self.config.n_label),
                    labels.view(-1))

                f1, acc = ic_metric(labels.cpu(),
                                    outputs['predicted_intents'].cpu())

                if self.n_gpu > 1:
                    loss = loss.mean()

                loss.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()

                global_step += 1
                batch_loss.append(loss.float().item())
                batch_f1.append(f1)
                batch_acc.append(acc)

                if (global_step
                        == 1) or (global_step % self.config.log_interval == 0):
                    mean_loss = np.mean(batch_loss)
                    mean_f1 = np.mean(batch_f1)
                    mean_acc = np.mean(batch_acc)
                    batch_loss = []
                    nsml.report(summary=True,
                                scope=locals(),
                                epoch=epoch,
                                train_loss=mean_loss,
                                step=global_step)

                if (global_step > 0) and (global_step %
                                          self.config.val_interval == 0):
                    val_loss, val_f1, val_acc = self.evaluation()
                    nsml.report(summary=True,
                                scope=locals(),
                                epoch=epoch,
                                val_loss=val_loss,
                                val_f1=val_f1,
                                val_acc=val_acc,
                                step=global_step)

                    if val_f1 > best_f1:
                        best_f1 = val_f1
                        best_acc = val_acc
                        nsml.save(global_step)

    def evaluation(self):
        self.model.eval()
        total_loss = []
        preds = []
        targets = []
        with torch.no_grad():
            for step, batch in enumerate(self.val_loader):
                batch = tuple(t.to(device) for t in batch)
                batch = sort_batch(batch)
                input_ids, input_lengths, labels = batch

                outputs = self.model(input_ids, input_lengths)
                loss = self.criterion(
                    outputs['logits'].view(-1, self.config.n_label),
                    labels.view(-1))

                pred = outputs['predicted_intents'].squeeze(
                    -1).cpu().numpy().tolist()
                target = labels.cpu().numpy().tolist()

                preds.extend(pred)
                targets.extend(target)
                total_loss.append(loss.float().item())

        mean_loss = np.mean(total_loss)
        mean_f1, mean_acc = ic_metric(targets, preds)
        return mean_loss, mean_f1, mean_acc
    # Load the training data.
    train_loader = _get_train_data_loader(args.batch_size, args.data_dir)

    # Build the model.
    model = LSTMClassifier(args.embedding_dim, args.hidden_dim,
                           args.vocab_size).to(device)

    with open(os.path.join(args.data_dir, "word_dict.pkl"), "rb") as f:
        model.word_dict = pickle.load(f)

    print("Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}.".
          format(args.embedding_dim, args.hidden_dim, args.vocab_size))

    # Train the model.
    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
    loss_fn = torch.nn.BCELoss()

    train(model, train_loader, args.epochs, optimizer, loss_fn, device)

    # Save the parameters used to construct the model
    model_info_path = os.path.join(args.model_dir, 'model_info.pth')
    with open(model_info_path, 'wb') as f:
        model_info = {
            'embedding_dim': args.embedding_dim,
            'hidden_dim': args.hidden_dim,
            'vocab_size': args.vocab_size,
        }
        torch.save(model_info, f)

# Save the word_dict
Exemple #8
0
def train(model, train_loader, epochs, optimizer, loss_fn, device):
    """
    This is the training method that is called by the PyTorch training script. The parameters
    passed are as follows:
    model        - The PyTorch model that we wish to train.
    train_loader - The PyTorch DataLoader that should be used during training.
    epochs       - The total number of epochs to train for.
    optimizer    - The optimizer to use during training.
    loss_fn      - The loss function used for training.
    device       - Where the model and data should be loaded (gpu or cpu).
    """
    
    # TODO: Paste the train() method developed in the notebook here.
    def train(model, train_loader, epochs, optimizer, loss_fn, device):
    for epoch in range(1, epochs + 1):
        model.train()
        total_loss = 0
        for batch in train_loader:         
            batch_X, batch_y = batch
            
            batch_X = batch_X.to(device)
            batch_y = batch_y.to(device)
            
            # TODO: Complete this train method to train the model provided.
            optimizer.zero_grad()
            out = model.forward(batch_X)
            loss = loss_fn(out, batch_y)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.data.item()
        print("Epoch: {}, BCELoss: {}".format(epoch, total_loss / len(train_loader)))
    pass


if __name__ == '__main__':
    # All of the model parameters and training parameters are sent as arguments when the script
    # is executed. Here we set up an argument parser to easily access the parameters.

    parser = argparse.ArgumentParser()

    # Training Parameters
    parser.add_argument('--batch-size', type=int, default=512, metavar='N',
                        help='input batch size for training (default: 512)')
    parser.add_argument('--epochs', type=int, default=10, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')

    # Model Parameters
    parser.add_argument('--embedding_dim', type=int, default=32, metavar='N',
                        help='size of the word embeddings (default: 32)')
    parser.add_argument('--hidden_dim', type=int, default=100, metavar='N',
                        help='size of the hidden dimension (default: 100)')
    parser.add_argument('--vocab_size', type=int, default=5000, metavar='N',
                        help='size of the vocabulary (default: 5000)')

    # SageMaker Parameters
    parser.add_argument('--hosts', type=list, default=json.loads(os.environ['SM_HOSTS']))
    parser.add_argument('--current-host', type=str, default=os.environ['SM_CURRENT_HOST'])
    parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
    parser.add_argument('--data-dir', type=str, default=os.environ['SM_CHANNEL_TRAINING'])
    parser.add_argument('--num-gpus', type=int, default=os.environ['SM_NUM_GPUS'])

    args = parser.parse_args()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device {}.".format(device))

    torch.manual_seed(args.seed)

    # Load the training data.
    train_loader = _get_train_data_loader(args.batch_size, args.data_dir)

    # Build the model.
    model = LSTMClassifier(args.embedding_dim, args.hidden_dim, args.vocab_size).to(device)

    with open(os.path.join(args.data_dir, "word_dict.pkl"), "rb") as f:
        model.word_dict = pickle.load(f)

    print("Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}.".format(
        args.embedding_dim, args.hidden_dim, args.vocab_size
    ))

    # Train the model.
    optimizer = optim.Adam(model.parameters())
    loss_fn = torch.nn.BCELoss()

    train(model, train_loader, args.epochs, optimizer, loss_fn, device)

    # Save the parameters used to construct the model
    model_info_path = os.path.join(args.model_dir, 'model_info.pth')
    with open(model_info_path, 'wb') as f:
        model_info = {
            'embedding_dim': args.embedding_dim,
            'hidden_dim': args.hidden_dim,
            'vocab_size': args.vocab_size,
        }
        torch.save(model_info, f)

	# Save the word_dict
    word_dict_path = os.path.join(args.model_dir, 'word_dict.pkl')
    with open(word_dict_path, 'wb') as f:
        pickle.dump(model.word_dict, f)

	# Save the model parameters
    model_path = os.path.join(args.model_dir, 'model.pth')
    with open(model_path, 'wb') as f:
        torch.save(model.cpu().state_dict(), f)