# Load the training data. train_loader = _get_train_data_loader(args.batch_size, args.data_dir) # Build the model. model = LSTMClassifier(args.embedding_dim, args.hidden_dim, args.vocab_size).to(device) with open(os.path.join(args.data_dir, "word_dict.pkl"), "rb") as f: model.word_dict = pickle.load(f) print("Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}.". format(args.embedding_dim, args.hidden_dim, args.vocab_size)) # Train the model. optimizer = optim.Adam(model.parameters()) loss_fn = torch.nn.BCELoss() train(model, train_loader, args.epochs, optimizer, loss_fn, device) # Save the parameters used to construct the model model_info_path = os.path.join(args.model_dir, 'model_info.pth') with open(model_info_path, 'wb') as f: model_info = { 'embedding_dim': args.embedding_dim, 'hidden_dim': args.hidden_dim, 'vocab_size': args.vocab_size, } torch.save(model_info, f) # Save the word_dict
#instantiate the model model = LSTMClassifier(12, 20, len(all_words), len(tags)).to(device) #architecture print(model) #No. of trianable parameters # def count_parameters(model): # return sum(p.numel() for p in model.parameters() if p.requires_grad) # print(f'The model has {count_parameters(model):,} trainable parameters') # Loss and optimizer criterion = nn.BCELoss() optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) # Train the model for epoch in range(num_epochs): for (words, labels) in train_loader: words = words.to(dtype=torch.long).to(device) labels = labels.to(dtype=torch.long).to(device) # Forward pass outputs = model(words) # print("output_shape") # print(outputs.shape) # if y would be one-hot, we must apply # labels = torch.max(labels, 1)[1] loss = F.cross_entropy(outputs, labels) # Backward and optimize
def main(): data, labels = read_imdb_data() train_X, test_X, train_y, test_y = prepare_imdb_data(data, labels) #storing the preprocess data as cache cache_dir = os.path.join( "cache", "sentiment_analysis") # where to store cache files os.makedirs(cache_dir, exist_ok=True) # ensure cache directory exists # Preprocess data train_X, test_X, train_y, test_y = preprocess_data(train_X, test_X, train_y, test_y, cache_dir) #building word dict from reviews word_dict = build_dict(train_X) #now we store word dict for future references data_dir = 'data/pytorch' # The folder we will use for storing data if not os.path.exists(data_dir): # Make sure that the folder exists os.makedirs(data_dir) with open(os.path.join(data_dir, 'word_dict.pkl'), "wb") as f: pickle.dump(word_dict, f) train_X, train_X_len = convert_and_pad_data(word_dict, train_X) test_X, test_X_len = convert_and_pad_data(word_dict, test_X) #store processed data pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X_len), pd.DataFrame(train_X)], axis=1) \ .to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False) loadEnv() # Accessing variables. access_key_id = os.getenv('ACCESS_KEY_ID') secret_key = os.getenv('SECRET_KEY') region = os.getenv('AWS_REGION') execution_role = os.getenv('EXEC_ROLE') # create sagemaker session session = boto3.Session(aws_access_key_id=access_key_id, aws_secret_access_key=secret_key, region_name=region) sagemaker_session = sagemaker.Session(boto_session=session) #update data to s3 bucket bucket = sagemaker_session.default_bucket() prefix = 'sagemaker/sentiment_rnn' role = execution_role input_data = sagemaker_session.upload_data(path=data_dir, bucket=bucket, key_prefix=prefix) # Read in only the first 250 rows train_sample = pd.read_csv(os.path.join(data_dir, 'train.csv'), header=None, names=None, nrows=250) # Turn the input pandas dataframe into tensors train_sample_y = torch.from_numpy( train_sample[[0]].values).float().squeeze() train_sample_X = torch.from_numpy(train_sample.drop([0], axis=1).values).long() # Build the dataset train_sample_ds = torch.utils.data.TensorDataset(train_sample_X, train_sample_y) # Build the dataloader train_sample_dl = torch.utils.data.DataLoader(train_sample_ds, batch_size=50) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") lstm_model = LSTMClassifier(32, 100, 5000).to(device) optimizer = optim.Adam(lstm_model.parameters()) loss_fn = torch.nn.BCELoss() train(lstm_model, train_sample_dl, 5, optimizer, loss_fn, device) estimator = PyTorch(entry_point="train.py", source_dir="train", role=role, framework_version='0.4.0', train_instance_count=1, train_instance_type='ml.m4.xlarge', hyperparameters={ 'epochs': 10, 'hidden_dim': 200, }) estimator.fit({'training': input_data}) # Deploy the trained model class StringPredictor(RealTimePredictor): def __init__(self, endpoint_name, sagemaker_session): super(StringPredictor, self).__init__(endpoint_name, sagemaker_session, content_type='text/plain') py_model = PyTorchModel(model_data=estimator.model_data, role=role, framework_version='0.4.0', entry_point='predict.py', source_dir='serve', predictor_cls=StringPredictor) pytorch_predictor = py_model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge') print(pytorch_predictor.endpoint) return
# Load the training data. train_loader = _get_train_data_loader(args.batch_size, args.data_dir) # Build the model. model = LSTMClassifier(args.embedding_dim, args.hidden_dim, args.vocab_size).to(device) with open(os.path.join(args.data_dir, "word_dict.pkl"), "rb") as f: model.word_dict = pickle.load(f) print("Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}.". format(args.embedding_dim, args.hidden_dim, args.vocab_size)) # Train the model. optimizer = optim.Adam(model.parameters(), lr=0.007) loss_fn = torch.nn.BCELoss() train(model, train_loader, args.epochs, optimizer, loss_fn, device) # Save the parameters used to construct the model model_info_path = os.path.join(args.model_dir, 'model_info.pth') with open(model_info_path, 'wb') as f: model_info = { 'embedding_dim': args.embedding_dim, 'hidden_dim': args.hidden_dim, 'vocab_size': args.vocab_size, } torch.save(model_info, f) # Save the word_dict
# 単語のベクトル次元数 EMBEDDING_DIM = 10 # 隠れ層の次元数 HIDDEN_DIM = 128 # データ全体の単語数 VOCAB_SIZE = len(word2index) # 分類先のカテゴリの数 TAG_SIZE = len(classes) # モデル宣言 model = LSTMClassifier(EMBEDDING_DIM, HIDDEN_DIM, VOCAB_SIZE, TAG_SIZE) # 損失関数はNLLLoss()を使用 LogSoftmaxを使う時はNLLLoss loss_function = nn.NLLLoss() # 最適化手法 lossの減少に時間がかかるため要検討 optimizer = optim.SGD(model.parameters(), lr=0.01) # 各エポックの合計loss値を格納 losses = [] for epoch in range(100): all_loss = 0 for text, cls in zip(traindata['Text'], traindata['Class']): # モデルが持ってる勾配の情報をリセット model.zero_grad() # 文章を単語IDの系列に変換(modelに食わせられる形に変換) inputs = sentence2index(text) # 順伝播の結果を受け取る out = model(inputs) # 正解カテゴリをテンソル化 answer = class2tensor(cls)
class Trainer: def __init__(self, config, n_gpu, vocab, train_loader=None, val_loader=None): self.config = config self.vocab = vocab self.n_gpu = n_gpu self.train_loader = train_loader self.val_loader = val_loader # Build model vocab_size = self.vocab.vocab_size() self.model = LSTMClassifier(self.config, vocab_size, self.config.n_label) self.model.to(device) if self.n_gpu > 1: self.model = nn.DataParallel(self.model) # Build optimizer self.optimizer = optim.Adam(self.model.parameters(), lr=self.config.lr) # Build criterion self.criterion = nn.CrossEntropyLoss() def train(self): best_f1 = 0.0 best_acc = 0.0 global_step = 0 batch_f1 = [] batch_acc = [] for epoch in range(self.config.num_epoch): batch_loss = [] for step, batch in enumerate(self.train_loader): self.model.train() batch = tuple(t.to(device) for t in batch) batch = sort_batch(batch) input_ids, input_lengths, labels = batch outputs = self.model(input_ids, input_lengths) loss = self.criterion( outputs['logits'].view(-1, self.config.n_label), labels.view(-1)) f1, acc = ic_metric(labels.cpu(), outputs['predicted_intents'].cpu()) if self.n_gpu > 1: loss = loss.mean() loss.backward() self.optimizer.step() self.optimizer.zero_grad() global_step += 1 batch_loss.append(loss.float().item()) batch_f1.append(f1) batch_acc.append(acc) if (global_step == 1) or (global_step % self.config.log_interval == 0): mean_loss = np.mean(batch_loss) mean_f1 = np.mean(batch_f1) mean_acc = np.mean(batch_acc) batch_loss = [] nsml.report(summary=True, scope=locals(), epoch=epoch, train_loss=mean_loss, step=global_step) if (global_step > 0) and (global_step % self.config.val_interval == 0): val_loss, val_f1, val_acc = self.evaluation() nsml.report(summary=True, scope=locals(), epoch=epoch, val_loss=val_loss, val_f1=val_f1, val_acc=val_acc, step=global_step) if val_f1 > best_f1: best_f1 = val_f1 best_acc = val_acc nsml.save(global_step) def evaluation(self): self.model.eval() total_loss = [] preds = [] targets = [] with torch.no_grad(): for step, batch in enumerate(self.val_loader): batch = tuple(t.to(device) for t in batch) batch = sort_batch(batch) input_ids, input_lengths, labels = batch outputs = self.model(input_ids, input_lengths) loss = self.criterion( outputs['logits'].view(-1, self.config.n_label), labels.view(-1)) pred = outputs['predicted_intents'].squeeze( -1).cpu().numpy().tolist() target = labels.cpu().numpy().tolist() preds.extend(pred) targets.extend(target) total_loss.append(loss.float().item()) mean_loss = np.mean(total_loss) mean_f1, mean_acc = ic_metric(targets, preds) return mean_loss, mean_f1, mean_acc
# Load the training data. train_loader = _get_train_data_loader(args.batch_size, args.data_dir) # Build the model. model = LSTMClassifier(args.embedding_dim, args.hidden_dim, args.vocab_size).to(device) with open(os.path.join(args.data_dir, "word_dict.pkl"), "rb") as f: model.word_dict = pickle.load(f) print("Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}.". format(args.embedding_dim, args.hidden_dim, args.vocab_size)) # Train the model. optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) loss_fn = torch.nn.BCELoss() train(model, train_loader, args.epochs, optimizer, loss_fn, device) # Save the parameters used to construct the model model_info_path = os.path.join(args.model_dir, 'model_info.pth') with open(model_info_path, 'wb') as f: model_info = { 'embedding_dim': args.embedding_dim, 'hidden_dim': args.hidden_dim, 'vocab_size': args.vocab_size, } torch.save(model_info, f) # Save the word_dict
def train(model, train_loader, epochs, optimizer, loss_fn, device): """ This is the training method that is called by the PyTorch training script. The parameters passed are as follows: model - The PyTorch model that we wish to train. train_loader - The PyTorch DataLoader that should be used during training. epochs - The total number of epochs to train for. optimizer - The optimizer to use during training. loss_fn - The loss function used for training. device - Where the model and data should be loaded (gpu or cpu). """ # TODO: Paste the train() method developed in the notebook here. def train(model, train_loader, epochs, optimizer, loss_fn, device): for epoch in range(1, epochs + 1): model.train() total_loss = 0 for batch in train_loader: batch_X, batch_y = batch batch_X = batch_X.to(device) batch_y = batch_y.to(device) # TODO: Complete this train method to train the model provided. optimizer.zero_grad() out = model.forward(batch_X) loss = loss_fn(out, batch_y) loss.backward() optimizer.step() total_loss += loss.data.item() print("Epoch: {}, BCELoss: {}".format(epoch, total_loss / len(train_loader))) pass if __name__ == '__main__': # All of the model parameters and training parameters are sent as arguments when the script # is executed. Here we set up an argument parser to easily access the parameters. parser = argparse.ArgumentParser() # Training Parameters parser.add_argument('--batch-size', type=int, default=512, metavar='N', help='input batch size for training (default: 512)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') # Model Parameters parser.add_argument('--embedding_dim', type=int, default=32, metavar='N', help='size of the word embeddings (default: 32)') parser.add_argument('--hidden_dim', type=int, default=100, metavar='N', help='size of the hidden dimension (default: 100)') parser.add_argument('--vocab_size', type=int, default=5000, metavar='N', help='size of the vocabulary (default: 5000)') # SageMaker Parameters parser.add_argument('--hosts', type=list, default=json.loads(os.environ['SM_HOSTS'])) parser.add_argument('--current-host', type=str, default=os.environ['SM_CURRENT_HOST']) parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) parser.add_argument('--data-dir', type=str, default=os.environ['SM_CHANNEL_TRAINING']) parser.add_argument('--num-gpus', type=int, default=os.environ['SM_NUM_GPUS']) args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Using device {}.".format(device)) torch.manual_seed(args.seed) # Load the training data. train_loader = _get_train_data_loader(args.batch_size, args.data_dir) # Build the model. model = LSTMClassifier(args.embedding_dim, args.hidden_dim, args.vocab_size).to(device) with open(os.path.join(args.data_dir, "word_dict.pkl"), "rb") as f: model.word_dict = pickle.load(f) print("Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}.".format( args.embedding_dim, args.hidden_dim, args.vocab_size )) # Train the model. optimizer = optim.Adam(model.parameters()) loss_fn = torch.nn.BCELoss() train(model, train_loader, args.epochs, optimizer, loss_fn, device) # Save the parameters used to construct the model model_info_path = os.path.join(args.model_dir, 'model_info.pth') with open(model_info_path, 'wb') as f: model_info = { 'embedding_dim': args.embedding_dim, 'hidden_dim': args.hidden_dim, 'vocab_size': args.vocab_size, } torch.save(model_info, f) # Save the word_dict word_dict_path = os.path.join(args.model_dir, 'word_dict.pkl') with open(word_dict_path, 'wb') as f: pickle.dump(model.word_dict, f) # Save the model parameters model_path = os.path.join(args.model_dir, 'model.pth') with open(model_path, 'wb') as f: torch.save(model.cpu().state_dict(), f)