parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) parser.add_argument('--data-dir', type=str, default=os.environ['SM_CHANNEL_TRAINING']) parser.add_argument('--num-gpus', type=int, default=os.environ['SM_NUM_GPUS']) args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Using device {}.".format(device)) torch.manual_seed(args.seed) # Load the training data. train_loader = _get_train_data_loader(args.batch_size, args.data_dir) # Build the model. model = LSTMClassifier(args.embedding_dim, args.hidden_dim, args.vocab_size).to(device) with open(os.path.join(args.data_dir, "word_dict.pkl"), "rb") as f: model.word_dict = pickle.load(f) print("Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}.".format( args.embedding_dim, args.hidden_dim, args.vocab_size )) # Train the model. optimizer = optim.Adam(model.parameters()) loss_fn = torch.nn.BCELoss() train(model, train_loader, args.epochs, optimizer, loss_fn, device) # Save the parameters used to construct the model
parser.add_argument('--num-gpus', type=int, default=os.environ['SM_NUM_GPUS']) args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Using device {}.".format(device)) torch.manual_seed(args.seed) # Load the training data. train_loader = _get_train_data_loader(args.batch_size, args.data_dir) # Build the model. model = LSTMClassifier(args.embedding_dim, args.hidden_dim, args.vocab_size).to(device) with open(os.path.join(args.data_dir, "word_dict.pkl"), "rb") as f: model.word_dict = pickle.load(f) print("Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}.". format(args.embedding_dim, args.hidden_dim, args.vocab_size)) # Train the model. optimizer = optim.Adam(model.parameters(), lr=0.001) loss_fn = torch.nn.BCELoss() print(model) print(len(list(model.parameters()))) for i in range(len(list(model.parameters()))): print(list(model.parameters())[i].size())
import torch.nn as nn import pandas as pd # 元データを7:3に分ける train_data, test_data = train_test_split(datasets, train_size=0.7) # 単語のベクトル次元数 EMBEDDING_DIM = 10 # 隠れ層の次元数 HIDDEN_DIM = 128 # データ全体の単語数 VOCAB_SIZE = len(word2index) # 分類先のカテゴリの数 TAG_SIZE = len(categories) # モデル宣言 model = LSTMClassifier(EMBEDDING_DIM, HIDDEN_DIM, VOCAB_SIZE, TAG_SIZE) # 損失関数はNLLLoss()。LogSoftmaxにはこれを使うことが多い。 loss_function = nn.NLLLoss() # 最適化はSGD。lossの減りに多少時間がかかる。 optimizer = optim.SGD(model.parameters(), lr=0.01) # 各エポックの合計loss値を格納 losses = [] for epoch in range(100): all_loss = 0 for title, cat in zip(train_data["title"], train_data["category"]): # モデルが持っている勾配の情報をリセット model.zero_grad() # 文章を単語IDの系列に変換(modelが読み込めるように) inputs = sentence2index(title)
def main(): data, labels = read_imdb_data() train_X, test_X, train_y, test_y = prepare_imdb_data(data, labels) #storing the preprocess data as cache cache_dir = os.path.join( "cache", "sentiment_analysis") # where to store cache files os.makedirs(cache_dir, exist_ok=True) # ensure cache directory exists # Preprocess data train_X, test_X, train_y, test_y = preprocess_data(train_X, test_X, train_y, test_y, cache_dir) #building word dict from reviews word_dict = build_dict(train_X) #now we store word dict for future references data_dir = 'data/pytorch' # The folder we will use for storing data if not os.path.exists(data_dir): # Make sure that the folder exists os.makedirs(data_dir) with open(os.path.join(data_dir, 'word_dict.pkl'), "wb") as f: pickle.dump(word_dict, f) train_X, train_X_len = convert_and_pad_data(word_dict, train_X) test_X, test_X_len = convert_and_pad_data(word_dict, test_X) #store processed data pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X_len), pd.DataFrame(train_X)], axis=1) \ .to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False) loadEnv() # Accessing variables. access_key_id = os.getenv('ACCESS_KEY_ID') secret_key = os.getenv('SECRET_KEY') region = os.getenv('AWS_REGION') execution_role = os.getenv('EXEC_ROLE') # create sagemaker session session = boto3.Session(aws_access_key_id=access_key_id, aws_secret_access_key=secret_key, region_name=region) sagemaker_session = sagemaker.Session(boto_session=session) #update data to s3 bucket bucket = sagemaker_session.default_bucket() prefix = 'sagemaker/sentiment_rnn' role = execution_role input_data = sagemaker_session.upload_data(path=data_dir, bucket=bucket, key_prefix=prefix) # Read in only the first 250 rows train_sample = pd.read_csv(os.path.join(data_dir, 'train.csv'), header=None, names=None, nrows=250) # Turn the input pandas dataframe into tensors train_sample_y = torch.from_numpy( train_sample[[0]].values).float().squeeze() train_sample_X = torch.from_numpy(train_sample.drop([0], axis=1).values).long() # Build the dataset train_sample_ds = torch.utils.data.TensorDataset(train_sample_X, train_sample_y) # Build the dataloader train_sample_dl = torch.utils.data.DataLoader(train_sample_ds, batch_size=50) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") lstm_model = LSTMClassifier(32, 100, 5000).to(device) optimizer = optim.Adam(lstm_model.parameters()) loss_fn = torch.nn.BCELoss() train(lstm_model, train_sample_dl, 5, optimizer, loss_fn, device) estimator = PyTorch(entry_point="train.py", source_dir="train", role=role, framework_version='0.4.0', train_instance_count=1, train_instance_type='ml.m4.xlarge', hyperparameters={ 'epochs': 10, 'hidden_dim': 200, }) estimator.fit({'training': input_data}) # Deploy the trained model class StringPredictor(RealTimePredictor): def __init__(self, endpoint_name, sagemaker_session): super(StringPredictor, self).__init__(endpoint_name, sagemaker_session, content_type='text/plain') py_model = PyTorchModel(model_data=estimator.model_data, role=role, framework_version='0.4.0', entry_point='predict.py', source_dir='serve', predictor_cls=StringPredictor) pytorch_predictor = py_model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge') print(pytorch_predictor.endpoint) return
def main(): parser = argparse.ArgumentParser( description='PyTorch Gambler\'s Loss Runner') parser.add_argument('--result_dir', type=str, help='directory to save result txt files', default='results') parser.add_argument('--noise_rate', type=float, help='corruption rate, should be less than 1', default=0.5) parser.add_argument('--noise_type', type=str, help='[pairflip, symmetric]', default='symmetric') parser.add_argument('--dataset', type=str, help='mnist, cifar10, or imdb', default='mnist') parser.add_argument('--n_epoch', type=int, default=10) parser.add_argument('--seed', type=int, default=1) parser.add_argument('--num_workers', type=int, default=4, help='how many subprocesses to use for data loading') parser.add_argument('--epoch_decay_start', type=int, default=80) parser.add_argument('--load_model', type=str, default="") parser.add_argument('--model', type=str, default='default') parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='input batch size for training (default: 128)') parser.add_argument( '--log-interval', type=int, default=100, metavar='N', help= 'how many batches to wait before logging training status (default: 100)' ) parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--eps', type=float, help='set lambda for lambda type \'gmblers\' only', default=1000.0) parser.add_argument('--lambda_type', type=str, help='[nll, euc, mid, exp, gmblers]', default="euc") parser.add_argument('--start_gamblers', type=int, help='number of epochs before starting gamblers', default=0) # label smoothing args parser.add_argument('--smoothing', type=float, default=1.0, help='smoothing parameter (default: 1)') args = parser.parse_args() args.use_scheduler = False torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) if args.dataset == 'mnist': input_channel = 1 num_classes = 10 train_dataset = MNIST(root='./data/', download=True, train=True, transform=transforms.ToTensor(), noise_type=args.noise_type, noise_rate=args.noise_rate) test_dataset = MNIST(root='./data/', download=True, train=False, transform=transforms.ToTensor(), noise_type=args.noise_type, noise_rate=args.noise_rate) print('loading dataset...') train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=False) if args.dataset == 'cifar10': input_channel = 3 num_classes = 10 transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) train_dataset = CIFAR10(root='./data/', download=True, train=True, transform=transform_train, noise_type=args.noise_type, noise_rate=args.noise_rate) test_dataset = CIFAR10(root='./data/', download=True, train=False, transform=transform_test, noise_type=args.noise_type, noise_rate=args.noise_rate) print('loading dataset...') train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=False) if args.dataset == 'cifar100': input_channel = 3 num_classes = 100 transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) train_dataset = CIFAR100(root='./data/', download=True, train=True, transform=transform_train, noise_type=args.noise_type, noise_rate=args.noise_rate) test_dataset = CIFAR100(root='./data/', download=True, train=False, transform=transform_test, noise_type=args.noise_type, noise_rate=args.noise_rate) print('loading dataset...') train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=False) if args.dataset == 'imdb': num_classes = 2 embedding_length = 300 hidden_size = 256 print('loading dataset...') TEXT, vocab_size, word_embeddings, train_loader, valid_iter, test_loader = load_data.load_dataset( rate=args.noise_rate, batch_size=args.batch_size) use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print("using {}".format(device)) print('building model...') if args.dataset == 'mnist': model = CNN_basic(num_classes=num_classes).to(device) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) if args.dataset == 'cifar10': if args.model == 'small': model = CNN_small(num_classes=num_classes).to(device) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) else: model = resnet.ResNet18(num_classes=num_classes).to(device) change_lr = lambda epoch: 0.1 if epoch >= 50 else 1.0 optimizer = LaProp(filter(lambda p: p.requires_grad, model.parameters()), lr=4e-4) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=change_lr) args.use_scheduler = True if args.dataset == 'cifar100': if args.model == 'small': model = CNN_small(num_classes=num_classes).to(device) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) else: model = resnet.ResNet18(num_classes=num_classes).to(device) change_lr = lambda epoch: 0.1 if epoch >= 50 else 1.0 optimizer = LaProp(filter(lambda p: p.requires_grad, model.parameters()), lr=4e-4) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=change_lr) args.use_scheduler = True if args.dataset == 'imdb': model = LSTMClassifier(args.batch_size, num_classes, hidden_size, vocab_size, embedding_length, word_embeddings).to(device) optimizer = LaProp(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) test_accs = [] train_losses = [] test_losses = [] out = [] name = "{}_{}_{:.2f}_{:.2f}_{}_{}".format(args.dataset, args.noise_type, args.smoothing, args.noise_rate, args.eps, args.seed) if not os.path.exists(args.result_dir): os.system('mkdir -p %s' % args.result_dir) save_file = args.result_dir + "/" + name + ".json" if os.path.exists(save_file): print('case processed') exit() for epoch in range(1, args.n_epoch + 1): for param_group in optimizer.param_groups: print(epoch, param_group['lr']) print(name) train_loss = train(args, model, device, train_loader, optimizer, epoch, num_classes=num_classes, use_gamblers=(epoch >= args.start_gamblers), text=(args.dataset == 'imdb')) train_losses.append(train_loss) test_acc, test_loss = test(args, model, device, test_loader, num_classes, text=(args.dataset == 'imdb')) test_accs.append(test_acc) test_losses.append(test_loss) if (args.use_scheduler): scheduler.step() # torch.save({ # 'model_state_dict': model.state_dict(), # 'optimizer_state_dict': optimizer.state_dict(), # 'loss': loss, # 'test_acc': acc # }, args.result_dir + "/" + name + "_model.npy") save_data = { "train_loss": train_losses, "test_loss": test_losses, "test_acc": test_accs } json.dump(save_data, open(save_file, 'w'))
#!/usr/bin/env python # coding: utf-8 from prepare_data import PrepareData from model import LSTMClassifier import pandas as pd from sklearn.model_selection import train_test_split ppd = PrepareData() data = ppd.get_data() lstm = LSTMClassifier() X = lstm.get_matrix(data) Y = pd.get_dummies(data['label']).values X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42) print(X_train.shape, Y_train.shape) print(X_test.shape, Y_test.shape) model = lstm.get_model(X.shape[1]) history = lstm.fit_model(model, X_train, Y_train) validation_size = 1500 X_validate = X_test[-validation_size:] Y_validate = Y_test[-validation_size:] X_test = X_test[:-validation_size] Y_test = Y_test[:-validation_size] score, acc = model.evaluate(X_test, Y_test, verbose=2, batch_size=batch_size)
shuffle=True, num_workers=0) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #define hyperparameters size_of_vocab = len(all_words) embedding_dim = 20 num_hidden_nodes = 8 num_output_nodes = len(tags) num_layers = 2 bidirection = True dropout = 0.2 #instantiate the model model = LSTMClassifier(12, 20, len(all_words), len(tags)).to(device) #architecture print(model) #No. of trianable parameters # def count_parameters(model): # return sum(p.numel() for p in model.parameters() if p.requires_grad) # print(f'The model has {count_parameters(model):,} trainable parameters') # Loss and optimizer criterion = nn.BCELoss() optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) # Train the model
#Load data generators train_data_loader = get_train_loader(cities=cities, labels=labels, batch_size=batch_size, shuffle=False, collate_fn=collate_fn, sampler=train_sampler) valid_data_loader = get_train_loader(cities=cities, labels=labels, batch_size=batch_size, shuffle=False, collate_fn=collate_fn, sampler=valid_sampler) #Initialize the model to train model = LSTMClassifier(27, 10, 14) # Loss and Optimizer criterion = nn.NLLLoss() learning_rate = 0.8 optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # train losses = [] num_epochs = 10 # Train the Model for epoch in range(num_epochs): print("##### epoch {:2d}".format(epoch + 1)) for i, batch in enumerate(train_data_loader):
def main(): os.chdir('./') global args, word2vec, batch_size, train_set_idx global weight_scale, phenotypedictinverse phenotypedict = dict({ "Cancer": 11, "Heart": 4, "Lung": 5, "Neuro": 10, "Pain": 9, "Alcohol": 7, "Substance": 8, "Obesity": 1, "Disorders": 6, "Depression": 12 }) parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) #parser.add_argument('clean_summaries0209.csv', help="Source Input file", type=str) #parser.add_argument('word2vec_50d.txt', help="word2vec file", type=str) parser.add_argument('--padding', help="padding around each text", type=int, default=4) parser.add_argument( '--max_note_len', help="Cut off all notes longer than this (0 = no cutoff).", type=int, default=0) parser.add_argument('--filename', help="File name for output file", type=str, default="data.h5") parser.add_argument('-predict_label', type=int, default=phenotypedict["Depression"], help='Choose which type of phenotyping to detect') parser.add_argument('-topred', type=str, default="Depression", help='Choose which type of phenotyping to detect') parser.add_argument('-epochs', type=int, default=10, help='number of epochs for train [default: 10]') parser.add_argument('-batch_size', type=int, default=8, help='batch size for training [default: 64]') parser.add_argument('-output_size', type=int, default=2, help='final output dim [default: 2]') parser.add_argument('-hidden_size', type=int, default=256, help='output dim of the cell [default: 256]') parser.add_argument('-embedding_length', type=int, default=50, help='number of embedding dimension [default: 50]') parser.add_argument('-learning_rate', type=float, default=0.005, help='initial learning rate [default: 0.5]') parser.add_argument('-vocab_size', type=float, default=48849, help='initial learning rate [default: 0.5]') parser.add_argument( '-optimizer', type=str, default='Adam', help='optimizer for the gradient descent: Adadelta, Adam') parser.add_argument('-cuda', type=int, default=-1, help='CUUUUUUUUUUUUDA') parser.add_argument('-debug', type=int, default=0, help='debug mode to print') parser.add_argument('-l2s', type=float, default=3, help='l2 norm') # with open("conditions.dict", 'w') as f: # for i, c in enumerate(conditions): # print (f, i + 1, c) args = parser.parse_args() phenotypedictinverse = dict({ 11: "Cancer", 4: "Heart", 5: "Lung", 10: "Neuro", 9: "Pain", 7: "Alcohol", 8: "Substance", 1: "Obesity", 6: "Disorders", 12: "Depression" }) phenotypedictsamples = dict({ "Cancer": 161, "Heart": 275, "Lung": 167, "Neuro": 368, "Pain": 321, "Alcohol": 196, "Substance": 155, "Obesity": 126, "Disorders": 295, "Depression": 460 }) weight_scale = [ 1 / (1610 - phenotypedictsamples[phenotypedictinverse[args.predict_label]]), 1 / phenotypedictsamples[phenotypedictinverse[args.predict_label]] ] #weight_scale = [ phenotypedictsamples[phenotypedictinverse[args.predict_label]]/1610*10, (1610 - phenotypedictsamples[phenotypedictinverse[args.predict_label]])/1610*10] if args.cuda > -1: weight_scale = torch.FloatTensor(weight_scale).cuda() print('Weight Scale is: ', weight_scale) # LOAD THE WORD2VEC FILE word2vec, emb_size, v_large = load_bin_vec( "word2vec_50d.txt") # word2vec whole dataset(label+unlabeled) 470260 print('WORD2VEC POINTS:', v_large) # first step # lbl, targets, ids, subj, time, embed = preprocess(args, emb_size, word2vec) # lbl_train, lbl_train_target, lbl_test, lbl_test_target, phenotypedict = cross_validation(lbl, targets, ids, subj, time, args.topred, phenotypedict, phenotypedictsamples) fold = 1 # put data of each fold in to a .h5py file ''' for i in range(0,fold): with h5py.File('data_biased_'+args.topred+'_cv{0}_occ'.format(i+1) + '0'+'.h5',"w") as f: xtrain = np.array(lbl_train[i], dtype=int) xtraintarget = np.array(lbl_train_target[i], dtype=int) xtest = np.array(lbl_test[i], dtype=int) xtesttarget = np.array(lbl_test_target[i], dtype=int) f["w2v"] = np.array(embed) f['train'] = xtrain f['train_label'] = xtraintarget[:,phenotypedict[args.topred]] f['test'] = xtest f['test_label'] = xtesttarget[:,phenotypedict[args.topred]] ''' if args.cuda > -1: torch.cuda.set_device(args.cuda) torch.backends.cudnn.benchmark = True for i in range(0, fold): train, test, y_test, w2v = readh5todata( args, 'data_biased_' + phenotypedictinverse[args.predict_label] + '_cv{0}'.format(i + 1) + '_occ' + '0' + '.h5') args.w2v = w2v train_loader = torch.utils.data.DataLoader(train, batch_size=args.batch_size, sampler=None, shuffle=False) test_loader = torch.utils.data.DataLoader(test, batch_size=args.batch_size, sampler=None, shuffle=False) LSTM = LSTMClassifier(args) print(LSTM) train_model(args, LSTM, args.learning_rate, args.batch_size, args.epochs, train_loader)
class Trainer: def __init__(self, config, n_gpu, vocab, train_loader=None, val_loader=None): self.config = config self.vocab = vocab self.n_gpu = n_gpu self.train_loader = train_loader self.val_loader = val_loader # Build model vocab_size = self.vocab.vocab_size() self.model = LSTMClassifier(self.config, vocab_size, self.config.n_label) self.model.to(device) if self.n_gpu > 1: self.model = nn.DataParallel(self.model) # Build optimizer self.optimizer = optim.Adam(self.model.parameters(), lr=self.config.lr) # Build criterion self.criterion = nn.CrossEntropyLoss() def train(self): best_f1 = 0.0 best_acc = 0.0 global_step = 0 batch_f1 = [] batch_acc = [] for epoch in range(self.config.num_epoch): batch_loss = [] for step, batch in enumerate(self.train_loader): self.model.train() batch = tuple(t.to(device) for t in batch) batch = sort_batch(batch) input_ids, input_lengths, labels = batch outputs = self.model(input_ids, input_lengths) loss = self.criterion( outputs['logits'].view(-1, self.config.n_label), labels.view(-1)) f1, acc = ic_metric(labels.cpu(), outputs['predicted_intents'].cpu()) if self.n_gpu > 1: loss = loss.mean() loss.backward() self.optimizer.step() self.optimizer.zero_grad() global_step += 1 batch_loss.append(loss.float().item()) batch_f1.append(f1) batch_acc.append(acc) if (global_step == 1) or (global_step % self.config.log_interval == 0): mean_loss = np.mean(batch_loss) mean_f1 = np.mean(batch_f1) mean_acc = np.mean(batch_acc) batch_loss = [] nsml.report(summary=True, scope=locals(), epoch=epoch, train_loss=mean_loss, step=global_step) if (global_step > 0) and (global_step % self.config.val_interval == 0): val_loss, val_f1, val_acc = self.evaluation() nsml.report(summary=True, scope=locals(), epoch=epoch, val_loss=val_loss, val_f1=val_f1, val_acc=val_acc, step=global_step) if val_f1 > best_f1: best_f1 = val_f1 best_acc = val_acc nsml.save(global_step) def evaluation(self): self.model.eval() total_loss = [] preds = [] targets = [] with torch.no_grad(): for step, batch in enumerate(self.val_loader): batch = tuple(t.to(device) for t in batch) batch = sort_batch(batch) input_ids, input_lengths, labels = batch outputs = self.model(input_ids, input_lengths) loss = self.criterion( outputs['logits'].view(-1, self.config.n_label), labels.view(-1)) pred = outputs['predicted_intents'].squeeze( -1).cpu().numpy().tolist() target = labels.cpu().numpy().tolist() preds.extend(pred) targets.extend(target) total_loss.append(loss.float().item()) mean_loss = np.mean(total_loss) mean_f1, mean_acc = ic_metric(targets, preds) return mean_loss, mean_f1, mean_acc
def main(): parser = argparse.ArgumentParser("Script to train model on a GPU") parser.add_argument( "--checkpoint", type=str, default=None, help= "Optional path to saved model, if none provided, the model is trained from scratch." ) parser.add_argument("--n_epochs", type=int, default=5, help="Number of training epochs.") args = parser.parse_args() sampling_rate = 125 n_velocity_bins = 32 seq_length = 1024 n_tokens = 256 + sampling_rate + n_velocity_bins #early_stopping = 100000 # very high value to basically turn it off early_stopping = 200 # regular value # transformer = MusicTransformer(n_tokens, seq_length, # d_model = 64, n_heads = 8, d_feedforward=256, # depth = 4, positional_encoding=True, relative_pos=True, xavier_init=True) # set xavier_init = True to run xavier_init optimization # transformer = LongMusicTransformer(n_tokens, seq_length, # d_model=64, n_heads=8, d_feedforward=256, # depth=4, positional_encoding=True, relative_pos=False, # xavier_init=True) transformer = LSTMClassifier(input_dim=1, hidden_dim=413, label_size=413, n_tokens=n_tokens, xavier_init=True) if args.checkpoint is not None: state = torch.load(args.checkpoint) transformer.load_state_dict(state) print(f"Successfully loaded checkpoint at {args.checkpoint}") #rule of thumb: 1 minute is roughly 2k tokens pipeline = PreprocessingPipeline(input_dir="data", stretch_factors=[0.975, 1, 1.025], split_size=30, sampling_rate=sampling_rate, n_velocity_bins=n_velocity_bins, transpositions=range(-2, 3), training_val_split=0.9, max_encoded_length=seq_length + 1, min_encoded_length=257) pipeline_start = time.time() pipeline.run() runtime = time.time() - pipeline_start print(f"MIDI pipeline runtime: {runtime / 60 : .1f}m") today = datetime.date.today().strftime('%m%d%Y') t = str(time.time()) # checkpoint = f"saved_models/tf_{today}_{t}" checkpoint = f"saved_models/tf_lstm_both" training_sequences = pipeline.encoded_sequences['training'] validation_sequences = pipeline.encoded_sequences['validation'] batch_size = 16 train(transformer, training_sequences, validation_sequences, epochs=args.n_epochs, evaluate_per=1, batch_size=batch_size, batches_per_print=100, padding_index=0, checkpoint_path=checkpoint, early_stopping_value=early_stopping)
help='number of layers (default: 2)') # args holds all passed-in arguments args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Using device {}.".format(device)) torch.manual_seed(args.seed) # Load the training data. train_loader = _get_train_data_loader(args.batch_size, args.data_dir) # To get params from the parser, call args.argument_name, ex. args.epochs or ards.hidden_dim # Don't forget to move your model .to(device) to move to GPU , if appropriate model = LSTMClassifier(args.input_dim, args.hidden_dim, args.num_layers, args.output_dim).to(device) ## TODO: Define an optimizer and loss function for training optimizer = optim.Adam(model.parameters()) criterion = torch.nn.MSELoss() # Trains the model (given line of code, which calls the above training function) # Keep the keys of this dictionary as they are model_info_path = os.path.join(args.model_dir, 'model_info.pth') with open(model_info_path, 'wb') as f: model_info = { 'num_layers': args.num_layers, 'hidden_dim': args.hidden_dim, 'output_dim': args.output_dim, 'input_dim': args.input_dim,
if not os.path.exists(save_root): os.makedirs(save_root) #print('writing results to '+save_root) # create data generators generator_train = get_batch_transform(model, train_data) generator_val = get_batch_transform(model, val_data) generator_test = get_batch_transform(model, test_data) # train a predictor model if exp_model == 'LR': model_snt = LRClassifier() elif exp_model == 'LSTM': model_snt = LSTMClassifier() else: raise NotImplementedError if args.cuda: model_snt.cuda() iters_max = 4000 lr_base = 0.001 lr_final = 0.00005 lr_new = lr_base criterion = nn.BCELoss() optimizer = torch.optim.Adam(model_snt.parameters(), lr=lr_base) iters_val = [] accus_val = [] loss_val = []
def test(args): dataset_test = FirmaData_select_subjects(args.data_dir, 30, args.subset_par[0], args.subset_par[1], args.subset_par[2],args.subjects_list, subset='test', pre_process=False) dat_loader_test = DataLoader(dataset_test, batch_size=args.batch_size, shuffle=True) if args.test_all: for loadid in range(args.num_epochs): saved_model = os.path.join(args.save_path, 'model_' + str(loadid) + '.tar') checkpoint = torch.load(saved_model) model = LSTMClassifier(dataset_test[0][0].shape[1], args.hidden_dim, output_size=3) model.cuda() model.load_state_dict(checkpoint['model_state_dict']) acc,f1,_= evaluate_test_set(model, dat_loader_test) print('model {} test_accuracy:{:5.4f}, f1_score:{:5.4f}'.format(loadid,acc,f1)) else: loadid=args.test_id saved_model = os.path.join(args.save_path, 'model_' + str(loadid) + '.tar') checkpoint = torch.load(saved_model) model = LSTMClassifier(dataset_test[0][0].shape[1], args.hidden_dim, output_size=3) model.cuda() model.load_state_dict(checkpoint['model_state_dict']) acc,f1, _ = evaluate_test_set(model, dat_loader_test) print('model {} test_accuracy:{:5.4f}, f1_score:{:5.4f}'.format(loadid,acc,f1))
def train(model, train_loader, epochs, optimizer, loss_fn, device): """ This is the training method that is called by the PyTorch training script. The parameters passed are as follows: model - The PyTorch model that we wish to train. train_loader - The PyTorch DataLoader that should be used during training. epochs - The total number of epochs to train for. optimizer - The optimizer to use during training. loss_fn - The loss function used for training. device - Where the model and data should be loaded (gpu or cpu). """ # TODO: Paste the train() method developed in the notebook here. def train(model, train_loader, epochs, optimizer, loss_fn, device): for epoch in range(1, epochs + 1): model.train() total_loss = 0 for batch in train_loader: batch_X, batch_y = batch batch_X = batch_X.to(device) batch_y = batch_y.to(device) # TODO: Complete this train method to train the model provided. optimizer.zero_grad() out = model.forward(batch_X) loss = loss_fn(out, batch_y) loss.backward() optimizer.step() total_loss += loss.data.item() print("Epoch: {}, BCELoss: {}".format(epoch, total_loss / len(train_loader))) pass if __name__ == '__main__': # All of the model parameters and training parameters are sent as arguments when the script # is executed. Here we set up an argument parser to easily access the parameters. parser = argparse.ArgumentParser() # Training Parameters parser.add_argument('--batch-size', type=int, default=512, metavar='N', help='input batch size for training (default: 512)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') # Model Parameters parser.add_argument('--embedding_dim', type=int, default=32, metavar='N', help='size of the word embeddings (default: 32)') parser.add_argument('--hidden_dim', type=int, default=100, metavar='N', help='size of the hidden dimension (default: 100)') parser.add_argument('--vocab_size', type=int, default=5000, metavar='N', help='size of the vocabulary (default: 5000)') # SageMaker Parameters parser.add_argument('--hosts', type=list, default=json.loads(os.environ['SM_HOSTS'])) parser.add_argument('--current-host', type=str, default=os.environ['SM_CURRENT_HOST']) parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) parser.add_argument('--data-dir', type=str, default=os.environ['SM_CHANNEL_TRAINING']) parser.add_argument('--num-gpus', type=int, default=os.environ['SM_NUM_GPUS']) args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Using device {}.".format(device)) torch.manual_seed(args.seed) # Load the training data. train_loader = _get_train_data_loader(args.batch_size, args.data_dir) # Build the model. model = LSTMClassifier(args.embedding_dim, args.hidden_dim, args.vocab_size).to(device) with open(os.path.join(args.data_dir, "word_dict.pkl"), "rb") as f: model.word_dict = pickle.load(f) print("Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}.".format( args.embedding_dim, args.hidden_dim, args.vocab_size )) # Train the model. optimizer = optim.Adam(model.parameters()) loss_fn = torch.nn.BCELoss() train(model, train_loader, args.epochs, optimizer, loss_fn, device) # Save the parameters used to construct the model model_info_path = os.path.join(args.model_dir, 'model_info.pth') with open(model_info_path, 'wb') as f: model_info = { 'embedding_dim': args.embedding_dim, 'hidden_dim': args.hidden_dim, 'vocab_size': args.vocab_size, } torch.save(model_info, f) # Save the word_dict word_dict_path = os.path.join(args.model_dir, 'word_dict.pkl') with open(word_dict_path, 'wb') as f: pickle.dump(model.word_dict, f) # Save the model parameters model_path = os.path.join(args.model_dir, 'model.pth') with open(model_path, 'wb') as f: torch.save(model.cpu().state_dict(), f)