parser.add_argument('--num-gpus', type=int, default=os.environ['SM_NUM_GPUS']) args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Using device {}.".format(device)) torch.manual_seed(args.seed) # Load the training data. train_loader = _get_train_data_loader(args.batch_size, args.data_dir) # Build the model. model = LSTMClassifier(args.embedding_dim, args.hidden_dim, args.vocab_size).to(device) with open(os.path.join(args.data_dir, "word_dict.pkl"), "rb") as f: model.word_dict = pickle.load(f) print("Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}.". format(args.embedding_dim, args.hidden_dim, args.vocab_size)) # Train the model. optimizer = optim.Adam(model.parameters()) loss_fn = torch.nn.BCELoss() train(model, train_loader, args.epochs, optimizer, loss_fn, device) # Save the parameters used to construct the model model_info_path = os.path.join(args.model_dir, 'model_info.pth')
#class2tensor = data_preparation.class2tensor # 元データを7:3に分割(7->学習、3->テスト) traindata, testdata = train_test_split(datasets, train_size=0.7) # 単語のベクトル次元数 EMBEDDING_DIM = 10 # 隠れ層の次元数 HIDDEN_DIM = 128 # データ全体の単語数 VOCAB_SIZE = len(word2index) # 分類先のカテゴリの数 TAG_SIZE = len(classes) # モデル宣言 model = LSTMClassifier(EMBEDDING_DIM, HIDDEN_DIM, VOCAB_SIZE, TAG_SIZE) # 損失関数はNLLLoss()を使用 LogSoftmaxを使う時はNLLLoss loss_function = nn.NLLLoss() # 最適化手法 lossの減少に時間がかかるため要検討 optimizer = optim.SGD(model.parameters(), lr=0.01) # 各エポックの合計loss値を格納 losses = [] for epoch in range(100): all_loss = 0 for text, cls in zip(traindata['Text'], traindata['Class']): # モデルが持ってる勾配の情報をリセット model.zero_grad() # 文章を単語IDの系列に変換(modelに食わせられる形に変換) inputs = sentence2index(text)
shuffle=True, num_workers=0) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #define hyperparameters size_of_vocab = len(all_words) embedding_dim = 20 num_hidden_nodes = 8 num_output_nodes = len(tags) num_layers = 2 bidirection = True dropout = 0.2 #instantiate the model model = LSTMClassifier(12, 20, len(all_words), len(tags)).to(device) #architecture print(model) #No. of trianable parameters # def count_parameters(model): # return sum(p.numel() for p in model.parameters() if p.requires_grad) # print(f'The model has {count_parameters(model):,} trainable parameters') # Loss and optimizer criterion = nn.BCELoss() optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) # Train the model
def main(): os.chdir('./') global args, word2vec, batch_size, train_set_idx global weight_scale, phenotypedictinverse phenotypedict = dict({ "Cancer": 11, "Heart": 4, "Lung": 5, "Neuro": 10, "Pain": 9, "Alcohol": 7, "Substance": 8, "Obesity": 1, "Disorders": 6, "Depression": 12 }) parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) #parser.add_argument('clean_summaries0209.csv', help="Source Input file", type=str) #parser.add_argument('word2vec_50d.txt', help="word2vec file", type=str) parser.add_argument('--padding', help="padding around each text", type=int, default=4) parser.add_argument( '--max_note_len', help="Cut off all notes longer than this (0 = no cutoff).", type=int, default=0) parser.add_argument('--filename', help="File name for output file", type=str, default="data.h5") parser.add_argument('-predict_label', type=int, default=phenotypedict["Depression"], help='Choose which type of phenotyping to detect') parser.add_argument('-topred', type=str, default="Depression", help='Choose which type of phenotyping to detect') parser.add_argument('-epochs', type=int, default=10, help='number of epochs for train [default: 10]') parser.add_argument('-batch_size', type=int, default=8, help='batch size for training [default: 64]') parser.add_argument('-output_size', type=int, default=2, help='final output dim [default: 2]') parser.add_argument('-hidden_size', type=int, default=256, help='output dim of the cell [default: 256]') parser.add_argument('-embedding_length', type=int, default=50, help='number of embedding dimension [default: 50]') parser.add_argument('-learning_rate', type=float, default=0.005, help='initial learning rate [default: 0.5]') parser.add_argument('-vocab_size', type=float, default=48849, help='initial learning rate [default: 0.5]') parser.add_argument( '-optimizer', type=str, default='Adam', help='optimizer for the gradient descent: Adadelta, Adam') parser.add_argument('-cuda', type=int, default=-1, help='CUUUUUUUUUUUUDA') parser.add_argument('-debug', type=int, default=0, help='debug mode to print') parser.add_argument('-l2s', type=float, default=3, help='l2 norm') # with open("conditions.dict", 'w') as f: # for i, c in enumerate(conditions): # print (f, i + 1, c) args = parser.parse_args() phenotypedictinverse = dict({ 11: "Cancer", 4: "Heart", 5: "Lung", 10: "Neuro", 9: "Pain", 7: "Alcohol", 8: "Substance", 1: "Obesity", 6: "Disorders", 12: "Depression" }) phenotypedictsamples = dict({ "Cancer": 161, "Heart": 275, "Lung": 167, "Neuro": 368, "Pain": 321, "Alcohol": 196, "Substance": 155, "Obesity": 126, "Disorders": 295, "Depression": 460 }) weight_scale = [ 1 / (1610 - phenotypedictsamples[phenotypedictinverse[args.predict_label]]), 1 / phenotypedictsamples[phenotypedictinverse[args.predict_label]] ] #weight_scale = [ phenotypedictsamples[phenotypedictinverse[args.predict_label]]/1610*10, (1610 - phenotypedictsamples[phenotypedictinverse[args.predict_label]])/1610*10] if args.cuda > -1: weight_scale = torch.FloatTensor(weight_scale).cuda() print('Weight Scale is: ', weight_scale) # LOAD THE WORD2VEC FILE word2vec, emb_size, v_large = load_bin_vec( "word2vec_50d.txt") # word2vec whole dataset(label+unlabeled) 470260 print('WORD2VEC POINTS:', v_large) # first step # lbl, targets, ids, subj, time, embed = preprocess(args, emb_size, word2vec) # lbl_train, lbl_train_target, lbl_test, lbl_test_target, phenotypedict = cross_validation(lbl, targets, ids, subj, time, args.topred, phenotypedict, phenotypedictsamples) fold = 1 # put data of each fold in to a .h5py file ''' for i in range(0,fold): with h5py.File('data_biased_'+args.topred+'_cv{0}_occ'.format(i+1) + '0'+'.h5',"w") as f: xtrain = np.array(lbl_train[i], dtype=int) xtraintarget = np.array(lbl_train_target[i], dtype=int) xtest = np.array(lbl_test[i], dtype=int) xtesttarget = np.array(lbl_test_target[i], dtype=int) f["w2v"] = np.array(embed) f['train'] = xtrain f['train_label'] = xtraintarget[:,phenotypedict[args.topred]] f['test'] = xtest f['test_label'] = xtesttarget[:,phenotypedict[args.topred]] ''' if args.cuda > -1: torch.cuda.set_device(args.cuda) torch.backends.cudnn.benchmark = True for i in range(0, fold): train, test, y_test, w2v = readh5todata( args, 'data_biased_' + phenotypedictinverse[args.predict_label] + '_cv{0}'.format(i + 1) + '_occ' + '0' + '.h5') args.w2v = w2v train_loader = torch.utils.data.DataLoader(train, batch_size=args.batch_size, sampler=None, shuffle=False) test_loader = torch.utils.data.DataLoader(test, batch_size=args.batch_size, sampler=None, shuffle=False) LSTM = LSTMClassifier(args) print(LSTM) train_model(args, LSTM, args.learning_rate, args.batch_size, args.epochs, train_loader)
#!/usr/bin/env python # coding: utf-8 from prepare_data import PrepareData from model import LSTMClassifier import pandas as pd from sklearn.model_selection import train_test_split ppd = PrepareData() data = ppd.get_data() lstm = LSTMClassifier() X = lstm.get_matrix(data) Y = pd.get_dummies(data['label']).values X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42) print(X_train.shape, Y_train.shape) print(X_test.shape, Y_test.shape) model = lstm.get_model(X.shape[1]) history = lstm.fit_model(model, X_train, Y_train) validation_size = 1500 X_validate = X_test[-validation_size:] Y_validate = Y_test[-validation_size:] X_test = X_test[:-validation_size] Y_test = Y_test[:-validation_size] score, acc = model.evaluate(X_test, Y_test, verbose=2, batch_size=batch_size)
def main(): parser = argparse.ArgumentParser("Script to train model on a GPU") parser.add_argument( "--checkpoint", type=str, default=None, help= "Optional path to saved model, if none provided, the model is trained from scratch." ) parser.add_argument("--n_epochs", type=int, default=5, help="Number of training epochs.") args = parser.parse_args() sampling_rate = 125 n_velocity_bins = 32 seq_length = 1024 n_tokens = 256 + sampling_rate + n_velocity_bins #early_stopping = 100000 # very high value to basically turn it off early_stopping = 200 # regular value # transformer = MusicTransformer(n_tokens, seq_length, # d_model = 64, n_heads = 8, d_feedforward=256, # depth = 4, positional_encoding=True, relative_pos=True, xavier_init=True) # set xavier_init = True to run xavier_init optimization # transformer = LongMusicTransformer(n_tokens, seq_length, # d_model=64, n_heads=8, d_feedforward=256, # depth=4, positional_encoding=True, relative_pos=False, # xavier_init=True) transformer = LSTMClassifier(input_dim=1, hidden_dim=413, label_size=413, n_tokens=n_tokens, xavier_init=True) if args.checkpoint is not None: state = torch.load(args.checkpoint) transformer.load_state_dict(state) print(f"Successfully loaded checkpoint at {args.checkpoint}") #rule of thumb: 1 minute is roughly 2k tokens pipeline = PreprocessingPipeline(input_dir="data", stretch_factors=[0.975, 1, 1.025], split_size=30, sampling_rate=sampling_rate, n_velocity_bins=n_velocity_bins, transpositions=range(-2, 3), training_val_split=0.9, max_encoded_length=seq_length + 1, min_encoded_length=257) pipeline_start = time.time() pipeline.run() runtime = time.time() - pipeline_start print(f"MIDI pipeline runtime: {runtime / 60 : .1f}m") today = datetime.date.today().strftime('%m%d%Y') t = str(time.time()) # checkpoint = f"saved_models/tf_{today}_{t}" checkpoint = f"saved_models/tf_lstm_both" training_sequences = pipeline.encoded_sequences['training'] validation_sequences = pipeline.encoded_sequences['validation'] batch_size = 16 train(transformer, training_sequences, validation_sequences, epochs=args.n_epochs, evaluate_per=1, batch_size=batch_size, batches_per_print=100, padding_index=0, checkpoint_path=checkpoint, early_stopping_value=early_stopping)
#Load data generators train_data_loader = get_train_loader(cities=cities, labels=labels, batch_size=batch_size, shuffle=False, collate_fn=collate_fn, sampler=train_sampler) valid_data_loader = get_train_loader(cities=cities, labels=labels, batch_size=batch_size, shuffle=False, collate_fn=collate_fn, sampler=valid_sampler) #Initialize the model to train model = LSTMClassifier(27, 10, 14) # Loss and Optimizer criterion = nn.NLLLoss() learning_rate = 0.8 optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # train losses = [] num_epochs = 10 # Train the Model for epoch in range(num_epochs): print("##### epoch {:2d}".format(epoch + 1)) for i, batch in enumerate(train_data_loader):
type=int, default=os.environ['SM_NUM_GPUS']) args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Using device {}.".format(device)) torch.manual_seed(args.seed) # Load the training data. train_loader = _get_train_data_loader(args.batch_size, args.data_dir) # Build the model. model = LSTMClassifier(args.embedding_dim, args.hidden_dim, args.vocab_size, args.n_layers, args.drop_prob).to(device) #model = LSTMClassifier(args.embedding_dim, args.hidden_dim, args.vocab_size).to(device) with open(os.path.join(args.data_dir, "word_dict.pkl"), "rb") as f: model.word_dict = pickle.load(f) print( "Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}, n_layers {}, drop_prob {}." .format(args.embedding_dim, args.hidden_dim, args.vocab_size, args.n_layers, args.drop_prob)) #print("Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}.".format( #args.embedding_dim, args.hidden_dim, args.vocab_size #)) # Train the model.
help='number of layers (default: 2)') # args holds all passed-in arguments args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Using device {}.".format(device)) torch.manual_seed(args.seed) # Load the training data. train_loader = _get_train_data_loader(args.batch_size, args.data_dir) # To get params from the parser, call args.argument_name, ex. args.epochs or ards.hidden_dim # Don't forget to move your model .to(device) to move to GPU , if appropriate model = LSTMClassifier(args.input_dim, args.hidden_dim, args.num_layers, args.output_dim).to(device) ## TODO: Define an optimizer and loss function for training optimizer = optim.Adam(model.parameters()) criterion = torch.nn.MSELoss() # Trains the model (given line of code, which calls the above training function) # Keep the keys of this dictionary as they are model_info_path = os.path.join(args.model_dir, 'model_info.pth') with open(model_info_path, 'wb') as f: model_info = { 'num_layers': args.num_layers, 'hidden_dim': args.hidden_dim, 'output_dim': args.output_dim, 'input_dim': args.input_dim,
if not os.path.exists(save_root): os.makedirs(save_root) #print('writing results to '+save_root) # create data generators generator_train = get_batch_transform(model, train_data) generator_val = get_batch_transform(model, val_data) generator_test = get_batch_transform(model, test_data) # train a predictor model if exp_model == 'LR': model_snt = LRClassifier() elif exp_model == 'LSTM': model_snt = LSTMClassifier() else: raise NotImplementedError if args.cuda: model_snt.cuda() iters_max = 4000 lr_base = 0.001 lr_final = 0.00005 lr_new = lr_base criterion = nn.BCELoss() optimizer = torch.optim.Adam(model_snt.parameters(), lr=lr_base) iters_val = [] accus_val = [] loss_val = []
parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) parser.add_argument('--data-dir', type=str, default=os.environ['SM_CHANNEL_TRAINING']) parser.add_argument('--num-gpus', type=int, default=os.environ['SM_NUM_GPUS']) args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Using device {}.".format(device)) torch.manual_seed(args.seed) # Load the training data. train_loader = _get_train_data_loader(args.batch_size, args.data_dir) # Build the model. model = LSTMClassifier(args.embedding_dim, args.hidden_dim, args.vocab_size).to(device) with open(os.path.join(args.data_dir, "word_dict.pkl"), "rb") as f: model.word_dict = pickle.load(f) print("Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}.".format( args.embedding_dim, args.hidden_dim, args.vocab_size )) # Train the model. optimizer = optim.Adam(model.parameters()) loss_fn = torch.nn.BCELoss() train(model, train_loader, args.epochs, optimizer, loss_fn, device) # Save the parameters used to construct the model
def train(model, train_loader, epochs, optimizer, loss_fn, device): """ This is the training method that is called by the PyTorch training script. The parameters passed are as follows: model - The PyTorch model that we wish to train. train_loader - The PyTorch DataLoader that should be used during training. epochs - The total number of epochs to train for. optimizer - The optimizer to use during training. loss_fn - The loss function used for training. device - Where the model and data should be loaded (gpu or cpu). """ # TODO: Paste the train() method developed in the notebook here. def train(model, train_loader, epochs, optimizer, loss_fn, device): for epoch in range(1, epochs + 1): model.train() total_loss = 0 for batch in train_loader: batch_X, batch_y = batch batch_X = batch_X.to(device) batch_y = batch_y.to(device) # TODO: Complete this train method to train the model provided. optimizer.zero_grad() out = model.forward(batch_X) loss = loss_fn(out, batch_y) loss.backward() optimizer.step() total_loss += loss.data.item() print("Epoch: {}, BCELoss: {}".format(epoch, total_loss / len(train_loader))) pass if __name__ == '__main__': # All of the model parameters and training parameters are sent as arguments when the script # is executed. Here we set up an argument parser to easily access the parameters. parser = argparse.ArgumentParser() # Training Parameters parser.add_argument('--batch-size', type=int, default=512, metavar='N', help='input batch size for training (default: 512)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') # Model Parameters parser.add_argument('--embedding_dim', type=int, default=32, metavar='N', help='size of the word embeddings (default: 32)') parser.add_argument('--hidden_dim', type=int, default=100, metavar='N', help='size of the hidden dimension (default: 100)') parser.add_argument('--vocab_size', type=int, default=5000, metavar='N', help='size of the vocabulary (default: 5000)') # SageMaker Parameters parser.add_argument('--hosts', type=list, default=json.loads(os.environ['SM_HOSTS'])) parser.add_argument('--current-host', type=str, default=os.environ['SM_CURRENT_HOST']) parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) parser.add_argument('--data-dir', type=str, default=os.environ['SM_CHANNEL_TRAINING']) parser.add_argument('--num-gpus', type=int, default=os.environ['SM_NUM_GPUS']) args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Using device {}.".format(device)) torch.manual_seed(args.seed) # Load the training data. train_loader = _get_train_data_loader(args.batch_size, args.data_dir) # Build the model. model = LSTMClassifier(args.embedding_dim, args.hidden_dim, args.vocab_size).to(device) with open(os.path.join(args.data_dir, "word_dict.pkl"), "rb") as f: model.word_dict = pickle.load(f) print("Model loaded with embedding_dim {}, hidden_dim {}, vocab_size {}.".format( args.embedding_dim, args.hidden_dim, args.vocab_size )) # Train the model. optimizer = optim.Adam(model.parameters()) loss_fn = torch.nn.BCELoss() train(model, train_loader, args.epochs, optimizer, loss_fn, device) # Save the parameters used to construct the model model_info_path = os.path.join(args.model_dir, 'model_info.pth') with open(model_info_path, 'wb') as f: model_info = { 'embedding_dim': args.embedding_dim, 'hidden_dim': args.hidden_dim, 'vocab_size': args.vocab_size, } torch.save(model_info, f) # Save the word_dict word_dict_path = os.path.join(args.model_dir, 'word_dict.pkl') with open(word_dict_path, 'wb') as f: pickle.dump(model.word_dict, f) # Save the model parameters model_path = os.path.join(args.model_dir, 'model.pth') with open(model_path, 'wb') as f: torch.save(model.cpu().state_dict(), f)