def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001): clip = 5 val_frac = 0.7 print_every = 10 opt = torch.optim.Adam(net.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() val_idx = int(len(data) * (1 - val_frac)) data, val_data = data[:val_idx], data[val_idx:] if torch.cuda.is_available(): net = net.cuda() counter = 0 n_chars = len(net.chars) print(data) for e in range(epochs): h = net.init_hidden(batch_size) for x, y in get_batches(data, batch_size, seq_length): counter += 1 x = one_hot_encode(x, n_chars) inputs = torch.from_numpy(x) targets = torch.from_numpy(y) if torch.cuda.is_available(): inputs = inputs.cuda() targets = targets.long().cuda() h = tuple([each.data for each in h]) net.zero_grad() output, h = net(inputs, h) loss = criterion(output, targets.view(batch_size * seq_length)) loss.backward() nn.utils.clip_grad_norm_(net.parameters(), clip) opt.step() if counter % print_every == 0: val_h = net.init_hidden(batch_size) val_losses = [] for x, y in get_batches(val_data, batch_size, seq_length): x = one_hot_encode(x, n_chars) x = torch.from_numpy(x) y = torch.from_numpy(y) val_h = tuple([each.data for each in val_h]) inputs, targets = x, y if torch.cuda.is_available(): inputs = inputs.cuda() targets = targets.long().cuda() output, val_h = net(inputs, val_h) val_loss = criterion(output, targets.view(batch_size * seq_length)) val_losses.append(val_loss.item()) print('epoch: {}/{}'.format(e + 1, epochs), 'steps: {}'.format(counter), 'Loss {:.4f}'.format(loss.item()), 'val_loss {:.4f}'.format(np.mean(val_losses)))
def __init__(self, text, vocabulary, batch_size=10, seq_length=50, device="cpu"): self.original_text = text batch_char_size = batch_size * seq_length # character length of the trimmed text self.text_length = (len(text) // batch_char_size) * batch_char_size self.batch_size = batch_size self.seq_length = seq_length # character tokenization self.vocabulary = vocabulary # character to integer translation dictionaries self.int2char = dict(enumerate(vocabulary)) self.char2int = {ch: i for i, ch in self.int2char.items()} # character to integer translation encoded_text = np.array( [self.char2int[ch] for ch in self.original_text]) # we prepare Xs (inputs) and Ys (targets) beforehand to save computation time # inputs will be one-hot encoded # while targets will remain index-like integers, indicating the correct classes self.x_y = [ (torch.tensor(one_hot_encode(batch[0], len(vocabulary))).to(device), torch.tensor(batch[1]).to(device)) for batch in get_batches(encoded_text, batch_size, seq_length) ]
def predict(net, char, h=None, top_k=None): x = np.array([[net.char2int[char]]]) x = one_hot_encode(x, len(net.chars)) inputs = torch.from_numpy(x) if torch.cuda.is_available(): inputs = inputs.cuda() h = tuple([each.data for each in h]) out, h = net(inputs, h) p = F.softmax(out, dim=1).data if torch.cuda.is_available(): p = p.cpu() if top_k is None: top_ch = np.arange(len(net.chars)) else: p, top_ch = p.topk(top_k) top_ch = top_ch.numpy().squeeze() p = p.numpy().squeeze() char = np.random.choice(top_ch, p=p / p.sum()) return net.int2char[char], h
def predict(self, char, h=None, cuda=False, top_k=None): ''' Given a character, predict the next character. Returns the predicted character and the hidden state. ''' if cuda: self.cuda() else: self.cpu() if h is None: h = self.init_hidden(1) x = np.array([[self.char2int[char]]]) x = helper.one_hot_encode(x, len(self.chars)) inputs = torch.from_numpy(x) if cuda: inputs = inputs.cuda() h = tuple([each.data for each in h]) out, h = self.forward(inputs, h) p = F.softmax(out, dim=1).data if cuda: p = p.cpu() if top_k is None: top_ch = np.arange(len(self.chars)) else: p, top_ch = p.topk(top_k) top_ch = top_ch.numpy().squeeze() p = p.numpy().squeeze() char = np.random.choice(top_ch, p=p / p.sum()) return self.int2char[char], h
def make_samples(patches_list, labels_list, guidance, num_classes=2, shuffle=True, seismic_standardize=True): import imgaug.augmenters as iaa samples_list = [] label_list = [] label_note_list = [] # just for using tqdm to monitor the process. WASTE Memory. for i in np.arange(len(patches_list)): samples_list.extend(patches_list[i]) label_list.extend(labels_list[i]) #----------------------------sample corrupt--------------------------------# # do not use iaa.Sometimes, cause wanna to record augmenter status mannually num_patch = len(samples_list) patch_size = samples_list[0].shape[0] patches_tensor = np.zeros((num_patch, patch_size, patch_size)) labels_tensor = np.zeros((num_patch, num_classes)) print("\nProcessing samples ...") for i in tqdm(np.arange(num_patch)): """ back ground noise """ temp_array = samples_list[i] # brightness temp_array = temp_array * np.random.uniform(low=0.2, high=1) # brightness label_note = "Bright; " for aug in guidance: if aug[0] >= np.random.random(): temp_array = aug[-1](images=np.float32(temp_array)) label_note += aug[1] + "; " patches_tensor[i, :] = temp_array labels_tensor[i, :] = helper.one_hot_encode(label_list[i], num_classes=num_classes) label_note_list.append(label_note) #--------------------------seismic standardize----------------------------# if seismic_standardize: patches_tensor = helper.Standardize_Seismic(patches_tensor, method="Keep Polarity", feature_range=(-1, 1), SourceData_Bound=None) #-------------------------------shuffle-----------------------------------# if shuffle: state = np.random.get_state() np.random.shuffle(patches_tensor) np.random.set_state(state) np.random.shuffle(labels_tensor) np.random.set_state(state) np.random.shuffle(label_note_list) return patches_tensor, labels_tensor, label_note_list
def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001): """ This function is resposible for training the lstm model. Args: net: the lstm model data: the input data that will be used in the model epochs: number of iterations batch_size: number of batches given seq_length: n examples in training set default is 50 lr: learning rate Returns: None """ clip = 5 val_frac = 0.1 print_every = 10 opt = torch.optim.Adam(net.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() val_idx = int(len(data) * (1 - val_frac)) data, val_data = data[:val_idx], data[val_idx:] if torch.cuda.is_available(): if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") net = nn.DataParallel(net) print("using GPU {}".format(torch.cuda.get_device_name())) net = net.cuda() else: print("using CPU") net = net.cpu() counter = 0 n_chars = len(net.chars) for e in range(epochs): # initializing hidden units h = net.init_hidden(batch_size) for x, y in get_batches(data, batch_size, seq_length): counter += 1 x = one_hot_encode(x, n_chars) inputs = torch.from_numpy(x) targets = torch.from_numpy(y) # if gpu is there use it. if torch.cuda.is_available(): inputs = inputs.cuda() targets = targets.long().cuda() h = tuple([each.data for each in h]) net.zero_grad() output, h = net(inputs, h) loss = criterion(output, targets.view(batch_size * seq_length)) loss.backward() nn.utils.clip_grad_norm_(net.parameters(), clip) opt.step() if counter % print_every == 0: val_h = net.init_hidden(batch_size) val_losses = [] for x, y in get_batches(val_data, batch_size, seq_length): x = one_hot_encode(x, n_chars) x = torch.from_numpy(x) y = torch.from_numpy(y) val_h = tuple([each.data for each in val_h]) inputs, targets = x, y if torch.cuda.is_available(): inputs = inputs.cuda() targets = targets.long().cuda() output, val_h = net(inputs, val_h) val_loss = criterion(output, targets.view(batch_size * seq_length)) val_losses.append(val_loss.item()) print('epoch: {}/{}'.format(e + 1, epochs), 'steps: {}'.format(counter), 'Loss {:.4f}'.format(loss.item()), 'val_loss {:.4f}'.format(np.mean(val_losses))) return None