def fit(self, data, labels, validate=0.0, n_epochs=100, batch_size=128, converging_threshold=-1., path='./', periodic_save=0): history = [] if type(data) is not torch.Tensor: data = torch.tensor(data) labels = torch.tensor(labels) set_size = data.shape[0] best_perform = math.inf for n in range(n_epochs): print('STARTING EPOCH {}'.format(n)) epoch_loss = [] for i in range(0, set_size, batch_size): self.print_progress(((i+batch_size)/set_size)*100, progress='[{}/{}]'.format(min(i+batch_size,set_size), set_size)) x, y = data[i:i+batch_size], labels[i:i+batch_size] self.optimizer.zero_grad() # forward y_pred, (z_mu, z_var) = self.forward(x) # evaluate loss = self.bvae_loss(y_pred, y, z_mu, z_var) epoch_loss.append(loss) # backprop loss.backward() self.optimizer.step() self.print_progress(x=((i+batch_size)/set_size)*100, progress='[{}/{}][loss {}]'.format(min(i+batch_size,set_size), set_size, loss/batch_size)) self.epochs_trained += 1 history.append(torch.mean(torch.tensor(epoch_loss))/set_size) loss_delta = history[-2] - history[-1] if len(history)> 1 else -1 if validate > 0: num_validation_samples = int(len(data)*validate) # floors idxs = np.random.randint(0, len(data), num_validation_samples) x, y = data[idxs], labels[idxs] print('\n evaluate on {} samples; '.format(len(idxs)), end='') performance = self.evaluate(x, y) print('avg loss {}'.format(performance)) if performance < best_perform: best_perform = performance save_model(self, path, '{}'.format(self.__class__.__name__)) if periodic_save > 0: if self.epochs_trained % periodic_save == 0: print('saving model') save_model(self, path, '{}'.format(self.__class__.__name__)) if 0 <= loss_delta and loss_delta <= converging_threshold: print('\n RETURN AFTER {} EPOCHS with loss_delta: {} < {} '.format(n,loss_delta, converging_threshold)) break print('\nFINISHED EPOCH {}, avg loss: {}\n\n'.format(n, history[-1])) return history
def fit(model, data): loss_fun = F.binary_cross_entropy optimizer = optim.Adam(params=model.parameters()) epochs = 100 data = [np.expand_dims(x, 1) for x in data] data = [torch.tensor(d, dtype=torch.float32) for d in data] for epoch in range(epochs): num_samples = len(data) for i, seq in enumerate(data): # feed sequence model.reset_state() optimizer.zero_grad() for xi, yi in zip(seq[:-1], seq[1:]): y_pred, new_state = model.forward(xi) model.update_state(new_state) loss = loss_fun(y_pred, yi) loss.backward(retain_graph=True) print('epoch {}/{}, seq {}/{}, loss {}'.format(epoch, epochs, i, num_samples, loss)) optimizer.step() if epoch % 10 == 0: model.epochs_trained = i model.optimizer = optimizer save_model(model, modelname='werther_rnn')
def main(datapath, modelpath): print(f'loading data from {datapath}') filename = datapath.split('/')[-1].split('.')[0] # print(datapath.split('/')[:-1]) # exit() datadir = datapath.split('/')[:-1] datadir = os.path.join(*datadir) dataset = load_pkl(datapath) # Dataset from asr.util print('looking for idx files') # idxs_train = load_pkl(os.path.join('/', datadir, 'idxs_train.pkl')) # idxs_test = load_pkl(os.path.join('/', datadir, 'idxs_test.pkl')) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Additional Info when using cuda if device.type == 'cuda': print(torch.cuda.get_device_name(0)) print('Memory Usage:') print('Allocated:', round(torch.cuda.memory_allocated(0) / 1024**3, 1), 'GB') print('Cached: ', round(torch.cuda.memory_cached(0) / 1024**3, 1), 'GB') x = dataset.data Y = dataset.get_labels_categorical() seqlen, nfeatures = x.shape[1:] print('splitting up data into train and test sets') idxs = [x for x in range(len(x))] np.random.shuffle(idxs) split_percent = 0.90 split = int(split_percent * len(idxs)) idxs_train = idxs[:split] idxs_test = idxs[split:] idx_path = '/' + datadir + f'/idxs_{filename}.pkl' print(f'saving idxs to {idx_path}') with open(idx_path, 'wb') as f: pkl.dump({'train': idxs_train, 'test': idxs_test}, f) # print(idxs_test); exit() x_train = torch.tensor(x[idxs_train], dtype=torch.float) y_train = torch.tensor(Y[idxs_train], dtype=torch.float) x_test = torch.tensor(x[idxs_test], dtype=torch.float) y_test = torch.tensor(Y[idxs_test], dtype=torch.float) del x, Y, dataset n_samples, num_classes = y_train.shape x_train = x_train.to(device) y_train = y_train.to(device) y_test = y_test.to(device) x_test = x_test.to(device) # hidden_size = [25, 50, 75, 100, 150, 200, 250, 300, 400] hidden_size = [25, 250, 50] print( f'starting training of classifiers with hidden dims of {hidden_size}') for hid_s in hidden_size: print(f'training model with {hid_s} hidden dims') network = GRUEncoder(input_size=nfeatures, hidden_size=hid_s, out_dim=num_classes, act_out=nn.Sigmoid, num_layers=1) if device.type == 'cuda': network = network.to(device) optim = Adam(network.parameters(), ) #lr=0.005) # default 0.001 loss_fun = nn.BCELoss() histories = [] test_errors = [] target_error = 57e-3 train = True batch_size = 256 n_epochs, max_epochs = 0, 200 print( f'test performances without training: {test_classifier(network.forward, x_test, y_test, batch_size)}' ) while train: history = [] for i in range(0, n_samples, batch_size): x, y = x_train[i:i + batch_size], y_train[i:i + batch_size] optim.zero_grad() y_pred, _ = network.forward(x) # print(y_pred); exit(); loss = loss_fun(y_pred, y) loss.backward() history.append(loss) optim.step() if i / batch_size % 10 == 0: print( f'epoch {n_epochs} {i}:{i+batch_size}/{n_samples} loss {loss}', end='\r', flush=True) # train_error = test_classifier(network.forward, x_train, y_train, 512) # print(f'\ntrain error: {train_error}') current_error = test_classifier(network.forward, x_test, y_test, 512) print(f'\ntest error: {current_error} \n') test_errors.append(current_error) n_epochs += 1 train = target_error < current_error and n_epochs < max_epochs histories.append(history) modelname = '_'.join([ network.__class__.__name__, filename, str(hid_s), str(n_epochs), str(current_error) ]) network.optimizer = optim network.history = histories network.test_errors = test_errors network.epochs_trained = n_epochs save_model(network, path=modelpath, modelname=modelname)
def main(save_prefix='./model'): # Load dataset dataset_zip = np.load('./data/test/dsprites.npz') imgs = dataset_zip['imgs'] imgs = np.expand_dims(imgs, 1) num_imgs = len(imgs) idxs = np.arange(num_imgs) np.random.shuffle(idxs) train_perc = 0.8 train_idxs, test_idxs = idxs[:int(train_perc * num_imgs)], idxs[int(train_perc * num_imgs):] with open(save_prefix + '/shuffle_idxs.pkl', 'wb') as f: pkl.dump({'train': train_idxs, 'test': test_idxs}, f) x_train, x_test = imgs[train_idxs], imgs[test_idxs] latent_dim = 9 # datastet was generated by 6 latent features, we want to see that 3 dimensions are unused data_shape = (1, 64, 64) encoder = ConvEncoder( in_shape=data_shape, out_dim=latent_dim) # out_dim == dim of mu and dim of log_var decoder = ConvDecoder( in_dim=latent_dim, out_shape=data_shape, ) print(encoder.extra_repr()) print(decoder.extra_repr()) print("build beta vae") device = check_for_gpu() x_train = x_train / 255 def cast_data(x, device=device): if device.type == 'cuda': print('data to {}'.format(device)) x = torch.tensor(x).to(device) else: x = torch.tensor(x).to(torch.float64) return x l = F.binary_cross_entropy # l = F.hinge_embedding_loss beta = 4.0 bvae = bVAE(encoder, decoder, latent_dim=latent_dim, recon_loss=l, beta=beta) bvae.to(device) print(bvae.extra_repr()) print("fit bvae") set_splits = 4 num_samples = len(x_train) for i in range(1, set_splits + 1): start = 1 - (1 / (i + 1)) end = start + (1 / set_splits) start = int(start * num_samples) end = int(end * num_samples) d = x_train[start:end] d = cast_data(d) history = bvae.fit(d, d, n_epochs=200, batch_size=512) print("saving model") path = save_prefix modelname = 'bvae_dsprite_{}'.format(beta) save_model(bvae, path=path, modelname=modelname)
print(encoder.extra_repr()) print(decoder.extra_repr()) print("build beta vae") l = F.mse_loss #F.binary_cross_entropy # l = F.hinge_embedding_loss bvae = bVAE(encoder, decoder, latent_dim=latent_dim, recon_loss=l, beta=1, activation_function=None) if device.type == 'cuda': print('upload to {}'.format(device)) bvae = bvae.to(device) x_train = torch.tensor(x_train) x_train = x_train.to(device) print(bvae.extra_repr()) print("fit bvae") history = bvae.fit(x_train, x_train, n_epochs=200, batch_size=256) print("saving model") path = '/dev/shm/semueller/asr/models_test' modelname = 'bvae_test' save_model(bvae, path=path, modelname=modelname) # del bvae # print("test loading") # bvae = load_model(path=path, modelname=modelname) # print("test bvae") # test_out = bvae.predict(x_test[:25])
loss_fun = F.binary_cross_entropy optimizer = optim.Adam(params=model.parameters()) epochs = 100 data = [np.expand_dims(x, 1) for x in data] data = [torch.tensor(d, dtype=torch.float32) for d in data] for epoch in range(epochs): num_samples = len(data) for i, seq in enumerate(data): # feed sequence model.reset_state() optimizer.zero_grad() for xi, yi in zip(seq[:-1], seq[1:]): y_pred, new_state = model.forward(xi) model.update_state(new_state) loss = loss_fun(y_pred, yi) loss.backward(retain_graph=True) print('epoch {}/{}, seq {}/{}, loss {}'.format(epoch, epochs, i, num_samples, loss)) optimizer.step() if epoch % 10 == 0: model.epochs_trained = i model.optimizer = optimizer save_model(model, modelname='werther_rnn') if __name__ == '__main__': data = load_werther() in_dim = data[0][0].shape[0] model = RNN(in_dim, hidden_dim=250, out_dim=in_dim, activation_function_out=torch.sigmoid) fit(model, data) save_model(model, modelname='werther_rnn')
def main(data, model): print('loading data from {}'.format(data)) filename = data.split('/')[-1].split('.')[0] data = pkl.load(open(data, 'rb')) print(data.keys()) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Additional Info when using cuda if device.type == 'cuda': print(torch.cuda.get_device_name(0)) print('Memory Usage:') print('Allocated:', round(torch.cuda.memory_allocated(0) / 1024**3, 1), 'GB') print('Cached: ', round(torch.cuda.memory_cached(0) / 1024**3, 1), 'GB') train_percentage = 0.8 x = data['X'] seqlen, nfeatures = x.shape[1:] idxs = [i for i in range(len(x))] np.random.shuffle(idxs) split = np.math.ceil(len(idxs) * train_percentage) idxs_train, idxs_test = idxs[:split], idxs[split:] with open(filename + '_idxs.dat', 'w') as f: f.write('train: \n {} \n test: \n {} \n'.format(idxs_train, idxs_test)) x_train = torch.tensor(x[idxs_train], dtype=torch.float) del x latent_dim = 100 hidden_size = 500 n_epochs = 1000 encoder = GRUEncoder(input_size=nfeatures, hidden_size=hidden_size, out_dim=int(latent_dim * 1.5)) decoder = GRUDecoder(input_size=latent_dim, hidden_size=hidden_size, out_dim=nfeatures) rbvae = RbVAE(encoder=encoder, decoder=decoder, latent_dim=latent_dim, beta=1.1) print('num params: {}'.format(rbvae.num_params)) if device.type == 'cuda': x_train = x_train.to(device) rbvae = rbvae.to(device) history = rbvae.fit( x_train, x_train, n_epochs=n_epochs, batch_size=256, validate=0.0, path=model, periodic_save=10 ) # don't validate for now, duplicates data on gpu which raises memory exception (and is inefficient) modelname = '_'.join([ rbvae.__class__.__name__, filename, str(latent_dim), str(hidden_size), str(n_epochs) ]) save_model(rbvae, path=model, modelname=modelname) pass