def train_VAE(): data = load_hyp_spectral_preprocessed() print("n:", sum([len(data[k]) for k in data])) model = VAE() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) for epoch in range(num_epochs): for data in raw_data: data = np.asarray( data) # normalize the input, bc it would explode otherwise data = (data - np.average(data)) / np.std(data) img = Variable(torch.from_numpy(data.astype(float)).float()) # ===================forward===================== recon, mu, logvar = model(img) loss = loss_function(recon, img, mu, logvar) # ===================backward==================== optimizer.zero_grad() loss.backward() optimizer.step() # ===================log======================== print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs, loss.data)) #torch.save(model.state_dict(), './sim_autoencoder.pth') return model
def train_MNIST(): data = load_hyp_spectral_preprocessed() #data, _ = load_MNIST_raw() batch_size = 3 # 3 for mnist data_batches = np.split(np.asarray(data), batch_size) model = autoencoder() criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) print("start") for epoch in range(num_epochs): for b in data_batches: batch = b for sample in batch: img = Variable(torch.from_numpy(sample.astype(float)).float()) # ===================forward===================== output = model(img) loss = criterion(output, img) # ===================backward==================== optimizer.zero_grad() loss.backward() optimizer.step() # ===================log======================== print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs, loss.data)) #torch.save(model.state_dict(), './sim_autoencoder.pth') return model
def VAE_features(model): data = load_hyp_spectral_preprocessed() avgs = {} for c in range(1, 17): results = [] for row in data[c]: row = Variable(torch.from_numpy(row.astype(float)).float()) result, _ = model.encode(row) results.append(result.data.numpy()) avg = np.average(results, axis=0) print(c, avg[0:5]) avgs[str(c)] = avg #print(avgs) print([list(avgs[key]) for key in avgs]) heatmap([list(avgs[key]) for key in avgs]) save_object(avgs, "obj/VAE_features_10_e50.pkl")
def test(model): data = load_hyp_spectral_preprocessed() for c in data: #print(data[c][0]) plt.figure(c) plt.subplot(2, 1, 1) heatmap(list([data[c][0][0:10]]), title="original", x_size=10, y_size=0.2) row = Variable(torch.from_numpy(data[c][0].astype(float)).float()) #print(model.encode(row)) #print(model(row)) #print("data", np.asarray(model(row).data[0:10])) plt.subplot(2, 1, 2) heatmap([np.asarray(model(row).data[0:10])], title="reconstruction", x_size=10, y_size=0.2)
def autoencoder_features2(model): data = load_hyp_spectral_preprocessed() avgs = {} print(data.keys()) for c in range(1, 17): #c = str(c) avg = np.zeros(220, ) if c in data.keys(): for row in data[c]: avg += np.asarray(row) avg /= len(data[c]) print(avg[0:5]) avg = Variable(torch.from_numpy(avg.astype(float)).float()) result = model.encode(avg).data.numpy() #print(c, avg[0:5]) avgs[str(c)] = result #print(avgs) #print([list(avgs[key]) for key in avgs]) print(avgs) heatmap([list(avgs[key]) for key in avgs]) save_object(avgs, "obj/autoencoder_features_10_e50.pkl")
def show_data_distr(): # show data distribution: data_size = np.sum([len(data[x]) for x in data]) distr = {} for key in sorted(data): print(key, len(data[key]) / data_size) distr[int(key)] = len(data[key]) / data_size distr = [distr[k] for k in list(sorted(distr))] plt.subplot(2, 1, 1) plt.bar(range(1, 17), distr) plt.title("data distribution before preprocessing") data_p = load_hyp_spectral_preprocessed() distr = {} data_size = np.sum([len(data_p[x]) for x in data_p]) for key in data_p: print("--", key, len(data_p[key]) / data_size) distr[int(key)] = len(data_p[key]) / data_size distr = [distr[k] for k in list(sorted(distr))] plt.subplot(2, 1, 2) plt.bar(range(1, 17), distr) plt.title("data distribution after preprocessing") plt.show()
def train(): data = load_hyp_spectral_preprocessed() print("n:", sum([len(data[k]) for k in data])) #data = load_MNIST_raw() model = autoencoder() criterion = nn.MSELoss() """ SGD mostly finds all the same features for all classes as optimal solution, but sometimes not. In these cases, the feature map looks promissing. Adam finds solutions with a smaller loss, and the features look also more promissing. """ optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) print("start") losses = [] for epoch in range(num_epochs): epoch_loss = [] for key in data: batch = data[key] for sample in batch: img = Variable(torch.from_numpy(sample.astype(float)).float()) # ===================forward===================== output = model(img) loss = criterion(output, img) # ===================backward==================== optimizer.zero_grad() loss.backward() optimizer.step() epoch_loss.append(loss.data) # ===================log======================== print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs, loss.data)) losses.append(np.average(epoch_loss)) plt.plot(losses) #torch.save(model.state_dict(), './sim_autoencoder.pth') return model