def main(): tf.random.set_seed(42) trX, teX, trY, teY = load_mnist(onehot=True) trX = tf.convert_to_tensor(trX, dtype=tf.float32) teX = tf.convert_to_tensor(teX, dtype=tf.float32) trY = tf.convert_to_tensor(trY, dtype=tf.float32) n_examples, n_features = trX.shape n_classes = 10 model = build_model(n_features, n_classes) model.summary() loss = CategoricalCrossentropy(from_logits=True) optimizer = SGD(learning_rate=0.01, momentum=0.9) batch_size = 100 for i in range(50): cost = 0. num_batches = n_examples // batch_size for k in range(num_batches): start, end = k * batch_size, (k + 1) * batch_size cost += train(model, loss, optimizer, trX[start:end], trY[start:end]) predY = predict(model, teX) print("Epoch %d, cost = %f, acc = %.2f%%" % (i + 1, cost / num_batches, 100. * np.mean(predY == teY.argmax(axis=1))))
def main(): torch.manual_seed(42) trX, teX, trY, teY = load_mnist(onehot=False) trX = trX.reshape(-1, 1, 28, 28) teX = teX.reshape(-1, 1, 28, 28) trX = torch.from_numpy(trX).float() teX = torch.from_numpy(teX).float() trY = torch.from_numpy(trY).long() n_examples = len(trX) n_classes = 10 model = ConvNet(output_dim=n_classes) loss = torch.nn.CrossEntropyLoss(size_average=True) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) batch_size = 100 for i in range(20): cost = 0. num_batches = n_examples / batch_size for k in range(num_batches): start, end = k * batch_size, (k + 1) * batch_size cost += train(model, loss, optimizer, trX[start:end], trY[start:end]) predY = predict(model, teX) print("Epoch %d, cost = %f, acc = %.2f%%" % (i + 1, cost / num_batches, 100. * np.mean(predY == teY)))
def main(): torch.manual_seed(42) # Sets the global precision level (if you set per layer this is overridden # for said layer). #sim.nn.Quantizer.set_float_precision(num_bits=32, num_mantissa_bits=11) sim.nn.Quantizer.set_fixed_precision(scale_factor=1e-2, num_bits=8) trX, teX, trY, teY = load_mnist(onehot=False) trX = torch.from_numpy(trX).float() teX = torch.from_numpy(teX).float() trY = torch.from_numpy(trY).long() n_examples, n_features = trX.size() n_classes = 10 model = build_model(n_features, n_classes) loss = torch.nn.CrossEntropyLoss(size_average=True) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) batch_size = 100 for i in range(100): cost = 0. num_batches = n_examples // batch_size for k in range(num_batches): start, end = k * batch_size, (k + 1) * batch_size cost += train(model, loss, optimizer, trX[start:end], trY[start:end]) predY = predict(model, teX) print("Epoch %d, cost = %f, acc = %.2f%%" % (i + 1, cost / num_batches, 100. * np.mean(predY == teY)))
def mnist_class_conditional_visualize(folder_name, data_type, prob, class_conditional =None): [x_train, t_train, x_valid, t_valid, x_test, t_test] = load_mnist() pxz = np.loadtxt(os.path.join('output', str(folder_name), data_type,'pxz.csv'), delimiter=',') if class_conditional != None: if data_type == 'train': mask = np.where(t_train==class_conditional)[0] pxz = pxz[mask] elif data_type == 'test': mask = np.where(t_test==class_conditional)[0] pxz = pxz[mask] else: mask = np.arange(len(pxz)) pxz = pxz * -1 if data_type == 'train': data = x_train[mask, :] elif data_type == 'test': data= x_test[mask, :] for percentage in [prob]: anomaly_list = list() good_list = list() threshold = np.percentile(pxz, percentage) threshold_2 = np.percentile(pxz, 100 - percentage) # threshold_2 = np.percentile(pxz, percentage+1) for i in range(pxz.shape[0]): if pxz[i] < threshold: anomaly_list.append(i) if pxz[i] > threshold_2: good_list.append(i) if class_conditional != None: save_dir = os.path.join('output', str(folder_name), data_type, str(percentage), str(class_conditional)) else: save_dir = os.path.join('output', str(folder_name), data_type, str(percentage)) if not os.path.exists(os.path.join(save_dir, 'anomaly')): os.makedirs(os.path.join(save_dir, 'anomaly')) if not os.path.exists(os.path.join(save_dir, 'good')): os.makedirs(os.path.join(save_dir, 'good')) for i in anomaly_list: reshaped = data[i, :].reshape(np.sqrt(data.shape[1]), -1) * 255 reshaped = reshaped.astype(np.uint8) real_num = str(t_train[mask][i]) im = Image.fromarray(reshaped) im.save(os.path.join(save_dir, 'anomaly', str(i)+'_'+real_num+'.png')) for i in good_list: reshaped = data[i, :].reshape(np.sqrt(data.shape[1]), -1) * 255 reshaped = reshaped.astype(np.uint8) real_num = str(t_train[mask][i]) im = Image.fromarray(reshaped) im.save(os.path.join(save_dir, 'good', str(i)+'_'+real_num+'.png'))
def main(): tf.random.set_seed(42) trX, teX, trY, teY = load_mnist(onehot=True) train_size = len(trY) n_classes = 10 seq_length = 28 input_dim = 28 hidden_dim = 128 batch_size = 100 epochs = 20 # Convert to the shape (num_samples, seq_length, input_dim) trX = trX.reshape(-1, seq_length, input_dim) teX = teX.reshape(-1, seq_length, input_dim) trX = tf.convert_to_tensor(trX, dtype=tf.float32) teX = tf.convert_to_tensor(teX, dtype=tf.float32) trY = tf.convert_to_tensor(trY, dtype=tf.float32) model = LSTMNet(input_dim, hidden_dim, n_classes) # Pass input_shape for building the model so that model.summary() works # The input_shape's 1st component is the batch size. The dummy value 1 is used here. model.build(input_shape=(1, seq_length, input_dim)) model.summary() loss = CategoricalCrossentropy(from_logits=True) optimizer = SGD(learning_rate=0.01, momentum=0.9) for i in range(epochs): cost = 0. num_batches = train_size // batch_size for k in range(num_batches): start, end = k * batch_size, (k + 1) * batch_size cost += train(model, loss, optimizer, trX[start:end, :, :], trY[start:end]) predY = predict(model, teX) print("Epoch %d, cost = %f, acc = %.2f%%" % (i + 1, cost / num_batches, 100. * np.mean(predY == teY.argmax(axis=1))))
def main(): torch.manual_seed(42) trX, teX, trY, teY = load_mnist(onehot=False) trX = torch.from_numpy(trX).float() teX = torch.from_numpy(teX).float() trY = torch.from_numpy(trY).long() n_examples, n_features = trX.size() n_classes = 10 model = build_model(n_features, n_classes) loss = torch.nn.CrossEntropyLoss(reduction='elementwise_mean') optimizer = optim.Adam(model.parameters()) batch_size = 100 for i in range(100): cost = 0. num_batches = n_examples // batch_size for k in range(num_batches): start, end = k * batch_size, (k + 1) * batch_size cost += train(model, loss, optimizer, trX[start:end], trY[start:end]) predY = predict(model, teX) print("Epoch %d, cost = %f, acc = %.2f%%" % (i + 1, cost / num_batches, 100. * np.mean(predY == teY)))
def main(): torch.manual_seed(42) trX, teX, trY, teY = load_mnist(onehot=False) train_size = len(trY) n_classes = 10 seq_length = 28 input_dim = 28 hidden_dim = 128 batch_size = 100 epochs = 20 trX = trX.reshape(-1, seq_length, input_dim) teX = teX.reshape(-1, seq_length, input_dim) # Convert to the shape (seq_length, num_samples, input_dim) trX = np.swapaxes(trX, 0, 1) teX = np.swapaxes(teX, 0, 1) trX = torch.from_numpy(trX).float() teX = torch.from_numpy(teX).float() trY = torch.from_numpy(trY).long() model = LSTMNet(input_dim, hidden_dim, n_classes) loss = torch.nn.CrossEntropyLoss(reduction='elementwise_mean') optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) for i in range(epochs): cost = 0. num_batches = train_size // batch_size for k in range(num_batches): start, end = k * batch_size, (k + 1) * batch_size cost += train(model, loss, optimizer, trX[:, start:end, :], trY[start:end]) predY = predict(model, teX) print("Epoch %d, cost = %f, acc = %.2f%%" % (i + 1, cost / num_batches, 100. * np.mean(predY == teY)))
def main(): # forest weights w4_ensemble = [] w_d_ensemble = [] leaf_p_e = [] softmax = nn.Softmax() # parameter initialization print('# parameter initialization') for i in range(N_TREE): w4_ensemble.append(init_weights([128 * 3 * 3, 625])) w_d_ensemble.append(init_prob_weights([625, N_LEAF], -1, 1)) pi = init_prob_weights([N_LEAF, N_LABEL], 0, 1) pi = softmax.forward(Variable(pi)) if cuda: pi = pi.cuda() leaf_p_e.append(pi) # network hyperparameters p_conv_keep = 0.5 p_full_keep = 0.4 model = DeepNeuralDecisionForest(w4_e=w4_ensemble, w_d_e=w_d_ensemble, p_keep_conv=p_conv_keep, p_keep_hidden=p_full_keep) if cuda: model.cuda() ################ Load dataset ####################### print('# data loading') trX, teX, trY, teY = load_mnist(onehot=False) trX = trX.reshape(-1, 1, 28, 28) teX = teX.reshape(-1, 1, 28, 28) trX = torch.from_numpy(trX).float() teX = torch.from_numpy(teX).float() trY = torch.from_numpy(trY).long() trX = trX[:1024] trY = trY[:1024] n_examples = len(trX) if cuda: trX = trX.cuda() teX = teX.cuda() trY = trY.cuda() optimizer = optim.RMSprop(model.parameters(), lr=1e-2, alpha=0.99, weight_decay=0) batch_size = N_BATCH print('# begin training') loss = nn.NLLLoss(size_average=True) pi_update = Variable(torch.zeros(leaf_p_e[0].size())) print leaf_p_e for i in range(50): cost = 0. num_batches = n_examples / batch_size for k in range(num_batches): start, end = k * batch_size, (k + 1) * batch_size cost += train(model, loss, optimizer, trX[start:end], trY[start:end], leaf_p_e) # Define cost and optimization method #predY = predict(model, teX[:2000], 1) print("Epoch %d, cost = %f, acc = %.2f%%" % (i + 1, cost / num_batches, 0) ) #100. * np.mean(predY == teY[:2000]))) mu_e, py_x = full_forward(model, trX, leaf_p_e) print('Epoch %d, updating leaf probabilities!' % (i + 1)) print mu_e[0][0] leaf_p_e = update_leaf_p(mu_e, leaf_p_e, py_x, trY, pi_update) del mu_e del py_x print leaf_p_e
def run_tsne(data_type, train_test, mode, data_class=None): if data_type =='mnist': [x_train, t_train, x_valid, t_valid, x_test, t_test] = load_mnist() mask = np.where(t_train!=data_class)[0] x_train = x_train[mask, :] t_train = t_train[mask] elif data_type == 'synthetic' or 'kdd': print 'loaded data' output_dir = '/home/jinwon/PycharmProjects/autoencoder/src/output/kdd/a_ipsweep._z_10_h_20_e_50' # output_dir = '/home/jinwon/PycharmProjects/autoencoder/src/output/synthetic/d_[10000, 10001]_z_10_h_10_ds_[10000, 10001]_e_200' z_train = np.loadtxt(os.path.join(output_dir, 'train', 'mu.csv'), delimiter=',') z_test = np.loadtxt(os.path.join(output_dir, 'test', 'mu.csv'), delimiter=',') x_train = np.loadtxt(os.path.join(output_dir, 'x_train.csv'), delimiter=',') x_test = np.loadtxt(os.path.join(output_dir, 'x_test.csv'), delimiter=',') t_train = np.loadtxt(os.path.join(output_dir, 'y_train.csv'), delimiter=',') t_test = np.loadtxt(os.path.join(output_dir, 'y_test.csv'), delimiter=',') # lb = preprocessing.LabelBinarizer(neg_label=0) # lb.fit(t_test) # t_train = lb.transform(t_train) # t_test = lb.transform(t_test) fig = plt.figure() n_points = 1000 if train_test == 'train': if mode == 'normal': X, color = x_train, t_train elif mode == 'latent': X, color = z_train, t_train elif train_test == 'test': if mode == 'normal': X, color = x_test, t_test elif mode == 'latent': X, color = z_test, t_test mask = np.random.choice(X.shape[0], size = X.shape[0] / 10) mask_2 = np.where(color == 0)[0] # print mask_2 mask = np.unique(np.hstack((mask, mask_2))) X = X[mask] color = color[mask] n_neighbors = 10 n_components = 2 n_classes = 2 t0 = time() tsne = manifold.TSNE(n_components=n_components, init='random', random_state=0, method='barnes_hut') Y = tsne.fit_transform(X) t1 = time() color_spectrum = plt.cm.Spectral(np.linspace(0,1,n_classes)) print("t-SNE: %.2g sec" % (t1 - t0)) ax = fig.add_subplot(1,1,1) for color_select in range(n_classes): mask = np.where(color == color_select)[0] plt.scatter(Y[mask, 0], Y[mask, 1], color=[color_spectrum[color_select]] * len(mask), label=color_select) plt.title("t-SNE (%.2g sec)" % (t1 - t0)) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.legend() plt.axis('tight') plt.show()
def run_mnist(): data = load_mnist() for i in [0,1,2,3]: run_VAE(data_mode='mnist', dimZ=20, epoch=50, HU_size=200, data=data, data_class=i)
action="store", default=10, type=int, help="Number of classes for classification.") parser.add_argument( "--solver", action="store", default="all", type=str, choices=["sgd", "svrg", "lp-sgd", "lp-svrg", "bc-sgd", "bc-svrg", "all"], help="Solver/optimization algorithm.") args = parser.parse_args() print(args) utils.set_seed(args.seed) x_train, x_test, y_train, y_test = load_mnist(onehot=False) model = model.LogisticRegression(n_samples=x_train.shape[0], batch_size=args.batch_size, n_bits=args.n_bits, fwd_scale_factor=args.lin_fwd_sf, bck_scale_factor=args.lin_bck_sf, loss_scale_factor=args.loss_sf, in_features=x_train.shape[1], out_features=args.n_classes, lr=args.alpha) in_data = utils.OptimizerData(args, x_train, x_test, y_train, y_test) if args.solver == "sgd" or args.solver == "all": print("\nRunning SGD...")