def train_conv_net(epochs=15, weights_file=MODEL_PATH, dictionary_file=DICT_TRANSFORM_PATH): data = get_tokenized_list_of_dicts() train, test = train_test_split(data, test_size=0.1, random_state=0) cnet = ConvNet(train, test, filter_sizes=[1, 2, 3, 4], n_filters=10, dropout_prob=.7, pool_size=10, hidden_dims=12, embedding_size=64, fc_l2=.05, conv_l2=0.0, balance_classes=True) batch_size = 64 cnet.fit(batch_size, epochs, weights_file) predict_proba = cnet.get_predict_proba() probs = predict_proba([r['content'] for r in test]) preds = (probs[:, 0] < .5).astype(int) y_test = [row['label'] for row in test] print('Mean prediction: {}'.format(np.mean(preds))) print(classification_report(y_test, preds)) print(confusion_matrix(y_test, preds)) cnet.transform.serialize(dictionary_file)
def __init__(self, seed, n_ensembles, batch_size, root): super(MnistOptimizee, self).__init__() self.random_state = np.random.RandomState(seed=seed) self.n_ensembles = n_ensembles self.batch_size = batch_size self.root = root self.conv_net = ConvNet().to(device) self.criterion = nn.CrossEntropyLoss() self.data_loader = DataLoader() self.data_loader.init_iterators(self.root, self.batch_size) self.dataiter_mnist = self.data_loader.dataiter_mnist self.testiter_mnist = self.data_loader.testiter_mnist self.generation = 0 self.inputs, self.labels = self.dataiter_mnist() self.inputs = self.inputs.to(device) self.labels = self.labels.to(device) self.test_input, self.test_label = self.testiter_mnist() self.test_input = self.test_input.to(device) self.test_label = self.test_label.to(device) self.timings = { 'shape_parameters': [], 'set_parameters': [], 'set_parameters_cnn': [], 'shape_parameters_ens': [] } self.length = 0 for key in self.conv_net.state_dict().keys(): self.length += self.conv_net.state_dict()[key].nelement()
def __init__(self, seed, n_ensembles, batch_size, root, path): self.random_state = np.random.RandomState(seed=seed) self.n_ensembles = n_ensembles self.batch_size = batch_size self.root = root self.path = path self.conv_net = ConvNet().to(device) self.criterion = nn.CrossEntropyLoss() self.data_loader = DataLoader() self.data_loader.init_iterators(self.root, self.path, self.batch_size) self.dataiter_mnist = self.data_loader.dataiter_mnist self.testiter_mnist = self.data_loader.testiter_mnist self.dataiter_notmnist = self.data_loader.dataiter_notmnist self.testiter_notmnist = self.data_loader.testiter_notmnist self.generation = 0 self.inputs, self.labels = self.dataiter_mnist() self.test_input, self.test_label = self.testiter_mnist() # For plotting self.train_pred = [] self.test_pred = [] self.train_acc = [] self.test_acc = [] self.train_cost = [] self.test_cost = [] self.targets = [] self.output_activity_train = [] self.output_activity_test = [] self.targets.append(self.labels.numpy())
class Model(): def __init__(self, num_epochs=5, num_classes=10, batch_size=100, learning_rate=0.001): self.num_epochs = num_epochs self.num_classes = num_classes self.batch_size = batch_size self.learning_rate = learning_rate self.model = ConvNet(num_classes) # Loss and optimizer self.criterion = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate) def train(self, train_loader): total_step = len(train_loader) for epoch in range(self.num_epochs): for i, (images, labels) in enumerate(train_loader): # Forward pass outputs = self.model(images) loss = self.criterion(outputs, labels) # Backward and optimize self.optimizer.zero_grad() loss.backward() self.optimizer.step() if (i + 1) % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch + 1, self.num_epochs, i + 1, total_step, loss.item())) def eval(self, test_loader): self.model.eval() with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: outputs = self.model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() def save(self): # Save the model checkpoint torch.save(self.model.state_dict(), 'model.ckpt')
def __init__(self, seed, n_ensembles, batch_size, root): super(MnistFashionOptimizee, self).__init__() self.random_state = np.random.RandomState(seed=seed) self.n_ensembles = n_ensembles self.batch_size = batch_size self.root = root self.conv_net = ConvNet().to(device) self.criterion = nn.CrossEntropyLoss() self.data_loader = DataLoader() self.data_loader.init_iterators(self.root, self.batch_size) self.dataiter_fashion = self.data_loader.dataiter_fashion self.dataiter_mnist = self.data_loader.dataiter_mnist self.testiter_fashion = self.data_loader.testiter_fashion self.testiter_mnist = self.data_loader.testiter_mnist self.generation = 0 # if self.generation % 2 == 0: # self.inputs, self.labels = self.dataiter_fashion() # else: # self.inputs, self.labels = self.dataiter_mnist() self.inputs, self.labels = self.dataiter_mnist() self.inputs = self.inputs.to(device) self.labels = self.labels.to(device) self.test_input, self.test_label = self.testiter_mnist() self.test_input = self.test_input.to(device) self.test_label = self.test_label.to(device) # For plotting self.train_pred = [] self.test_pred = [] self.train_acc = [] self.test_acc = [] self.train_cost = [] self.test_cost = [] self.targets = [] self.output_activity_train = [] self.output_activity_test = [] self.targets.append(self.labels.cpu().numpy()) # Covariance noise matrix self.cov = 0.0 self.length = 0 for key in self.conv_net.state_dict().keys(): self.length += self.conv_net.state_dict()[key].nelement()
def __init__(self, seed, n_ensembles, batch_size, root): super(MnistOptimizee, self).__init__() self.random_state = np.random.RandomState(seed=seed) self.n_ensembles = n_ensembles self.batch_size = batch_size self.root = root self.conv_net = ConvNet().to(device) self.criterion = nn.CrossEntropyLoss() self.data_loader = DataLoader() self.data_loader.init_iterators(self.root, self.batch_size) self.dataiter_mnist = self.data_loader.dataiter_mnist self.testiter_mnist = self.data_loader.testiter_mnist self.generation = 0 self.inputs, self.labels = self.dataiter_mnist() self.inputs = self.inputs.to(device) self.labels = self.labels.to(device) self.test_input, self.test_label = self.testiter_mnist() self.test_input = self.test_input.to(device) self.test_label = self.test_label.to(device) # For plotting self.train_pred = [] self.test_pred = [] self.train_acc = [] self.test_acc = [] self.train_cost = [] self.test_cost = [] self.train_loss = [] self.test_loss = [] self.targets = [] self.output_activity_train = [] self.output_activity_test = [] self.act_func = {'act1': [], 'act2': [], 'act1_mean': [], 'act2_mean': [], 'act1_std': [], 'act2_std': [], 'act3': [], 'act3_mean': [], 'act3_std': []} self.test_act_func = {'act1': [], 'act2': [], 'act1_mean': [], 'act2_mean': [], 'act1_std': [], 'act2_std': [], 'act3': [], 'act3_mean': [], 'act3_std': []} self.targets.append(self.labels.cpu().numpy()) self.length = 0 for key in self.conv_net.state_dict().keys(): self.length += self.conv_net.state_dict()[key].nelement()
def __init__(self, num_epochs=5, num_classes=10, batch_size=100, learning_rate=0.001): self.num_epochs = num_epochs self.num_classes = num_classes self.batch_size = batch_size self.learning_rate = learning_rate self.model = ConvNet(num_classes) # Loss and optimizer self.criterion = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
def run_experiment(average_gradients, batch_size, iterations, verbose): batch_size = batch_size tf.reset_default_graph() net = ConvNet() validation_batch = mnist.test.images val_count = validation_batch.shape[0] validation_batch = np.reshape(validation_batch, (val_count, 28, 28, 1)) validation_labels = mnist.test.labels net.setup_train(average_gradients=average_gradients) training_log = [] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(iterations): batch = mnist.train.next_batch(batch_size) input_batch = np.reshape(batch[0], (batch_size, 28, 28, 1)) loss = net.train(sess, input_batch, batch[1]) if (i + 1) % 100 == 0: accuracy = net.evaluate(sess, validation_batch, validation_labels) training_log.append((accuracy, i + 1)) if verbose: print('[{:d}/{:d}] loss: {:.3g}, accuracy: {:.3g}%'.format( i + 1, iterations, loss, accuracy)) accuracy = net.evaluate(sess, validation_batch, validation_labels) training_log.append((accuracy, iterations)) best = sorted(training_log, key=lambda x: x[0], reverse=True)[0] print('Training finished. Best accuracy: {:.3g} at iteration {:d}.'. format(best[0], best[1])) return best[0]
def __init__(self, char_num, embedding_size, channels, kernel_size, padding_idx, dropout, emb_dropout): super(CharEncoder, self).__init__() self.embed = nn.Embedding(char_num, embedding_size, padding_idx=padding_idx) self.drop = nn.Dropout(emb_dropout) self.conv_net = ConvNet(channels, kernel_size, dropout=dropout) self.init_weights()
def __init__(self, weight, channels, kernel_size, dropout, emb_dropout): super(WordEncoder, self).__init__() self.embed = nn.Embedding.from_pretrained(weight, freeze=False) self.drop = nn.Dropout(emb_dropout) self.conv_net = ConvNet(channels, kernel_size, dropout, dilated=True, residual=False)
def __init__(self, char_num, embedding_size, channels, kernel_size, padding_idx, dropout, emb_dropout): ###初始化时的参数 super(CharEncoder, self).__init__() self.embed = nn.Embedding( char_num, embedding_size, padding_idx=padding_idx) ### ce=CharEncoder()时被调用 ### torch.nn.Embedding(m,n) m表示单词数目,n表示嵌入维度 self.drop = nn.Dropout(emb_dropout) self.conv_net = ConvNet(channels, kernel_size, dropout=dropout) self.init_weights()
def perform_inference_on_camera_input(args): cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FPS, 1) haar_face_cascade = cv2.CascadeClassifier(args.haar_classifier_path) model = ConvNet() loader = tf.train.Saver() with tf.Session() as sess: # Restore variables from disk. sess.run(tf.global_variables_initializer()) model.restore_from_checkpoint(sess, loader, args.checkpoint_path) while True: # Capture frame-by-frame ret, frame = cap.read() faces = haar_face_cascade.detectMultiScale(frame, scaleFactor=1.1, minNeighbors=5) if len(faces) == 1 and ret == True: (x, y, w, h) = faces[0] croped_img = frame[y:y + h, x:x + w] if croped_img.shape[0] > 0 and croped_img.shape[1] > 0: input_image = np.expand_dims(cv2.resize( croped_img, (IMG_HEIGHT, IMG_WIDTH)), axis=0) feed_dict_inference = { model.input_images: input_image, model.is_training: False, } predictions = sess.run(model.prediction, feed_dict_inference) print(predictions) cv2.imshow("Checking images", croped_img) if cv2.waitKey(1) & 0xFF == ord("q"): break sleep(1) # When everything done, release the capture cap.release() cv2.destroyAllWindows()
def experiment(threshold, iterations, train_loss, n_conv, optimizer, batch_size=1, batch_norm=False, learning_rate=1e-3, summary_dir=None): model = ConvNet(filters=4, n_conv=n_conv, train_loss=train_loss, batch_norm=batch_norm, optimizer=optimizer, learning_rate=learning_rate, summary_dir=summary_dir) print('train_loss:', train_loss.value, 'optimizer:', optimizer.value, 'n_conv:', n_conv, 'batch_norm:', batch_norm, 'batch_size:', batch_size, 'learning_rate:', learning_rate) ret = dict() val_input_batch, val_output_batch = get_data(threshold, 100, verbose=True) best_accuracy = (0.0, 0) for i in tqdm(range(iterations)): input_batch, output_batch = get_data(threshold, batch_size) out = model.train(input_batch, output_batch) if i == 0: for k in out.keys(): ret[k] = [] ret['accuracy'] = [] for k, v in out.items(): ret[k].append(v) if i % 250 == 0: accuracy = model.accuracy(val_input_batch, val_output_batch) if accuracy > best_accuracy[0]: best_accuracy = (accuracy, i) ret['accuracy'].append((i, accuracy)) #print('[%d] accuracy: %.3g' % (i, accuracy)) print('Best accuracy %.3g at iteration %d.' % best_accuracy) return ret
def __init__(self, model): """Loads the specified model for training Args: model (str): Can be rnn, cnn, or hybrid. Specifies the model to load. """ if model == 'rnn': from rnn import Bidirectional_GRU self.model = Bidirectional_GRU() elif model == 'cnn': from conv_net import ConvNet self.model = ConvNet() elif model == 'hybrid': from hybrid_model_attention import HybridModel self.model = HybridModel()
def __init__(self): super(Discriminator, self).__init__() ConvNet.add_conv(self, 3, 64, 4, 4, stride=2, pad=1, wscale=0.02) ConvNet.add_batch_normalization(self, 64 * 48 * 48, "Elu") ConvNet.add_conv(self, 64, 128, 4, 4, stride=2, pad=1, wscale=0.02) ConvNet.add_batch_normalization(self, 128 * 24 * 24, "Elu") ConvNet.add_conv(self, 128, 256, 4, 4, stride=2, pad=1, wscale=0.02) ConvNet.add_batch_normalization(self, 256 * 12 * 12, "Elu") ConvNet.add_conv(self, 256, 512, 4, 4, stride=2, pad=1, wscale=0.02) ConvNet.add_batch_normalization(self, 512 * 6 * 6, "Elu") ConvNet.add_affine(self, 512 * 6 * 6, 2) ConvNet.add_softmax(self)
def __init__(self, nz): super(Generator, self).__init__() ConvNet.add_affine(self, nz, 512 * 6 * 6, output_shape=(512, 6, 6)) ConvNet.add_batch_normalization(self, 512 * 6 * 6, "Relu") ConvNet.add_deconv(self, 512, 256, 4, 4, stride=2, pad=1, wscale=0.02) ConvNet.add_batch_normalization(self, 256 * 12 * 12, "Relu") ConvNet.add_deconv(self, 256, 128, 4, 4, stride=2, pad=1, wscale=0.02) ConvNet.add_batch_normalization(self, 128 * 24 * 24, "Relu") ConvNet.add_deconv(self, 128, 64, 4, 4, stride=2, pad=1, wscale=0.02) ConvNet.add_batch_normalization(self, 64 * 48 * 48, "Relu") ConvNet.add_deconv(self, 64, 3, 4, 4, stride=2, pad=1, wscale=0.02) ConvNet.add_tanh(self)
def __init__(self): super(SimpleConv, self).__init__() ConvNet.add_conv(self, 1, 30, 5, 5) ConvNet.add_batch_normalization(self, 30 * 24 * 24, "Relu") ConvNet.add_pooling(self, 2, 2, stride=2) ConvNet.add_affine(self, 30 * 12 * 12, 200) ConvNet.add_batch_normalization(self, 200, "Relu") ConvNet.add_affine(self, 200, 10) ConvNet.add_softmax(self)
BATCH_SIZE = 500 BATCHES = 20 TEST_SIZE = 5 IMG_SHAPE = (32, 32, 3) images = [] for i in range(BATCHES*BATCH_SIZE): images.append(load_image(i+1)) puncher = HolePuncher(IMG_SHAPE) X_train, Y_train = puncher.split_fill_batch(images) with ConvNet() as conv: conv.fit(X_train, Y_train, width=IMG_SHAPE[0], height=IMG_SHAPE[1], batch_size=BATCH_SIZE, training_epochs=300, learning_rate=0.0001) test_images = [] for i in range(BATCHES*BATCH_SIZE, BATCHES*BATCH_SIZE + TEST_SIZE): test_images.append(load_image(i + 1)) X_test, Y_test = puncher.split_fill_batch(test_images) for i in range(X_test.shape[1]): x_test, y_test = X_test[:, i], Y_test[:, i]
test_compose = transforms.Compose(common_trans) d_func = dset.MNIST train_set = dset.MNIST('data', train=True, transform=train_compose, download=True) test_set = dset.MNIST('data', train=False, transform=test_compose) train_loader = torch.utils.data.DataLoader(train_set, batch_size=256, shuffle=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=256, shuffle=False) model = ConvNet(num_classes).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) def test_model(): # Test the model model.eval( ) # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance) with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: images = images.to(device)
def __init__(self): super(SimpleConv, self).__init__() ConvNet.add_conv(self, 3, 64, 3, 3, pad=1) ConvNet.add_batch_normalization(self, 64 * 96 * 96, "Relu") ConvNet.add_pooling(self, 2, 2, stride=2) ConvNet.add_conv(self, 64, 16, 3, 3, pad=1) ConvNet.add_batch_normalization(self, 16 * 48 * 48, "Relu") ConvNet.add_pooling(self, 2, 2, stride=2) ConvNet.add_affine(self, 16 * 24 * 24, 200) ConvNet.add_batch_normalization(self, 200, "Relu") ConvNet.add_affine(self, 200, 2) ConvNet.add_softmax(self)
class MnistOptimizee: def __init__(self, seed, n_ensembles, batch_size, root, path): self.random_state = np.random.RandomState(seed=seed) self.n_ensembles = n_ensembles self.batch_size = batch_size self.root = root self.path = path self.conv_net = ConvNet().to(device) self.criterion = nn.CrossEntropyLoss() self.data_loader = DataLoader() self.data_loader.init_iterators(self.root, self.path, self.batch_size) self.dataiter_mnist = self.data_loader.dataiter_mnist self.testiter_mnist = self.data_loader.testiter_mnist self.dataiter_notmnist = self.data_loader.dataiter_notmnist self.testiter_notmnist = self.data_loader.testiter_notmnist self.generation = 0 self.inputs, self.labels = self.dataiter_mnist() self.test_input, self.test_label = self.testiter_mnist() # For plotting self.train_pred = [] self.test_pred = [] self.train_acc = [] self.test_acc = [] self.train_cost = [] self.test_cost = [] self.targets = [] self.output_activity_train = [] self.output_activity_test = [] self.targets.append(self.labels.numpy()) def create_individual(self): # get weights, biases from networks and flatten them # convolutional network parameters ### conv_ensembles = [] with torch.no_grad(): # weights for layers, conv1, conv2, fc1 # conv1_weights = self.conv_net.state_dict()['conv1.weight'].view(-1).numpy() # conv2_weights = self.conv_net.state_dict()['conv2.weight'].view(-1).numpy() # fc1_weights = self.conv_net.state_dict()['fc1.weight'].view(-1).numpy() # bias # conv1_bias = self.conv_net.state_dict()['conv1.bias'].numpy() # conv2_bias = self.conv_net.state_dict()['conv2.bias'].numpy() # fc1_bias = self.conv_net.state_dict()['fc1.bias'].numpy() # stack everything into a vector of # conv1_weights, conv1_bias, conv2_weights, conv2_bias, # fc1_weights, fc1_bias # params = np.hstack((conv1_weights, conv1_bias, conv2_weights, # conv2_bias, fc1_weights, fc1_bias)) length = 0 for key in self.conv_net.state_dict().keys(): length += self.conv_net.state_dict()[key].nelement() # conv_ensembles.append(params) # l = np.random.uniform(-.5, .5, size=length) for _ in range(self.n_ensembles): # stacked = [] # for key in self.conv_net.state_dict().keys(): # stacked.extend( # self._he_init(self.conv_net.state_dict()[key])) # conv_ensembles.append(stacked) # tmp = [] # for j in l: # jitter = np.random.uniform(-0.1, 0.1) + j # tmp.append(jitter) # conv_ensembles.append(tmp) conv_ensembles.append(np.random.uniform(-1, 1, size=length)) # convert targets to numpy() targets = tuple([label.numpy() for label in self.labels]) return dict(conv_params=np.array(conv_ensembles), targets=targets, input=self.inputs.squeeze().numpy()) @staticmethod def _he_init(weights, gain=0): """ He- or Kaiming- initialization as in He et al., "Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification". Values are sampled from :math:`\\mathcal{N}(0, \\text{std})` where .. math:: \text{std} = \\sqrt{\\frac{2}{(1 + a^2) \\times \text{fan\\_in}}} Note: Only for the case that the non-linearity of the network activation is `relu` :param weights, tensor :param gain, additional scaling factor, Default is 0 :return: numpy nd array, random array of size `weights` """ fan_in = torch.nn.init._calculate_correct_fan(weights, 'fan_in') stddev = np.sqrt(2. / fan_in * (1 + gain**2)) return np.random.normal(0, stddev, weights.numel()) def set_parameters(self, ensembles): # set the new parameter for the network conv_params = np.mean(ensembles, axis=0) ds = self._shape_parameter_to_conv_net(conv_params) self.conv_net.set_parameter(ds) print('---- Train -----') print('Generation ', self.generation) generation_change = 8 dataset_change = 500 with torch.no_grad(): inputs = self.inputs labels = self.labels if self.generation % generation_change == 0 and self.generation < dataset_change: self.inputs, self.labels = self.dataiter_mnist() print('New MNIST set used at generation {}'.format( self.generation)) # append the outputs self.targets.append(self.labels.numpy()) elif model.generation >= dataset_change and self.generation % generation_change == 0: if model.generation == generation_change: self.test_input, self.test_label = self.testiter_notmnist() self.inputs, self.labels = self.dataiter_notmnist() print('New notMNIST set used at generation {}'.format( self.generation)) # append the outputs self.targets.append(self.labels.numpy()) outputs = self.conv_net(inputs) self.output_activity_train.append(outputs.numpy()) conv_loss = self.criterion(outputs, labels).item() train_cost = _calculate_cost(_encode_targets(labels, 10), outputs.numpy(), 'MSE') train_acc = score(labels.numpy(), np.argmax(outputs.numpy(), 1)) print('Cost: ', train_cost) print('Accuracy: ', train_acc) print('Loss:', conv_loss) self.train_cost.append(train_cost) self.train_acc.append(train_acc) self.train_pred.append(np.argmax(outputs.numpy(), 1)) print('---- Test -----') test_output = self.conv_net(self.test_input) test_output = test_output.numpy() test_acc = score(self.test_label.numpy(), np.argmax(test_output, 1)) test_cost = _calculate_cost(_encode_targets(self.test_label, 10), test_output, 'MSE') print('Test accuracy', test_acc) self.test_acc.append(test_acc) self.test_pred.append(np.argmax(test_output, 1)) self.test_cost.append(test_cost) self.output_activity_test.append(test_output) print('-----------------') conv_params = [] for c in ensembles: ds = self._shape_parameter_to_conv_net(c) self.conv_net.set_parameter(ds) conv_params.append(self.conv_net(inputs).numpy().T) outs = { 'conv_params': np.array(conv_params), 'conv_loss': float(conv_loss), 'input': self.inputs.squeeze().numpy(), 'targets': self.labels.numpy() } return outs def _shape_parameter_to_conv_net(self, params): param_dict = dict() start = 0 for key in self.conv_net.state_dict().keys(): shape = self.conv_net.state_dict()[key].shape length = self.conv_net.state_dict()[key].nelement() end = start + length param_dict[key] = params[start:end].reshape(shape) start = end return param_dict
import pandas as pd from numpy import array from sklearn.preprocessing import LabelBinarizer from sklearn.model_selection import train_test_split from clean_text import CleanText from encode_text import EncodeText from conv_net import ConvNet df = pd.read_pickle('./data.pkl') ct = CleanText() encoder = EncodeText() df['clean_text'] = df['ticket_text'].apply(lambda x: ct.prepare_text(x)) model = ConvNet() trainLines, trainLabels = df['clean_text'], df['issue'] lb = LabelBinarizer() transformed_labels = lb.fit_transform(trainLabels) X_train, X_test, y_train, y_test = train_test_split(trainLines, transformed_labels, test_size=.2, random_state=42, stratify=transformed_labels) length = encoder.max_length(X_train) vocab_size = encoder.vocab_size(X_train) X_train = encoder.encode_text(X_train) X_test = encoder.encode_text(X_test, test_data=True) encoder.save_encoder('./encoder.pkl') encoder.save_encoder_variables('./encoder_variables')
class MnistOptimizee(torch.nn.Module): def __init__(self, seed, n_ensembles, batch_size, root): super(MnistOptimizee, self).__init__() self.random_state = np.random.RandomState(seed=seed) self.n_ensembles = n_ensembles self.batch_size = batch_size self.root = root self.conv_net = ConvNet().to(device) self.criterion = nn.CrossEntropyLoss() self.data_loader = DataLoader() self.data_loader.init_iterators(self.root, self.batch_size) self.dataiter_mnist = self.data_loader.dataiter_mnist self.testiter_mnist = self.data_loader.testiter_mnist self.generation = 0 self.inputs, self.labels = self.dataiter_mnist() self.inputs = self.inputs.to(device) self.labels = self.labels.to(device) self.test_input, self.test_label = self.testiter_mnist() self.test_input = self.test_input.to(device) self.test_label = self.test_label.to(device) self.timings = { 'shape_parameters': [], 'set_parameters': [], 'set_parameters_cnn': [], 'shape_parameters_ens': [] } self.length = 0 for key in self.conv_net.state_dict().keys(): self.length += self.conv_net.state_dict()[key].nelement() def create_individual(self): # get weights, biases from networks and flatten them # convolutional network parameters ### conv_ensembles = [] with torch.no_grad(): # weights for layers, conv1, conv2, fc1 # conv1_weights = self.conv_net.state_dict()['conv1.weight'].view(-1).numpy() # conv2_weights = self.conv_net.state_dict()['conv2.weight'].view(-1).numpy() # fc1_weights = self.conv_net.state_dict()['fc1.weight'].view(-1).numpy() # bias # conv1_bias = self.conv_net.state_dict()['conv1.bias'].numpy() # conv2_bias = self.conv_net.state_dict()['conv2.bias'].numpy() # fc1_bias = self.conv_net.state_dict()['fc1.bias'].numpy() # stack everything into a vector of # conv1_weights, conv1_bias, conv2_weights, conv2_bias, # fc1_weights, fc1_bias # params = np.hstack((conv1_weights, conv1_bias, conv2_weights, # conv2_bias, fc1_weights, fc1_bias)) # length = 0 # for key in self.conv_net.state_dict().keys(): # length += self.conv_net.state_dict()[key].nelement() # conv_ensembles.append(params) # l = np.random.uniform(-.5, .5, size=length) for _ in range(self.n_ensembles): # stacked = [] # for key in self.conv_net.state_dict().keys(): # stacked.extend( # self._he_init(self.conv_net.state_dict()[key])) # conv_ensembles.append(stacked) # tmp = [] # for j in l: # jitter = np.random.uniform(-0.1, 0.1) + j # tmp.append(jitter) # conv_ensembles.append(tmp) conv_ensembles.append( np.random.normal(0, 0.1, size=self.length)) return dict(conv_params=torch.as_tensor(conv_ensembles, device=device), targets=self.labels, input=self.inputs.squeeze()) def load_model(self, path='conv_params.npy'): print('Loading model from path: {}'.format(path)) conv_params = np.load(path).item() conv_ensembles = conv_params.get('ensemble') return dict(conv_params=torch.as_tensor(conv_ensembles, device=device), targets=self.labels, input=self.inputs.squeeze()) @staticmethod def _he_init(weights, gain=0): """ He- or Kaiming- initialization as in He et al., "Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification". Values are sampled from :math:`\\mathcal{N}(0, \\text{std})` where .. math:: \text{std} = \\sqrt{\\frac{2}{(1 + a^2) \\times \text{fan\\_in}}} Note: Only for the case that the non-linearity of the network activation is `relu` :param weights, tensor :param gain, additional scaling factor, Default is 0 :return: numpy nd array, random array of size `weights` """ fan_in = torch.nn.init._calculate_correct_fan(weights, 'fan_in') stddev = np.sqrt(2. / fan_in * (1 + gain**2)) return np.random.normal(0, stddev, weights.numel()) def set_parameters(self, ensembles): # set the new parameter for the network conv_params = ensembles.mean(0) t = time.time() ds = self._shape_parameter_to_conv_net(conv_params) self.timings['shape_parameters_ens'].append(time.time() - t) t = time.time() self.conv_net.set_parameter(ds) self.timings['set_parameters_cnn'].append(time.time() - t) print('---- Train -----') print('Generation ', self.generation) generation_change = 1 with torch.no_grad(): inputs = self.inputs.to(device) labels = self.labels.to(device) if self.generation % generation_change == 0: self.inputs, self.labels = self.dataiter_mnist() self.inputs = self.inputs.to(device) self.labels = self.labels.to(device) print('New MNIST set used at generation {}'.format( self.generation)) outputs, act1, act2 = self.conv_net(inputs) conv_loss = self.criterion(outputs, labels).item() train_cost = _calculate_cost(_encode_targets(labels, 10), F.softmax(outputs, dim=1), 'MSE') train_acc = score(labels, torch.argmax(F.softmax(outputs, dim=1), 1)) print('Cost: ', train_cost) print('Accuracy: ', train_acc) print('Loss:', conv_loss) print('---- Test -----') test_output, act1, act2 = self.conv_net(self.test_input) test_loss = self.criterion(test_output, self.test_label).item() test_acc = score(self.test_label, torch.argmax(test_output, 1)) test_cost = _calculate_cost(_encode_targets(self.test_label, 10), test_output, 'MSE') print('Test accuracy', test_acc) print('Test loss: {}'.format(test_loss)) print('-----------------') conv_params = [] for idx, c in enumerate(ensembles): t = time.time() ds = self._shape_parameter_to_conv_net(c) self.timings['shape_parameters'].append(time.time() - t) t = time.time() self.conv_net.set_parameter(ds) self.timings['set_parameters'].append(time.time() - t) params, _, _ = self.conv_net(inputs) conv_params.append(params.t()) conv_params = torch.stack(conv_params) outs = { 'conv_params': conv_params, 'conv_loss': float(conv_loss), 'input': self.inputs.squeeze(), 'targets': self.labels } return outs def _shape_parameter_to_conv_net(self, params): param_dict = dict() start = 0 for key in self.conv_net.state_dict().keys(): shape = self.conv_net.state_dict()[key].shape length = self.conv_net.state_dict()[key].nelement() end = start + length param_dict[key] = params[start:end].reshape(shape) start = end return param_dict
defn_len = 20 context_len = 50 dropout = 0.75 # Dropout, probability to keep units import tensorflow as tf slabel = tf.placeholder("float", [None, label_len, n_input]) tlabel = tf.placeholder("float", [None, label_len, n_input]) sdefn = tf.placeholder("float", [None, defn_len, n_input]) tdefn = tf.placeholder("float", [None, defn_len, n_input]) scontext = tf.placeholder("float", [None, context_len, n_input]) tcontext = tf.placeholder("float", [None, context_len, n_input]) y = tf.placeholder("float", [None, 1]) keep_prob = tf.placeholder(tf.float32) from conv_net import ConvNet cn = ConvNet() src_label_net = cn.conv_net(slabel) tar_label_net = cn.conv_net(tlabel) src_defn_net = cn.conv_net(sdefn) tar_defn_net = cn.conv_net(tdefn) src_cont_net = cn.conv_net(scontext) tar_cont_net = cn.conv_net(tcontext) print src_label_net.get_shape() with tf.name_scope("dropout"): src_label_rep = tf.nn.dropout(src_label_net, keep_prob) tar_label_rep = tf.nn.dropout(tar_label_net, keep_prob) src_defn_rep = tf.nn.dropout(src_defn_net, keep_prob) tar_defn_rep = tf.nn.dropout(tar_defn_net, keep_prob) src_cont_rep = tf.nn.dropout(src_cont_net, keep_prob)
from optimizer import Adam from conv_net import ConvNet (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=False, one_hot_label=True) train_loss_list = [] train_acc_list = [] test_acc_list = [] iters_num = 10000 batch_size = 100 train_size = x_train.shape[0] iter_per_epoch = max(train_size / batch_size, 1) net = ConvNet() optim = SGD(net.params, lr=0.1, momentum=0.9) # optim = AdaGrad(net.params) # optim = Adam(net.params) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grad = net.gradient(x_batch, t_batch) net.params = optim.update(net.params, grad) loss = net.loss(x_batch, t_batch) train_loss_list.append(loss)
num_images = 150 quot = int(len(training_files) / num_images) print('quot:', quot) #Neural Network declarations num_epochs = 2 num_classes = 2 batch_size = 64 #TO BE CHANGED learning_rate = 0.001 #Neural Net framework model = ConvNet() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") for j in range(0, quot): input_tensor_training = [] block_labels_training = [] print('This is the ', j + 1, ' set of , ', num_images) if (j > 0): model = ConvNet()
w = compute_class_weight('balanced', np.unique(df['issue']), df['issue']) weights = { 0: w[0], 1: w[1], 2: w[2], 3: w[3], 4: w[4], 5: w[5], 6: w[6], 7: w[7], 8: w[8], 9: w[9] } model = ConvNet() trainLines, trainLabels = df['clean_text'], df['issue'] labels = pd.get_dummies(trainLabels) X_train, X_test, y_train, y_test = train_test_split(trainLines, labels, test_size=.2, random_state=42, stratify=labels) # test_data = pd.concat([pd.DataFrame(X_test), pd.DataFrame(y_test)], axis=1) # test_data.to_pickle('./test_data.pkl') length = encoder.max_length(X_train)
loss = criterion(output, target) test_loss += loss.item() # network prediction pred = output.argmax(1, keepdim=True) # how many image are correct classified, compare with targets ta = pred.eq(target.view_as(pred)).sum().item() test_accuracy += ta test_acc = score(target.cpu().numpy(), np.argmax(output.cpu().numpy(), 1)) if idx % 10 == 0: print('Test Loss {}, idx {}'.format( loss.item(), idx)) print('Test accuracy: {} Average test loss: {}'.format( 100 * test_accuracy / len(test_loader_mnist.dataset), test_loss / len(test_loader_mnist.dataset))) if __name__ == '__main__': conv_params = torch.load('conv_params.pt', map_location='cpu') ensemble = conv_params['ensemble'].mean(0) ensemble = torch.from_numpy(ensemble) conv_net = ConvNet() ds = shape_parameter_to_conv_net(conv_net, ensemble) conv_net.set_parameter(ds) criterion = torch.nn.CrossEntropyLoss() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") batch = 64 train_loader, test_loader = get_data(batch, device) test(conv_net, test_loader)
shuffle=False, sampler=torch.utils.data.SubsetRandomSampler(list(range(1000)))) # test_loader = torch.utils.data.DataLoader( # datasets.MNIST('../data', train=False, download=True, transform=transforms.Compose([ # transforms.ToTensor(), # ])), # batch_size=1, shuffle=False, sampler=torch.utils.data.SubsetRandomSampler(list( # range(100)))) # Define what device we are using print("CUDA Available: ", torch.cuda.is_available()) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") pretrained_model = "models/cnn_mnist.ckpt" model = ConvNet().to(device) # pretrained_model = "models/lenet_mnist_model.pth" # model = Net().to(device) model.load_state_dict(torch.load(pretrained_model, map_location='cpu')) model.eval() gp_model, likelihood = load_combined_model('models/gp_mnist.dat') gp_model.eval() likelihood.eval() # FGSM attack code def fgsm_attack(image, epsilon, data_grad): # Collect the element-wise sign of the data gradient sign_data_grad = data_grad.sign() # Create the perturbed image by adjusting each pixel of the input image
def train_model(train_data_dir: str, val_data_dir: str, save_model_path: str): """ Args: data_dir: Where the dataset is Returns: """ all_train_image_paths = load_all_image_paths_convnet(train_data_dir) all_val_image_paths = load_all_image_paths_convnet(val_data_dir) log.info( f"{len(all_train_image_paths)} images belonging to the train set...") log.info( f"{len(all_val_image_paths)} images belonging to the validation set..." ) model = ConvNet() log.info("Model built...") BEST_VAL_LOSS = sys.maxsize saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for e in range(EPOCHS): train_loss_epoch = 0.0 validation_loss_epoch = 0.0 random.shuffle(all_train_image_paths) full_epoch_train = trange(0, len(all_train_image_paths), BATCH_SIZE) for step in full_epoch_train: train_image_batch, train_label_batch = load_batch_of_data_convnet( all_train_image_paths[step:step + BATCH_SIZE]) feed_dict_train = { model.input_images: train_image_batch, model.labels: train_label_batch, model.is_training: True, } _, train_loss = sess.run([model.train_step, model.loss_fun], feed_dict_train) train_loss_epoch += train_loss full_epoch_train.set_description(f"Loss for epoch {e+1}: %g" % train_loss_epoch) for step in range(0, len(all_val_image_paths), BATCH_SIZE): val_image_batch, val_label_batch = load_batch_of_data_convnet( all_train_image_paths[step:step + BATCH_SIZE]) feed_dict_val = { model.input_images: val_image_batch, model.labels: val_label_batch, model.is_training: False, } val_loss = sess.run(model.loss_fun, feed_dict_val) validation_loss_epoch += val_loss print( f"The validation loss for epoch {e+1} is: {validation_loss_epoch}" ) if validation_loss_epoch < BEST_VAL_LOSS: print("===============================================") print(f"Found new best! Saving model on epoch {e+1}...") print("===============================================") saver.save(sess, f"{save_model_path}") BEST_VAL_LOSS = validation_loss_epoch
class MnistOptimizee(torch.nn.Module): def __init__(self, n_ensembles, batch_size, root): super(MnistOptimizee, self).__init__() self.n_ensembles = n_ensembles self.batch_size = batch_size self.root = root self.length = 0 self.conv_net = ConvNet().to(device) self.criterion = nn.CrossEntropyLoss() self.data_loader = DataLoader() self.data_loader.init_iterators(self.root, self.batch_size) self.dataiter_mnist = self.data_loader.dataiter_mnist self.testiter_mnist = self.data_loader.testiter_mnist self.generation = 0 # iterations self.inputs, self.labels = self.dataiter_mnist() self.inputs = self.inputs.to(device) self.labels = self.labels.to(device) self.test_input, self.test_label = self.testiter_mnist() self.test_input = self.test_input.to(device) self.test_label = self.test_label.to(device) # For plotting self.train_pred = [] self.test_pred = [] self.train_acc = [] self.test_acc = [] self.train_cost = [] self.test_cost = [] self.train_loss = [] self.test_loss = [] self.targets = [] self.output_activity_train = [] self.output_activity_test = [] self.act_func = { 'act1': [], 'act2': [], 'act1_mean': [], 'act2_mean': [], 'act1_std': [], 'act2_std': [], 'act3': [], 'act3_mean': [], 'act3_std': [] } self.test_act_func = { 'act1': [], 'act2': [], 'act1_mean': [], 'act2_mean': [], 'act1_std': [], 'act2_std': [], 'act3': [], 'act3_mean': [], 'act3_std': [] } self.targets.append(self.labels.cpu().numpy()) for key in self.conv_net.state_dict().keys(): self.length += self.conv_net.state_dict()[key].nelement() def create_individual(self): # get weights, biases from networks and flatten them # convolutional network parameters conv_ensembles = [] sigma = config['sigma'] with torch.no_grad(): for _ in range(self.n_ensembles): conv_ensembles.append( np.random.normal(0, sigma, size=self.length)) return dict(conv_params=torch.as_tensor(np.array(conv_ensembles), device=device), targets=self.labels, input=self.inputs.squeeze()) def load_model(self, path='conv_params.npy'): print('Loading model from path: {}'.format(path)) conv_params = np.load(path).item() conv_ensembles = conv_params.get('ensemble') return dict(conv_params=torch.as_tensor(np.array(conv_ensembles), device=device), targets=self.labels, input=self.inputs.squeeze()) def set_parameters(self, ensembles): # set the new parameter for the network conv_params = ensembles.mean(0) ds = self._shape_parameter_to_conv_net(conv_params) self.conv_net.set_parameter(ds) print('---- Train -----') print('Iteration ', self.generation) generation_change = config['repetitions'] with torch.no_grad(): inputs = self.inputs.to(device) labels = self.labels.to(device) if self.generation % generation_change == 0: self.inputs, self.labels = self.dataiter_mnist() self.inputs = self.inputs.to(device) self.labels = self.labels.to(device) print('New MNIST set used at generation {}'.format( self.generation)) # append the outputs self.targets.append(self.labels.cpu().numpy()) # get network predicitions outputs, act1, act2 = self.conv_net(inputs) act3 = outputs # save all important calculations self.act_func['act1'] = act1.cpu().numpy() self.act_func['act2'] = act2.cpu().numpy() self.act_func['act3'] = act3.cpu().numpy() self.act_func['act1_mean'].append(act1.mean().item()) self.act_func['act2_mean'].append(act2.mean().item()) self.act_func['act3_mean'].append(act3.mean().item()) self.act_func['act1_std'].append(act1.std().item()) self.act_func['act2_std'].append(act2.std().item()) self.act_func['act3_std'].append(act3.std().item()) self.output_activity_train.append( F.softmax(outputs, dim=1).cpu().numpy()) conv_loss = self.criterion(outputs, labels).item() self.train_loss.append(conv_loss) train_cost = _calculate_cost( _encode_targets(labels, 10), F.softmax(outputs, dim=1).cpu().numpy(), 'MSE') train_acc = score( labels.cpu().numpy(), np.argmax(F.softmax(outputs, dim=1).cpu().numpy(), 1)) print('Cost: ', train_cost) print('Accuracy: ', train_acc) print('Loss:', conv_loss) self.train_cost.append(train_cost) self.train_acc.append(train_acc) self.train_pred.append( np.argmax(F.softmax(outputs, dim=1).cpu().numpy(), 1)) print('---- Test -----') test_output, act1, act2 = self.conv_net(self.test_input) test_loss = self.criterion(test_output, self.test_label).item() self.test_act_func['act1'] = act1.cpu().numpy() self.test_act_func['act2'] = act2.cpu().numpy() self.test_act_func['act1_mean'].append(act1.mean().item()) self.test_act_func['act2_mean'].append(act2.mean().item()) self.test_act_func['act3_mean'].append(test_output.mean().item()) self.test_act_func['act1_std'].append(act1.std().item()) self.test_act_func['act2_std'].append(act2.std().item()) self.test_act_func['act3_std'].append(test_output.std().item()) test_output = test_output.cpu().numpy() self.test_act_func['act3'] = test_output test_acc = score(self.test_label.cpu().numpy(), np.argmax(test_output, 1)) test_cost = _calculate_cost( _encode_targets(self.test_label.cpu().numpy(), 10), test_output, 'MSE') print('Test accuracy', test_acc) print('Test loss: {}'.format(test_loss)) self.test_acc.append(test_acc) self.test_pred.append(np.argmax(test_output, 1)) self.test_cost.append(test_cost) self.output_activity_test.append(test_output) self.test_loss.append(test_loss) print('-----------------') conv_params = [] for idx, c in enumerate(ensembles): ds = self._shape_parameter_to_conv_net(c) self.conv_net.set_parameter(ds) params, _, _ = self.conv_net(inputs) conv_params.append(params.t().cpu().numpy()) outs = { 'conv_params': torch.tensor(conv_params).to(device), 'conv_loss': float(conv_loss), 'input': self.inputs.squeeze(), 'targets': self.labels } return outs def _shape_parameter_to_conv_net(self, params): """ Create a dictionary which includes the shapes of the network architecture per layer """ param_dict = dict() start = 0 for key in self.conv_net.state_dict().keys(): shape = self.conv_net.state_dict()[key].shape length = self.conv_net.state_dict()[key].nelement() end = start + length param_dict[key] = params[start:end].reshape(shape) start = end return param_dict