Ejemplo n.º 1
0
def train_model(dataset,
                mod_hypers,
                opt_hypers,
                dat_hypers,
                model_params=None):
    """Stochastic gradient descent optimization of a Discriminative RBM.

    Input
    -----
    dataset : tuple(tuple(np.ndarray))
      Training and validation sets
    mod_hypers : dictionary
      Model hyperparameters
    opt_hypers : dictionary
      Optimization hyperparameters
    model_params : list(np.ndarray)
      A list of model parameter values (optional)

    Output
    ------
    model_params : list(np.ndarray)
      List of all the model parameters (see class definition)
    valid_nll : float
      Validation set negative log-likelihood
    """
    # Read training, validation and test sets
    X_train, y_train = dataset[0]
    X_valid, y_valid = dataset[1]
    if opt_hypers['eval_test'] == True:
        X_test, y_test = dataset[2]

    # We need these as well, and note that n_visible = n_input + n_class
    n_input = dat_hypers['n_input']
    n_class = dat_hypers['n_class']

    # Train the DRBM
    model = DRBM(n_input=n_input,
                 n_class=n_class,
                 hypers=mod_hypers,
                 init_params=model_params)

    # The actual learning step
    if opt_hypers['opt_type'] == 'batch-gd':  # Batch learning
        X_train, y_train = make_batches(X_train, y_train,
                                        opt_hypers['batch_size'])
        X_valid, y_valid = make_batches(X_valid, y_valid, X_valid.shape[0])
        if opt_hypers['eval_test'] == True:
            X_test, y_test = make_batches(X_test, y_test, X_test.shape[0])
            dataset = ((X_train, y_train), (X_valid, y_valid), (X_test,
                                                                y_test))
        else:
            dataset = ((X_train, y_train), (X_valid, y_valid))

        optimizer = sgd(opt_hypers)
        params, valid_score = optimizer.optimize(model, dataset)
    else:
        raise NotImplementedError

    return params, valid_score
Ejemplo n.º 2
0
    def _samples_h(self,
                   filepath,
                   x_batches,
                   num_samples=100,
                   tile_shape=(10, 10)):
        '''
        Strategically move data by Mover
        '''
        if not os.path.exists(os.path.dirname(filepath)):
            os.makedirs(os.path.dirname(filepath))

        num = ((num_samples - 1) // self.batch_size + 1) * self.batch_size
        x = np.zeros(
            [num, self.img_size[0], self.img_size[1], self.img_size[2]],
            dtype=np.float32)
        batches = make_batches(num, self.batch_size)
        for batch_idx, (batch_start, batch_end) in enumerate(batches):
            x_batch = x_batches[batch_start:batch_end]
            x[batch_start:batch_end] = self.tf_session.run(
                self.h, feed_dict={self.x: x_batch})
        idx = np.random.permutation(num)[:num_samples]
        x = (x[idx] + 1.0) / 2.0
        imgs = create_image_grid(x,
                                 img_size=self.img_size,
                                 tile_shape=tile_shape)
        spm.imsave(filepath, imgs)
Ejemplo n.º 3
0
    def test_and_compute_score(self, x_test_opcode, x_test_assembly, x_test_seq_len, y_test):
        with tf.Session(config=utils.get_default_config()) as sess:
            saver = tf.train.Saver(tf.global_variables())

            check_point = tf.train.get_checkpoint_state(self.checkpoint_path)
            if check_point and tf.train.checkpoint_exists(check_point.model_checkpoint_path):
                message = "Load model parameters from %s\n" % check_point.model_checkpoint_path
                utils.print_and_write_logging_file(self.logging_path, message, self.running_mode, self.datetime)
                saver.restore(sess, check_point.model_checkpoint_path)
            else:
                raise Exception('Saved model not found.')

            testing_set = x_test_opcode.shape[0] - x_test_opcode.shape[0] % self.batch_size
            testing_batches = utils.make_batches(testing_set, self.batch_size)

            average_test_loss = 0.0
            average_accuracy_rnn = 0.0
            full_y_pred = np.array([])
            for batch_idx, (batch_start, batch_end) in enumerate(testing_batches):
                batch_x_opcode = x_test_opcode[batch_start:batch_end]
                batch_x_assembly = x_test_assembly[batch_start:batch_end]

                batch_y = y_test[batch_start:batch_end]
                batch_sequence_length = x_test_seq_len[batch_start:batch_end]

                feed_dict = {
                    self.X_opcode: batch_x_opcode,
                    self.X_assembly: batch_x_assembly,
                    self.Y: batch_y,
                    self.sequence_length: batch_sequence_length,
                }

                # batch_test_loss, accuracy_rnn = sess.run(
                #     [self.loss, self.accuracy_bi_rnn],
                #     feed_dict=feed_dict)

                batch_test_loss = sess.run(self.loss, feed_dict=feed_dict)

                batch_y_pred = sess.run(self.y_pred_svm, feed_dict=feed_dict)
                full_y_pred = np.append(full_y_pred, batch_y_pred)

                average_test_loss += batch_test_loss / len(testing_batches)
                # average_accuracy_rnn += accuracy_rnn / len(testing_batches)

            full_accuracy_score = mt.accuracy_score(y_true=y_test[:testing_set], y_pred=full_y_pred)
            full_pre_score = mt.precision_score(y_true=y_test[:testing_set], y_pred=full_y_pred)
            full_f1_score = mt.f1_score(y_true=y_test[:testing_set], y_pred=full_y_pred)
            full_recall_score = mt.recall_score(y_true=y_test[:testing_set], y_pred=full_y_pred)
            full_auc_score = mt.roc_auc_score(y_true=y_test[:testing_set], y_score=full_y_pred)

            message = "testing loss %.5f\n" % average_test_loss
            message += "accuracy %.2f\n" % (full_accuracy_score * 100)
            message += "compute score:\n"
            message += '\tprecision score %.5f\n' % (full_pre_score * 100)
            message += '\tf1 score %.5f\n' % (full_f1_score * 100)
            message += '\trecall score %.5f\n' % (full_recall_score * 100)
            message += '\tAUC score %.5f\n' % (full_auc_score * 100)
            message += "-----------------------------------------------------\n"
            message += "Finish computing score process.\n"
            utils.print_and_write_logging_file(self.logging_path, message, self.running_mode, self.datetime)
Ejemplo n.º 4
0
 def _generate(self, num_samples=100):
     sess = self.tf_session
     batch_size = self.g_batch_size * self.num_gens
     num = ((num_samples - 1) // batch_size + 1) * batch_size
     z = self.z_prior.sample([num, self.num_z]).astype(np.float32)
     x = np.zeros(
         [num, self.img_size[0], self.img_size[1], self.img_size[2]],
         dtype=np.float32)
     batches = make_batches(num, batch_size)
     for batch_idx, (batch_start, batch_end) in enumerate(batches):
         z_batch = z[batch_start:batch_end]
         x[batch_start:batch_end] = sess.run(self.sampler,
                                             feed_dict={self.z: z_batch})
     f_x = np.reshape(x, [
         self.num_gens, -1, self.img_size[0], self.img_size[1],
         self.img_size[2]
     ])
     f_x = f_x[:, 0::7, :, :, :]
     f_x = np.reshape(f_x, [
         num_samples, self.img_size[0], self.img_size[1], self.img_size[2]
     ])
     # idx = np.random.permutation(num)[:num_samples]
     # x = (x[idx] + 1.0) / 2.0
     # x = x[idx]
     x = (f_x + 1) / 2
     return x
Ejemplo n.º 5
0
 def setUpClass(self):
     # VertexFrequencyCluster
     # Custom window sizes
     self.window_sizes = np.array([2, 4, 8, 24])
     data, self.labels = make_batches(n_pts_per_cluster=100)
     self.G = gt.Graph(data, sample_idx=self.labels, use_pygsp=True)
     meld_op = meld.MELD()
     self.EES = meld_op.fit_transform(G=self.G, RES=self.labels)
Ejemplo n.º 6
0
def test_mnn():
    data, labels = make_batches(n_pts_per_cluster=250)
    meld_op = meld.MELD(verbose=0)
    sample_densities = meld_op.fit_transform(data, labels, sample_idx=labels)
    sample_likelihoods = meld.utils.normalize_densities(sample_densities)
    meld.VertexFrequencyCluster().fit_transform(
        G=meld_op.graph,
        sample_indicator=meld_op.sample_indicators["expt"],
        likelihood=sample_likelihoods["expt"],
    )
Ejemplo n.º 7
0
    def train(self, x_train, y_train, loss_fn, optimiser, metric, *, batch_size, epochs):
        for epoch in range(epochs):
            batches, labels = make_batches(x_train, y_train, batch_size, shuffle_data=True)

            for batch, label in zip(batches, labels):
                result = self(batch)
                self.backpropagate(result, label, loss_fn, optimiser)
            
            train_metric = metric(y_train, self(x_train))
            print(f'Epoch {epoch + 1}: {train_metric}')
Ejemplo n.º 8
0
 def setUpClass(self):
     # VertexFrequencyCluster
     # Custom window sizes
     self.window_sizes = np.array([2, 4, 8, 24])
     self.data, self.sample_labels = make_batches(n_pts_per_cluster=100)
     meld_op = meld.MELD(verbose=0)
     self.densities = meld_op.fit_transform(
         self.data, sample_labels=self.sample_labels)
     self.sample_indicators = meld_op.sample_indicators
     self.likelihoods = meld.utils.normalize_densities(self.densities)
     self.G = meld_op.graph
Ejemplo n.º 9
0
 def generate(self, num_samples=1000):
     zs = np.random.normal(0.0, 1.0, [num_samples, self.num_z])
     g = np.zeros([num_samples, 2])
     batches = make_batches(num_samples, self.batch_size)
     for batch_idx, (batch_start, batch_end) in enumerate(batches):
         g[batch_start:batch_end] = self.tf_session.run(
             self.x_g,
             feed_dict={
                 self.z:
                 np.reshape(zs[batch_start:batch_end],
                            [batch_end - batch_start, self.num_z])
             })
     return g
Ejemplo n.º 10
0
 def _generate(self, num_samples=100):
     num = ((num_samples - 1) // self.batch_size + 1) * self.batch_size
     z = self.z_prior.sample([num, self.num_z]).astype(np.float32)
     x = np.zeros(
         [num, self.img_size[0], self.img_size[1], self.img_size[2]],
         dtype=np.float32)
     batches = make_batches(num, self.batch_size)
     for batch_idx, (batch_start, batch_end) in enumerate(batches):
         z_batch = z[batch_start:batch_end]
         x[batch_start:batch_end] = self.tf_session.run(
             self.g, feed_dict={self.z: z_batch})
     idx = np.random.permutation(num)[:num_samples]
     return (x[idx] + 1.0) / 2.0
Ejemplo n.º 11
0
    def preproc_predict(self, imgs, batch_size=32, augmentation_seed=None):
        """Preprocess images and predict with the model (no batch processing for first step)
        Input:
        imgs: 4D float or int array of images
        batch_size: integer, size of the batch
        Returns:
        predictions: numpy array with predictions (num_images, len_model_output)
        """
        print('base_model preproc_predict!')
        # import utool as ut
        # ut.embed()
        batch_idx = make_batches(imgs.shape[0], batch_size)
        imgs_preds = np.zeros((imgs.shape[0], ) +
                              self.model.get_output_shape_at(0)[1:])
        print('Computing predictions with the shape {}'.format(
            imgs_preds.shape))

        # do some augmentation here
        use_augmentation = augmentation_seed is not None
        print('use_augmentation = %s and augmentation_seed = %s' %
              (use_augmentation, augmentation_seed))
        if use_augmentation:
            gen_args = dict(
                rotation_range=30,
                width_shift_range=0.15,
                height_shift_range=0.15,
                shear_range=0.1,
                zoom_range=0.15,
                channel_shift_range=0.15,
                data_format=K.image_data_format(),
                fill_mode='reflect',
                preprocessing_function=self.backend_class.normalize,
            )
            aug_gen = ImageDataGenerator(**gen_args)

        for sid, eid in batch_idx:
            if use_augmentation:
                # [0] found experimentally
                preproc = aug_gen.flow(imgs[sid:eid],
                                       batch_size=batch_size,
                                       seed=augmentation_seed)
                assert len(preproc) == 1
                assert len(preproc[0]) <= batch_size
                preproc = preproc[0]
            else:
                preproc = self.backend_class.normalize(imgs[sid:eid])
            imgs_preds[sid:eid] = self.model.predict_on_batch(preproc)

        print('imgs_preds = %s' % imgs_preds)

        return imgs_preds
Ejemplo n.º 12
0
def eval_step(model, criterion, data, cell_line_info, drug_fingerprint_info):
	
	model.eval()

	step = 0
	loss = .0
	l1, l2, CD, ED, pearson, r_squared = .0, .0, .0, .0, .0, .0

	if len(data) % parameters.batch_size == 0:
		batch_num = int(len(data)/parameters.batch_size)
	else:
		batch_num = int(len(data)/parameters.batch_size) + 1

	batches = utils.make_batches(data, cell_line_info, 
								 drug_fingerprint_info,
								 parameters.batch_size, False)
	for batch in batches:
		cell_lines, drugs, targets, doses, times = batch

		with torch.no_grad():
			input_cell_lines = torch.cuda.FloatTensor(cell_lines)
			input_drugs = torch.cuda.FloatTensor(drugs)
			input_targets = torch.cuda.FloatTensor(targets)
			input_doses = torch.cuda.FloatTensor(doses)
			input_times = torch.cuda.FloatTensor(times)

			# Optimizing
			predicted = model(input_cell_lines, input_drugs,  input_doses, 
				input_times)
			l1 += torch.abs(predicted - input_targets).sum()
			l2 += criterion(predicted, input_targets)
			CD += nn.functional.cosine_similarity(predicted, 
																input_targets)\
																.sum()
			ED += torch.sqrt(torch.mul(predicted-input_targets,
				predicted-input_targets).sum(dim=1)).sum()

			for i in xrange(len(input_targets)):
				pearson += stats.pearsonr(input_targets[i], predicted[i])[0]
			r_squared += rsquared(input_targets, predicted).sum()
		
		step+=1

		sys.stdout.write("\033[F")
		sys.stdout.write("\033[K")
		print("Process Validation Batch: [{}/{}]".format(step, batch_num))                     

	return l1/len(data), l2/len(data), 1 - CD/len(data), ED/len(data), \
	pearson/len(data), r_squared/len(data)
Ejemplo n.º 13
0
    def train(self, ktrain, kval, n_epochs, batch_size, no_improve_lim):
        self.tloss, self.tacc, self.vloss, self.vacc = [], [], [], []
        self.min_val_loss = 100000
        self.no_improve_lim = no_improve_lim
        self.no_improve = 0
        self.max_val_acc = 0
        best_model = self.optimizer.target.copy()

        for epoch in range(n_epochs):
            kbtrain = utils.make_batches(ktrain, batch_size=batch_size, shuffle=True)
            self.n_batches = len(kbtrain)
            self.epoch = epoch
            self.tloss.append([])
            self.tacc.append([])
            for i_batch in range(self.n_batches):
                loss = self.model(Variable(kbtrain[i_batch][0]), Variable(kbtrain[i_batch][1]), test=False).loss
                self.tloss[epoch].append(loss.data[()])
                self.tacc[epoch].append(F.accuracy(self.model.y, kbtrain[i_batch][1]).data[()])
                self.print_report('train')
                self.model.cleargrads()
                loss.backward()
                self.optimizer.update()

            self.vloss.append(self.model(Variable(kval[0], volatile=True), Variable(kval[1], volatile=True), test=True).loss.data[()])
            self.vacc.append(F.accuracy(self.model.y, kval[1]).data[()])
            self.vcorrect = np.sum(F.argmax(self.model.y, axis=1).data == kval[1])
            self.vall = len(kval[1])

            if (self.vloss[-1] < self.min_val_loss): ## | (self.vacc[-1] > self.max_val_acc):
                best_model = self.optimizer.target.copy()
                self.min_val_loss = self.vloss[-1]
                self.max_val_acc = self.vacc[-1]
                self.no_improve = 0

            self.print_report('val')
            if self.no_improve >= self.no_improve_lim:
                print
                print "Validation loss did not reduce in " + str(self.no_improve_lim) + " iterations"
                print "Quit iteration loop"
                break

            self.no_improve += 1
            self.optimizer.new_epoch()

        self.model = best_model
Ejemplo n.º 14
0
def create_badges(data, layout):
    for batch in make_batches(layout.ordering_function(data), layout.badge_per_sheet):

        if settings.printout.show_guidelines:
            draw_margins(layout)
            draw_guidelines(layout)

        draw_cutlines(layout)
        draw_page_borders(layout)

        layout.canvas.translate(0, layout.height_offset)
        for ticket_index, attendee in batch:
            write_verso(attendee, ticket_index, layout)
            layout.canvas.translate(layout.section_width, 0)
            write_recto(attendee, layout)
            layout.canvas.translate(-layout.section_width, -layout.height_offset)
        layout.canvas.showPage()  # finish the page, next statements should go next page
    layout.canvas.save()
Ejemplo n.º 15
0
    def fit(self, x):
         with tf.device('/gpu:0'):  
            if (not hasattr(self, 'epoch')) or self.epoch == 0:
                self._init()
                with self.tf_graph.as_default():
                    self._build_model()
                    self.tf_session.run(tf.global_variables_initializer())

            num_data = x.shape[0] - x.shape[0] % self.d_batch_size
            batches = make_batches(num_data, self.d_batch_size)
            best_is = 0.0
            while (self.epoch < self.num_epochs):
                print('xxx')
                for batch_idx, (batch_start, batch_end) in enumerate(batches):
                    batch_size = batch_end - batch_start

                    x_batch = x[batch_start:batch_end]
                    if self.same_input:
                        z_batch = self.z_prior.sample([self.g_batch_size, self.num_z]).astype(np.float32)
                        z_batch = np.vstack([z_batch] * self.num_gens)
                    else:
                        z_batch = self.z_prior.sample([self.g_batch_size * self.num_gens, self.num_z]).astype(np.float32)

                    # update discriminator D
                    d_bin_loss, d_mul_loss, d_loss, _ = self.tf_session.run(
                        [self.d_bin_loss, self.d_mul_loss, self.d_loss, self.d_opt],
                        feed_dict={self.x: x_batch, self.z: z_batch})

                    # update generator G
                    g_bin_loss, g_mul_loss, g_loss, _ = self.tf_session.run(
                        [self.g_bin_loss, self.g_mul_loss, self.g_loss, self.g_opt],
                        feed_dict={self.z: z_batch})
                    print('-----'+str(batch_idx)+'/'+str(len(batches)))

                self.epoch += 1
                print("Epoch: [%4d/%4d] d_bin_loss: %.5f, d_mul_loss: %.5f, d_loss: %.5f,"
                      " g_bin_loss: %.5f, g_mul_loss: %.5f, g_loss: %.5f" % (self.epoch, self.num_epochs,
                                    d_bin_loss, d_mul_loss, d_loss, g_bin_loss, g_mul_loss, g_loss))
                self._samples(self.sample_fp.format(epoch=self.epoch+1))
                self._samples_by_gen(self.sample_by_gen_fp.format(epoch=self.epoch+1))
Ejemplo n.º 16
0
def train_step(model, criterion, optimizer, data, pre_treatment, 
		 	   drug_fingerprint_info):
	
	model.train()

	step = 0
	loss = .0

	if len(data) % parameters.batch_size == 0:
		batch_num = int(len(data)/parameters.batch_size)
	else:
		batch_num = int(len(data)/parameters.batch_size) + 1

	batches = utils.make_batches(data, pre_treatment, 
								 drug_fingerprint_info,
								 parameters.batch_size)
	for batch in batches:
		pre_treatments, drugs, targets, doses, times = batch
		input_pre_treatments = torch.cuda.FloatTensor(pre_treatments)
		input_drugs = torch.cuda.FloatTensor(drugs)
		input_targets = torch.cuda.FloatTensor(targets)
		input_doses = torch.cuda.FloatTensor(doses)
		input_times = torch.cuda.FloatTensor(times)

		# Optimizing
		optimizer.zero_grad()
		predicted = model(input_pre_treatments, input_drugs, input_doses,
						  input_times)
		_loss = criterion(predicted, input_targets).mean()
		_loss.backward()
		loss+=_loss.item()
		optimizer.step()
		
		step+=1

		sys.stdout.write("\033[F")
		sys.stdout.write("\033[K")
		print("Process Training Batch: [{}/{}]".format(step, batch_num))                     

	return loss/batch_num
Ejemplo n.º 17
0
def run_actor_critic_model(args, model, session, dataset, file_writer, saver, epoch_num):
    with tf.device('/cpu:0'):
        batched_data, batched_labels, batched_seq_lens,  batched_bbox = utils.make_batches(dataset, batch_size=BATCH_SIZE)

    batch_accuracies = []
    for j in xrange(len(batched_data)):
        data_batch = batched_data[j]
        label_batch = batched_labels[j]
        seq_lens_batch = batched_seq_lens[j]
        bbox_batch =  batched_bbox[j]

        if 'actor' in args.model:
            summary, loss, rewards, area_accuracy = model.run_pretrain_actor_batch(args, session, data_batch, 
                                                            label_batch, seq_lens_batch, bbox_batch)
            print("Loss of the current batch is {0}".format(loss))
            print("Finished batch {0}/{1}".format(j,len(batched_data)))
            print("Total rewards: {0}".format(rewards))
            print("Average area accuracy per sequence per batch: {0}".format(area_accuracy))
            batch_accuracies.append(area_accuracy)
        if 'critic' in args.model:
            summary, loss = model.run_pretrain_critic_batch(args, session, data_batch, 
                                                label_batch, seq_lens_batch, bbox_batch, seq_lens_batch, seq_lens_batch)
            print("Loss of the current batch is {0}".format(loss))

        # if 'complete' in args.model:
            

        file_writer.add_summary(summary, j)

        # # Record batch accuracies for test code
       

    if args.train == "train":
    # Checkpoint model - every epoch
        utils.save_checkpoint(args, session, saver, epoch_num)
    else: # val or test
        test_accuracy = np.mean(batch_accuracies)
        print "Model {0} accuracy: {1}".format(args.train, test_accuracy)
Ejemplo n.º 18
0
def test_utils():
    data, labels = make_batches(n_pts_per_cluster=250)
    G = gt.Graph(data, sample_idx=labels, use_pygsp=True)
    meld_op = meld.MELD()
    sample_densities = meld_op.fit_transform(G, labels)
    sample_likelihoods = meld.utils.normalize_densities(sample_densities)

    meld.VertexFrequencyCluster().fit_predict(
        G=G,
        sample_indicator=meld_op.sample_indicators["expt"],
        likelihood=sample_likelihoods["expt"],
    )

    meld.utils.get_meld_cmap()

    # Test normalize_densities
    # Three samples
    densities = np.ones([100, 3])
    meld.utils.normalize_densities(sample_densities=densities)

    # Two samples
    densities = np.ones([100, 2])
    meld.utils.normalize_densities(sample_densities=densities)
Ejemplo n.º 19
0
    def preproc_predict(self, imgs, batch_size=32):
        """Preprocess images and predict with the model (no batch processing for first step)
        Input:
        imgs: 4D float or int array of images
        batch_size: integer, size of the batch
        Returns:
        predictions: numpy array with predictions (num_images, len_model_output)
        """
        print('base_model preproc_predict!')
        # import utool as ut
        # ut.embed()
        batch_idx = make_batches(imgs.shape[0], batch_size)
        imgs_preds = np.zeros((imgs.shape[0], ) +
                              self.model.get_output_shape_at(0)[1:])
        print('Computing predictions with the shape {}'.format(
            imgs_preds.shape))

        for sid, eid in batch_idx:
            preproc = self.backend_class.normalize(imgs[sid:eid])
            imgs_preds[sid:eid] = self.model.predict_on_batch(preproc)

        print('imgs_preds = %s' % imgs_preds)

        return imgs_preds
Ejemplo n.º 20
0
INIT_LEARNING_RATE = 0.001
WEIGHT_DECAY_RATE = 0.0005
MOMENTUM = 0.9
IMAGE_HEIGHT  = 224
IMAGE_WIDTH   = 224
NUM_CHANNELS  = 3
BATCH_SIZE = 50
N_LABELS = 2
DROPOUT = 0.50  
LOGS_PATH = './tensorflow_logs/'
WEIGHT_PATH = 'vgg16_weights.npz'
TRAINSET_PATH = 'train.csv'
VALSET_PATH ='val.csv'
ckpt_dir = "./ckpt_dir_config1"

train_image_batch, train_label_batch = make_batches(TRAINSET_PATH, N_EPOCHS, IMAGE_HEIGHT,
                                                    IMAGE_WIDTH, BATCH_SIZE, shuffle=False, training = True)
val_image_batch, val_label_batch = make_batches(VALSET_PATH, 1, IMAGE_HEIGHT,
                                                IMAGE_WIDTH, BATCH_SIZE, shuffle=False, training = False)
                                                
# Count the number of training and test examples
num_train = count_images(TRAINSET_PATH)
num_val = count_images(VALSET_PATH)
print ('Train_images: ', num_train)
print ('Validation_images: ', num_val)

# Placeholders for tensorflow graph
learning_rate = tf.placeholder( tf.float32, [])
images_tf = tf.placeholder( tf.float32, [None, IMAGE_HEIGHT, IMAGE_WIDTH, 3], name="images")
labels_tf = tf.placeholder( tf.int64, [None], name='labels')  # the dimensions could be [None,N_CLASSES]

network = VGG16(N_LABELS, WEIGHT_PATH)
Ejemplo n.º 21
0
    def train(self, x_train_opcode, x_train_assembly, x_train_seq_len, y_train, x_valid_opcode, x_valid_assembly,
                x_valid_seq_len, y_valid, x_test_opcode, x_test_assembly, x_test_seq_len, y_test):
        outFile = open(self.OutName, 'w')    
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=100)

        with tf.Session(config=utils.get_default_config()) as sess:
            writer = tf.summary.FileWriter(self.graph_path, sess.graph)

            check_point = tf.train.get_checkpoint_state(self.checkpoint_path)
            if check_point and tf.train.checkpoint_exists(check_point.model_checkpoint_path):
                message = "Load model parameters from %s\n" % check_point.model_checkpoint_path
                utils.print_and_write_logging_file(self.logging_path, message, self.running_mode)
                saver.restore(sess, check_point.model_checkpoint_path)
            else:
                message = "Create the model with fresh parameters\n"
                utils.print_and_write_logging_file(self.logging_path, message, self.running_mode)
                sess.run(tf.global_variables_initializer())

            ####Seperate dataset
            x_train_opcode_0 = []
            x_train_opcode_1 = []
            x_train_assembly_0 = []
            x_train_assembly_1 = []
            y_train_0 = []
            y_train_1 = []
            x_train_seq_len_0 = []
            x_train_seq_len_1 = []

            for index, aLabel in enumerate(y_train):
                 if (aLabel == 0.0):
                    x_train_opcode_0.append(x_train_opcode[index,:,:,:])
                    x_train_assembly_0.append(x_train_assembly[index,:,:,:])
                    y_train_0.append(y_train[index])
                    x_train_seq_len_0.append(x_train_seq_len[index])
                 else:
                    x_train_opcode_1.append(x_train_opcode[index,:,:,:])
                    x_train_assembly_1.append(x_train_assembly[index,:,:,:])
                    y_train_1.append(y_train[index])
                    x_train_seq_len_1.append(x_train_seq_len[index])

            x_train_opcode_0 = np.array(x_train_opcode_0)
            x_train_opcode_1 = np.array(x_train_opcode_1)
            x_train_assembly_0 = np.array(x_train_assembly_0)
            x_train_assembly_1 = np.array(x_train_assembly_1)

            min_train_0_1 = min(x_train_opcode_0.shape[0], x_train_opcode_1.shape[0])
            training_set = min_train_0_1 - min_train_0_1 % (self.batch_size // 2)
            training_batches = utils.make_batches(training_set, (self.batch_size // 2))
            ####Seperate dataset

            step_loss = 0.0  # average loss per epoch
            step_time = 0.0
            full_train_accuracy_score = []
            full_train_pre_score = []
            full_train_f1_score = []
            full_train_recall_score = []
            full_train_auc_score = []
            initial_step = self.global_step.eval()
            for step in range(initial_step, initial_step + self.num_train_steps):

                loss_per_batch = 0.0
                start_time = time.time()
                full_y_predic_train = np.array([])
                full_y_target_train = np.array([])
                for batch_idx, (batch_start, batch_end) in enumerate(training_batches):

                    ####Seperate batch
                    batch_x_opcode_0 = x_train_opcode_0[batch_start:batch_end]
                    batch_x_assembly_0 = x_train_assembly_0[batch_start:batch_end]
                    batch_y_0 = y_train_0[batch_start:batch_end]
                    batch_sequence_length_0 = x_train_seq_len_0[batch_start:batch_end]

                    batch_x_opcode_1 = x_train_opcode_1[batch_start:batch_end]
                    batch_x_assembly_1 = x_train_assembly_1[batch_start:batch_end]
                    batch_y_1 = y_train_1[batch_start:batch_end]
                    batch_sequence_length_1 = x_train_seq_len_1[batch_start:batch_end]

                    batch_x_opcode = np.concatenate((batch_x_opcode_0, batch_x_opcode_1), axis=0)
                    batch_x_assembly = np.concatenate((batch_x_assembly_0, batch_x_assembly_1), axis=0)
                    batch_y = batch_y_0 + batch_y_1
                    batch_sequence_length = batch_sequence_length_0 + batch_sequence_length_1
                    ####Seperate batch
                    full_y_target_train = np.append(full_y_target_train, batch_y)

                    feed_dict = {
                        self.X_opcode: batch_x_opcode,
                        self.X_assembly: batch_x_assembly,
                        self.Y: batch_y,
                        self.sequence_length: batch_sequence_length,
                    }

                    soutputs, sstates, sphi_x_tilde = sess.run(
                        [self.outputs, self.states, self.phi_x_tilde],
                        feed_dict=feed_dict)

                    # print("Hello")
                    # print(soutputs.shape)
                    # print(sstates.shape)
                    # print(sphi_x_tilde.shape)
                    # print(np.sum(soutputs[:,0,:]))
                    # print(np.sum(sphi_x_tilde[:,:256]))
                    # print(np.sum(soutputs[:,-1,:]))
                    # print(np.sum(sstates))
                    # print(np.sum(sphi_x_tilde[:,-256:]))
                    # sys.exit()

                    _, summary, batch_loss, batch_y_pred_train = sess.run(
                        [self.training_op, self.summary_op, self.loss, self.y_pred_svm],
                        feed_dict=feed_dict)
                    full_y_predic_train = np.append(full_y_predic_train, batch_y_pred_train)

                    if (batch_idx + 1) % (len(training_batches) // 10) == 0:
                        writer.add_summary(summary, global_step=step)

                    loss_per_batch += batch_loss / len(training_batches)

                batch_train_accuracy_score = mt.accuracy_score(y_true=full_y_target_train, y_pred=full_y_predic_train)
                batch_train_pre_score = mt.precision_score(y_true=full_y_target_train, y_pred=full_y_predic_train)
                batch_train_f1_score = mt.f1_score(y_true=full_y_target_train, y_pred=full_y_predic_train)
                batch_train_recall_score = mt.recall_score(y_true=full_y_target_train, y_pred=full_y_predic_train)
                batch_train_auc_score = mt.roc_auc_score(y_true=full_y_target_train, y_score=full_y_predic_train)
                full_y_predic_train = np.array([])
                full_y_target_train = np.array([])
                full_train_accuracy_score.append(batch_train_accuracy_score)
                full_train_pre_score.append(batch_train_pre_score)
                full_train_f1_score.append(batch_train_f1_score)
                full_train_recall_score.append(batch_train_recall_score)
                full_train_auc_score.append(batch_train_auc_score)

                step_time += (time.time() - start_time)
                step_loss += loss_per_batch

                # if (step + 1) % 10 == 0:
                #     # Save checkpoint and zero timer and loss.
                #     checkpoint_path = os.path.join(self.checkpoint_path, "rnn_classifier_" + self.data_size + ".ckpt")
                #     saver.save(sess, checkpoint_path, global_step=step)

                if (step + 1) % self.display_step == 0:

                	#Train plot
                    ave_train_accuracy_score = np.mean(full_train_accuracy_score)
                    ave_train_pre_score = np.mean(full_train_pre_score)
                    ave_train_f1_score = np.mean(full_train_f1_score)
                    ave_train_recall_score = np.mean(full_train_recall_score)
                    ave_train_auc_score = np.mean(full_train_auc_score)

                    full_train_accuracy_score = []
                    full_train_pre_score = []
                    full_train_f1_score = []
                    full_train_recall_score = []
                    full_train_auc_score = []
                    message = "global step %d/%d step-time %.2fs average loss %.5f acc %.2f pre %.2f f1 %.2f rec %.2f auc %.2f\n" % (
                        step, self.num_train_steps - 1, step_time, step_loss, ave_train_accuracy_score, ave_train_pre_score, ave_train_f1_score, ave_train_recall_score, ave_train_auc_score)
                    utils.print_and_write_logging_file(self.logging_path, message, self.running_mode)

                    outFile.write("%.2f\n" %(ave_train_accuracy_score * 100))
                    outFile.write("%.2f\n" %(ave_train_pre_score * 100))
                    outFile.write("%.2f\n" %(ave_train_f1_score * 100))
                    outFile.write("%.2f\n" %(ave_train_recall_score * 100))
                    outFile.write("%.2f\n" %(ave_train_auc_score * 100))
                    #Train plot

                    #Dev plot

                    step_time, step_loss = 0.0, 0.0

                    dev_set = x_valid_opcode.shape[0] - x_valid_opcode.shape[0] % self.batch_size
                    dev_batches = utils.make_batches(dev_set, self.batch_size)
                    ####Seperate dataset

                    average_dev_loss = 0.0
                    full_y_pred_svm = np.array([])
                    for batch_idx, (batch_start, batch_end) in enumerate(dev_batches):

                        valid_x_opcode = x_valid_opcode[batch_start:batch_end]
                        valid_x_assembly = x_valid_assembly[batch_start:batch_end]

                        valid_y = y_valid[batch_start:batch_end]
                        valid_seq_len = x_valid_seq_len[batch_start:batch_end]
                        ####Seperate batch

                        feed_dict = {
                            self.X_opcode: valid_x_opcode,
                            self.X_assembly: valid_x_assembly,
                            self.Y: valid_y,
                            self.sequence_length: valid_seq_len,
                        }

                        batch_dev_loss, batch_y_pred = sess.run([self.loss, self.y_pred_svm], feed_dict=feed_dict)
                        full_y_pred_svm = np.append(full_y_pred_svm, batch_y_pred)

                        average_dev_loss += batch_dev_loss / len(dev_batches)
                    message = "eval: accuracy_svm %.2f\n" % (
                                mt.accuracy_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100)
                    message += "eval: precision_svm %.2f\n" % (
                                mt.precision_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100)
                    message += "eval: f1_svm %.2f\n" % (
                                mt.f1_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100)
                    message += "eval: recall_svm %.2f\n" % (
                                mt.recall_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100)
                    message += "eval: roc_auc_svm %.2f\n" % (
                                mt.roc_auc_score(y_true=y_valid[:dev_set], y_score=full_y_pred_svm) * 100)
                    message += "-----------------------------------------------------\n"
                    outFile.write("%.2f\n" %(mt.accuracy_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100))
                    outFile.write("%.2f\n" %(mt.precision_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100))
                    outFile.write("%.2f\n" %(mt.f1_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100))
                    outFile.write("%.2f\n" %(mt.recall_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100))
                    outFile.write("%.2f\n" %(mt.roc_auc_score(y_true=y_valid[:dev_set], y_score=full_y_pred_svm) * 100))
                    utils.print_and_write_logging_file(self.logging_path, message, self.running_mode)
                    #Dev plot

                    #Test plot
                    #x_train_opcode, x_train_assembly, x_train_seq_len, y_train, 
                    #x_valid_opcode, x_valid_assembly, x_valid_seq_len, y_valid, 
                    #x_test_opcode, x_test_assembly, x_test_seq_len, y_test
                    step_time, step_loss = 0.0, 0.0

                    test_set = x_test_opcode.shape[0] - x_test_opcode.shape[0] % self.batch_size
                    test_batches = utils.make_batches(test_set, self.batch_size)
                    ####Seperate dataset

                    average_test_loss = 0.0
                    full_y_pred_svm_test = np.array([])
                    for batch_idx, (batch_start, batch_end) in enumerate(test_batches):

                        test_x_opcode = x_test_opcode[batch_start:batch_end]
                        test_x_assembly = x_test_assembly[batch_start:batch_end]

                        test_y = y_test[batch_start:batch_end]
                        test_seq_len = x_test_seq_len[batch_start:batch_end]
                        ####Seperate batch

                        feed_dict = {
                            self.X_opcode: test_x_opcode,
                            self.X_assembly: test_x_assembly,
                            self.Y: test_y,
                            self.sequence_length: test_seq_len,
                        }

                        batch_test_loss, batch_y_pred_test = sess.run([self.loss, self.y_pred_svm], feed_dict=feed_dict)
                        full_y_pred_svm_test = np.append(full_y_pred_svm_test, batch_y_pred_test)

                        average_test_loss += batch_test_loss / len(test_batches)

                    message = "test: accuracy_svm %.2f\n" % (
                                mt.accuracy_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100)
                    message += "test: precision_svm %.2f\n" % (
                                mt.precision_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100)
                    message += "test: f1_svm %.2f\n" % (
                                mt.f1_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100)
                    message += "test: recall_svm %.2f\n" % (
                                mt.recall_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100)
                    message += "test: roc_auc_svm %.2f\n" % (
                                mt.roc_auc_score(y_true=y_test[:test_set], y_score=full_y_pred_svm_test) * 100)
                    message += "-----------------------------------------------------\n"
                    outFile.write("%.2f\n" %(mt.accuracy_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100))
                    outFile.write("%.2f\n" %(mt.precision_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100))
                    outFile.write("%.2f\n" %(mt.f1_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100))
                    outFile.write("%.2f\n" %(mt.recall_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100))
                    outFile.write("%.2f\n" %(mt.roc_auc_score(y_true=y_test[:test_set], y_score=full_y_pred_svm_test) * 100))
                    utils.print_and_write_logging_file(self.logging_path, message, self.running_mode)
                    #Test plot                    
            writer.close()
        message = "Finish training process.\n"
        utils.print_and_write_logging_file(self.logging_path, message, self.running_mode)
        outFile.close()
Ejemplo n.º 22
0
    bkgPerBatch = bkgPerBatch.astype(int)

    # fill lists of all events and files
    b_events, b_files = [], []
    for file, nEvents in bkgCounts.items():
        for evt in range(nEvents):
            b_events.append(evt)
            b_files.append(file)
    e_events, e_files = [], []
    for file, nEvents in eCounts.items():
        for evt in range(nEvents):
            e_events.append(evt)
            e_files.append(file)

    # make batches
    bkg_event_batches, bkg_file_batches = utils.make_batches(
        b_events, b_files, bkgPerBatch, num_batches)
    e_event_batches, e_file_batches = utils.make_batches(
        e_events, e_files, ePerBatch, num_batches)

    # create the discriminator
    discriminator = build_discriminator(img_shape=(40, 40, 3), n_classes=2)
    # create the generator
    generator = build_generator(latent_dim)
    # create the gan
    gan_model = build_gan(generator, discriminator)

    num_examples = num_batches * batch_size

    print(utils.bcolors.YELLOW + 'Training on:' + utils.bcolors.ENDC)
    print(utils.bcolors.GREEN + 'Number of examples: ' + utils.bcolors.ENDC,
          num_examples)
Ejemplo n.º 23
0
from utils import load_mnist, make_batches, display_mnist_image
from FullyConnectedNet import FullyConnectedNet
import numpy as np
from tqdm import tqdm

batch_size = 32
learning_rate = 0.005
n_epochs = 20

X_train, X_test, y_train, y_test = load_mnist()
X_train = X_train / 255
X_test = X_test / 255
np.random.seed(42)
train_batches = make_batches(X_train, y_train, batch_size)
test_batches = make_batches(X_test, y_test, batch_size)

model = FullyConnectedNet(learning_rate)

for i in tqdm(range(n_epochs)):
    epoch_loss = 0
    for x_batch, y_batch in train_batches:
        pred, loss = model.forward(x_batch.T, target=y_batch)
        model.backward()
        model.update_parameters()
        epoch_loss += loss
    print("training loss for epoch", (i + 1), "is", epoch_loss / y_train.size)

total_correct = 0
for x_batch, y_batch in test_batches:
    predicted, loss = model.forward(x_batch.T, target=y_batch)
    predicted_classes = np.argmax(predicted, axis=0)
Ejemplo n.º 24
0
	bkgPerBatch = bkgPerBatch.astype(int)

	# fill lists of all events and files
	b_events, b_files = [], []
	for file, nEvents in bkgCounts.items():
		for evt in range(nEvents):
			b_events.append(evt)
			b_files.append(file)
	e_events, e_files = [], []
	for file, nEvents in eCounts.items():
		for evt in range(nEvents):
			e_events.append(evt)
			e_files.append(file)

	# make batches
	bkg_event_batches, bkg_file_batches = utils.make_batches(b_events, b_files, bkgPerBatch, nBatches)
	e_event_batches, e_file_batches = utils.make_batches(e_events, e_files, ePerBatch, nBatches)

	# train/validation split
	train_e_event_batches, val_e_event_batches, train_e_file_batches, val_e_file_batches = train_test_split(e_event_batches, e_file_batches, test_size=val_size, random_state=42)
	train_bkg_event_batches, val_bkg_event_batches, train_bkg_file_batches, val_bkg_file_batches = train_test_split(bkg_event_batches, bkg_file_batches, test_size=val_size, random_state=42)

	# count events in each batch
	nSavedETrain = utils.count_events(train_e_file_batches, train_e_event_batches, eCounts)
	nSavedEVal = utils.count_events(val_e_file_batches, val_e_event_batches, eCounts)
	nSavedBkgTrain = utils.count_events(train_bkg_file_batches, train_bkg_event_batches, bkgCounts)
	nSavedBkgVal = utils.count_events(val_bkg_file_batches, val_bkg_event_batches, bkgCounts)

	# add background events to validation data
	# to keep ratio e/bkg equal to that in original dataset
	if(nSavedEVal*1.0/(nSavedEVal+nSavedBkgVal) > fE):
Ejemplo n.º 25
0
## load data
train_x, train_y = ds.load('train')
valid_x, valid_y = ds.load('valid')

## setup model
idim = len(train_x[0][0])
odim = max(train_y) + 1
model = RNN(idim, 300, odim, 'lstm')

## setup optimizer
#train_w = utils.balance_prior(train_y)
params = model.get_theta()
#args, n_batches = utils.make_batches([train_x, train_y], None)
#opt = climin.Rprop(params, model.opt_fprime, args=args, init_step=0.0001)
args, n_batches = utils.make_batches([train_x, train_y], 30)
opt = climin.Adadelta(params, model.opt_fprime, offset=1e-6, args=args)
#args, n_batches = utils.make_batches([train_x, train_y], 30)
#opt = climin.rmsprop.RmsProp(params, model.opt_fprime, step_rate=0.01, args=args)

## perform optimization
epoch = 0
start = time.time()
for info in opt:
    if info['n_iter'] % n_batches == 0:
        epoch += 1
        # end
        if epoch == 100:
            break
# print performance
        if epoch % 1 == 0:
Ejemplo n.º 26
0
    def fit(self, x):
        if (not hasattr(self, 'epoch')) or self.epoch == 0:
            self._init()
            with self.tf_graph.as_default():
                self._build_model()
                self.tf_session.run(tf.global_variables_initializer())
                if self.load():
                    print('load the checkpoint!')
                else:
                    print(
                        'cannot load the checkpoint and init all the varibale')

        num_data = x.shape[0] - x.shape[0] % self.d_batch_size
        batches = make_batches(num_data, self.d_batch_size)
        best_is = 0.0
        while (self.epoch < self.num_epochs):
            for batch_idx, (batch_start, batch_end) in enumerate(batches):
                batch_size = batch_end - batch_start

                x_batch = x[batch_start:batch_end]
                if self.same_input:
                    z_batch = self.z_prior.sample(
                        [self.g_batch_size, self.num_z]).astype(np.float32)
                    z_batch = np.vstack([z_batch] * self.num_gens)
                else:
                    z_batch = self.z_prior.sample(
                        [self.g_batch_size * self.num_gens,
                         self.num_z]).astype(np.float32)

                # update discriminator D
                d_bin_loss, d_mul_loss, d_loss, _ = self.tf_session.run(
                    [
                        self.d_bin_loss, self.d_mul_loss, self.d_loss,
                        self.d_opt
                    ],
                    feed_dict={
                        self.x: x_batch,
                        self.z: z_batch
                    })

                # update generator G
                g_bin_loss, g_mul_loss, g_loss, _ = self.tf_session.run(
                    [
                        self.g_bin_loss, self.g_mul_loss, self.g_loss,
                        self.g_opt
                    ],
                    feed_dict={self.z: z_batch})

            self.epoch += 1
            print(
                "Epoch: [%4d/%4d] d_bin_loss: %.5f, d_mul_loss: %.5f, d_loss: %.5f,"
                " g_bin_loss: %.5f, g_mul_loss: %.5f, g_loss: %.5f" %
                (self.epoch, self.num_epochs, d_bin_loss, d_mul_loss, d_loss,
                 g_bin_loss, g_mul_loss, g_loss))
            # print("Epoch: [%4d/%4d] d_bin_loss: %.5f,g_bin_loss: %.5f" % (self.epoch, self.num_epochs,
            #                     d_bin_loss, g_bin_loss))
            if self.epoch % 10 == 0:
                self._samples(self.sample_fp.format(epoch=self.epoch + 1))

            if not os.path.exists(self.checkpoint_dir):
                os.makedirs(self.checkpoint_dir)
            self.saver.save(
                self.tf_session,
                os.path.join(self.checkpoint_dir, "classifier_mode_checkpoint")
            )  #+str(self.num_gens)+"epoch_"+str(self.num_epochs)+"num_g_maps_"+str(self.num_gen_feature_maps)))
        self._samples_by_gen(self.sample_by_gen_fp)
Ejemplo n.º 27
0
def test_mnn():
    data, labels = make_batches(n_pts_per_cluster=250)
    G = gt.Graph(data, sample_idx=labels, use_pygsp=True)
    meld_op = meld.MELD()
    EES = meld_op.fit_transform(G, labels)
    meld.VertexFrequencyCluster().fit_transform(G=G, RES=labels, EES=EES)
Ejemplo n.º 28
0
    def fit(self, x):
        if (not hasattr(self, 'epoch')) or self.epoch == 0:
            self._init()
            with self.tf_graph.as_default():
                self._build_model()
                self.tf_session.run(tf.global_variables_initializer())

        batch_idx_step = max(self.num_training_mover,
                             self.num_training_generator,
                             self.num_training_critic)
        num_data = x.shape[0] - x.shape[0] % (self.batch_size * batch_idx_step)
        if num_data > 150000:
            num_data = self.num_iter_per_epoch

        batches = make_batches(num_data, self.batch_size * batch_idx_step)

        iter = 0.0
        while (self.epoch < self.num_epochs):
            for batch_idx in np.arange(0, len(batches), batch_idx_step):
                iter += 1.0
                if self.decay:
                    lr = self.learning_rate - self.learning_rate * iter / self.decay_steps
                else:
                    lr = self.learning_rate

                if self.epoch < self.gamma_steps:
                    gamma = self.gamma0 + self.epoch * (
                        self.gamma1 - self.gamma0) / self.gamma_steps
                else:
                    gamma = self.gamma1

                # update Mover
                for it in range(self.num_training_mover):
                    z_batch = self.z_prior.sample(
                        [self.batch_size, self.num_z]).astype(np.float32)
                    batch_start, batch_end = batches[batch_idx + it]
                    x_batch = x[batch_start:batch_end]
                    self.tf_session.run(self.m_opt,
                                        feed_dict={
                                            self.gamma: gamma,
                                            self.lr: lr,
                                            self.x: x_batch,
                                            self.z: z_batch
                                        })

                # update Critic
                for it in range(self.num_training_critic):
                    z_batch = self.z_prior.sample(
                        [self.batch_size, self.num_z]).astype(np.float32)
                    batch_start, batch_end = batches[batch_idx + it]
                    x_batch = x[batch_start:batch_end]
                    self.tf_session.run(self.c_opt,
                                        feed_dict={
                                            self.gamma: gamma,
                                            self.lr: lr,
                                            self.x: x_batch,
                                            self.z: z_batch
                                        })

                # update Generator
                for _ in range(self.num_training_generator):
                    z_batch = self.z_prior.sample(
                        [self.batch_size, self.num_z]).astype(np.float32)
                    self.tf_session.run(self.g_opt,
                                        feed_dict={
                                            self.gamma: gamma,
                                            self.lr: lr,
                                            self.z: z_batch
                                        })

            self._samples(self.samples_fp.format(epoch=self.epoch + 1),
                          num_samples=100)
            idx = np.random.randint(x.shape[0],
                                    size=(100 // self.batch_size + 1) *
                                    self.batch_size)
            self._samples_h(self.samples_h_fp.format(epoch=self.epoch + 1),
                            x[idx],
                            num_samples=100)
            print('Epoch %d: done.' % (self.epoch + 1))
            self.epoch += 1
Ejemplo n.º 29
0
def train(model, x, y, x_adv, aux_adv, batch_size=None, epochs=1, verbose=1, callbacks=None,
          validation_split=0., shuffle=True, class_weight=None, sample_weight=None):

  # Validate user data.
  x, y, sample_weights = model._standardize_user_data(
    x, y,
    sample_weight=sample_weight,
    class_weight=class_weight,
    batch_size=batch_size)
  ins = x + y + sample_weights

  # Prepare validation data.
  do_validation = False
  if validation_split and 0. < validation_split < 1.:
    do_validation = True
    if hasattr(x[0], 'shape'):
      split_at = int(x[0].shape[0] * (1. - validation_split))
    else:
      split_at = int(len(x[0]) * (1. - validation_split))
    x, val_x = (slice_arrays(x, 0, split_at),
                slice_arrays(x, split_at))
    y, val_y = (slice_arrays(y, 0, split_at),
                slice_arrays(y, split_at))
    sample_weights, val_sample_weights = (
      slice_arrays(sample_weights, 0, split_at),
      slice_arrays(sample_weights, split_at))
    val_ins = val_x + val_y + val_sample_weights
  else:
    val_ins = []


  # ____________________________________________________________________________
  # Fit

  num_train_samples = x[0].shape[0]
  index_array = np.arange(num_train_samples)

  #sess = K.get_session()
  #tf_x = K.placeholder(shape=(None, x[0].shape[1]))
  #tf_y = K.placeholder(shape=(None, y[0].shape[1]))
  #sess.run(tf.initialize_all_variables())

  # Loop over epochs
  for epoch in xrange(epochs):
    print('Epoch {0}'.format(epoch))

    if shuffle:
      np.random.shuffle(index_array)

    batches = make_batches(num_train_samples, batch_size)

    # Loop over batches
    for batch_index, (batch_start, batch_end) in enumerate(batches):
      print('.. batch {0}'.format(batch_index))

      batch_ids = index_array[batch_start:batch_end]
      ins_batch = slice_arrays(ins, batch_ids)
      assert isinstance(ins_batch, list) and len(ins_batch) == 1 + 2 + 2

      # Add noise
      if add_noise:
        noise = x_adv[np.random.randint(0, x_adv.shape[0], ins_batch[0].shape[0])]
        noise_reg = np.zeros_like(ins_batch[1]) + 100.  # mask_value is set to 100
        noise_discr = np.zeros_like(ins_batch[2])
        noise_reg_w = np.ones_like(ins_batch[3])
        noise_discr_w = np.ones_like(ins_batch[3])
        ins_noise = [noise, noise_reg, noise_discr, noise_reg_w, noise_discr_w]
        ins_batch = merge_arrays(ins_batch, ins_noise)

      model._make_train_function()
      f = model.train_function
      outs = f(ins_batch)
Ejemplo n.º 30
0
		b_files.append(file)
#e_events, e_files = [], []
#for file, nEvents in eCounts.items():
#	for evt in range(nEvents):
#		e_events.append(evt)
#		e_files.append(file)

availableBkg = sum(list(bkgCounts.values()))
nBatches = int(availableBkg / batch_size)

bkgPerBatch = np.asarray([batch_size]*nBatches)
bkgPerBatch = bkgPerBatch.astype(int)

print(availableBkg, nBatches)
# make batches
bkg_event_batches, bkg_file_batches = utils.make_batches(b_events, b_files, bkgPerBatch, nBatches)
#e_event_batches, e_file_batches = utils.make_batches(e_events, e_files, ePerBatch, nBatches)
val_e_file_batches = [list(set([-1])) for _ in range(nBatches)]
val_e_event_batches = np.array([[0,0]]*nBatches)

print(bkg_file_batches)
print(bkg_event_batches)

#reversed to save electrons
np.save(outputDir + 'e_files_testBatches', bkg_file_batches)
np.save(outputDir + 'e_events_testBatches', bkg_event_batches)
np.save(outputDir + 'bkg_files_testBatches', val_e_file_batches)
np.save(outputDir + 'bkg_events_testBatches', val_e_event_batches)