Exemple #1
0
    def encode_texts(self, texts, include_oov=False, verbose=1, **kwargs):
        """Encodes the given texts using internal vocabulary with optionally applied encoding options. See
        ``apply_encoding_options` to set various options.

        Args:
            texts: The list of text items to encode.
            include_oov: True to map unknown (out of vocab) tokens to 0. False to exclude the token.
            verbose: The verbosity level for progress. Can be 0, 1, 2. (Default value = 1)
            **kwargs: The kwargs for `token_generator`.

        Returns:
            The encoded texts.
        """
        if not self.has_vocab:
            raise ValueError("You need to build the vocabulary using `build_vocab` before using `encode_texts`")

        progbar = Progbar(len(texts), verbose=verbose, interval=0.25)
        encoded_texts = []
        for token_data in self.token_generator(texts, **kwargs):
            indices, token = token_data[:-1], token_data[-1]

            token_idx = self._token2idx.get(token)
            if token_idx is None and include_oov:
                token_idx = 0

            if token_idx is not None:
                _append(encoded_texts, indices, token_idx)

            # Update progressbar per document level.
            progbar.update(indices[0])

        # All done. Finalize progressbar.
        progbar.update(len(texts), force=True)
        return encoded_texts
Exemple #2
0
    def _load_text_word2vec(self, file_path):
        with open(file_path, 'r', encoding='utf-8') as f:
            line = f.readline()
            num_vectors, self.vector_len = (int(x) for x in line.split())
            if self.verbose:
                self.logger.info(
                    f'Loading {num_vectors} word vectors from file '
                    f'"{file_path}".')
            oov_id = self._init_vectors()
            found = 1
            progbar = Progbar(len(self.vocab) - 1, verbose=self.verbose)
            for i in range(num_vectors):
                line = f.readline()
                values = line.split()
                word = values[0]
                word_vector = np.asarray(values[1:], dtype='float32')
                if word in self.vocab:
                    word_id = self.vocab[word]
                    self.vectors[word_id] = word_vector
                    found += 1
                progbar.update(found)
            if found < len(self.vocab):
                progbar.update(len(self.vocab) - 1)

            if oov_id is not None:
                self._replace_oov_tokens(oov_id)
Exemple #3
0
    def _test_loop(self, f, ins, batch_size=128, verbose=0):
        '''
            Abstract method to loop over some data in batches.
        '''
        nb_sample = len(ins[0])
        outs = []
        if verbose == 1:
            progbar = Progbar(target=nb_sample)
        batches = make_batches(nb_sample, batch_size)
        index_array = np.arange(nb_sample)
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            ins_batch = slice_X(ins, batch_ids)

            batch_outs = f(*ins_batch)
            if type(batch_outs) == list:
                if batch_index == 0:
                    for batch_out in enumerate(batch_outs):
                        outs.append(0.)
                for i, batch_out in enumerate(batch_outs):
                    outs[i] += batch_out * len(batch_ids)
            else:
                if batch_index == 0:
                    outs.append(0.)
                outs[0] += batch_outs * len(batch_ids)

            if verbose == 1:
                progbar.update(batch_end)
        for i, out in enumerate(outs):
            outs[i] /= nb_sample
        return outs
    def epoch_end_callback(self, sess, sv, epoch_num):
        # Evaluate val loss
        validation_iou = 0
        print("\nComputing Validation IoU")
        progbar = Progbar(target=self.val_steps_per_epoch)

        for i in range(self.val_steps_per_epoch):
            loss_iou = sess.run(self.val_iou,
                                feed_dict={self.is_training: False})
            validation_iou += loss_iou
            progbar.update(i)
        validation_iou /= self.val_steps_per_epoch * self.config.batch_size

        # Log to Tensorflow board
        val_sum = sess.run(self.val_sum,
                           feed_dict={self.val_iou_ph: validation_iou})

        sv.summary_writer.add_summary(val_sum, epoch_num)

        print("Epoch [{}] Validation IoU: {}".format(epoch_num,
                                                     validation_iou))
        # Model Saving
        if validation_iou > self.min_val_iou:
            self.save(sess, self.config.checkpoint_dir, 'best')
            self.min_val_iou = validation_iou
        if epoch_num % self.config.save_freq == 0:
            self.save(sess, self.config.checkpoint_dir, epoch_num)
Exemple #5
0
    def _predict_loop(self, f, ins, batch_size=128, verbose=0):
        '''
            Abstract method to loop over some data in batches.
        '''
        nb_sample = len(ins[0])
        outs = []
        if verbose == 1:
            progbar = Progbar(target=nb_sample)
        batches = make_batches(nb_sample, batch_size)
        index_array = np.arange(nb_sample)
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            ins_batch = slice_X(ins, batch_ids)

            batch_outs = f(*ins_batch)
            if type(batch_outs) != list:
                batch_outs = [batch_outs]
            if batch_index == 0:
                for batch_out in batch_outs:
                    shape = (nb_sample,) + batch_out.shape[1:]
                    outs.append(np.zeros(shape))

            for i, batch_out in enumerate(batch_outs):
                outs[i][batch_start:batch_end] = batch_out
            if verbose == 1:
                progbar.update(batch_end)
        return outs
def test_progbar():
    n = 2
    input_arr = np.random.random((n, n, n))
    bar = Progbar(n)

    for i, arr in enumerate(input_arr):
        bar.update(i, list(arr))
Exemple #7
0
    def load_subsets(self, subsets):
        """
        Loads specified subsets of the data for the code jam.
        Returns tuple: ( images, labels, subset membership number )
        You can use the subset membership number to select the data from particular subset:
        e.g. result[(indices == 4).flatten()]
        """
        result = None
        resultLabels = None
        indices = None
        n_of_subsets = len(subsets)
        p = Progbar(n_of_subsets)
        p.update(0)
        for index, subsetIndex in enumerate(subsets):
            data = np.load("{}/{}.npz".format(self.root_path, subsetIndex))
            if result is None:
                result = data['images']
            else:
                result = np.vstack([result, data['images']])

            if resultLabels is None:
                resultLabels = data['labels']
            else:
                resultLabels = np.vstack([resultLabels, data['labels']])

            tmp = np.ones(data['labels'].shape) * subsetIndex
            if indices is None:
                indices = tmp
            else:
                indices = np.vstack([indices, tmp])
            p.update(index + 1)
        return (result, resultLabels, indices)
Exemple #8
0
def test_extractor_in_generator(intervals, extractor, batch_size=128):
    """
    Extracts data in bulk, then in streaming batches and checks its the same data.
    """
    from keras.utils.generic_utils import Progbar

    X_in_memory = extractor(intervals)
    samples_per_epoch = len(intervals)
    batches_per_epoch = int(samples_per_epoch / batch_size) + 1
    batch_array = np.zeros((batch_size, 1, 4, intervals[0].length),
                           dtype=np.float32)
    batch_generator = generate_from_intervals(intervals,
                                              extractor,
                                              batch_size=batch_size,
                                              indefinitely=False,
                                              batch_array=batch_array)
    progbar = Progbar(target=samples_per_epoch)
    for batch_indx in xrange(1, batches_per_epoch + 1):
        X_batch = next(batch_generator)
        start = (batch_indx - 1) * batch_size
        stop = batch_indx * batch_size
        if stop > samples_per_epoch:
            stop = samples_per_epoch
        # assert streamed sequences and labels match data in memory
        assert (X_in_memory[start:stop] - X_batch).sum() == 0
        progbar.update(stop)
Exemple #9
0
    def training(self, x_train_hw, y_train_hw, x_test, y_test):
        """Alternatively training models"""
        # get a batch of real images
        nb_train_hw = x_train_hw.shape[0]
        num_truncate = nb_train_hw % self.batch_size
        hw_data_used_num = nb_train_hw - num_truncate

        for epoch in range(self.nb_epochs):
            print('Epoch {} of {}'.format(epoch + 1, self.nb_epochs))
            nb_batches = int(nb_train_hw / self.batch_size)
            progress_bar = Progbar(target=nb_batches)

            epoch_label_predictor_loss = []

            for index in range(nb_batches):
                progress_bar.update(index)

                # get a batch of handwritten data
                hw_data_index_start = index * self.batch_size % hw_data_used_num
                hw_data_index_end = hw_data_index_start + self.batch_size
                img_hw = x_train_hw[hw_data_index_start:hw_data_index_end]
                cls_labels_hw = y_train_hw[
                    hw_data_index_start:hw_data_index_end]

                # updating parameters of label_predictor
                epoch_label_predictor_loss.append(
                    self.character_classifier.train_on_batch([img_hw],
                                                             [cls_labels_hw]))

            score = self.test(x_test, y_test)
            weights_output_dir = os.path.join(
                self.output_dir, 'pre_weights%02d-%04f.h5' % (epoch, score[1]))
            self.save_weights(weights_output_dir)
            print('\nTesting for epoch %02d: accuracy %04f' %
                  (epoch + 1, score[1]))
Exemple #10
0
def seq_to_preds(rows, seq_x, pred_probs, idx_to_attr_id, dilate=False):
    '''
    rows: List of dicts, N elements
    seq_x: Tokens, N x L matrix
    pred_probs: Predicted idxs, N x L x C matrix
    '''

    pred_labels = np.argmax(pred_probs, axis=2)
    # Set as -1 when x_i as PAD_IDX
    pad_idxs = np.where(seq_x == PAD_IDX)
    pred_labels[pad_idxs] = -1

    predictions = []

    n_rows = seq_x.shape[0]
    n_attr = len(idx_to_attr_id)

    progbar = Progbar(n_rows)

    for row_idx, row in enumerate(rows):
        row = rows[row_idx]

        gt_attr_ids = row['attr_id_seq']
        rle_seq = row['rle_seq']
        image_id = row['image_id']

        pred_all_attr_idxs = pred_labels[
            row_idx]  # A sequence of size L, predictions of PAD_IDXs
        pred_seq_idxs = np.where(pred_all_attr_idxs >= 0)[0]
        pred_attr_idxs = pred_all_attr_idxs[pred_seq_idxs]

        pred_attr_ids = [idx_to_attr_id[a] for a in pred_attr_idxs]

        assert len(pred_attr_ids) == len(gt_attr_ids)

        for w_idx, pred_seq_idx in enumerate(pred_seq_idxs):
            for attr_idx in range(1, n_attr):  # Exclude SAFE
                this_attr_id = idx_to_attr_id[attr_idx]
                this_score = pred_probs[row_idx, pred_seq_idx, attr_idx]
                w_rle = rle_seq[w_idx]

                if this_score > 1e-5 and w_rle is not None:

                    if this_score > 0.05 and dilate:
                        predictions.append({
                            'image_id': image_id,
                            'attr_id': this_attr_id,
                            'segmentation': dilate_rle(w_rle),
                            'score': float(this_score),
                        })
                    else:
                        predictions.append({
                            'image_id': image_id,
                            'attr_id': this_attr_id,
                            'segmentation': w_rle,
                            'score': float(this_score),
                        })
        progbar.update(row_idx)

    return predictions
Exemple #11
0
def img_to_features(X, image_list, model, batch_size=64):
    n_img, n_h, n_w, n_c = X.shape
    n_batches = n_img / batch_size + 1
    n_feat = model.output_shape[-1]

    feat_mat = np.zeros((n_img, n_feat))

    pbar = Progbar(n_batches)

    for b_idx, start_idx in enumerate(range(0, n_img, batch_size)):
        end_idx = min(start_idx + batch_size, n_img)
        this_batch_size = end_idx - start_idx

        bx = X[start_idx:end_idx]
        bx = preprocess_input(bx)
        batch_feat = model.predict(bx)

        feat_mat[start_idx:end_idx] = batch_feat
        pbar.update(b_idx)

    # Create a dict: image_id -> feat
    image_id_to_visfeat = dict()
    for i, (image_id, image_path) in enumerate(image_list):
        image_id_to_visfeat[image_id] = feat_mat[i]

    return image_id_to_visfeat
Exemple #12
0
def train_population(population,
                     x,
                     y,
                     batch_size,
                     steps,
                     steps_save=100,
                     validation_split=0.3):
    # Split data in train and validation. Set seed to get same splits in
    # consequent calls
    x_train, x_val, y_train, y_val = train_test_split(
        x, y, test_size=validation_split, random_state=42)

    population_size = len(population)
    batch_generator = BatchGenerator(x_train, y_train, batch_size)

    results = defaultdict(lambda: [])
    stateful_metrics = ['min_loss', 'max_loss', 'mean_loss']
    for metric, _ in population[0].eval_metrics:
        stateful_metrics.extend(
            [m.format(metric) for m in ['min_{}', 'max_{}', 'mean_{}']])
    progbar = Progbar(steps, stateful_metrics=stateful_metrics)

    for step in range(1, steps + 1):
        x, y = batch_generator.next()
        for idx, member in enumerate(population):
            # One step of optimisation using hyperparameters of 'member'
            member.step_on_batch(x, y)
            # Model evaluation
            loss = member.eval_on_batch(x_val, y_val)
            # If optimised for 'STEPS_READY' steps
            if member.ready():
                # Use the rest of population to find better solutions
                exploited = member.exploit(population)
                # If new weights != old weights
                if exploited:
                    # Produce new hyperparameters for 'member'
                    member.explore()
                    loss = member.eval_on_batch(x_val, y_val)

            if step % steps_save == 0 or step == steps:
                results['model_id'].append(str(member))
                results['step'].append(step)
                results['loss'].append(loss)
                results['loss_smoothed'].append(member.loss_smoothed())
                for metric, value in member.eval_metrics:
                    results[metric].append(value)
                for h, v in member.get_hyperparameter_config().items():
                    results[h].append(v)

        # Get recently added losses to show in the progress bar
        all_losses = results['loss']
        recent_losses = all_losses[-population_size:]
        if recent_losses:
            metrics = _statistics(recent_losses, 'loss')
            for metric, _ in population[0].eval_metrics:
                metrics.extend(
                    _statistics(results[metric][-population_size:], metric))
            progbar.update(step, metrics)

    return pd.DataFrame(results)
Exemple #13
0
    def _predict_loop(self, f, ins, batch_size=128, verbose=0):
        '''
            Abstract method to loop over some data in batches.
        '''
        nb_sample = len(ins[0])
        outs = []
        if verbose == 1:
            progbar = Progbar(target=nb_sample)
        batches = make_batches(nb_sample, batch_size)
        index_array = np.arange(nb_sample)
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            ins_batch = slice_X(ins, batch_ids)

            batch_outs = f(*ins_batch)
            if type(batch_outs) != list:
                batch_outs = [batch_outs]
            if batch_index == 0:
                for batch_out in batch_outs:
                    shape = (nb_sample, ) + batch_out.shape[1:]
                    outs.append(np.zeros(shape))

            for i, batch_out in enumerate(batch_outs):
                outs[i][batch_start:batch_end] = batch_out
            if verbose == 1:
                progbar.update(batch_end)
        return outs
Exemple #14
0
def image_list_to_arr(image_list):
    target_img_size = (250, 250)
    n_items = len(image_list)

    X = np.zeros(shape=(n_items, target_img_size[0], target_img_size[1], 3))

    pbar = Progbar(n_items)

    for idx, (image_id, this_image_path) in enumerate(image_list):
        # ----- Image -> Mat
        resized_img_path = this_image_path.replace('images', 'images_250')
        resized_img_path = osp.join('/BS/orekondy2/work/datasets/VISPR2017',
                                    resized_img_path)

        if osp.exists(resized_img_path):
            this_image_path = resized_img_path
        else:
            this_image_path = osp.join(SEG_ROOT, this_image_path)

        img = load_img(this_image_path, target_size=target_img_size)
        img_arr = img_to_array(img)
        X[idx] = img_arr
        pbar.update(idx)

    return X
class TrainIntervalLogger(Callback):
    def __init__(self, interval=10000):
        self.interval = interval
        self.step = 0
        self.reset()

    def reset(self):
        self.interval_start = timeit.default_timer()
        self.progbar = Progbar(target=self.interval)
        self.metrics = []
        self.infos = []
        self.info_names = None
        self.episode_rewards = []

    def on_train_begin(self, logs):
        self.train_start = timeit.default_timer()
        self.metrics_names = self.model.metrics_names
        print('Training for {} steps ...'.format(self.params['nb_steps']))

    def on_train_end(self, logs):
        duration = timeit.default_timer() - self.train_start
        print('done, took {:.3f} seconds'.format(duration))

    def on_step_begin(self, step, logs):
        if self.step % self.interval == 0:
            if len(self.episode_rewards) > 0:
                metrics = np.array(self.metrics)
                assert metrics.shape == (self.interval, len(self.metrics_names))
                formatted_metrics = ''
                if not np.isnan(metrics).all():  # not all values are means
                    means = np.nanmean(self.metrics, axis=0)
                    assert means.shape == (len(self.metrics_names),)
                    for name, mean in zip(self.metrics_names, means):
                        formatted_metrics += ' - {}: {:.3f}'.format(name, mean)
                
                formatted_infos = ''
                if len(self.infos) > 0:
                    infos = np.array(self.infos)
                    if not np.isnan(infos).all():  # not all values are means
                        means = np.nanmean(self.infos, axis=0)
                        assert means.shape == (len(self.info_names),)
                        for name, mean in zip(self.info_names, means):
                            formatted_infos += ' - {}: {:.3f}'.format(name, mean)
                print('{} episodes - episode_reward: {:.3f} [{:.3f}, {:.3f}]{}{}'.format(len(self.episode_rewards), np.mean(self.episode_rewards), np.min(self.episode_rewards), np.max(self.episode_rewards), formatted_metrics, formatted_infos))
                print('')
            self.reset()
            print('Interval {} ({} steps performed)'.format(self.step // self.interval + 1, self.step))

    def on_step_end(self, step, logs):
        if self.info_names is None:
            self.info_names = logs['info'].keys()
        values = [('reward', logs['reward'])]
        self.progbar.update((self.step % self.interval) + 1, values=values, force=True)
        self.step += 1
        self.metrics.append(logs['metrics'])
        if len(self.info_names) > 0:
            self.infos.append([logs['info'][k] for k in self.info_names])

    def on_episode_end(self, episode, logs):
        self.episode_rewards.append(logs['episode_reward'])
Exemple #16
0
def _save_predictions_to_xmls(model, batch_size, embeddings, label2ind,
                              ind2label, test_set, predictions_dir,
                              binary_classification, hipaa_only,
                              extra_features, require_argmax):
    if not os.path.isdir(predictions_dir):
        os.mkdir(predictions_dir)

    print('Saving test XMLs to', predictions_dir)
    progress_bar = Progbar(target=TestSet.number_of_test_sets(test_set),
                           verbose=env.keras_verbose)

    for i, te in enumerate(TestSet.test_sets(
            embeddings,
            test_set=test_set,
            label2ind=label2ind,
            binary_classification=binary_classification,
            hipaa_only=hipaa_only,
            extra_features=extra_features),
                           start=1):
        preds = model.predict([te.X, te.X_extra], batch_size=batch_size)
        if require_argmax:
            preds = np.argmax(preds, axis=-1)
        xml = prediction_to_xml(te.X, preds, te.text, te.sents, ind2label)
        filename = os.path.basename(te.filename)[:-4] + '.xml'
        with open(os.path.join(predictions_dir, filename), 'w') as f:
            f.write(xml)

        progress_bar.update(i)
Exemple #17
0
 def run_train_epoch(self,
                     session,
                     x_inputs,
                     batch_size,
                     shuffle=True,
                     verbose=1):
     num_samples = len(x_inputs)
     index_array = np.arange(num_samples)
     if shuffle:
         np.random.shuffle(index_array)
     batches = self.make_batches(num_samples, batch_size)
     nb_batch = len(batches)
     progbar = Progbar(nb_batch)
     avg_total_loss = 0.
     total_samples = 0.
     for batch_index, (batch_start, batch_end) in enumerate(batches):
         batch_ids = index_array[batch_start:batch_end]
         x_batch = x_inputs[batch_ids]
         _, loss = session.run([self.train_op, self.loss],
                               {self.x: x_batch})
         if np.isnan(loss) or np.isinf(loss):
             raise ValueError("nan or inf loss")
         cur_batch_size = (batch_end - batch_start)
         total_samples += cur_batch_size
         avg_total_loss += (loss * cur_batch_size / num_samples)
         if verbose == 1:
             progbar.update(batch_index + 1,
                            values=[("avg loss per 1000 samples",
                                     avg_total_loss * 1000. / total_samples)
                                    ],
                            force=True)
     print("avg total loss = %d" % avg_total_loss)
Exemple #18
0
 def evaluate_by_datasets(self, model):
     from keras.utils.generic_utils import Progbar
     results = []
     for i, single in enumerate(self.single_datasets):
         ys = [np.zeros(s.y_valid.shape[1:])
               for s in self.single_datasets]  # makes blank ys
         result = []
         print('Evaluating', single.name)
         progbar = Progbar(len(single.X_valid))
         for j in range(len(single.X_valid)):
             X, y = next(single.valid_generator)
             Xtemp = []
             for x_one in X:
                 x_normed = util.random_unify_3d_mels(x_one, self.duration)
                 Xtemp.append(x_normed)
             Xtemp = np.array(Xtemp)
             result.append(
                 np.argmax(y) == np.argmax(model.predict(Xtemp)[i]))
             progbar.update(j)
         results.append(result)
         progbar.update(len(single.X_valid))
         print(' =', np.sum(result) / len(result))
     accuracies = [np.sum(result) / len(result) for result in results]
     for s, acc in zip(self.single_datasets, accuracies):
         print('Accuracy with %s = %f' % (s.name, acc))
     return accuracies
def test_progbar():
    n = 2
    input_arr = np.random.random((n, n, n))
    bar = Progbar(n)

    for i, arr in enumerate(input_arr):
        bar.update(i, list(arr))
    def build_vocab(self, texts, verbose=1, **kwargs):
        """Builds the internal vocabulary and computes various statistics.

        Args:
            texts: The list of text items to encode.
            verbose: The verbosity level for progress. Can be 0, 1, 2. (Default value = 1)
            **kwargs: The kwargs for `token_generator`.
        """
        if self.has_vocab:
            logger.warn(
                "Tokenizer already has existing vocabulary. Overriding and building new vocabulary."
            )

        progbar = Progbar(len(texts), verbose=verbose, interval=0.25)
        count_tracker = _CountTracker()

        self._token_counts.clear()
        self._num_texts = len(texts)

        for token_data in self.token_generator(texts, **kwargs):
            indices, token = token_data[:-1], token_data[-1]
            count_tracker.update(indices)
            self._token_counts[token] += 1

            # Update progressbar per document level.
            progbar.update(indices[0])

        # Generate token2idx and idx2token.
        self.create_token_indices(self._token_counts.keys())

        # All done. Finalize progressbar update and count tracker.
        count_tracker.finalize()
        self._counts = count_tracker.counts
        progbar.update(len(texts), force=True)
def trainer_on_batch(model,
                     train_x,
                     train_y,
                     batch_size=_default_batch_size,
                     epochs=_default_epochs):

    for epoch in range(epochs):
        print('Epoch {} of {}'.format(epoch + 1, epochs))

        nb_batches = int(train_x.shape[0] / batch_size)
        # progress_bar display
        progress_bar = Progbar(target=train_x.shape[0])

        batch_res = [None, None]
        history = []
        start_epoch = time.time()
        for iter in range(nb_batches):
            # get a batch train_set
            train_x_batch = train_x[iter * batch_size:(iter + 1) * batch_size]
            train_y_batch = train_y[iter * batch_size:(iter + 1) * batch_size]

            batch_res = model.train_on_batch(x=train_x_batch, y=train_y_batch)
            history.append(batch_res)
            # update the progress_bar
            progress_bar.update((iter + 1) * batch_size)
        end_epoch = time.time()
        print(' epoch_loss: {}   epoch_acc: {}   epoch_time:{}'.format(
            str(batch_res[0]), str(batch_res[1]), end_epoch - start_epoch))

    return model, history
Exemple #22
0
class TrainIntervalLogger(Callback):
    def __init__(self, interval=10000):
        self.interval = interval
        self.step = 0
        self.reset()

    def reset(self):
        self.interval_start = timeit.default_timer()
        self.progbar = Progbar(target=self.interval)
        self.metrics = []
        self.infos = []
        self.info_names = None
        self.episode_rewards = []

    def on_train_begin(self, logs):
        self.train_start = timeit.default_timer()
        self.metrics_names = self.model.metrics_names
        print('Training for {} steps ...'.format(self.params['nb_steps']))

    def on_train_end(self, logs):
        duration = timeit.default_timer() - self.train_start
        print('done, took {:.3f} seconds'.format(duration))

    def on_step_begin(self, step, logs):
        if self.step % self.interval == 0:
            if len(self.episode_rewards) > 0:
                metrics = np.array(self.metrics)
                assert metrics.shape == (self.interval, len(self.metrics_names))
                formatted_metrics = ''
                if not np.isnan(metrics).all():  # not all values are means
                    means = np.nanmean(self.metrics, axis=0)
                    assert means.shape == (len(self.metrics_names),)
                    for name, mean in zip(self.metrics_names, means):
                        formatted_metrics += ' - {}: {:.3f}'.format(name, mean)
                
                formatted_infos = ''
                if len(self.infos) > 0:
                    infos = np.array(self.infos)
                    if not np.isnan(infos).all():  # not all values are means
                        means = np.nanmean(self.infos, axis=0)
                        assert means.shape == (len(self.info_names),)
                        for name, mean in zip(self.info_names, means):
                            formatted_infos += ' - {}: {:.3f}'.format(name, mean)
                print('{} episodes - episode_reward: {:.3f} [{:.3f}, {:.3f}]{}{}'.format(len(self.episode_rewards), np.mean(self.episode_rewards), np.min(self.episode_rewards), np.max(self.episode_rewards), formatted_metrics, formatted_infos))
                print('')
            self.reset()
            print('Interval {} ({} steps performed)'.format(self.step // self.interval + 1, self.step))

    def on_step_end(self, step, logs):
        if self.info_names is None:
            self.info_names = logs['info'].keys()
        values = [('reward', logs['reward'])]
        self.progbar.update((self.step % self.interval) + 1, values=values, force=True)
        self.step += 1
        self.metrics.append(logs['metrics'])
        if len(self.info_names) > 0:
            self.infos.append([logs['info'][k] for k in self.info_names])

    def on_episode_end(self, episode, logs):
        self.episode_rewards.append(logs['episode_reward'])
Exemple #23
0
    def fit_model(self, X, y):
        """
        fits a model to some data
        """

        for e in range(self.nb_epoch):
            print('Epoch: ', e, ' of ', self.nb_epoch)
            progbar = Progbar(target=X.shape[0], verbose=True)

            # batch train with realtime data augmentation
            total_accuracy = 0
            total_loss = 0
            current = 0
            for X_batch, y_batch in self.datagen.flow(X, y, self.batch_size):

                # prepare the batch with random augmentations
                X_batch, y_batch = self.batch_warp(X_batch, y_batch)

                # train on the batch
                loss, accuracy = self.model.train(X_batch, y_batch, accuracy = True)
                
                # update the progress bar
                total_loss += loss * self.batch_size
                total_accuracy += accuracy * self.batch_size
                current += self.batch_size
                if current > self.X.shape[0]:
                    current = self.X.shape[0]
                else:
                    progbar.update(current, [('loss', loss), ('acc.', accuracy)])
            progbar.update(current, [('loss', total_loss/current), ('acc.', total_accuracy/current)])
            
            # checkpoints between epochs
            self.model.save_weights(self.save_weights_file, overwrite = True)
Exemple #24
0
    def _test_loop(self, f, ins, batch_size=128, verbose=0):
        '''
            Abstract method to loop over some data in batches.
        '''
        nb_sample = len(ins[0])
        outs = []
        if verbose == 1:
            progbar = Progbar(target=nb_sample)
        batches = make_batches(nb_sample, batch_size)
        index_array = np.arange(nb_sample)
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            ins_batch = slice_X(ins, batch_ids)

            batch_outs = f(*ins_batch)
            if type(batch_outs) == list:
                if batch_index == 0:
                    for batch_out in enumerate(batch_outs):
                        outs.append(0.)
                for i, batch_out in enumerate(batch_outs):
                    outs[i] += batch_out * len(batch_ids)
            else:
                if batch_index == 0:
                    outs.append(0.)
                outs[0] += batch_outs * len(batch_ids)

            if verbose == 1:
                progbar.update(batch_end)
        for i, out in enumerate(outs):
            outs[i] /= nb_sample
        return outs
Exemple #25
0
    def test(self,
             model,
             queue,
             batch_size=1000,
             verbose=True,
             test_size=None):
        iterator = None
        process = psutil.Process(os.getpid())

        def get_rss_prop():  # this is quite expensive
            return (process.memory_info().rss -
                    process.memory_info().shared) / 10**6

        rss_minus_shr_memory = get_rss_prop()

        try:
            iterator = gf_io_utils.ExampleQueueIterator(
                queue,
                num_exs_batch=batch_size,
                num_epochs=1,
                allow_smaller_final_batch=True)
            if test_size is not None:
                num_examples = min(test_size, iterator.num_examples)
            else:
                num_examples = iterator.num_examples
            num_batches = int(np.ceil(num_examples / batch_size))

            if verbose:
                progbar = Progbar(target=num_examples)

            predictions = []
            labels = []

            for batch_indx, batch in enumerate(iterator):
                if batch_indx == num_batches:
                    break
                predictions.append(
                    np.vstack(model.model.predict_on_batch(batch)))
                labels.append(batch['labels'])
                if verbose:
                    if batch_indx % BATCH_FREQ_UPDATE_MEM_USAGE == 0:
                        rss_minus_shr_memory = get_rss_prop()
                    if batch_indx % BATCH_FREQ_UPDATE_PROGBAR == 0:
                        progbar.update(batch_indx * batch_size,
                                       values=[("Non-shared RSS (Mb)",
                                                rss_minus_shr_memory)])
            iterator.close()
            del iterator

        except Exception as e:
            if iterator is not None:  # NOQA
                iterator.close()  # NOQA
            raise e

        predictions = np.vstack(predictions)
        labels = np.vstack(labels)
        return ClassificationResult(labels,
                                    predictions,
                                    task_names=self.task_names)
def train_model(train_X, val_X, mu, sigma):
    assert train_X.ndim == 3, train_X.shape
    total_X, time_steps, out_shape = train_X.shape
    trainer = GANTrainer(out_shape)
    epochs = 0

    # GAN predictions will be put in here
    try:
        mkdir('gan-conv-out')
    except FileExistsError:
        pass

    print('Training generator')

    while True:
        copy_X = train_X.copy()
        np.random.shuffle(copy_X)
        total_X, _, _ = copy_X.shape
        to_fetch = BATCH_SIZE // 2
        epochs += 1
        print('Epoch %d' % epochs)
        bar = Progbar(total_X)
        bar.update(0)
        epoch_fetched = 0

        while epoch_fetched < total_X:
            # Fetch some ground truth to train the discriminator
            for i in range(K):
                if epoch_fetched >= total_X:
                    break
                fetched = copy_X[epoch_fetched:epoch_fetched + to_fetch]
                dloss, dacc = trainer.disc_train_step(fetched)
                epoch_fetched += len(fetched)
                bar.update(epoch_fetched,
                           values=[('d_loss', dloss), ('d_acc', dacc)])

            # Train the generator (don't worry about loss)
            trainer.gen_train_step(BATCH_SIZE)

        # End of an epoch, so let's validate models (doesn't work so great,
        # TBH)
        print('\nValidating')
        disc_loss, disc_acc = trainer.disc_val(val_X, BATCH_SIZE)
        gen_loss, gen_acc = trainer.gen_val(100, BATCH_SIZE)
        print('\nDisc loss/acc:   %g/%g' % (disc_loss, disc_acc))
        print('Gen loss/acc:    %g/%g' % (gen_loss, gen_acc))

        # Also save some predictions so that we can monitor training
        print('Saving predictions')
        poses = trainer.generate_poses(16) * sigma + mean
        poses = insert_junk_entries(poses)
        savemat('gan-conv-out/gan-conv-preds-epoch-%d.mat' % epochs,
                {'poses': poses})

        # Sometimes we save a model
        if not (epochs - 1) % 5:
            dest_dir = 'saved-conv-gans/'
            print('Saving model to %s' % dest_dir)
            trainer.save(dest_dir)
Exemple #27
0
    def train_model(self):
        cbs = []
        cbs.append(EarlyStopping(patience=2))
        cbs.append(LearningRateScheduler(lambda e: self.lr * 0.999**(e / 10)))
        cb = CallBacks(cbs)
        cb.set_model(self.model)

        print('Start training chatbot...')
        train_num = len(self.en_ipt)
        cb.on_train_begin()
        for itr in range(self.epoch):
            print('Epoch %s/%s' % (itr + 1, self.epoch))
            cb.on_epoch_begin(itr)
            indexes = np.random.permutation(train_num)
            progbar = Progbar(train_num)
            losses = []
            for idx in range(int(0.8 * train_num / self.bs)):
                batch_idx = indexes[idx * self.bs:(idx + 1) * self.bs]
                en_ipt_bc = self.en_ipt[batch_idx]
                de_ipt_bc = self.de_ipt[batch_idx]
                de_opt_bc = self.de_opt[batch_idx]
                if np.random.rand() < self.tfr:  # apply teacher forcing
                    bc_loss = self.model.train_on_batch([en_ipt_bc, de_ipt_bc],
                                                        de_opt_bc)
                else:  # do not apply teacher forcing
                    ipt_len = [sum(i) for i in np.any(de_opt_bc, axis=-1)]
                    de_ipt_nt = np.zeros((self.max_de_seq, self.bs),
                                         dtype='int64')
                    en_out, h, c = self.encoder_model.predict(
                        en_ipt_bc, batch_size=self.bs)
                    de_in = np.asarray([[self.word2idx['bos']]] * self.bs)
                    for i in range(self.max_de_seq):
                        de_out, h, c = self.decoder_model.predict(
                            [en_out, de_in, h, c], batch_size=self.bs)
                        sampled_idxs = np.argmax(de_out[:, -1, :], axis=-1)
                        de_ipt_nt[i] = sampled_idxs
                        de_in = sampled_idxs.reshape((-1, 1))
                    de_ipt_nt = de_ipt_nt.T
                    for i in range(self.bs):
                        de_ipt_nt[i, ipt_len[i]:] = 0
                    bc_loss = self.model.train_on_batch([en_ipt_bc, de_ipt_nt],
                                                        de_opt_bc)
                losses.append(bc_loss)
                progbar.add(self.bs, [('loss', np.mean(losses))])
            val_idx = indexes[-int(0.2 * train_num):]
            val_loss = self.model.evaluate(
                [self.en_ipt[val_idx], self.de_ipt[val_idx]],
                self.de_opt[val_idx],
                batch_size=self.bs,
                verbose=0)
            progbar.update(train_num, [('val_loss', np.mean(val_loss))])
            cb.on_epoch_end(itr,
                            logs={
                                'loss': np.mean(losses),
                                'val_loss': np.mean(val_loss)
                            })
            self.model.save_weights(self.ckpt_dir + 'weights.hdf5')
        cb.on_train_end()
        print('Chatbot training complete.')
Exemple #28
0
def predict(model, generator, steps):
    prog = Progbar(steps)
    preds = []
    for i, batch in enumerate(generator):
        preds.append(model.predict_on_batch(batch))
        prog.update(i + 1)
    print("")
    return preds
Exemple #29
0
    def train(self, epochs, batch_size, X_train, save_result_dir,
              save_weight_dir):
        g_losses = []
        d_losses = []
        # noiseの発生を一定にする。
        # create random noise -> N latent vectors
        np.random.seed(0)
        noise = np.random.uniform(-1, 1, size=(batch_size, self.input_dim))
        # np.save('seed50.npy', noise)
        # epoch開始
        for epoch in range(epochs):
            np.random.shuffle(X_train)
            # traindataをbatchsize分に分割するので、iter = train//batch
            n_iter = X_train.shape[0] // batch_size
            progress_bar = Progbar(target=n_iter)
            # イテレーション開始
            for index in range(n_iter):
                # load real data & generate fake data
                image_batch = X_train[index * batch_size:(index + 1) *
                                      batch_size]

                for i in range(batch_size):
                    if np.random.random() > 0.5:
                        image_batch[i] = np.fliplr(image_batch[i])
                    if np.random.random() > 0.5:
                        image_batch[i] = np.flipud(image_batch[i])
                generated_images = self.g.predict(noise, verbose=0)
                # print(generated_images.shape)
                # attach label for training discriminator
                X = np.concatenate((image_batch, generated_images))
                y = np.array([1] * batch_size + [0] * batch_size)

                # training discriminator
                d_loss = self.d.train_on_batch(X, y)

                # training generator
                g_loss = self.dcgan.train_on_batch(noise,
                                                   np.array([1] * batch_size))

                progress_bar.update(index,
                                    values=[('g', g_loss), ('d', d_loss)])
            g_losses.append(g_loss)
            d_losses.append(d_loss)
            image = self.combine_images(generated_images)
            image = (image + 1) / 2.0 * 255.0
            cv2.imwrite(os.path.join(save_result_dir,
                                     str(epoch) + ".png"), image)
            print('\nEpoch' + str(epoch) + " end")

            # save weights for each epoch
            if (epoch + 1) % 50 == 0:
                self.g.save_weights(
                    os.path.join(save_weight_dir,
                                 "generator_" + str(epoch) + '.h5'), True)
                self.d.save_weights(
                    os.path.join(save_weight_dir,
                                 "discriminator_" + str(epoch) + '.h5'), True)
        return g_losses, d_losses
Exemple #30
0
def train(BATCH_SIZE, X_train, n_EPOCH):

    ### model define
    d = discriminator_model()
    g = generator_model()
    #d.summary()
    #g.summary()
    d_on_g = generator_containing_discriminator(g, d)
    d_optim = RMSprop(lr=0.0004)
    g_optim = RMSprop(lr=0.0002)
    g.compile(loss='mse', optimizer=g_optim)
    d_on_g.compile(loss='mse', optimizer=g_optim)
    d.trainable = True
    d.compile(loss='mse', optimizer=d_optim)

    for epoch in range(n_EPOCH):
        print("Epoch is", epoch)
        n_iter = int(X_train.shape[0] / BATCH_SIZE)
        progress_bar = Progbar(target=n_iter)

        for index in range(n_iter):
            # create random noise -> U(0,1) 10 latent vectors
            noise = np.random.uniform(0, 1, size=(BATCH_SIZE, 10))

            # load real data & generate fake data
            image_batch = X_train[index * BATCH_SIZE:(index + 1) * BATCH_SIZE]
            generated_images = g.predict(noise, verbose=0)

            # visualize training results
            if index % 20 == 0:
                image = combine_images(generated_images)
                image = image * 127.5 + 127.5
                cv2.imwrite(
                    './result/' + str(epoch) + "_" + str(index) + ".png",
                    image)

            # attach label for training discriminator
            #print("image_batch shape", image_batch.shape)
            #print("generated_images shape", generated_images.shape)
            X = np.concatenate((image_batch, generated_images))
            y = np.array([1] * BATCH_SIZE + [0] * BATCH_SIZE)

            # training discriminator
            d_loss = d.train_on_batch(X, y)

            # training generator
            d.trainable = False
            g_loss = d_on_g.train_on_batch(noise, np.array([1] * BATCH_SIZE))
            d.trainable = True

            progress_bar.update(index, values=[('g', g_loss), ('d', d_loss)])
        print('')

        # save weights for each epoch
        g.save_weights('weights/generator.h5', True)
        d.save_weights('weights/discriminator.h5', True)
    return d, g
Exemple #31
0
def input_fn(tf_glob,
             one_hot=True,
             classes=None,
             is_training=None,
             batch_shape=[32, 224, 224, 3],
             parallelism=1):
    """ Return tensor to read from TFRecord """
    print('Creating graph for loading %s TFRecords...' % tf_glob)
    with tf.variable_scope("TFRecords"):
        record_input = data_flow_ops.RecordInput(tf_glob,
                                                 batch_size=batch_shape[0],
                                                 parallelism=parallelism)
        records_op = record_input.get_yield_op()
        records_op = tf.split(records_op, batch_shape[0], 0)
        records_op = [tf.reshape(record, []) for record in records_op]
        progbar = Progbar(len(records_op))

        images = []
        labels = []
        for i, serialized_example in enumerate(records_op):
            progbar.update(i)
            with tf.variable_scope("parse_images", reuse=True):
                features = tf.parse_single_example(
                    serialized_example,
                    features={
                        'image': tf.FixedLenFeature([], tf.string),
                        'label': tf.FixedLenFeature([], tf.int64),
                    })
                image_decoded = tf.image.decode_jpeg(features['image'],
                                                     channels=3)
                image = tf.image.convert_image_dtype(image_decoded, tf.float32)
                resized_image = tf.image.resize_images(
                    image, [batch_shape[1], batch_shape[2]])
                label = tf.cast(features['label'], tf.int32)
                if one_hot and classes:
                    label = tf.one_hot(label, classes)

                images.append(resized_image)
                labels.append(label)

        images = tf.parallel_stack(images, 0)
        labels = tf.parallel_stack(labels, 0)
        #         images = tf.cast(images, tf.float32)

        #         images = tf.reshape(images, shape=batch_shape)

        # StagingArea will store tensors
        # across multiple steps to
        # speed up execution
        images_shape = images.get_shape()
        labels_shape = labels.get_shape()
        copy_stage = data_flow_ops.StagingArea(
            [tf.float32, tf.float32], shapes=[images_shape, labels_shape])
        copy_stage_op = copy_stage.put([images, labels])
        staged_images, staged_labels = copy_stage.get()

        return images, labels
Exemple #32
0
def train(run_name,
          digit,
          nepochs,
          batch_size,
          latent_dim,
          noise_type,
          k,
          optimizer,
          model_dir,
          gen_intermediate_dims=None,
          disc_intermediate_dims=None):

    # load data and params
    Xtr, Xte, _, _ = load_data(digit)
    Xte = Xte[:(Xte.shape[0] / batch_size) *
              batch_size]  # correct for batch_size
    original_dim = Xtr.shape[-1]
    nbatches = int(Xtr.shape[0] / batch_size)
    fnm_gen, fnm_disc, fnm_hist, fnm_samp = get_filenames(model_dir, run_name)

    # load models
    generator = build_generator(batch_size, latent_dim, gen_intermediate_dims,
                                original_dim, optimizer)
    discriminator = build_discriminator(2 * batch_size, original_dim,
                                        disc_intermediate_dims, optimizer)
    combined = build_combined(2 * batch_size, latent_dim, generator,
                              discriminator, optimizer)

    # train and test
    history = init_history()
    for i in xrange(nepochs):
        print('Epoch {} of {}'.format(i + 1, nepochs))
        progress_bar = Progbar(target=nbatches)

        # train on mini-batches of train data
        epoch_losses = []
        for j in xrange(nbatches):
            progress_bar.update(j)
            batch_loss = train_on_batch(Xtr, j, batch_size, latent_dim,
                                        noise_type, k, generator,
                                        discriminator, combined)
            epoch_losses.append(batch_loss)

        # evaluate on test data
        print('\nTesting epoch {}:'.format(i + 1))
        test_loss = evaluate(Xte, batch_size, latent_dim, noise_type,
                             generator, discriminator, combined,
                             fnm_samp.format(i + 1))
        train_loss = np.mean(np.array(epoch_losses), axis=0)
        history = update_history(history, train_loss, test_loss, do_print=True)

        # save weights
        generator.save_weights(fnm_gen.format(i), True)
        discriminator.save_weights(fnm_disc.format(i), True)

    # save history
    save_history(fnm_hist, history)
Exemple #33
0
def _test_masks():
    assert os.path.exists(FLAGS.filename)
    assert os.path.exists(FLAGS.flow_dir)
    assert os.path.exists(FLAGS.root_dir)

    learner = AdversarialLearner()
    learner.setup_inference(FLAGS, aug_test=False)
    saver = tf.train.Saver([var for var in tf.trainable_variables()])

    # manages multi-threading
    sv = tf.train.Supervisor(logdir=FLAGS.test_save_dir,
                             save_summaries_secs=0,
                             saver=None)

    with open(FLAGS.filename, 'r') as f:
        samples = f.readlines()

    with sv.managed_session() as sess:
        checkpoint = FLAGS.ckpt_file
        if checkpoint:
            saver.restore(sess, checkpoint)
            print("Resume model from checkpoint {}".format(checkpoint))
        else:
            raise IOError("Checkpoint file not found")

        sess.run(learner.test_iterator.initializer)

        n_steps = len(samples)
        progbar = Progbar(target=n_steps)

        i = 0

        for step in range(n_steps):
            if sv.should_stop():
                break
            try:
                inference = learner.inference(sess)
            except tf.errors.OutOfRangeError:
                print("End of testing dataset")  # ==> "End of dataset"
                break
            # Now write images in the test folder

            # select mask
            generated_mask = inference['mask']

            # Verbose image generation
            save_dir = os.path.join(
                FLAGS.test_save_dir,
                os.path.dirname(samples[step].split(' ')[0]))
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            filename = os.path.join(
                save_dir, os.path.basename(samples[step].split(' ')[0]))
            cv2.imwrite(filename, generated_mask.squeeze() * 255.)
            i += 1
            progbar.update(step)
Exemple #34
0
def train(generator, discriminator, combined, epochs):

    # Load and normalize data:
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train = np.concatenate((x_train, x_test), axis=0)
    x_train = x_train.reshape((NUM_IMGS, IMG_ROWS, IMG_COLS, CHANNELS))
    y_train = np.concatenate((y_train, y_test), axis=0)
    y_train = y_train.reshape((-1, 1))
    x_train = (x_train.astype(np.float32) - 127.5) / 127.5

    # Label arrays for positive/negative examples
    positive_examples = np.ones((BATCH_SIZE, 1))
    negative_examples = np.zeros((BATCH_SIZE, 1))

    # Number of batch loops:
    batch_loops = int(NUM_IMGS // BATCH_SIZE)

    # Train cGAN:
    for epoch in range(epochs):
        progress_bar = Progbar(target=batch_loops)

        shuffle_idx = np.random.permutation(NUM_IMGS)
        real_imgs = x_train[shuffle_idx]
        labels = y_train[shuffle_idx]

        for batch_i in range(batch_loops):
            progress_bar.update(batch_i)

            # Discriminator:
            img_batch = real_imgs[batch_i * BATCH_SIZE:(batch_i + 1) *
                                  BATCH_SIZE]
            label_batch = labels[batch_i * BATCH_SIZE:(batch_i + 1) *
                                 BATCH_SIZE]
            noise = np.random.normal(0, 1, (BATCH_SIZE, LATENT_SIZE))
            fake_img_batch = generator.predict([noise, label_batch])

            d_loss_real = discriminator.train_on_batch(
                [img_batch, label_batch], positive_examples)
            d_loss_fake = discriminator.train_on_batch(
                [fake_img_batch, label_batch], negative_examples)
            d_loss_total = 0.5 * np.add(d_loss_real, d_loss_fake)

            # Generator:
            noise = np.random.normal(0, 1, (2 * BATCH_SIZE, LATENT_SIZE))
            fake_labels = np.random.randint(0, NUM_CLASSES,
                                            (2 * BATCH_SIZE, 1))
            positive = np.concatenate((positive_examples, positive_examples),
                                      axis=0)

            g_loss = combined.train_on_batch([noise, fake_labels], positive)

        print("Epoch: %d [D loss: %f, acc.: %.2f%%] [G loss: %f]" %
              (epoch, d_loss_total[0], 100 * d_loss_total[1], g_loss))
        save_images(generator, epoch)

    return generator
Exemple #35
0
    def predict(self, model, queue, batch_size=1000, verbose=True):
        iterator = None
        process = psutil.Process(os.getpid())

        def get_rss_prop():  # this is quite expensive
            return (process.memory_info().rss -
                    process.memory_info().shared) / 10**6

        rss_minus_shr_memory = get_rss_prop()

        try:
            iterator = gf_io_utils.ExampleQueueIterator(
                queue,
                num_exs_batch=batch_size,
                num_epochs=1,
                allow_smaller_final_batch=True)

            if verbose:
                progbar = Progbar(target=iterator.num_examples)

            chroms = []
            starts = []
            ends = []
            predictions = []

            for batch_indx, batch in enumerate(iterator):
                chroms.append(batch['intervals/chrom'])
                starts.append(batch['intervals/start'])
                ends.append(batch['intervals/end'])
                predictions.append(
                    np.vstack(model.model.predict_on_batch(batch)))

                if verbose:
                    if batch_indx % BATCH_FREQ_UPDATE_MEM_USAGE == 0:
                        rss_minus_shr_memory = get_rss_prop()
                    if batch_indx % BATCH_FREQ_UPDATE_PROGBAR == 0:
                        progbar.update(batch_indx * batch_size,
                                       values=[("Non-shared RSS (Mb)",
                                                rss_minus_shr_memory)])

            iterator.close()
            del iterator

        except Exception as e:
            if iterator is not None:  # NOQA
                iterator.close()  # NOQA
            raise e

        # concatenate intervals and predictions
        intervals = {
            'chrom': np.concatenate(chroms),
            'start': np.concatenate(starts),
            'end': np.concatenate(ends)
        }
        predictions = np.vstack(predictions)
        return intervals, predictions
def test_progbar():
    values_s = [None,
                [['key1', 1], ['key2', 1e-4]],
                [['key3', 1], ['key2', 1e-4]]]

    for target in (len(values_s) - 1, None):
        for verbose in (0, 1, 2):
            bar = Progbar(target, width=30, verbose=verbose, interval=0.05)
            for current, values in enumerate(values_s):
                bar.update(current, values=values)
class TrainIntervalLogger(Callback):
    def __init__(self, interval=10000):
        self.interval = interval
        self.step = 0
        self.reset()

    def reset(self):
        """ Reset statistics """
        self.interval_start = timeit.default_timer()
        self.progbar = Progbar(target=self.interval)
        self.metrics = []
        self.infos = []
        self.info_names = None
        self.episode_rewards = []

    def on_train_begin(self, logs):
        """ Initialize training statistics at beginning of training """
        self.train_start = timeit.default_timer()
        self.metrics_names = metrics_names()
        print('Training for {} steps ...'.format(self.params['nb_steps']))

    def on_train_end(self, logs):
        """ Print training duration at end of training """
        duration = timeit.default_timer() - self.train_start
        print('done, took {:.3f} seconds'.format(duration))

    def on_step_begin(self, step, logs):
        """ Print metrics if interval is over """
        if self.step % self.interval == 0:
            if len(self.episode_rewards) > 0:
                metrics = np.array(self.metrics)
                assert metrics.shape == (self.interval, len(self.metrics_names))
                formatted_metrics = ''
                if not np.isnan(metrics).all():  # not all values are means
                    means = np.nanmean(self.metrics, axis=0)
                    assert means.shape == (len(self.metrics_names),)
                    for name, mean in zip(self.metrics_names, means):
                        formatted_metrics += ' - {}: {:.3f}'.format(name, mean)

                formatted_infos = ''
                print('{} episodes - episode_reward: {:.3f} [{:.3f}, {:.3f}]{}{}'.format(len(self.episode_rewards), np.mean(self.episode_rewards), np.min(self.episode_rewards), np.max(self.episode_rewards), formatted_metrics, formatted_infos))
                print('')
            self.reset()
            print('Interval {} ({} steps performed)'.format(self.step // self.interval + 1, self.step))

    def on_step_end(self, step, logs):
        """ Update progression bar at the end of each step """
        values = [('reward', logs['reward'])]
        self.progbar.update((self.step % self.interval) + 1, values=values)
        self.step += 1
        self.metrics.append(logs['metrics'])

    def on_episode_end(self, episode, logs):
        """ Update reward value at the end of each episode """
        self.episode_rewards.append(logs['episode_reward'])
    def predict_general_(self, model, X, size, load_func):

        queue = Queue.Queue()

        #generate the progress bar
        if self.verbose>0:
            progbar = Progbar(size, width=80, verbose=self.verbose)

        batch_idx = range(min(size, self.memory_batch_size));
        self.matrix_load_into_queue(X, batch_idx, queue, load_func);
        X_batch,_,_ = queue.get()

        p = []
        samples = 0
        last_update = time.time()-1000
        for _, i in enumerate(xrange(0, size, self.memory_batch_size)):
            
            next_start = i+len(batch_idx)
            next_end = min(size, next_start+self.memory_batch_size)
            if next_end>next_start:
                #spin the thread up                
                batch_idx_next = range(next_start,next_end);
                
                thread = threading.Thread(target=self.matrix_load_into_queue, args=(X,batch_idx_next,queue,load_func))
                thread.start()
            else:         
                batch_idx_next = None
                thread = None

            #predict the value
            if X_batch.shape[0]>0:
                p_curr = model.predict(X_batch, batch_size=self.batch_size, verbose=0)
                p.append(p_curr)
            
            #increment the counter
            samples+= len(batch_idx)
            
            curr_update = time.time()
            if  self.verbose>0 and (curr_update-last_update>=0.5 or (samples)>=size):
                progbar.update(samples, [])
                last_update = curr_update
                    
            #wait for the next load to happen                
            if thread is not None:
                thread.join()
                X_batch,_,_ = queue.get()
                
            #now add the next batch
            batch_idx = batch_idx_next
        
        p = np.vstack(p)
        
        return p
Exemple #39
0
    def positive(self, set_positive=1.):
        """
        Create a list of positive data points, expand them to square disk of radius `self.radius` pixels.
        :param set_positive: Set positive to max(1., p) where p is given probability
        :return: Dictionary(key: filename, value: [x, y, (p, 1-p)])
        """
        if self.positives is not None:  # If already calculated then return it.
            return self.positives, self.positives_count

        if self.files is None:  # Curate list of files if not done already.
            self.files = read_all_files(self.path)

        bar = Progbar(len(self.files))  # Create instance of progress bar.
        if self.verbose:  # Verbose output for debugging.
            print TT.info("> Collecting positive samples from dataset...")
            bar.update(0)

        index = 0  # File index - to update state of progress bar.
        count = 0  # Holds total number of positive samples.
        expanded = {}  # Holds list of files and positive pixels in flattened image with mitosis probability.
        normal = {}  # Holds list of files and positive pixel (y, x) along with class probabilities.
        #              (0: Mitotic, 1: Non-Mitotic)
        total = 0
        for data_image, target_csv in self.files:
            labels = csv2np(os.path.join(self.path, target_csv))  # Load CSV annotations into numpy array.
            expanded[data_image] = {}  # Initialize list for file
            normal[data_image] = []
            total += len(labels)
            for (y, x, p) in labels:  # Iterate over annotated pixel values.
                x = int(x)
                y = int(y)
                p = max(set_positive, float(p))
                # Image position, horizontal -> y, vertical -> x
                # Image size, (y, x)
                # @see http://www.scipy-lectures.org/advanced/image_processing/#basic-manipulations
                range_x = xrange(max(0, x - self.radius), min(x + self.radius, self.image_size[1]))
                range_y = xrange(max(0, y - self.radius), min(y + self.radius, self.image_size[0]))
                for i in range_x:
                    for j in range_y:
                        expanded[data_image][i * self.image_size[0] + j] = p  # TODO: Verify this. `x * width + y`
                        normal[data_image].append([i, j, p])  # (x, y) => (row, column)
                        count += 1
            index += 1
            if self.verbose:
                bar.update(index)
        self.positives = normal
        self.positives_sorted = expanded
        self.positives_count = count
        TT.success("> Total", count, "positive pixels from", total, "annotations.")
        return normal, count
Exemple #40
0
class TrainIntervalLogger(Callback):
    def __init__(self, interval=10000):
        self.interval = interval
        self.step = 0
        self.reset()

    def reset(self):
        self.interval_start = timeit.default_timer()
        self.progbar = Progbar(target=self.interval)
        self.metrics = []

    def on_train_begin(self, logs):
        self.train_start = timeit.default_timer()
        self.metrics_names = self.model.metrics_names
        print('Training for {} steps ...'.format(self.params['nb_steps']))

    def on_train_end(self, logs):
        duration = timeit.default_timer() - self.train_start
        print('done, took {:.3f} seconds'.format(duration))

    def on_step_begin(self, step, logs):
        if self.step % self.interval == 0:
            self.reset()
            print('Interval {} ({} steps performed)'.format(self.step / self.interval + 1, self.step))

    def on_step_end(self, step, logs):
        # TODO: work around nan's in metrics. This isn't really great yet and probably not 100% accurate
        filtered_metrics = []
        means = None
        for idx, value in enumerate(logs['metrics']):
            if not np.isnan(value):
                filtered_metrics.append(value)
            else:
                mean = np.nan
                if len(self.metrics) > 0 and not np.isnan(self.metrics).all():
                    if means is None:
                        means = np.nanmean(self.metrics, axis=0)
                        assert means.shape == (len(self.metrics_names),)
                    mean = means[idx]
                filtered_metrics.append(mean)

        values = [('reward', logs['reward'])]
        if not np.isnan(filtered_metrics).any():
            values += list(zip(self.metrics_names, filtered_metrics))
        self.progbar.update((self.step % self.interval) + 1, values=values, force=True)
        self.step += 1
        self.metrics.append(logs['metrics'])
def run(tag_dist, output_fname, force, nb_samples):
    os.makedirs(os.path.dirname(output_fname), exist_ok=True)
    if os.path.exists(output_fname) and force:
        print("Deleted {}".format(output_fname))
        os.remove(output_fname)
    else:
        assert not os.path.exists(output_fname), \
            "File {} already exists. Use --force to override it"
    basename, _ = os.path.splitext(output_fname)
    anit_name = basename + "_anti_{}.png"
    hist_name = basename + "_hist_{}.png"
    plot_anitaliasing(tag_dist, anit_name, 1)
    plot_anitaliasing(tag_dist, anit_name, 2)
    plot_anitaliasing(tag_dist, anit_name, 4)
    plot_anitaliasing(tag_dist, anit_name, 8)

    labels, masks, _ = next(generator(tag_dist, 10000, antialiasing=2))
    for key in labels.dtype.names:
        m = labels[key].mean()
        s = labels[key].std()
        print("{}: {:.3f}, {:.3f}".format(key, m, s))
        assert abs(m) <= 0.03

    for label_name in sorted(set(labels.dtype.names) - set(['bits'])):
        x = labels[label_name]
        plt.hist(x.flatten(), bins=40, normed=True)
        plt.savefig(hist_name.format(label_name))
        plt.clf()

    dset = DistributionHDF5Dataset(output_fname, distribution=tag_dist,
                                   nb_samples=nb_samples, mode='w')
    progbar = Progbar(nb_samples)
    batch_size = min(25000, nb_samples)

    for labels, tags, depth_map in generator(tag_dist, batch_size, antialiasing=4):
        pos = dset.append(labels=labels, tag3d=tags, depth_map=depth_map)
        progbar.update(pos)
        if pos == nb_samples:
            break

    print("Saved tag 3d dataset to: {}".format(output_fname))
    dist_fname = basename + "_distribution.json"
    with open(dist_fname, "w+") as dist_f:
        dist_f.write(tag_dist.to_json())
        print("Saved distribution to: {}".format(dist_fname))
Exemple #42
0
    def sample(self, batch_size=100):
        if self.sampled is not None and self.batch == batch_size:
            return self.sampled, batch_size

        self.batch = batch_size

        self.positive()

        if self.verbose:
            TT.info("> Creating a random dataset...")

        if self.files is None:
            self.files = read_all_files(self.path)

        TT.info("> Sampling from", len(self), "pixels.")
        indices = xrange(len(self))
        sampled = {}
        bar = Progbar(self.batch)
        count = 0
        positives = 0
        if self.verbose:
            bar.update(count)
        for index in random.sample(indices, self.batch):
            file_id = index / self.pixels_per_image
            image, csv = self.files[file_id]
            if image not in sampled:
                sampled[image] = []
            pixel = index % self.pixels_per_image
            if image in self.positives_sorted and pixel in self.positives_sorted[image]:
                p = 1.
                positives += 1
            else:
                p = 0.
            (x, y) = self.pixel_to_xy(pixel)
            sampled[image].append([x, y, p])
            count += 1
            if self.verbose:
                bar.update(count)
        self.sampled = sampled
        if positives > 0:
            TT.warn("> Out of", batch_size, "sampled pixels,", positives, "pixels are positive.")

        return sampled, count
Exemple #43
0
    def fit(self, X, y, M, batch_size=128, nb_epoch=100, verbose=1,
            validation_split=0., lr=None, shuffle=True):
        y = standardize_y(y)

        # If a validation split size is given (e.g. validation_split=0.2)
        # then split X into smaller X and X_val,
        # and split y into smaller y and y_val.
        do_validation = False
        if validation_split > 0 and validation_split < 1:
            do_validation = True
            split_at = int(len(X) * (1 - validation_split))
            (X, X_val) = (X[0:split_at], X[split_at:])
            (y, y_val) = (y[0:split_at], y[split_at:])
            (M, M_val) = (M[0:split_at], M[split_at:])
            if verbose:
                print "Train on %d samples, validate on %d samples" % (len(y), len(y_val))
        
        index_array = numpy.arange(len(X))
        for epoch in range(nb_epoch):
            if verbose:
                print 'Epoch', epoch
            if shuffle:
                numpy.random.shuffle(index_array)

            nb_batch = int(numpy.ceil(len(X)/float(batch_size)))
            progbar = Progbar(target=len(X))
            for batch_index in range(0, nb_batch):
                batch_start = batch_index*batch_size
                batch_end = min(len(X), (batch_index+1)*batch_size)
                batch_ids = index_array[batch_start:batch_end]

                X_batch = X[batch_ids]
                y_batch = y[batch_ids]
                M_batch = M[batch_ids]
                loss = self._train(X_batch, y_batch, M_batch, lr)
                
                if verbose:
                    is_last_batch = (batch_index == nb_batch - 1)
                    if not is_last_batch or not do_validation:
                        progbar.update(batch_end, [('loss', loss)])
                    else:
                        progbar.update(batch_end, [('loss', loss), ('val. loss', self.test(X_val, y_val, M_val))])
Exemple #44
0
        # pdb.set_trace()
        # if not os.path.isfile(os.path.join(jsonpath,str(indtrain[i])+'.json')):
            # print('no file exist')
            # continue
            start = time.time()

            objvec = fget_activs(RGBX_train,OFX_train)
            
            model.fit({'objvec':objvec,'out':cur_label},batch_size=128,\
                      nb_epoch=1,shuffle=False,verbose=0,callbacks=[MPLC])
  
            endfit = time.time()
    
        # pdb.set_trace()
        
            progbar.update(tidx+passed+1)
    
            reterr = model.predict({'objvec':objvec},verbose=0)
            batchtotAP = 0
            reterr = reterr['out']
            
            curAP, acc = getAP(reterr,cur_label)
            
            batchtotAP = batchtotAP + curAP
                   
            totactAP = totactAP + batchtotAP

            lastacc = lastacc + acc
            
            curtotAP = float(totactAP) / (tidx+passed+1)
            endap = time.time()
Exemple #45
0
    def fit(self,
            X, Y, M,
            X_val, Y_val, M_val,
            lr_A=None, lr_B=None,
            batch_size=128,
            verbose=1, shuffle=True,
            epoch_start=0,
            continue_training=lambda *_: True,
            epoch_callback=lambda *_: None):

        # Require validation set to be explicitly passed in (to prevent risk
        # of dataset contamination with the embedding step)
        m = 10000
        do_validation = False
        if X_val is not None and Y_val is not None and M_val is not None:
            do_validation = True
            if verbose:
                print "Train on {0} samples, validate on {1} samples".format(len(Y), len(Y_val))
                print "epoch callback: {0}".format("yes" if epoch_callback else "no")
        
        index_array = numpy.arange(len(X))
        epoch = epoch_start
        callback_result = None
        while continue_training(epoch, callback_result):
            if verbose:
                print 'Epoch', epoch
            if shuffle:
                numpy.random.shuffle(index_array)

            nb_batch = int(numpy.ceil(len(X)/float(batch_size)))
            progbar = Progbar(target=len(X))
            for batch_index in range(0, nb_batch):
                batch_start = batch_index*batch_size
                batch_end = min(len(X), (batch_index+1)*batch_size)
                batch_ids = index_array[batch_start:batch_end]

                X_batch = X[batch_ids]
                Y_batch = Y[batch_ids]
                M_batch = M[batch_ids]
                loss = self._train(X_batch, Y_batch, M_batch, lr_A, lr_B)
                
                if verbose:
                    is_last_batch = (batch_index == nb_batch - 1)
                    if not is_last_batch or not do_validation:
                        progbar.update(batch_end, [('loss', loss*m)])
                    else:
                        #val_loss = self.test(X_val, Y_val, M_val)
                        i_vt = random.sample(xrange(len(X_val)), batch_size)
                        X_vt = numpy.array([X_val[i] for i in i_vt])
                        Y_vt = numpy.array([Y_val[i] for i in i_vt])
                        M_vt = numpy.array([M_val[i] for i in i_vt])
                        val_loss = self.test(X_vt, Y_vt, M_vt)
                        progbar.update(batch_end, [('loss', loss*m), ('val. loss', val_loss*m)])

            loss = progbar.sum_values['loss'][0] / (m * progbar.sum_values['loss'][1])

            # call epoch callback after each round
            round_stats = {}
            round_stats['train'] = {'loss': float(loss)}
            if do_validation:
                round_stats['validate'] = {'loss': float(val_loss)}

            callback_result = epoch_callback(round_stats, epoch)

            epoch += 1
Exemple #46
0
        # y_train = readjson.to_categorical_dual(y_train,10)
        endread = time.time()
        
        if isGraph:
            model.fit({'input1':X_train,'actout':act_label,'objout':obj_label},batch_size=128,\
                        nb_epoch=1,shuffle=False,verbose=0,callbacks=[MPLC])
        else:    
        # print('model fit')
            model.fit(X_train,act_label,batch_size=128,nb_epoch=1,show_accuracy=True,shuffle=False,\
                      verbose=0,callbacks=[MPLC])

        endfit = time.time()

        # pdb.set_trace()
        
        progbar.update(i)

        if showAP:
            x_each = X_train.transpose(1,0,2)
            if isGraph:
                reterr = model.predict({'input1':x_each},verbose=0)
                predobj = reterr['objout']
                predact = reterr['actout']
                batchobjAP = 0
                batchactAP = 0
                batchacc = 0
                for batidx in range(len(predobj)):    
                    sortedobjerr = [si[0] for si in sorted(enumerate(predobj[batidx]),reverse=True,key=lambda xy:xy[1])]
                    itemobjidx = np.where(obj_label[batidx]==1)
                    # sortederr = [61,32,51,...] ( 0 ~ 154 )
                    # itemidx[0] = array([34,51,61]) ( 0 ~ 154 )
Exemple #47
0
    nb_train, nb_test = X_train.shape[0], X_test.shape[0]

    train_history = defaultdict(list)
    test_history = defaultdict(list)

    for epoch in range(nb_epochs):
        print('Epoch {} of {}'.format(epoch + 1, nb_epochs))

        nb_batches = int(X_train.shape[0] / batch_size)
        progress_bar = Progbar(target=nb_batches)

        epoch_gen_loss = []
        epoch_disc_loss = []

        for index in range(nb_batches):
            progress_bar.update(index)
            # generate a new batch of noise
            noise = np.random.uniform(-1, 1, (batch_size, latent_size))

            # get a batch of real images
            image_batch = X_train[index * batch_size:(index + 1) * batch_size]
            label_batch = y_train[index * batch_size:(index + 1) * batch_size]

            # sample some labels from p_c
            sampled_labels = np.random.randint(0, 10, batch_size)

            # generate a batch of fake images, using the generated labels as a
            # conditioner. We reshape the sampled labels to be
            # (batch_size, 1) so that we can feed them into the embedding
            # layer as a length one sequence
            generated_images = generator.predict(
Exemple #48
0
def run_training(trainfile, testfile, embeddings_file, epochs,
                 static=False,
                 maxlen=100,
                 batch_size=32):
    print('Loading data...')
    sents_train, truths_train, unique_words_train, unique_tags_train = \
        P.retrieve_sentences_tags(trainfile, maxlen=maxlen)
    sents_test, truths_test, unique_word_test, unique_tags_test = \
        P.retrieve_sentences_tags(testfile, maxlen=maxlen, allowedtags=unique_tags_train)

    alltags = unique_tags_train.union(unique_tags_test)
    uniqueWords = unique_words_train.union(unique_word_test)

    gsm_mod = gensim.models.Word2Vec.load_word2vec_format(embeddings_file)
    vocab_dim = len(gsm_mod['word'])

    tagDict = {}
    for n, t in enumerate(alltags):
        tagDict[t] = n + 1

    index_dict = {}
    for n, word in enumerate(uniqueWords):
        index_dict[word] = n + 1

    nb_classes = len(tagDict)

    X_train, Y_train = P.create_input_data(sents_train, truths_train, index_dict,
                                           tagDict, maxlen=maxlen)
    X_test, Y_test = P.create_input_data(sents_test, truths_test, index_dict,
                                         tagDict, maxlen=maxlen)

    # makes output classes binary vectors instead of class numbers
    Y_train_cat = np.array([to_categorical(y, nb_classes=nb_classes + 1) for y in Y_train])
    Y_test_cat = np.array([to_categorical(y, nb_classes=nb_classes + 1) for y in Y_test])

    print(Y_train_cat.shape)
    print(X_train.shape)

    n_symbols = len(uniqueWords) + 1  # adding 1 to account for 0th index (for masking)
    embedding_weights = np.zeros((n_symbols, vocab_dim))
    for word, index in index_dict.items():
        embedding_weights[index, :] = gsm_mod[word]

    # assemble the model
    model = Sequential()
    if not static:
        model.add(Embedding(output_dim=vocab_dim, input_dim=n_symbols, mask_zero=False,
                            weights=[embedding_weights]))
    model.add(LSTM(128, return_sequences=True, input_shape=(maxlen, vocab_dim)))
    model.add(Dropout(0.5))
    model.add(TimeDistributed(Dense(nb_classes + 1)))
    model.add(Activation('softmax'))

    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    keep_iterating = True
    count = 0
    cwd = os.getcwd()
    while keep_iterating:
        # making sure to not save the weights as the same name as
        # another is using
        count += 1
        tmpweights = "{}/tmp/weights{}.hdf5".format(cwd, count)
        if not os.path.isfile(tmpweights):
            keep_iterating = False

    print('============Training Params============\n'
          'Training file: {}\nTesting file: {}\nEmbeddings file: {}\n'
          'Epochs: {}\nStatic: {}\nWord embedding dimensions: {}\n'
          'Batch size: {}\nMax length of sentence: {}\n'
          '======================================='
          .format(trainfile, testfile, embeddings_file, epochs,
                  static, vocab_dim, batch_size, maxlen))

    print('Train...')
    best_yet = 0
    accs = []
    for e in range(epochs):
        print("Training epoch {}".format(e + 1))
        pbar = Progbar(1 + len(X_train) / batch_size)
        batch_count = 0
        for xt, yt in batch(X_train, Y_train_cat, vocab_dim, embedding_weights,
                            static, n=batch_size, shuffle=True):
            batch_count += 1
            model.fit(xt, yt, batch_size=batch_size, nb_epoch=1, verbose=False)
            pbar.update(batch_count)

        # free up some space? maybe python automatically garbage collects already
        xt = None
        yt = None

        validation_size = 1024
        print("Training finished, evaluating on {} validation samples".format(validation_size))
        # take a random subset of validation data
        for X_test_subset, Y_test_subset in batch(X_test, Y_test, vocab_dim, embedding_weights,
                                                  static, n=validation_size, shuffle=True):
            hypo = model.predict_classes(X_test_subset, batch_size=1)
            break

        correct, incorrect = custom_accuracy(y_true=Y_test_subset, y_pred=hypo)
        acc = correct / float(correct + incorrect)
        accs.append(acc)
        print("Correct: {}\nIncorrect: {}\n Accuracy: {}"
              .format(correct, incorrect, acc))
        if acc > best_yet:
            print('Improved from {} to {}, saving weights to {}\nEpoch {} finished.'
                  .format(best_yet, acc, tmpweights, e + 1))
            best_yet = acc
            model.save_weights(tmpweights, overwrite=True)

    model.load_weights(tmpweights)
    # evaluate on model's best weights

    first = True
    for xt, yt in batch(X_test, Y_test_cat, vocab_dim, embedding_weights, static, n=validation_size):
        hypo = model.predict_classes(xt, batch_size=1)
        if first:
            Y_hypo = hypo
            first = False
        else:
            Y_hypo = np.concatenate((Y_hypo, hypo))

    correct, incorrect = custom_accuracy(y_true=Y_test, y_pred=Y_hypo)
    print("Finished! Final Score\nCorrect: {}\nIncorrect: {}\n Accuracy: {}"
          .format(correct, incorrect, float(correct) / (correct + incorrect)))

    log = '{}/tmp/log_{}.txt'.format(cwd, count)
    f = open(log, 'w')
    f.write('Embeddings file: {}\n'.format(embeddings_file))
    f.write('Accuracy for each epoch: {}\n'.format(str(accs)))
    f.close()
    print('Log saved as {}'.format(log))
        losses = []
        batch_loss = []
        for i, seq in enumerate(tokenizer.texts_to_sequences_generator(text_generator())):
            # get skipgram couples for one text in the dataset
            couples, labels = skipgrams_l2c_fast(seq, vocab_size, num_senses =num_senses, window_size=4, negative_samples=1., sampling_table=sampling_table)
            if couples:
                # one gradient update per sentence (one sentence = a few 1000s of word couples)
                # print couples
                X = np.array(couples, dtype="int32")
                labels= np.array(labels, dtype="int32")
                loss = model.train_on_batch(X, labels)
                losses.append(loss)
                batch_loss.append(loss)
                if len(losses) % 10 == 0:
                    print ('\nBatch Loss: '+str(np.mean(batch_loss)))
                    progbar.update(i, values=[("loss", np.mean(losses))])
                    batch_loss = []
                samples_seen += len(labels)

                if (i and i % 10000 == 0):
                    global_weights, sense_weights = model.layers[0].get_weights()[0]
                    avgSim, avgSimC = evaluator.get_scores(global_weights, sense_weights)
                    print("scores after %d epochs:")
                    print("\t avg-sim: %5.3f", avgSim)
                    print("\t global-sim: %5.3f", avgSimC)

        print('Samples seen:', samples_seen)



Exemple #50
0
        # Train ConvNet
        from keras.utils.generic_utils import Progbar
        batch_losses = np.zeros(epoch_size / batch_size)
        loss_history = []
        mean_loss = float("inf")
        for epoch_id in xrange(n_epochs):
            dataflow = datagen.flow(batch_size=batch_size,
                                    epoch_size=epoch_size)
            print "\nEpoch ", 1 + epoch_id
            progbar = Progbar(epoch_size)
            batch_id = 0
            for (X_batch, Y_batch) in dataflow:
                loss = di.learning.train_on_batch(graph, X_batch,
                                                  Y_batch, Q, js, offsets)
                batch_losses[batch_id] = loss[0]
                progbar.update(batch_id * batch_size)
                batch_id += 1
            if np.mean(batch_losses) < mean_loss:
                mean_loss = np.mean(batch_losses)
                std_loss = np.std(batch_losses)
            else:
                break
            print "\nTraining loss = ", mean_loss, " +/- ", std_loss

        # Measure test accuracies
        class_probs = di.learning.predict(graph, X_test, Q, js, offsets)
        y_predicted = np.argmax(class_probs, axis=1)
        chunk_accuracies = di.singlelabel.chunk_accuracies(y_predicted, y_test)
        file_accuracies = di.singlelabel.file_accuracies(test_paths,
            class_probs, y_test, method="geometric_mean")
        mean_file_accuracy = np.mean(file_accuracies)
    def update_general(self, X, y, w, load_func, reset=False):
        
        log = logging.getLogger(__name__)
        
        data_size = y.shape[0]
          
        #figure out the weights
        if w is None and len(y.shape)==1:
            y_unique = np.unique(y)
            weights = class_weight.compute_class_weight(self.class_weight, y_unique, y)
            self.class_actual_weight_ = {}
            for i, y_val in enumerate(y_unique):
                self.class_actual_weight_[y_val] = weights[i]
                
            w = np.zeros(data_size)
            for i,v in enumerate(y):
                w[i] = self.class_actual_weight_[v]
        elif w is None:
            log.warn('Do not know how to make class weights for multidimensinal output. If neeed, specify weights directly. Assuming uniform weights.')
            w = np.ones(data_size)
        else:
            assert w.shape[0]==data_size, 'Weight size should match data size.'    

        if self.background:        
            queue = multiprocessing.Queue()
        else:
            queue = Queue.Queue()
        

        log.info('Starting to fit the NN model.')
        
        if self.callbacks is not None:
            for callback in self.callbacks:
                if callback is not None:
                    callback.on_train_begin(self)
                
        for epoch in xrange(self.nb_epoch):
            
            last_update = time.time()-1000
            start_time = time.time()

            #generate the progress bar
            if self.verbose>0:
                progbar = Progbar(data_size, width=80, verbose=self.verbose)

            #get random permutation
            p = np.random.permutation(range(data_size))            
                        
            #load the first batch
            batch_idx = p[0:self.memory_batch_size];
            self.matrix_load_into_queue(X, batch_idx, queue, load_func, y, w);
            X_batch,y_batch,w_batch = queue.get()
            
            if reset and epoch==0:
                
                n_features = self.get_dimensions(X_batch)
                log.info('Compiling the NN model with {} dimensions.'.format(n_features))
                self.generate_and_compile_model_(n_features)
            
            samples = 0
            for batch, i in enumerate(xrange(0, len(p), self.memory_batch_size)):
                
                #compute indicies for next batch
                next_start = i+len(batch_idx)
                next_end = min(len(p), next_start+self.memory_batch_size)
                if next_end>next_start:
                    #spin the thread up                
                    batch_idx_next = p[next_start:next_end];

                    #load data in background
                    thread = 0                   
                    if self.background:
                        thread = multiprocessing.Process(target=self.matrix_load_into_queue, args=(X,batch_idx_next,queue,load_func, y, w))
                        thread.start()
                else:         
                    batch_idx_next = None
                    thread = None
                
                #perform update
                loss = self.batch_update(X_batch, y_batch, w_batch)
                
                #increment the counter
                samples+= len(batch_idx)

                curr_update = time.time()
                if  self.verbose>0 and (curr_update-last_update>=0.5 or (samples)>=len(p)):
                    progbar.update(samples, [('Loss', loss)])
                    last_update = curr_update

                if self.callbacks is not None:
                    for callback in self.callbacks:
                        if callback is not None:
                            r = callback.on_batch_end(self, epoch+1, batch+1)

                #wait for the next load to happen                
                if thread is not None:
                    #if no background, load the data now
                    if not self.background:
                        self.matrix_load_into_queue(X, batch_idx, queue, load_func, y, w)
                    X_batch,y_batch,w_batch = queue.get()
                    
                    #if loading a background process, do a join
                    if self.background:
                        thread.join()
                    
                #now add the next batch
                batch_idx = batch_idx_next

            finish_time = time.time()-start_time
            if self.verbose>0:
                log.info('Finished epoch {}/{}. Time per epoch (s): {:0.2f}, Time per sample (s): {}.'.format(epoch+1, self.nb_epoch,finish_time,finish_time/len(p)))
            
            #process the end of epoch, and see if need to quit out
            quit_now = False
            if self.callbacks is not None:
                for callback in self.callbacks:
                    if callback is not None:
                        r = callback.on_epoch_end(self, epoch+1)
                        if r is not None and r is True:
                            quit_now = True
                        
            
            if quit_now:
                break    
        
        return self   
Exemple #52
0
class LangModelLogger(BaseLogger):
    def __init__(self):
        super(LangModelLogger, self).__init__()
        self.verbose = None
        self.nb_epoch = None
        self.seen = 0
        self.totals = {}
        self.progbar = None
        self.log_values = []

    # def on_train_begin(self, logs=None):
    #     logger.debug('Begin training...')
    #     self.verbose = self.params['verbose']
    #     self.nb_epoch = self.params['nb_epoch']
    #
    # def on_epoch_begin(self, epoch, logs=None):
    #     # print('Epoch %d/%d' % (epoch + 1, self.nb_epoch))
    #     self.progbar = Progbar(target=self.params['nb_sample'], verbose=1)
    #     self.seen = 0
    #     self.totals = {}
    #
    # def on_batch_begin(self, batch, logs=None):
    #     if self.seen < self.params['nb_sample']:
    #         self.log_values = []
    #         self.params['metrics'] = ['loss', 'ppl', 'val_loss', 'val_ppl']

    def on_batch_end(self, batch, logs=None):
        logs = {} if logs is None else logs
        batch_size = logs.get('size', 0)
        self.seen += batch_size

        for k, v in logs.items():
            if k == 'encode_len' or 'nb_words':
                try:
                    self.totals[k] += v
                except KeyError:
                    self.totals[k] = v
                continue

            try:
                self.totals[k] += v * batch_size
            except KeyError:
                self.totals[k] = v * batch_size

        if 'encode_len' in self.totals and 'nb_words' in self.totals and 'ppl' in self.params['metrics']:
            self.totals['ppl'] = math.exp(self.totals['encode_len']/float(self.totals['nb_words']))
            self.log_values.append(('ppl', self.totals['ppl']))
        for k in self.params['metrics']:
            if k in logs:
                self.log_values.append((k, logs[k]))

        # skip progbar update for the last batch; will be handled by on_epoch_end
            if self.seen < self.params['nb_sample']:
                self.progbar.update(self.seen, self.log_values)

    def on_epoch_begin(self, epoch, logs=None):
        if self.verbose:
            self.progbar = Progbar(target=self.params['nb_sample'],
                                   verbose=self.verbose)
        self.seen = 0
        self.totals = {}

    def on_epoch_end(self, epoch, logs=None):
        logs = {} if logs is None else logs
        # logger.debug('log keys: %s' % str(logs.keys()))
        for k in self.params['metrics']:
            if k in self.totals:
                if k != 'ppl':
                    self.log_values.append((k, self.totals[k] / self.seen))
                else:
                    self.totals['ppl'] = math.exp(self.totals['encode_len']/float(self.totals['nb_words']))
                    self.log_values.append((k, self.totals['ppl']))
            if k in logs:
                self.log_values.append((k, logs[k]))
        if 'val_encode_len' in logs and 'val_nb_words' in logs:
            val_ppl = math.exp(logs['val_encode_len']/float(logs['val_nb_words']))
            self.log_values.append(('val_ppl', val_ppl))

        self.progbar.update(self.seen, self.log_values)