コード例 #1
0
ファイル: generic_utils_test.py プロジェクト: wrongu/keras
def test_progbar():
    n = 2
    input_arr = np.random.random((n, n, n))

    bar = Progbar(n)
    for i, arr in enumerate(input_arr):
        bar.update(i, list(arr))

    bar = Progbar(None)
    for i, arr in enumerate(input_arr):
        bar.update(i, list(arr))
コード例 #2
0
ファイル: seq.py プロジェクト: weipingpei/visual_redactions
def seq_to_preds(rows, seq_x, pred_probs, idx_to_attr_id, dilate=False):
    '''
    rows: List of dicts, N elements
    seq_x: Tokens, N x L matrix
    pred_probs: Predicted idxs, N x L x C matrix
    '''

    pred_labels = np.argmax(pred_probs, axis=2)
    # Set as -1 when x_i as PAD_IDX
    pad_idxs = np.where(seq_x == PAD_IDX)
    pred_labels[pad_idxs] = -1

    predictions = []

    n_rows = seq_x.shape[0]
    n_attr = len(idx_to_attr_id)

    progbar = Progbar(n_rows)

    for row_idx, row in enumerate(rows):
        row = rows[row_idx]

        gt_attr_ids = row['attr_id_seq']
        rle_seq = row['rle_seq']
        image_id = row['image_id']

        pred_all_attr_idxs = pred_labels[
            row_idx]  # A sequence of size L, predictions of PAD_IDXs
        pred_seq_idxs = np.where(pred_all_attr_idxs >= 0)[0]
        pred_attr_idxs = pred_all_attr_idxs[pred_seq_idxs]

        pred_attr_ids = [idx_to_attr_id[a] for a in pred_attr_idxs]

        assert len(pred_attr_ids) == len(gt_attr_ids)

        for w_idx, pred_seq_idx in enumerate(pred_seq_idxs):
            for attr_idx in range(1, n_attr):  # Exclude SAFE
                this_attr_id = idx_to_attr_id[attr_idx]
                this_score = pred_probs[row_idx, pred_seq_idx, attr_idx]
                w_rle = rle_seq[w_idx]

                if this_score > 1e-5 and w_rle is not None:

                    if this_score > 0.05 and dilate:
                        predictions.append({
                            'image_id': image_id,
                            'attr_id': this_attr_id,
                            'segmentation': dilate_rle(w_rle),
                            'score': float(this_score),
                        })
                    else:
                        predictions.append({
                            'image_id': image_id,
                            'attr_id': this_attr_id,
                            'segmentation': w_rle,
                            'score': float(this_score),
                        })
        progbar.update(row_idx)

    return predictions
コード例 #3
0
ファイル: seq.py プロジェクト: weipingpei/visual_redactions
def image_list_to_arr(image_list):
    target_img_size = (250, 250)
    n_items = len(image_list)

    X = np.zeros(shape=(n_items, target_img_size[0], target_img_size[1], 3))

    pbar = Progbar(n_items)

    for idx, (image_id, this_image_path) in enumerate(image_list):
        # ----- Image -> Mat
        resized_img_path = this_image_path.replace('images', 'images_250')
        resized_img_path = osp.join('/BS/orekondy2/work/datasets/VISPR2017',
                                    resized_img_path)

        if osp.exists(resized_img_path):
            this_image_path = resized_img_path
        else:
            this_image_path = osp.join(SEG_ROOT, this_image_path)

        img = load_img(this_image_path, target_size=target_img_size)
        img_arr = img_to_array(img)
        X[idx] = img_arr
        pbar.update(idx)

    return X
コード例 #4
0
def train_population(population,
                     x,
                     y,
                     batch_size,
                     steps,
                     steps_save=100,
                     validation_split=0.3):
    # Split data in train and validation. Set seed to get same splits in
    # consequent calls
    x_train, x_val, y_train, y_val = train_test_split(
        x, y, test_size=validation_split, random_state=42)

    population_size = len(population)
    batch_generator = BatchGenerator(x_train, y_train, batch_size)

    results = defaultdict(lambda: [])
    stateful_metrics = ['min_loss', 'max_loss', 'mean_loss']
    for metric, _ in population[0].eval_metrics:
        stateful_metrics.extend(
            [m.format(metric) for m in ['min_{}', 'max_{}', 'mean_{}']])
    progbar = Progbar(steps, stateful_metrics=stateful_metrics)

    for step in range(1, steps + 1):
        x, y = batch_generator.next()
        for idx, member in enumerate(population):
            # One step of optimisation using hyperparameters of 'member'
            member.step_on_batch(x, y)
            # Model evaluation
            loss = member.eval_on_batch(x_val, y_val)
            # If optimised for 'STEPS_READY' steps
            if member.ready():
                # Use the rest of population to find better solutions
                exploited = member.exploit(population)
                # If new weights != old weights
                if exploited:
                    # Produce new hyperparameters for 'member'
                    member.explore()
                    loss = member.eval_on_batch(x_val, y_val)

            if step % steps_save == 0 or step == steps:
                results['model_id'].append(str(member))
                results['step'].append(step)
                results['loss'].append(loss)
                results['loss_smoothed'].append(member.loss_smoothed())
                for metric, value in member.eval_metrics:
                    results[metric].append(value)
                for h, v in member.get_hyperparameter_config().items():
                    results[h].append(v)

        # Get recently added losses to show in the progress bar
        all_losses = results['loss']
        recent_losses = all_losses[-population_size:]
        if recent_losses:
            metrics = _statistics(recent_losses, 'loss')
            for metric, _ in population[0].eval_metrics:
                metrics.extend(
                    _statistics(results[metric][-population_size:], metric))
            progbar.update(step, metrics)

    return pd.DataFrame(results)
コード例 #5
0
def make_predictions(conf, shot_list, loader, custom_path=None):
    generator = loader.inference_batch_generator_full_shot(shot_list)
    inference_model = build_torch_model(conf)

    if custom_path is None:
        model_path = get_model_path(conf)
    else:
        model_path = custom_path
    inference_model.load_state_dict(torch.load(model_path))
    # shot_list = shot_list.random_sublist(10)

    y_prime = []
    y_gold = []
    disruptive = []
    num_shots = len(shot_list)

    pbar = Progbar(num_shots)
    while True:
        x, y, mask, disr, lengths, num_so_far, num_total = next(generator)
        # x, y, mask = Variable(torch.from_numpy(x_).float()),
        # Variable(torch.from_numpy(y_).float()),
        # Variable(torch.from_numpy(mask_).byte())
        output = apply_model_to_np(inference_model, x)
        for batch_idx in range(x.shape[0]):
            curr_length = lengths[batch_idx]
            y_prime += [output[batch_idx, :curr_length, 0]]
            y_gold += [y[batch_idx, :curr_length, 0]]
            disruptive += [disr[batch_idx]]
            pbar.add(1.0)
        if len(disruptive) >= num_shots:
            y_prime = y_prime[:num_shots]
            y_gold = y_gold[:num_shots]
            disruptive = disruptive[:num_shots]
            break
    return y_prime, y_gold, disruptive
コード例 #6
0
 def evaluate_by_datasets(self, model):
     from keras.utils.generic_utils import Progbar
     results = []
     for i, single in enumerate(self.single_datasets):
         ys = [np.zeros(s.y_valid.shape[1:])
               for s in self.single_datasets]  # makes blank ys
         result = []
         print('Evaluating', single.name)
         progbar = Progbar(len(single.X_valid))
         for j in range(len(single.X_valid)):
             X, y = next(single.valid_generator)
             Xtemp = []
             for x_one in X:
                 x_normed = util.random_unify_3d_mels(x_one, self.duration)
                 Xtemp.append(x_normed)
             Xtemp = np.array(Xtemp)
             result.append(
                 np.argmax(y) == np.argmax(model.predict(Xtemp)[i]))
             progbar.update(j)
         results.append(result)
         progbar.update(len(single.X_valid))
         print(' =', np.sum(result) / len(result))
     accuracies = [np.sum(result) / len(result) for result in results]
     for s, acc in zip(self.single_datasets, accuracies):
         print('Accuracy with %s = %f' % (s.name, acc))
     return accuracies
コード例 #7
0
ファイル: testcodejam.py プロジェクト: lkolezhuk/LunaCapsNet
    def load_subsets(self, subsets):
        """
        Loads specified subsets of the data for the code jam.
        Returns tuple: ( images, labels, subset membership number )
        You can use the subset membership number to select the data from particular subset:
        e.g. result[(indices == 4).flatten()]
        """
        result = None
        resultLabels = None
        indices = None
        n_of_subsets = len(subsets)
        p = Progbar(n_of_subsets)
        p.update(0)
        for index, subsetIndex in enumerate(subsets):
            data = np.load("{}/{}.npz".format(self.root_path, subsetIndex))
            if result is None:
                result = data['images']
            else:
                result = np.vstack([result, data['images']])

            if resultLabels is None:
                resultLabels = data['labels']
            else:
                resultLabels = np.vstack([resultLabels, data['labels']])

            tmp = np.ones(data['labels'].shape) * subsetIndex
            if indices is None:
                indices = tmp
            else:
                indices = np.vstack([indices, tmp])
            p.update(index + 1)
        return (result, resultLabels, indices)
コード例 #8
0
    def training(self, x_train_hw, y_train_hw, x_test, y_test):
        """Alternatively training models"""
        # get a batch of real images
        nb_train_hw = x_train_hw.shape[0]
        num_truncate = nb_train_hw % self.batch_size
        hw_data_used_num = nb_train_hw - num_truncate

        for epoch in range(self.nb_epochs):
            print('Epoch {} of {}'.format(epoch + 1, self.nb_epochs))
            nb_batches = int(nb_train_hw / self.batch_size)
            progress_bar = Progbar(target=nb_batches)

            epoch_label_predictor_loss = []

            for index in range(nb_batches):
                progress_bar.update(index)

                # get a batch of handwritten data
                hw_data_index_start = index * self.batch_size % hw_data_used_num
                hw_data_index_end = hw_data_index_start + self.batch_size
                img_hw = x_train_hw[hw_data_index_start:hw_data_index_end]
                cls_labels_hw = y_train_hw[
                    hw_data_index_start:hw_data_index_end]

                # updating parameters of label_predictor
                epoch_label_predictor_loss.append(
                    self.character_classifier.train_on_batch([img_hw],
                                                             [cls_labels_hw]))

            score = self.test(x_test, y_test)
            weights_output_dir = os.path.join(
                self.output_dir, 'pre_weights%02d-%04f.h5' % (epoch, score[1]))
            self.save_weights(weights_output_dir)
            print('\nTesting for epoch %02d: accuracy %04f' %
                  (epoch + 1, score[1]))
コード例 #9
0
 def dl_progress(count, block_size, total_size):
     if ProgressTracker.progbar is None:
         if total_size == -1:
             total_size = None
         ProgressTracker.progbar = Progbar(total_size)
     else:
         ProgressTracker.progbar.update(count * block_size)
コード例 #10
0
def test_extractor_in_generator(intervals, extractor, batch_size=128):
    """
    Extracts data in bulk, then in streaming batches and checks its the same data.
    """
    from keras.utils.generic_utils import Progbar

    X_in_memory = extractor(intervals)
    samples_per_epoch = len(intervals)
    batches_per_epoch = int(samples_per_epoch / batch_size) + 1
    batch_array = np.zeros((batch_size, 1, 4, intervals[0].length),
                           dtype=np.float32)
    batch_generator = generate_from_intervals(intervals,
                                              extractor,
                                              batch_size=batch_size,
                                              indefinitely=False,
                                              batch_array=batch_array)
    progbar = Progbar(target=samples_per_epoch)
    for batch_indx in xrange(1, batches_per_epoch + 1):
        X_batch = next(batch_generator)
        start = (batch_indx - 1) * batch_size
        stop = batch_indx * batch_size
        if stop > samples_per_epoch:
            stop = samples_per_epoch
        # assert streamed sequences and labels match data in memory
        assert (X_in_memory[start:stop] - X_batch).sum() == 0
        progbar.update(stop)
コード例 #11
0
def trainer_on_batch(model,
                     train_x,
                     train_y,
                     batch_size=_default_batch_size,
                     epochs=_default_epochs):

    for epoch in range(epochs):
        print('Epoch {} of {}'.format(epoch + 1, epochs))

        nb_batches = int(train_x.shape[0] / batch_size)
        # progress_bar display
        progress_bar = Progbar(target=train_x.shape[0])

        batch_res = [None, None]
        history = []
        start_epoch = time.time()
        for iter in range(nb_batches):
            # get a batch train_set
            train_x_batch = train_x[iter * batch_size:(iter + 1) * batch_size]
            train_y_batch = train_y[iter * batch_size:(iter + 1) * batch_size]

            batch_res = model.train_on_batch(x=train_x_batch, y=train_y_batch)
            history.append(batch_res)
            # update the progress_bar
            progress_bar.update((iter + 1) * batch_size)
        end_epoch = time.time()
        print(' epoch_loss: {}   epoch_acc: {}   epoch_time:{}'.format(
            str(batch_res[0]), str(batch_res[1]), end_epoch - start_epoch))

    return model, history
コード例 #12
0
def _save_predictions_to_xmls(model, batch_size, embeddings, label2ind,
                              ind2label, test_set, predictions_dir,
                              binary_classification, hipaa_only,
                              extra_features, require_argmax):
    if not os.path.isdir(predictions_dir):
        os.mkdir(predictions_dir)

    print('Saving test XMLs to', predictions_dir)
    progress_bar = Progbar(target=TestSet.number_of_test_sets(test_set),
                           verbose=env.keras_verbose)

    for i, te in enumerate(TestSet.test_sets(
            embeddings,
            test_set=test_set,
            label2ind=label2ind,
            binary_classification=binary_classification,
            hipaa_only=hipaa_only,
            extra_features=extra_features),
                           start=1):
        preds = model.predict([te.X, te.X_extra], batch_size=batch_size)
        if require_argmax:
            preds = np.argmax(preds, axis=-1)
        xml = prediction_to_xml(te.X, preds, te.text, te.sents, ind2label)
        filename = os.path.basename(te.filename)[:-4] + '.xml'
        with open(os.path.join(predictions_dir, filename), 'w') as f:
            f.write(xml)

        progress_bar.update(i)
コード例 #13
0
ファイル: processing.py プロジェクト: zuoxiaolei/keras-text
    def encode_texts(self, texts, include_oov=False, verbose=1, **kwargs):
        """Encodes the given texts using internal vocabulary with optionally applied encoding options. See
        ``apply_encoding_options` to set various options.

        Args:
            texts: The list of text items to encode.
            include_oov: True to map unknown (out of vocab) tokens to 0. False to exclude the token.
            verbose: The verbosity level for progress. Can be 0, 1, 2. (Default value = 1)
            **kwargs: The kwargs for `token_generator`.

        Returns:
            The encoded texts.
        """
        if not self.has_vocab:
            raise ValueError("You need to build the vocabulary using `build_vocab` before using `encode_texts`")

        progbar = Progbar(len(texts), verbose=verbose, interval=0.25)
        encoded_texts = []
        for token_data in self.token_generator(texts, **kwargs):
            indices, token = token_data[:-1], token_data[-1]

            token_idx = self._token2idx.get(token)
            if token_idx is None and include_oov:
                token_idx = 0

            if token_idx is not None:
                _append(encoded_texts, indices, token_idx)

            # Update progressbar per document level.
            progbar.update(indices[0])

        # All done. Finalize progressbar.
        progbar.update(len(texts), force=True)
        return encoded_texts
コード例 #14
0
    def epoch_end_callback(self, sess, sv, epoch_num):
        # Evaluate val loss
        validation_iou = 0
        print("\nComputing Validation IoU")
        progbar = Progbar(target=self.val_steps_per_epoch)

        for i in range(self.val_steps_per_epoch):
            loss_iou = sess.run(self.val_iou,
                                feed_dict={self.is_training: False})
            validation_iou += loss_iou
            progbar.update(i)
        validation_iou /= self.val_steps_per_epoch * self.config.batch_size

        # Log to Tensorflow board
        val_sum = sess.run(self.val_sum,
                           feed_dict={self.val_iou_ph: validation_iou})

        sv.summary_writer.add_summary(val_sum, epoch_num)

        print("Epoch [{}] Validation IoU: {}".format(epoch_num,
                                                     validation_iou))
        # Model Saving
        if validation_iou > self.min_val_iou:
            self.save(sess, self.config.checkpoint_dir, 'best')
            self.min_val_iou = validation_iou
        if epoch_num % self.config.save_freq == 0:
            self.save(sess, self.config.checkpoint_dir, epoch_num)
コード例 #15
0
 def run_train_epoch(self,
                     session,
                     x_inputs,
                     batch_size,
                     shuffle=True,
                     verbose=1):
     num_samples = len(x_inputs)
     index_array = np.arange(num_samples)
     if shuffle:
         np.random.shuffle(index_array)
     batches = self.make_batches(num_samples, batch_size)
     nb_batch = len(batches)
     progbar = Progbar(nb_batch)
     avg_total_loss = 0.
     total_samples = 0.
     for batch_index, (batch_start, batch_end) in enumerate(batches):
         batch_ids = index_array[batch_start:batch_end]
         x_batch = x_inputs[batch_ids]
         _, loss = session.run([self.train_op, self.loss],
                               {self.x: x_batch})
         if np.isnan(loss) or np.isinf(loss):
             raise ValueError("nan or inf loss")
         cur_batch_size = (batch_end - batch_start)
         total_samples += cur_batch_size
         avg_total_loss += (loss * cur_batch_size / num_samples)
         if verbose == 1:
             progbar.update(batch_index + 1,
                            values=[("avg loss per 1000 samples",
                                     avg_total_loss * 1000. / total_samples)
                                    ],
                            force=True)
     print("avg total loss = %d" % avg_total_loss)
コード例 #16
0
    def detect_defects(self, validation_generator, verbose=1):

        total_samples = validation_generator.samples
        batch_size = validation_generator.batch_size

        results = list()
        labels = list()

        if (verbose != 0):
            progress_bar = Progbar(target=total_samples)

        for _ in range(np.ceil(total_samples / batch_size).astype(np.int32)):

            image_batch, lbls = validation_generator.next()

            labels = np.append(labels, lbls.reshape(lbls.shape[0]))
            image_batch = (image_batch.astype(np.float32) - 127.5) / 127.5

            tmp_rslt = self.discriminator.model.predict(
                x=image_batch, batch_size=image_batch.shape[0], verbose=0)

            if (verbose != 0):
                progress_bar.add(image_batch.shape[0])

            results = np.append(results, tmp_rslt.reshape(tmp_rslt.shape[0]))

        results = [1 if x >= 0.5 else 0 for x in results]

        tn, fp, fn, tp = confusion_matrix(labels, results).ravel()

        #################### NON DEFECT SITUATIONS ####################

        # Probability of Detecting a Non-Defect: (tp / (tp + fn))
        if ((tp + fn) != 0):
            recall = tp / (tp + fn)
        else:
            recall = 0.0

        # Probability of Correctly Detecting a Non-Defect: (tp / (tp + fp))

        if ((tp + fp) != 0):
            precision = tp / (tp + fp)
        else:
            precision = 0.0

        ###################### DEFECT SITUATIONS ######################

        # Probability of Detecting a Defect: (tn / (tn + fp))
        if ((tn + fp) != 0):
            specificity = tn / (tn + fp)
        else:
            specificity = 0.0

        # Probability of Correctly Detecting a Defect: (tn / (tn + fn))
        if ((tn + fn) != 0):
            negative_predictive_value = tn / (tn + fn)
        else:
            negative_predictive_value = 0.0

        return precision, recall, specificity, negative_predictive_value
コード例 #17
0
 def reset(self):
     self.interval_start = timeit.default_timer()
     self.progbar = Progbar(target=self.interval)
     self.metrics = []
     self.infos = []
     self.info_names = None
     self.episode_rewards = []
コード例 #18
0
def build_doc_id_to_url_map(profile_name='cogcomp',
                            bucket_name='finer-annotation',
                            prefix='annotation/by_length'):
    connection = boto.connect_s3(profile_name=profile_name)
    bucket = connection.get_bucket(bucket_name)
    bucket.list()
    url_paths = []
    for keyObj in bucket.list(prefix):
        url_path = os.path.join('https://s3.amazonaws.com/',
                                keyObj.bucket.name, keyObj.name)
        url_paths.append(url_path)
    print('Found %d docs. Fetching, parsing jsons and building map... ' %
          len(url_paths))
    progbar = Progbar(len(url_paths))
    errors = 0
    doc_id_to_url_map = {}
    for url_path in url_paths:
        try:
            response = urllib2.urlopen(url_path)
            doc_id = json.loads(response.read())['doc_id']
            doc_id_to_url_map[doc_id] = url_path
        except Exception:
            errors += 1
        progbar.add(1)
    print('Done with %d errors' % errors)

    return doc_id_to_url_map
コード例 #19
0
    def build_vocab(self, texts, verbose=1, **kwargs):
        """Builds the internal vocabulary and computes various statistics.

        Args:
            texts: The list of text items to encode.
            verbose: The verbosity level for progress. Can be 0, 1, 2. (Default value = 1)
            **kwargs: The kwargs for `token_generator`.
        """
        if self.has_vocab:
            logger.warn(
                "Tokenizer already has existing vocabulary. Overriding and building new vocabulary."
            )

        progbar = Progbar(len(texts), verbose=verbose, interval=0.25)
        count_tracker = _CountTracker()

        self._token_counts.clear()
        self._num_texts = len(texts)

        for token_data in self.token_generator(texts, **kwargs):
            indices, token = token_data[:-1], token_data[-1]
            count_tracker.update(indices)
            self._token_counts[token] += 1

            # Update progressbar per document level.
            progbar.update(indices[0])

        # Generate token2idx and idx2token.
        self.create_token_indices(self._token_counts.keys())

        # All done. Finalize progressbar update and count tracker.
        count_tracker.finalize()
        self._counts = count_tracker.counts
        progbar.update(len(texts), force=True)
コード例 #20
0
ファイル: word_vectors.py プロジェクト: afergadis/keras-nlp
    def _load_text_word2vec(self, file_path):
        with open(file_path, 'r', encoding='utf-8') as f:
            line = f.readline()
            num_vectors, self.vector_len = (int(x) for x in line.split())
            if self.verbose:
                self.logger.info(
                    f'Loading {num_vectors} word vectors from file '
                    f'"{file_path}".')
            oov_id = self._init_vectors()
            found = 1
            progbar = Progbar(len(self.vocab) - 1, verbose=self.verbose)
            for i in range(num_vectors):
                line = f.readline()
                values = line.split()
                word = values[0]
                word_vector = np.asarray(values[1:], dtype='float32')
                if word in self.vocab:
                    word_id = self.vocab[word]
                    self.vectors[word_id] = word_vector
                    found += 1
                progbar.update(found)
            if found < len(self.vocab):
                progbar.update(len(self.vocab) - 1)

            if oov_id is not None:
                self._replace_oov_tokens(oov_id)
コード例 #21
0
def make_predictions(conf, shot_list, loader, custom_path=None):
    feature_extractor = FeatureExtractor(loader)
    # save_prepath = feature_extractor.get_save_prepath()
    if custom_path is None:
        model_path = conf['paths']['model_save_path'] + \
            model_filename  # save_prepath + model_filename
    else:
        model_path = custom_path
    model = joblib.load(model_path)
    # shot_list = shot_list.random_sublist(10)

    y_prime = []
    y_gold = []
    disruptive = []

    pbar = Progbar(len(shot_list))
    fn = partial(predict_single_shot,
                 model=model,
                 feature_extractor=feature_extractor)
    pool = mp.Pool()
    print('predicting in parallel on {} processes'.format(pool._processes))
    # for (y_p, y, disr) in map(fn, shot_list):
    for (y_p, y, disr) in pool.imap(fn, shot_list):
        # y_p, y, disr = predict_single_shot(model, feature_extractor,shot)
        y_prime += [np.expand_dims(y_p, axis=1)]
        y_gold += [np.expand_dims(y, axis=1)]
        disruptive += [disr]
        pbar.add(1.0)

    pool.close()
    pool.join()
    return y_prime, y_gold, disruptive
コード例 #22
0
    def load_shots(self,
                   shot_list,
                   is_inference=False,
                   as_list=False,
                   num_samples=np.Inf):
        X = []
        Y = []
        Disr = []
        print("loading...")
        pbar = Progbar(len(shot_list))

        sample_prob_d, sample_prob_nd = self.get_sample_probs(
            shot_list, num_samples)
        fn = partial(self.load_shot,
                     is_inference=is_inference,
                     sample_prob_d=sample_prob_d,
                     sample_prob_nd=sample_prob_nd)
        pool = mp.Pool()
        print('loading data in parallel on {} processes'.format(
            pool._processes))
        for x, y, disr in pool.imap(fn, shot_list):
            X.append(x)
            Y.append(y)
            Disr.append(disr)
            pbar.add(1.0)
        pool.close()
        pool.join()
        return X, Y, np.array(Disr)
コード例 #23
0
    def _test_loop(self, f, ins, batch_size=128, verbose=0):
        '''
            Abstract method to loop over some data in batches.
        '''
        nb_sample = len(ins[0])
        outs = []
        if verbose == 1:
            progbar = Progbar(target=nb_sample)
        batches = make_batches(nb_sample, batch_size)
        index_array = np.arange(nb_sample)
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            ins_batch = slice_X(ins, batch_ids)

            batch_outs = f(*ins_batch)
            if type(batch_outs) == list:
                if batch_index == 0:
                    for batch_out in enumerate(batch_outs):
                        outs.append(0.)
                for i, batch_out in enumerate(batch_outs):
                    outs[i] += batch_out * len(batch_ids)
            else:
                if batch_index == 0:
                    outs.append(0.)
                outs[0] += batch_outs * len(batch_ids)

            if verbose == 1:
                progbar.update(batch_end)
        for i, out in enumerate(outs):
            outs[i] /= nb_sample
        return outs
コード例 #24
0
    def _predict_loop(self, f, ins, batch_size=128, verbose=0):
        '''
            Abstract method to loop over some data in batches.
        '''
        nb_sample = len(ins[0])
        outs = []
        if verbose == 1:
            progbar = Progbar(target=nb_sample)
        batches = make_batches(nb_sample, batch_size)
        index_array = np.arange(nb_sample)
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            ins_batch = slice_X(ins, batch_ids)

            batch_outs = f(*ins_batch)
            if type(batch_outs) != list:
                batch_outs = [batch_outs]
            if batch_index == 0:
                for batch_out in batch_outs:
                    shape = (nb_sample, ) + batch_out.shape[1:]
                    outs.append(np.zeros(shape))

            for i, batch_out in enumerate(batch_outs):
                outs[i][batch_start:batch_end] = batch_out
            if verbose == 1:
                progbar.update(batch_end)
        return outs
コード例 #25
0
def img_to_features(X, image_list, model, batch_size=64):
    n_img, n_h, n_w, n_c = X.shape
    n_batches = n_img / batch_size + 1
    n_feat = model.output_shape[-1]

    feat_mat = np.zeros((n_img, n_feat))

    pbar = Progbar(n_batches)

    for b_idx, start_idx in enumerate(range(0, n_img, batch_size)):
        end_idx = min(start_idx + batch_size, n_img)
        this_batch_size = end_idx - start_idx

        bx = X[start_idx:end_idx]
        bx = preprocess_input(bx)
        batch_feat = model.predict(bx)

        feat_mat[start_idx:end_idx] = batch_feat
        pbar.update(b_idx)

    # Create a dict: image_id -> feat
    image_id_to_visfeat = dict()
    for i, (image_id, image_path) in enumerate(image_list):
        image_id_to_visfeat[image_id] = feat_mat[i]

    return image_id_to_visfeat
コード例 #26
0
    def train_model(self):
        cbs = []
        cbs.append(EarlyStopping(patience=2))
        cbs.append(LearningRateScheduler(lambda e: self.lr * 0.999**(e / 10)))
        cb = CallBacks(cbs)
        cb.set_model(self.model)

        print('Start training chatbot...')
        train_num = len(self.en_ipt)
        cb.on_train_begin()
        for itr in range(self.epoch):
            print('Epoch %s/%s' % (itr + 1, self.epoch))
            cb.on_epoch_begin(itr)
            indexes = np.random.permutation(train_num)
            progbar = Progbar(train_num)
            losses = []
            for idx in range(int(0.8 * train_num / self.bs)):
                batch_idx = indexes[idx * self.bs:(idx + 1) * self.bs]
                en_ipt_bc = self.en_ipt[batch_idx]
                de_ipt_bc = self.de_ipt[batch_idx]
                de_opt_bc = self.de_opt[batch_idx]
                if np.random.rand() < self.tfr:  # apply teacher forcing
                    bc_loss = self.model.train_on_batch([en_ipt_bc, de_ipt_bc],
                                                        de_opt_bc)
                else:  # do not apply teacher forcing
                    ipt_len = [sum(i) for i in np.any(de_opt_bc, axis=-1)]
                    de_ipt_nt = np.zeros((self.max_de_seq, self.bs),
                                         dtype='int64')
                    en_out, h, c = self.encoder_model.predict(
                        en_ipt_bc, batch_size=self.bs)
                    de_in = np.asarray([[self.word2idx['bos']]] * self.bs)
                    for i in range(self.max_de_seq):
                        de_out, h, c = self.decoder_model.predict(
                            [en_out, de_in, h, c], batch_size=self.bs)
                        sampled_idxs = np.argmax(de_out[:, -1, :], axis=-1)
                        de_ipt_nt[i] = sampled_idxs
                        de_in = sampled_idxs.reshape((-1, 1))
                    de_ipt_nt = de_ipt_nt.T
                    for i in range(self.bs):
                        de_ipt_nt[i, ipt_len[i]:] = 0
                    bc_loss = self.model.train_on_batch([en_ipt_bc, de_ipt_nt],
                                                        de_opt_bc)
                losses.append(bc_loss)
                progbar.add(self.bs, [('loss', np.mean(losses))])
            val_idx = indexes[-int(0.2 * train_num):]
            val_loss = self.model.evaluate(
                [self.en_ipt[val_idx], self.de_ipt[val_idx]],
                self.de_opt[val_idx],
                batch_size=self.bs,
                verbose=0)
            progbar.update(train_num, [('val_loss', np.mean(val_loss))])
            cb.on_epoch_end(itr,
                            logs={
                                'loss': np.mean(losses),
                                'val_loss': np.mean(val_loss)
                            })
            self.model.save_weights(self.ckpt_dir + 'weights.hdf5')
        cb.on_train_end()
        print('Chatbot training complete.')
コード例 #27
0
def tensorise_smiles_mp(smiles,
                        max_degree=5,
                        max_atoms=None,
                        workers=cpu_count() - 1,
                        chunksize=3000,
                        verbose=True):
    ''' Multiprocess implementation of `tensorise_smiles`

    # Arguments:
        See `tensorise_smiles` documentation

    # Additional arguments:
        workers: int, num parallel processes
        chunksize: int, num molecules tensorised per worker, bigger chunksize is
            preffered as each process will preallocate np.arrays

    # Returns:
        See `tensorise_smiles` documentation

    # TODO:
        - fix python keyboardinterrupt bug:
          https://noswap.com/blog/python-multiprocessing-keyboardinterrupt
        - replace progbar with proper logging
    '''

    pool = Pool(processes=workers)

    # Create an iterator
    #http://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
    def chunks(l, n):
        """Yield successive n-sized chunks from l."""
        for i in range(0, len(l), n):
            yield l[i:i + n]

    smiles_chunks = chunks(smiles, chunksize)

    # MAP: Tensorise in parallel
    map_function = partial(tensorise_smiles,
                           max_degree=max_degree,
                           max_atoms=max_atoms)
    if verbose:
        print('Tensorising molecules in batches...')
        pbar = Progbar(len(smiles), width=50)
        tensor_list = []
        for tensors in pool.imap(map_function, smiles_chunks):
            pbar.add(tensors[0].shape[0])
            tensor_list.append(tensors)
        print('Merging batch tensors...    ', end='')
    else:
        tensor_list = pool.map(map_function, smiles_chunks)
    if verbose:
        print('[DONE]')

    # REDUCE: Concatenate the obtained tensors
    pool.close()
    pool.join()
    return concat_mol_tensors(tensor_list,
                              match_degree=max_degree != None,
                              match_max_atoms=max_atoms != None)
コード例 #28
0
def train_model(train_X, val_X, mu, sigma):
    assert train_X.ndim == 3, train_X.shape
    total_X, time_steps, out_shape = train_X.shape
    trainer = GANTrainer(out_shape)
    epochs = 0

    # GAN predictions will be put in here
    try:
        mkdir('gan-conv-out')
    except FileExistsError:
        pass

    print('Training generator')

    while True:
        copy_X = train_X.copy()
        np.random.shuffle(copy_X)
        total_X, _, _ = copy_X.shape
        to_fetch = BATCH_SIZE // 2
        epochs += 1
        print('Epoch %d' % epochs)
        bar = Progbar(total_X)
        bar.update(0)
        epoch_fetched = 0

        while epoch_fetched < total_X:
            # Fetch some ground truth to train the discriminator
            for i in range(K):
                if epoch_fetched >= total_X:
                    break
                fetched = copy_X[epoch_fetched:epoch_fetched + to_fetch]
                dloss, dacc = trainer.disc_train_step(fetched)
                epoch_fetched += len(fetched)
                bar.update(epoch_fetched,
                           values=[('d_loss', dloss), ('d_acc', dacc)])

            # Train the generator (don't worry about loss)
            trainer.gen_train_step(BATCH_SIZE)

        # End of an epoch, so let's validate models (doesn't work so great,
        # TBH)
        print('\nValidating')
        disc_loss, disc_acc = trainer.disc_val(val_X, BATCH_SIZE)
        gen_loss, gen_acc = trainer.gen_val(100, BATCH_SIZE)
        print('\nDisc loss/acc:   %g/%g' % (disc_loss, disc_acc))
        print('Gen loss/acc:    %g/%g' % (gen_loss, gen_acc))

        # Also save some predictions so that we can monitor training
        print('Saving predictions')
        poses = trainer.generate_poses(16) * sigma + mean
        poses = insert_junk_entries(poses)
        savemat('gan-conv-out/gan-conv-preds-epoch-%d.mat' % epochs,
                {'poses': poses})

        # Sometimes we save a model
        if not (epochs - 1) % 5:
            dest_dir = 'saved-conv-gans/'
            print('Saving model to %s' % dest_dir)
            trainer.save(dest_dir)
コード例 #29
0
    def test(self,
             model,
             queue,
             batch_size=1000,
             verbose=True,
             test_size=None):
        iterator = None
        process = psutil.Process(os.getpid())

        def get_rss_prop():  # this is quite expensive
            return (process.memory_info().rss -
                    process.memory_info().shared) / 10**6

        rss_minus_shr_memory = get_rss_prop()

        try:
            iterator = gf_io_utils.ExampleQueueIterator(
                queue,
                num_exs_batch=batch_size,
                num_epochs=1,
                allow_smaller_final_batch=True)
            if test_size is not None:
                num_examples = min(test_size, iterator.num_examples)
            else:
                num_examples = iterator.num_examples
            num_batches = int(np.ceil(num_examples / batch_size))

            if verbose:
                progbar = Progbar(target=num_examples)

            predictions = []
            labels = []

            for batch_indx, batch in enumerate(iterator):
                if batch_indx == num_batches:
                    break
                predictions.append(
                    np.vstack(model.model.predict_on_batch(batch)))
                labels.append(batch['labels'])
                if verbose:
                    if batch_indx % BATCH_FREQ_UPDATE_MEM_USAGE == 0:
                        rss_minus_shr_memory = get_rss_prop()
                    if batch_indx % BATCH_FREQ_UPDATE_PROGBAR == 0:
                        progbar.update(batch_indx * batch_size,
                                       values=[("Non-shared RSS (Mb)",
                                                rss_minus_shr_memory)])
            iterator.close()
            del iterator

        except Exception as e:
            if iterator is not None:  # NOQA
                iterator.close()  # NOQA
            raise e

        predictions = np.vstack(predictions)
        labels = np.vstack(labels)
        return ClassificationResult(labels,
                                    predictions,
                                    task_names=self.task_names)
コード例 #30
0
ファイル: utils.py プロジェクト: zjjlivein/kaggle
def predict(model, generator, steps):
    prog = Progbar(steps)
    preds = []
    for i, batch in enumerate(generator):
        preds.append(model.predict_on_batch(batch))
        prog.update(i + 1)
    print("")
    return preds