Ejemplo n.º 1
0
def preprocess_batch(batch):
    if len(batch.x_train.shape) == 4:
        x_train = np.expand_dims(batch.x_train, 0)
        y_train = np.expand_dims(batch.y_train, 0)
        x_test = np.expand_dims(batch.x_test, 0)
        y_test = np.expand_dims(batch.y_test, 0)
        if batch.x_unlabel is not None:
            x_unlabel = np.expand_dims(batch.x_unlabel, 0)
        else:
            x_unlabel = None
        if hasattr(batch, 'y_unlabel') and batch.y_unlabel is not None:
            y_unlabel = np.expand_dims(batch.y_unlabel, 0)
        else:
            y_unlabel = None

        return Episode(x_train,
                       y_train,
                       x_test,
                       y_test,
                       x_unlabel=x_unlabel,
                       y_unlabel=y_unlabel,
                       y_train_str=batch.y_train_str,
                       y_test_str=batch.y_test_str)
    else:
        return batch
Ejemplo n.º 2
0
    def _process_batch(self, batch, super_classes=False):
        """Convert np arrays to variable"""
        x_train = Variable(torch.from_numpy(batch.x_train).type(
            torch.FloatTensor),
                           requires_grad=False).cuda()
        x_test = Variable(torch.from_numpy(batch.x_test).type(
            torch.FloatTensor),
                          requires_grad=False).cuda()

        if batch.x_unlabel is not None and batch.x_unlabel.size > 0:
            x_unlabel = Variable(torch.from_numpy(batch.x_unlabel).type(
                torch.FloatTensor),
                                 requires_grad=False).cuda()
            y_unlabel = Variable(torch.from_numpy(
                batch.y_unlabel.astype(np.int64)),
                                 requires_grad=False).cuda()
        else:
            x_unlabel = None
            y_unlabel = None

        if super_classes:
            labels_train = Variable(torch.from_numpy(
                batch.y_train_str[:, 1]).type(torch.LongTensor),
                                    requires_grad=False).unsqueeze(0).cuda()
            labels_test = Variable(torch.from_numpy(
                batch.y_test_str[:, 1]).type(torch.LongTensor),
                                   requires_grad=False).unsqueeze(0).cuda()
        else:
            labels_train = Variable(torch.from_numpy(
                batch.y_train.astype(np.int64)[:, :, 1]),
                                    requires_grad=False).cuda()
            labels_test = Variable(torch.from_numpy(
                batch.y_test.astype(np.int64)[:, :, 1]),
                                   requires_grad=False).cuda()

        return Episode(x_train,
                       labels_train,
                       np.expand_dims(batch.train_indices, 0),
                       x_test,
                       labels_test,
                       np.expand_dims(batch.test_indices, 0),
                       x_unlabel=x_unlabel,
                       y_unlabel=y_unlabel,
                       unlabel_indices=np.expand_dims(batch.unlabel_indices,
                                                      0),
                       y_train_str=batch.y_train_str,
                       y_test_str=batch.y_test_str)
Ejemplo n.º 3
0
    def next_episode(self, within_category=False, catcode=None):
        """
      (1) Pick random set of classes
      (2) Pick random partitioning into train, test, unlabeled
      """

        sel_classes = np.random.choice(range(len(self.class_dict.keys())),
                                       size=self.al_instance.n_class +
                                       self.al_instance.n_distractor,
                                       replace=False)
        k_per_class = [
            None for i in range(self.al_instance.n_class +
                                self.al_instance.n_distractor)
        ]

        total_train = None
        total_test = None
        total_unlbl = None
        for idx, cl in enumerate(sel_classes[:self.al_instance.n_class]):
            train, test, unlbl = self._get_rand_partition(
                self.class_dict.keys()[cl], idx, k_per_class[idx])
            total_train = self._concat_or_identity(total_train, train)
            total_test = self._concat_or_identity(total_test, test)
            total_unlbl = self._concat_or_identity(total_unlbl, unlbl)

        for idx, cl in enumerate(sel_classes[self.al_instance.n_class:]):
            unlbl = self._get_rand_partition(self.class_dict.keys()[cl],
                                             self.al_instance.n_class + idx,
                                             k_per_class[idx])
            total_unlbl = self._concat_or_identity(total_unlbl, unlbl)

        assert self._check_shape(total_train, self.al_instance.n_class,
                                 self.al_instance.k_train)
        assert self._check_shape(total_test, self.al_instance.n_class,
                                 self.al_instance.k_test)
        assert self._check_shape(
            total_unlbl,
            self.al_instance.n_class + self.al_instance.n_distractor,
            self.al_instance.k_unlbl)

        return Episode(x_train=total_train.data,
                       y_train=total_train.labels,
                       x_test=total_test.data,
                       y_test=total_test.labels,
                       x_unlabel=total_unlbl.data,
                       y_unlabel=total_unlbl.labels)
Ejemplo n.º 4
0
def preprocess_batch(batch):
    if len(batch.x_train.shape) == 4:
        x_train = batch.x_train
        x_test = batch.x_test
        if batch.x_unlabel is not None:
                x_unlabel = batch.x_unlabel

        x_train = np.expand_dims(x_train, 0)
        y_train = np.expand_dims(batch.y_train, 0)
        x_test = np.expand_dims(x_test, 0)
        y_test = np.expand_dims(batch.y_test, 0)
        if batch.x_unlabel is not None:
            x_unlabel = np.expand_dims(x_unlabel, 0)
            x_unlabel = np.rollaxis(x_unlabel, 4, 2)
        else:
            x_unlabel = None

        if hasattr(batch, 'y_unlabel') and batch.y_unlabel is not None:
            y_unlabel = np.expand_dims(batch.y_unlabel, 0)
        else:
            y_unlabel = None

    x_train = np.rollaxis(x_train, 4, 2)
    x_test = np.rollaxis(x_test, 4, 2)

    return Episode(
                x_train,
                y_train,
                batch.train_indices,
                x_test,
                y_test,
                batch.test_indices,
                x_unlabel=x_unlabel,
                y_unlabel=y_unlabel,
                unlabel_indices = batch.unlabel_indices,
                y_train_str=batch.y_train_str,
                y_test_str=batch.y_test_str)
  def next_episode(self, within_category=False, catcode=None):
    """Gets a new episode.
    within_category: bool. Whether or not to choose the N classes
    to all belong to the same more general category.
    (Only applicable for datasets with self._category_labels defined).

    within_category: bool. Whether or not to restrict the episode's classes
    to belong to the same general category (only applicable for JakeImageNet).
    If True, a random general category will be chosen, unless catcode is set.

    catcode: str. (e.g. 'n02795169') if catcode is provided (is not None),
    then the classes chosen for this episode will be restricted
    to be synsets belonging to the more general category with code catcode.
    """

    if within_category or not catcode is None:
      assert hasattr(self, "_category_labels")
      assert hasattr(self, "_category_label_str")
      if catcode is None:
        # Choose a category for this episode's classes
        cat_idx = np.random.randint(len(self._category_label_str))
        catcode = self._catcode_to_syncode.keys()[cat_idx]
      cat_synsets = self._catcode_to_syncode[catcode]
      cat_synsets_str = [self._syncode_to_str[code] for code in cat_synsets]
      allowable_inds = []
      for str in cat_synsets_str:
        allowable_inds.append(np.where(np.array(self._label_str) == str)[0])
      class_seq = np.array(allowable_inds).reshape((-1))
    else:
      num_label_cls = len(self._label_str)
      class_seq = np.arange(num_label_cls)

    self._rnd.shuffle(class_seq)

    train_img_ids = []
    train_labels = []
    test_img_ids = []
    test_labels = []

    train_unlabel_img_ids = []
    non_distractor = []

    train_labels_str = []
    test_labels_str = []

    is_training = self._split in ["train", "trainval"]
    assert is_training or self._split in ["val", "test"]

    for ii in range(self._nway + self._num_distractor):

      cc = class_seq[ii]
      # print(cc, ii < self._nway)
      _ids = self._label_idict[cc]

      # Split the image IDs into labeled and unlabeled.
      _label_ids = filter(lambda _id: _id in self._label_split_idx_set, _ids)
      _unlabel_ids = filter(lambda _id: _id not in self._label_split_idx_set,
                            _ids)
      self._rnd.shuffle(_label_ids)
      self._rnd.shuffle(_unlabel_ids)

      # Add support set and query set (not for distractors).
      if ii < self._nway:
        train_img_ids.extend(_label_ids[:self._nshot])

        # Use the rest of the labeled image as queries, if num_test = -1.
        QUERY_SIZE_LARGE_ERR_MSG = (
            "Query + reference should be less than labeled examples." +
            "Num labeled {} Num test {} Num shot {}".format(
                len(_label_ids), self._num_test, self._nshot))
        assert self._nshot + self._num_test <= len(
            _label_ids), QUERY_SIZE_LARGE_ERR_MSG

        if self._num_test == -1:
          if is_training:
            num_test = len(_label_ids) - self._nshot
          else:
            num_test = len(_label_ids) - self._nshot - self._num_unlabel
        else:
          num_test = self._num_test
          if is_training:
            assert num_test <= len(_label_ids) - self._nshot
          else:
            assert num_test <= len(_label_ids) - self._num_unlabel - self._nshot

        test_img_ids.extend(_label_ids[self._nshot:self._nshot + num_test])
        train_labels.extend([ii] * self._nshot)
        train_labels_str.extend([self._label_str[cc]] * self._nshot)
        test_labels.extend([ii] * num_test)
        test_labels_str.extend([self._label_str[cc]] * num_test)
        non_distractor.extend([1] * self._num_unlabel)
      else:
        non_distractor.extend([0] * self._num_unlabel)

      # Add unlabeled images here.
      if is_training:
        # Use labeled, unlabeled split here for refinement.
        train_unlabel_img_ids.extend(_unlabel_ids[:self._num_unlabel])

      else:
        # Copy test set for refinement.
        # This will only work if the test procedure is rolled out in a sequence.
        train_unlabel_img_ids.extend(_label_ids[
            self._nshot + num_test:self._nshot + num_test + self._num_unlabel])

    train_img = self.get_images(train_img_ids) / 255.0
    train_unlabel_img = self.get_images(train_unlabel_img_ids) / 255.0
    test_img = self.get_images(test_img_ids) / 255.0
    train_labels = np.array(train_labels)
    test_labels = np.array(test_labels)
    train_labels_str = np.array(train_labels_str)
    test_labels_str = np.array(test_labels_str)
    non_distractor = np.array(non_distractor)

    test_ids_set = set(test_img_ids)
    for _id in train_unlabel_img_ids:
      assert _id not in test_ids_set

    if self._shuffle_episode:
      # log.fatal('')
      # Shuffle the sequence order in an episode. Very important for RNN based
      # meta learners.
      train_idx = np.arange(train_img.shape[0])
      self._rnd.shuffle(train_idx)
      train_img = train_img[train_idx]
      train_labels = train_labels[train_idx]

      train_unlabel_idx = np.arange(train_unlabel_img.shape[0])
      self._rnd.shuffle(train_unlabel_idx)
      train_unlabel_img = train_unlabel_img[train_unlabel_idx]

      test_idx = np.arange(test_img.shape[0])
      self._rnd.shuffle(test_idx)
      test_img = test_img[test_idx]
      test_labels = test_labels[test_idx]

    return Episode(
        train_img,
        train_labels,
        test_img,
        test_labels,
        x_unlabel=train_unlabel_img,
        y_unlabel=non_distractor,
        y_train_str=train_labels_str,
        y_test_str=test_labels_str)
Ejemplo n.º 6
0
    def next(self, within_category=False, catcode=None):
        """
    (1) Pick random set of classes
    (2) Pick random partitioning into train, test, unlabeled
    """
        sel_classes = np.random.choice(range(len(self.class_dict.keys())),
                                       size=self.al_instance.n_class +
                                       self.al_instance.n_distractor,
                                       replace=False)
        # if len(self.class_dict.keys()) < 50:
        k_per_class = [
            None for i in range(self.al_instance.n_class +
                                self.al_instance.n_distractor)
        ]

        total_train = None
        total_test = None
        total_unlbl = None
        n_class = self.al_instance.n_class
        n_distractor = self.al_instance.n_distractor
        k_train = self.al_instance.k_train
        k_test = self.al_instance.k_test
        k_unlbl = self.al_instance.k_unlbl
        total_train = np.zeros([n_class * k_train, 84, 84, 3],
                               dtype=np.float32)
        total_test = np.zeros([n_class * k_test, 84, 84, 3], dtype=np.float32)
        total_unlbl = np.zeros([(n_class + n_distractor) * k_unlbl, 84, 84, 3],
                               dtype=np.float32)
        total_train_label = np.zeros([n_class * k_train], dtype=np.int64)
        total_test_label = np.zeros([n_class * k_test], dtype=np.int64)
        total_unlbl_label = np.zeros([n_class * k_unlbl], dtype=np.int64)
        y_train_str = []
        y_test_str = []
        for idx, cl in enumerate(sel_classes[:self.al_instance.n_class]):
            train, test, unlbl = self._get_rand_partition(
                list(self.class_dict.keys())[cl], idx, k_per_class[idx])
            total_train[idx * k_train:(idx + 1) * k_train] = train
            total_test[idx * k_test:(idx + 1) * k_test] = test
            y_train_str.extend([cl] * k_train)
            y_test_str.extend([cl] * k_test)
            total_unlbl[idx * k_unlbl:(idx + 1) * k_unlbl] = unlbl
            total_train_label[idx * k_train:(idx + 1) * k_train] = idx
            total_test_label[idx * k_test:(idx + 1) * k_test] = idx
            total_unlbl_label[idx * k_unlbl:(idx + 1) * k_unlbl] = 1

        for idx, cl in enumerate(sel_classes[self.al_instance.n_class:]):
            unlbl = self._get_rand_partition(
                list(self.class_dict.keys())[cl],
                self.al_instance.n_class + idx, k_per_class[idx])
            total_unlbl[(idx + n_class) * k_unlbl:(idx + n_class + 1) *
                        k_unlbl] = unlbl

        if self._split == 'train_phase_train':
            y_sel = sel_classes[:self.al_instance.n_class]
        else:
            y_sel = None  # No need to forbid here.

        if self._split == 'train_phase_train':
            for jj in range(total_train.shape[0]):
                total_train[jj] = self._sess.run(
                    self._rnd_process,
                    feed_dict={self._rnd_process_plh: total_train[jj]})
            for jj in range(total_test.shape[0]):
                total_test[jj] = self._sess.run(
                    self._rnd_process,
                    feed_dict={self._rnd_process_plh: total_test[jj]})

        total_train = self.normalize(total_train)
        total_unlbl = self.normalize(total_unlbl)
        total_test = self.normalize(total_test)

        return Episode(x_train=total_train,
                       y_train=total_train_label,
                       x_test=total_test,
                       y_test=total_test_label,
                       x_unlabel=total_unlbl,
                       y_unlabel=total_unlbl_label,
                       y_train_str=y_train_str,
                       y_test_str=y_test_str,
                       y_sel=y_sel)
Ejemplo n.º 7
0
    def next_episode(self, within_category=False):
        """Gets a new episode.
    within_category: bool. Whether or not to choose classes
    which all belong to the same more general category.
    (Only applicable for datasets with self._category_labels defined).
    """

        num_label_cls = len(self._label_str)

        if self._mode_ratio < 1.0:
            if self._train_modes:
                self.class_seq = list(
                    filter(lambda _id: _id in self._class_train_set,
                           range(0, num_label_cls)))
            else:
                self.class_seq = list(
                    filter(lambda _id: _id not in self._class_train_set,
                           range(0, num_label_cls)))
        else:
            self.class_seq = np.arange(num_label_cls)

        train_img_ids = []
        train_labels = []
        test_img_ids = []
        test_labels = []

        train_unlabel_img_ids = []
        non_distractor = []

        train_labels_str = []
        test_labels_str = []

        self._rnd.shuffle(self.class_seq)

        ##Get a list of image indices (class_seq_i) which are within cat_way (number of categories per episode) randomly selected categories
        if within_category and self._cat_way != -1:
            assert hasattr(self, "_category_labels")
            cat_labels = np.unique(self._category_labels)
            num_cats = len(cat_labels)

            cat_idxs = self._rnd.choice(cat_labels,
                                        min(self._cat_way, num_cats),
                                        replace=False)
            allowable_inds = np.empty((1))
            for cat_idx in cat_idxs:
                current_inds = np.where(
                    np.array(self._category_labels) == cat_idx)[0]
                filtered_inds = list(
                    filter(lambda _id: _id in self.class_seq, current_inds))
                self._rnd.shuffle(filtered_inds)
                allowable_inds = np.concatenate(
                    (allowable_inds,
                     filtered_inds[0:min(self._nway, len(filtered_inds))]))
            class_seq_i = (allowable_inds[1:]).astype(np.int64)
            self._rnd.shuffle(class_seq_i)
            total_way = len(class_seq_i)

        else:
            total_way = self._nway
            class_seq_i = self.class_seq

        is_training = self._split in ["train", "trainval"]
        assert is_training or self._split in ["val", "test"]

        for ii in range(total_way + self._num_distractor):
            cc = class_seq_i[ii]

            _ids = self._label_idict[cc]

            # Split the image IDs into labeled and unlabeled.
            _label_ids = list(
                filter(lambda _id: _id in self._label_split_idx_set, _ids))
            _unlabel_ids = list(
                filter(lambda _id: _id not in self._label_split_idx_set, _ids))

            self._rnd.shuffle(_label_ids)
            self._rnd.shuffle(_unlabel_ids)
            if not is_training:
                train_idx = self._nshot + self._num_unlabel
            else:
                train_idx = self._nshot

            _label_train_ids = _label_ids[:train_idx]
            _label_test_ids = _label_ids[train_idx:]
            self._rnd.shuffle(_label_train_ids)
            self._rnd.shuffle(_label_test_ids)

            test_end_idx = self._nshot

            class_idx = [cc, ii]

            if self._num_test == -1:
                if is_training:
                    num_test = len(_label_test_ids)
                else:
                    num_test = len(_label_test_ids) - self._num_unlabel - 1
            else:
                num_test = self._num_test
                if is_training:
                    assert num_test <= len(_label_test_ids)
                else:
                    assert num_test <= len(_label_test_ids) - self._num_unlabel

            # Add support set and query set (not for distractors).
            if hasattr(
                    self,
                    "_category_labels") and self._category_labels is not None:
                label_strs = self._category_labels
            else:
                label_strs = self._label_str

            if ii < total_way:
                train_img_ids.extend(_label_train_ids[:self._nshot])

                # Use the rest of the labeled image as queries, if num_test = -1.
                QUERY_SIZE_LARGE_ERR_MSG = (
                    "Query + reference should be less than labeled examples." +
                    "Num labeled {} Num test {} Num shot {}".format(
                        len(_label_ids), self._num_test, self._nshot))
                assert self._nshot + self._num_test <= len(
                    _label_ids), QUERY_SIZE_LARGE_ERR_MSG

                test_img_ids.extend(_label_test_ids[:num_test])

                train_labels.extend([class_idx] * self._nshot)
                train_labels_str.extend([label_strs[cc]] * self._nshot)
                test_labels.extend([class_idx] * num_test)
                test_labels_str.extend([label_strs[cc]] * num_test)
                non_distractor.extend([class_idx] * self._num_unlabel)
            else:
                non_distractor.extend([[-1, -1]] * self._num_unlabel)

            # Add unlabeled images here.
            if is_training:
                # Use labeled, unlabeled split here for refinement.
                train_unlabel_img_ids.extend(_unlabel_ids[:self._num_unlabel])

            else:
                train_unlabel_img_ids.extend(
                    _label_train_ids[self._nshot:self._nshot +
                                     self._num_unlabel])

        train_img = self.get_images(train_img_ids) / 255.0
        train_unlabel_img = self.get_images(train_unlabel_img_ids) / 255.0
        test_img = self.get_images(test_img_ids) / 255.0
        train_labels = np.array(train_labels)
        test_labels = np.array(test_labels)

        if hasattr(self, "_category_labels"):
            train_labels_str = np.hstack(
                (np.array(train_labels_str)[:, None],
                 np.array(self.episodic_labels(train_labels_str))[:, None]))
            test_labels_str = np.hstack(
                (np.array(test_labels_str)[:, None],
                 np.array(self.episodic_labels(test_labels_str))[:, None]))
        else:
            train_labels_str = np.array(train_labels_str)
            test_labels_str = np.array(test_labels_str)

        non_distractor = np.array(non_distractor)

        test_ids_set = set(test_img_ids)
        for _id in train_unlabel_img_ids:
            assert _id not in test_ids_set

        return Episode(x_train=train_img,
                       train_indices=train_img_ids,
                       y_train=train_labels,
                       x_test=test_img,
                       test_indices=test_img_ids,
                       y_test=test_labels,
                       x_unlabel=train_unlabel_img,
                       y_unlabel=non_distractor,
                       unlabel_indices=train_unlabel_img_ids,
                       y_train_str=train_labels_str,
                       y_test_str=test_labels_str)