コード例 #1
0
    def hard_negative_multilabel(self):
        """Hard Negative Sampling based on multilabel assumption

        Search the negative sample with largest distance (smallest sim)
        with the anchor within self._k negative samplels
        """
        # During early iterations of sampling, use random sampling instead
        if self._iteration <= self._n:
            return self.random_multilabel()

        anchor_class_id, negative_class_id = np.random.choice(
            self._index.keys(), 2)
        anchor_id, positive_id = np.random.choice(
            self._index[anchor_class_id], 2)
        negative_ids = np.random.choice(
            self._index[negative_class_id], self._k)
        # calcualte the smallest simlarity one with negatives
        anchor_label = parse_label(self._labels[anchor_id])
        positive_label = parse_label(self._labels[positive_id])
        negative_labels = [parse_label(self._labels[negative_id]) for
                           negative_id in negative_ids]
        p_sim = intersect_sim(anchor_label, positive_label)
        n_sims = np.array(
            [intersect_sim(anchor_label, negative_label) for
             negative_label in negative_labels])
        min_sim_id = np.argmin(n_sims)
        negative_id = negative_ids[min_sim_id]
        n_sim = n_sims[min_sim_id]
        margin = p_sim - n_sim
        return (anchor_id, positive_id, negative_id, margin)
コード例 #2
0
    def hard_negative_multilabel(self):
        """Hard Negative Sampling based on multilabel assumption

        Search the negative sample with largest distance (smallest sim)
        with the anchor within self._k negative samplels
        """
        # During early iterations of sampling, use random sampling instead
        if self._iteration <= self._n:
            return self.random_multilabel()

        anchor_class_id, negative_class_id = np.random.choice(
            self._index.keys(), 2)
        anchor_id, positive_id = np.random.choice(self._index[anchor_class_id],
                                                  2)
        negative_ids = np.random.choice(self._index[negative_class_id],
                                        self._k)
        # calcualte the smallest simlarity one with negatives
        anchor_label = parse_label(self._labels[anchor_id])
        positive_label = parse_label(self._labels[positive_id])
        negative_labels = [
            parse_label(self._labels[negative_id])
            for negative_id in negative_ids
        ]
        p_sim = intersect_sim(anchor_label, positive_label)
        n_sims = np.array([
            intersect_sim(anchor_label, negative_label)
            for negative_label in negative_labels
        ])
        min_sim_id = np.argmin(n_sims)
        negative_id = negative_ids[min_sim_id]
        n_sim = n_sims[min_sim_id]
        margin = p_sim - n_sim
        return (anchor_id, positive_id, negative_id, margin)
コード例 #3
0
    def random_multilabel(self):
        """Random Sampling under the assumption of multilabels

        All are similar to random sampling
        the difference is to involve a distance ofsimilarity measurements
        in addition.
        Or equvilent, as a margin of anchor sample
        between positive and negative
        """
        anchor_id, positive_id, negative_id = self.random_sampling()
        # calculate the distance of similarity score / margin
        anchor_label = parse_label(self._labels[anchor_id])
        positive_label = parse_label(self._labels[positive_id])
        negative_label = parse_label(self._labels[negative_id])
        p_sim = intersect_sim(anchor_label, positive_label)
        n_sim = intersect_sim(anchor_label, negative_label)
        margin = p_sim - n_sim
        return (anchor_id, positive_id, negative_id, margin)
コード例 #4
0
    def random_multilabel(self):
        """Random Sampling under the assumption of multilabels

        All are similar to random sampling
        the difference is to involve a distance ofsimilarity measurements
        in addition.
        Or equvilent, as a margin of anchor sample
        between positive and negative
        """
        anchor_id, positive_id, negative_id = self.random_sampling()
        # calculate the distance of similarity score / margin
        anchor_label = parse_label(self._labels[anchor_id])
        positive_label = parse_label(self._labels[positive_id])
        negative_label = parse_label(self._labels[negative_id])
        p_sim = intersect_sim(anchor_label, positive_label)
        n_sim = intersect_sim(anchor_label, negative_label)
        margin = p_sim - n_sim
        return (anchor_id, positive_id, negative_id, margin)
コード例 #5
0
 def calculate_label_dim(self):
     """Calculate the dimension of labels
     by calculating the lenth of label set
     """
     all_labels = []
     for label_str in self._label:
         label = parse_label(label_str)
         all_labels += label
     all_labels = set(all_labels)
     self._label_dim = len(all_labels)
コード例 #6
0
ファイル: layer.py プロジェクト: ifp-uiuc/Image-Retrieval-IFP
 def calculate_label_dim(self):
     """Calculate the dimension of labels
     by calculating the lenth of label set
     """
     all_labels = []
     for label_str in self._label:
         label = parse_label(label_str)
         all_labels += label
     all_labels = set(all_labels)
     self._label_dim = len(all_labels)
コード例 #7
0
    def _build_index(self):
        """Build Index to randomly fetch samples from data

        The index is in the format of python dict
        {label: [list of sample id]}
        """
        self._sample_count = len(self._labels)
        self._index = dict()
        for id in range(self._sample_count):
            # parse label and insert into self._index
            labels_ = parse_label(self._labels[id])
            for label_ in labels_:
                if label_ in self._index.keys():
                    self._index[label_].append(id)
                else:
                    self._index[label_] = [id]
コード例 #8
0
 def get_a_datum(self):
     if self._compressed:
         datum = extract_sample(self._data[self._cur], self._mean,
                                self._resize)
     else:
         datum = self._data[self._cur]
     # start parsing labels
     label_elems = parse_label(self._label[self._cur])
     label = np.zeros(self._label_dim)
     if not self._multilabel:
         label[0] = label_elems[0]
     else:
         for i in label_elems:
             label[i] = 1
     self._cur = (self._cur + 1) % self._sample_count
     return datum, label
コード例 #9
0
ファイル: layer.py プロジェクト: ifp-uiuc/Image-Retrieval-IFP
 def get_a_datum(self):
     if self._compressed:
         datum = extract_sample(
             self._data[self._cur], self._mean, self._resize)
     else:
         datum = self._data[self._cur]
     # start parsing labels
     label_elems = parse_label(self._label[self._cur])
     label = np.zeros(self._label_dim)
     if not self._multilabel:
         label[0] = label_elems[0]
     else:
         for i in label_elems:
             label[i] = 1
     self._cur = (self._cur + 1) % self._sample_count
     return datum, label