Python select_subset Examples, dataset.utils.select_subset Python Examples

Example #1

0

Show file

File: sampler.py Project: hccngu/Stable-PROTO2

    def worker(self, done_queue, sampled_classes, source_classes):
        '''
            Generate one task (support and query).
            Store into self.support[self.cur] and self.query[self.cur]
        '''
        while True:
            if done_queue.qsize() > 100:
                time.sleep(1)
                continue

            # sample examples
            support_idx, query_idx = [], []
            for y in sampled_classes:
                tmp = np.random.permutation(len(self.idx_list[y]))
                support_idx.append(self.idx_list[y][tmp[:self.args.shot]])
                query_idx.append(
                    self.idx_list[y][tmp[self.args.shot:self.args.shot +
                                         self.args.query]])

            support_idx = np.concatenate(support_idx)
            query_idx = np.concatenate(query_idx)

            # aggregate examples
            max_support_len = np.max(self.data['text_len'][support_idx])
            max_query_len = np.max(self.data['text_len'][query_idx])

            support = utils.select_subset(self.data, {},
                                          ['text', 'text_len', 'label'],
                                          support_idx, max_support_len)
            query = utils.select_subset(self.data, {},
                                        ['text', 'text_len', 'label'],
                                        query_idx, max_query_len)

            done_queue.put((support, query))

Example #2

0

Show file

    def worker(self, done_queue, sampled_classes, source_classes):
        '''
            Generate one task (support and query).
            Store into self.support[self.cur] and self.query[self.cur]
        '''
        example_prob_metrix = self.example_prob_metrix
        while True:
            if done_queue.qsize() > self.num_episodes:
                time.sleep(1)
                return
                # continue

            # sample examples
            support_idx, query_idx = [], []
            if example_prob_metrix is None:
                for y in sampled_classes:
                    tmp = np.random.permutation(len(self.idx_list[y]))
                    if len(tmp) < self.args.shot + self.args.query:
                        tmp = np.random.choice(len(self.idx_list[y]),
                                               self.args.shot +
                                               self.args.query,
                                               replace=True)

                    support_idx.append(self.idx_list[y][tmp[:self.args.shot]])
                    query_idx.append(
                        self.idx_list[y][tmp[self.args.shot:self.args.shot +
                                             self.args.query]])
            else:
                for y in sampled_classes:
                    tmp = np.random.choice(len(self.idx_list[y]),
                                           self.args.shot + self.args.query,
                                           p=example_prob_metrix[y][0],
                                           replace=False)
                    support_idx.append(self.idx_list[y][tmp[:self.args.shot]])
                    query_idx.append(self.idx_list[y][tmp[self.args.shot:]])

            support_idx = np.concatenate(support_idx)
            query_idx = np.concatenate(query_idx)

            # aggregate examples
            max_support_len = np.max(self.data['text_len'][support_idx])
            max_query_len = np.max(self.data['text_len'][query_idx])

            support = utils.select_subset(self.data, {},
                                          ['text', 'text_len', 'label'],
                                          support_idx, max_support_len)
            query = utils.select_subset(self.data, {},
                                        ['text', 'text_len', 'label'],
                                        query_idx, max_query_len)

            done_queue.put((support, query))

Example #3

0

Show file

File: sampler.py Project: hccngu/MLADA

    def worker(self, done_queue):
        '''
            Generate one task (support and query).
            Store into self.support[self.cur] and self.query[self.cur]
        '''
        while True:
            if done_queue.qsize() > 100:
                time.sleep(1)
                continue
            # sample ways
            sampled_classes = np.random.permutation(
                self.num_classes)[:self.args.way]

            source_classes = []
            for j in range(self.num_classes):
                if j not in sampled_classes:
                    source_classes.append(self.all_classes[j])
            source_classes = sorted(source_classes)

            # sample examples
            support_idx, query_idx = [], []
            for y in sampled_classes:
                tmp = np.random.permutation(len(self.idx_list[y]))
                support_idx.append(self.idx_list[y][tmp[:self.args.shot]])
                query_idx.append(
                    self.idx_list[y][tmp[self.args.shot:self.args.shot +
                                         self.args.query]])

            support_idx = np.concatenate(support_idx)
            query_idx = np.concatenate(query_idx)
            if self.args.mode == 'finetune' and len(query_idx) == 0:
                query_idx = support_idx

            # aggregate examples
            max_support_len = np.max(self.data['text_len'][support_idx])
            max_query_len = np.max(self.data['text_len'][query_idx])

            support = utils.select_subset(self.data, {},
                                          ['text', 'text_len', 'label'],
                                          support_idx, max_support_len)
            query = utils.select_subset(self.data, {},
                                        ['text', 'text_len', 'label'],
                                        query_idx, max_query_len)

            done_queue.put((support, query))

Example #4

0

Show file

    def worker(self, done_queue):
        '''
            Generate one task (support and query).
            Store into self.support[self.cur] and self.query[self.cur]
        '''
        while True:
            if done_queue.qsize() > 100:
                time.sleep(1)
                continue
            # sample ways
            sampled_classes = np.random.permutation(
                    self.num_classes)[:self.args.way]

            source_classes = []
            for j in range(self.num_classes):
                if j not in sampled_classes:
                    source_classes.append(self.all_classes[j])
            source_classes = sorted(source_classes)

            # sample examples
            support_idx, query_idx = [], []
            for y in sampled_classes:
                tmp = np.random.permutation(len(self.idx_list[y]))
                support_idx.append(
                        self.idx_list[y][tmp[:self.args.shot]])
                query_idx.append(
                        self.idx_list[y][
                            tmp[self.args.shot:self.args.shot+self.args.query]])

            support_idx = np.concatenate(support_idx)
            query_idx = np.concatenate(query_idx)
            if self.args.mode == 'finetune' and len(query_idx) == 0:
                query_idx = support_idx

            # aggregate examples
            max_support_len = np.max(self.data['text_len'][support_idx])
            max_query_len = np.max(self.data['text_len'][query_idx])

            support = utils.select_subset(self.data, {}, ['text', 'text_len', 'label'],
                                     support_idx, max_support_len)
            query = utils.select_subset(self.data, {}, ['text', 'text_len', 'label'],
                                   query_idx, max_query_len)

            if self.args.embedding in ['idf', 'meta', 'meta_mlp']:
                # compute inverse document frequency over the meta-train set
                idf = stats.get_idf(self.data, source_classes)
                support['idf'] = idf
                query['idf'] = idf

            if self.args.embedding in ['iwf', 'meta', 'meta_mlp']:
                # compute SIF over the meta-train set
                iwf = stats.get_iwf(self.data, source_classes)
                support['iwf'] = iwf
                query['iwf'] = iwf

            if self.args.bert:
                # prepare bert token id
                # +2 becuase bert_id includes [CLS] and [SEP]
                support = utils.select_subset(self.data, support, ['bert_id'],
                    support_idx, max_support_len+2)
                query = utils.select_subset(self.data, query, ['bert_id'],
                    query_idx, max_query_len+2)


            if 'pos' in self.args.auxiliary:
               support = utils.select_subset(
                       self.data, support, ['head', 'tail'], support_idx)
               query = utils.select_subset(
                       self.data, query, ['head', 'tail'], query_idx)

            done_queue.put((support, query))