Ejemplo n.º 1
0
    def run(self):
        time_now = datetime.datetime.now()
        if self.params.config_file is not None:
            config = Config(file_py=self.params.config_file)
            for name in config.config:
                self.config[name] = config.config[name]
                self.config[name]['execute_num'] = 0
                self.config[name]['runner'] = (self.config[name]['function'],
                                               self.config[name]['args'],
                                               self.config[name]['kwargs'],
                                               name)
                self.config[name]['time_init'] = time_now

        for name in self.config:
            self._reset_time(name, time_now)
            if self.params.logger.params.log_file != '':
                self.params.logger.write(f'New task {name} has been added.')
            if self.params.verbose:
                self.params.logger.info(f'New task {name} has been added.')

        while True:
            time_now = datetime.datetime.now()

            for name in self.config:
                if self.config[name]['time_next'] > time_now:
                    self.config[name]['time_record'] = time_now
                else:
                    self._start(self.config[name]['runner'])
                    self._reset_time(name, time_now)
                    self.config[name]['execute_num'] += 1

            try:
                if self.params.config_file is not None:
                    config = Config(file_py=self.params.config_file)
                    for name in config.config:
                        if name not in self.config:
                            self.config[name] = config.config[name]
                            self.config[name]['execute_num'] = 0
                            self.config[name]['time_init'] = time_now
                            self._reset_time(name, time_now)
                            if self.params.logger.params.log_file != '':
                                self.params.logger.write(
                                    f'New task {name} has been added.')
                            if self.params.verbose:
                                self.params.logger.info(
                                    f'New task {name} has been added.')
                        for i, j in config.config[name].items():
                            self.config[name][i] = j
                        self.config[name]['runner'] = (
                            self.config[name]['function'],
                            self.config[name]['args'],
                            self.config[name]['kwargs'], name)
            except Exception as msg:
                if self.params.logger.params.log_file != '':
                    self.params.logger.write(str(msg))
                if self.params.verbose:
                    self.params.logger.info(str(msg))
Ejemplo n.º 2
0
 def _params_init(self):
     self._params = Config()
     self._params.batch = 0
     self._params.batch_size = 1
     self._params.skip_size = None
     self._params.take_size = -1
     self._params.shuffle_size = 1
     self._params.prefetch_size = 1
     self._params.sample = 0
     self._params.step = 1
     self._params.tensor_mode = 'numpy'
     self._params.options = defaultdict(dict)
Ejemplo n.º 3
0
 def _params_init(self):
     self._params = Config()
     self._params.step = 1
     self._params.tensor = 'numpy'
     self._params.mode = 'total'
     self._params.mode1 = 'total'
     self._params.index_data = defaultdict()
     self._params.index_data['total'] = 'total'
     self._params.data_from = 'tensor'
     self._params.data = defaultdict()
     self._params.index = defaultdict(list)
     self._params.map = defaultdict(list)
     self._params.batch = defaultdict(list)
     self._params.batch[self._params.mode] = [0, False, 0]
     self._params.enumerate = defaultdict(int)
     self._params.options = defaultdict(dict)
Ejemplo n.º 4
0
    def __init__(self,
                 nrows,
                 ncols,
                 left=None,
                 bottom=None,
                 right=None,
                 top=None,
                 wspace=None,
                 hspace=None,
                 width_ratios=None,
                 height_ratios=None):
        """A grid layout to place subplots within a figure.
        
        Args:
            nrows, ncols : int
                The number of rows and columns of the grid.
                
            left, right, top, bottom : float, optional
                Extent of the subplots as a fraction of figure width or height.
                Left cannot be larger than right, and bottom cannot be larger than
                top. If not given, the values will be inferred from a figure or
                rcParams at draw time.

            wspace : float, optional
                The amount of width reserved for space between subplots,
                expressed as a fraction of the average axis width.
                If not given, the values will be inferred from a figure or
                rcParams when necessary.

            hspace : float, optional
                The amount of height reserved for space between subplots,
                expressed as a fraction of the average axis height.
                If not given, the values will be inferred from a figure or
                rcParams when necessary.

            width_ratios : array-like of length *ncols*, optional
                Defines the relative widths of the columns. Each column gets a
                relative width of ``width_ratios[i] / sum(width_ratios)``.
                If not given, all columns will have the same width.

            height_ratios : array-like of length *nrows*, optional
                Defines the relative heights of the rows. Each column gets a
                relative height of ``height_ratios[i] / sum(height_ratios)``.
                If not given, all rows will have the same height.
        """
        self._grid = Config()
        self._grid.figure = {'figsize': (10, 6)}
        self._grid.grid = {
            'nrows': nrows,
            'ncols': ncols,
            'left': left,
            'bottom': bottom,
            'right': right,
            'top': top,
            'wspace': wspace,
            'hspace': hspace,
            'width_ratios': width_ratios,
            'height_ratios': height_ratios
        }
        self._grid.grid_id = dict()
Ejemplo n.º 5
0
 def __init__(self, model, hp=None, name=None, method='random'):
     self.params = Config()
     if name is not None:
         self.params.name = name
     elif method=='random':
         self.params.name = 'RS'
     elif method=='grid':
         self.params.name = 'GS'
     self.params.model_init = model
     self.params.model_name = ''
     self.params.method = method
     if model in ['XGBClassifier', 'XGBRegressor', 'LGBMClassifier', 'LGBMRegressor']:
         self.hp = model_hp(model=model, method=method)
         if hp is not None:
             self.hp.from_HyperParameters(hp)
         self.params.model_name = model
         if model=='XGBClassifier':
             import xgboost as xgb
             assert xgb.__version__>=__xgboost_version__, f'xgboost version should be >={__xgboost_version__}.'
             self.params.model_init = xgb.XGBClassifier
         elif model=='LGBMClassifier':
             import xgboost as xgb
             assert xgb.__version__>=__xgboost_version__, f'xgboost version should be >={__xgboost_version__}.'
             self.params.model_init = xgb.XGBRegressor
         elif model=='LGBMClassifier':
             import lightgbm as lgb
             assert lgb.__version__>=__lightgbm_version__, f'lightgbm version should be >={__lightgbm_version__}.'
             self.params.model_init = lgb.LGBMClassifier
         elif model=='LGBMRegressor':
             import lightgbm as lgb
             assert lgb.__version__>=__lightgbm_version__, f'lightgbm version should be >={__lightgbm_version__}.'
             self.params.model_init = lgb.LGBMRegressor
     else:
         self.hp = hp
     self.best_params = dict()
     self.best_params_history = dict()
Ejemplo n.º 6
0
 def __init__(self, target, width=25, verbose=1, unit_name='step'):
     """
     Args:
         target: Total number of steps expected, None if unknown.
         width: Progress bar width on screen.
         verbose: Verbosity mode, 0 (silent), 1 (verbose)
         unit_name: Display name for step counts (usually "step" or "sample").
     """
     self.param = Config()
     self.param.width = width
     self.param.target = target
     self.param.time = time.time()
     self.param.n = 0
     self.param.unit_name = unit_name
     self.param.verbose = verbose
     self.param.current = 0
     if verbose:
         self.param.logger = Logger()
Ejemplo n.º 7
0
 def __init__(self, logger=None, verbose=0, config_file=None):
     """
     Args:
         logger: Logger object, linora.utils.Logger() class.
         verbose: Verbosity mode, 0 (silent), 1 (verbose).
         config_file: job task config file, if .py file.
                      example: .py file name is schedulers_config.py, contain a dict, 
                      config = {'hhh':{'mode':'every_minute', 'time':50, 'function':function, 'args':[], 'kwargs':{}}}
     """
     self.config = dict()
     self.params = Config()
     self.params.verbose = verbose
     if logger is None:
         logger = Logger()
     self.params.logger = logger
     self.params.config_file = config_file
     manager = multiprocessing.Manager()
     self.params.tracker_dict = manager.dict()
     self.params.runner_dict = defaultdict()
Ejemplo n.º 8
0
    def __init__(self):
        self._params = Config()
        self._params.ydata = defaultdict(defaultdict)

        self._params.theme = 'ggplot'

        self._params.figure = {'figsize': (10, 6)}

        self._params.axis = {
            'axis': None,
            'xinvert': False,
            'yinvert': False,
            'xtick': {},
            'ytick': {},
            'xlabel': None,
            'ylabel': None,
            'xtickposition': None,
            'ytickposition': None
        }

        self._params.label = {
            'xlabel': {
                'xlabel': None
            },
            'ylabel': {
                'ylabel': None
            }
        }

        self._params.legend = {'loc': None}
        self._params.spine = {
            'show': {},
            'color': {},
            'width': {},
            'style': {},
            'position': {}
        }
        self._params.title = {'label': None}

        self._params.set_label = True
        self._params.colorbar = set()
Ejemplo n.º 9
0
class BaseSearch():
    def __init__(self, model, hp=None, name=None, method='random'):
        self.params = Config()
        if name is not None:
            self.params.name = name
        elif method=='random':
            self.params.name = 'RS'
        elif method=='grid':
            self.params.name = 'GS'
        self.params.model_init = model
        self.params.model_name = ''
        self.params.method = method
        if model in ['XGBClassifier', 'XGBRegressor', 'LGBMClassifier', 'LGBMRegressor']:
            self.hp = model_hp(model=model, method=method)
            if hp is not None:
                self.hp.from_HyperParameters(hp)
            self.params.model_name = model
            if model=='XGBClassifier':
                import xgboost as xgb
                assert xgb.__version__>=__xgboost_version__, f'xgboost version should be >={__xgboost_version__}.'
                self.params.model_init = xgb.XGBClassifier
            elif model=='LGBMClassifier':
                import xgboost as xgb
                assert xgb.__version__>=__xgboost_version__, f'xgboost version should be >={__xgboost_version__}.'
                self.params.model_init = xgb.XGBRegressor
            elif model=='LGBMClassifier':
                import lightgbm as lgb
                assert lgb.__version__>=__lightgbm_version__, f'lightgbm version should be >={__lightgbm_version__}.'
                self.params.model_init = lgb.LGBMClassifier
            elif model=='LGBMRegressor':
                import lightgbm as lgb
                assert lgb.__version__>=__lightgbm_version__, f'lightgbm version should be >={__lightgbm_version__}.'
                self.params.model_init = lgb.LGBMRegressor
        else:
            self.hp = hp
        self.best_params = dict()
        self.best_params_history = dict()

    def search(self, train_data, metrics, valid_data=None,
               iter_num=None, cv=3, metrics_min=True, 
               speedy=True, speedy_param=(20000, 0.3), 
               save_model_dir=None, save_model_name=None):
        """model params search method.

        Args:
            train_data: A list of (X, y, sample_weight) tuple pairs to use as train sets.
            metrics: model metrics function.
            valid_data: A list of (X, y, sample_weight) tuple pairs to use as validation sets.
            iter_num: search count.
            cv: cross validation fold.
            metrics_min: metrics value whether the smaller the better.
            speedy: whether use speedy method.
            speedy_param: if use speedy method, test_size will be set, 
                          test_size = 1-round(min(speedy_param[0], feature.shape[0]*speedy_param[1])/feature.shape[0], 2).
            save_model_dir: str, save model folder, only work with model='XGBClassifier' or 'XGBRegressor'.
            save_model_name: str, save model name prefix, only work with model='XGBClassifier' or 'XGBRegressor'.
        Returns:
            a best model params dict.
        Raises:
            params error.
        """
        logger = Logger(name=self.params.name)
        logger.info(f"Start hyperparameter {self.params.method} search.")
        import warnings
        warnings.filterwarnings("ignore")
        if speedy:
            test_size = 1-round(min(speedy_param[0], len(train_data[1])*speedy_param[1])/len(train_data[1]), 2)
        if self.params.model_name=='XGBClassifier':
            self._xgb_weight(train_data[1])
        
        if valid_data is not None:
            cv_score_list = []
            
        if self.params.method=='grid':
            if iter_num is None:
                iter_num = self.hp.cardinality()
            else:
                iter_num = min(iter_num, self.hp.cardinality())
        if iter_num is None:
            iter_num = 100
        for i in range(1, iter_num+1):
            self.hp.update(self.best_params)
            self.params.model = self.params.model_init(**self.hp.params)
            score = []
            if speedy:
                for _ in range(cv):
                    index = train_test_split(train_data[0], train_data[1], test_size, seed=np.random.choice(range(100), 1)[0])
                    score.append(self._model_fit_predict(train_data, metrics, index, mode=1))
            else:
                index_list = kfold(train_data[0], train_data[1], n_splits=cv, seed=np.random.choice(range(100), 1)[0])
                for n, index in enumerate(index_list):
                    score.append(self._model_fit_predict(train_data, metrics, index, mode=1))
            cv_score = np.mean(score)
            if valid_data is not None:
                cv_score_list.append(cv_score)
                cv_score_list.sort()
                threshold = cv_score_list[int(len(cv_score_list)*(0.2 if metrics_min else 0.8))]
                if (metrics_min==True and threshold>=cv_score) or (metrics_min==False and threshold<=cv_score):
                    cv_score = self._model_fit_predict(valid_data, metrics, index=None, mode=0)
                else:
                    logger.info(f"Model {self.params.method} search progress: {i/iter_num*100:.1f}%, best score: {scoring:.4f}", enter=False if i<iter_num else True)
                    continue
            if i==1:
                scoring = cv_score
            if (metrics_min==True and cv_score<=scoring) or (metrics_min==False and cv_score>=scoring):
                scoring = cv_score
                self.best_params = self.hp.params.copy()
                self.best_params_history[i] = {'score':scoring, 'best_params':self.best_params.copy()}
                if self.params.model_name in ['XGBClassifier', 'XGBRegressor']:
                    if save_model_dir is not None:
                        if save_model_name is None:
                            save_model_name = self.params.name
                        model.save_model(os.path.join(save_model_dir, f"{save_model_name}_model.json"))
                        with open(os.path.join(save_model_dir, f"{save_model_name}_params.json"),'w') as f:
                            json.dump(best_params, f)
            logger.info(f"Model {self.params.method} search progress: {i/iter_num*100:.1f}%, best score: {scoring:.4f}", enter=False if i<iter_num else True)
        logger.info(f"Model {self.params.method} search best score: {scoring:.4f}", close=True, time_mode=1)
        return self.best_params

    def _model_fit_predict(self, data, metrics, index=None, mode=1):
        if mode:
            if len(data)==2:
                self.params.model.fit(data[0].loc[index[0]], data[1][index[0]])
            else:
                self.params.model.fit(data[0].loc[index[0]], data[1][index[0]], sample_weight=data[2][index[0]])
        if index is None:
            cv_pred = pd.Series(self.params.model.predict(data[0]), index=data[1].index)
        else:
            cv_pred = pd.Series(self.params.model.predict(data[0].loc[index[1]]), index=data[1][index[1]].index)
        if len(data)==2:
            if index is None:
                return metrics(data[1], cv_pred)
            else:
                return metrics(data[1][index[1]], cv_pred)
        else:
            if index is None:
                return metrics(data[1], cv_pred, sample_weight=data[2])
            else:
                return metrics(data[1][index[1]], cv_pred, sample_weight=data[2][index[1]])
    
    def _xgb_weight(self, label):
        weight_dict = Counter(label)
        if len(weight_dict)==2:
            weight = int(np.ceil(weight_dict[min(weight_dict)]/weight_dict[max(weight_dict)]))
        else:
            weight_dict = {j:i for i,j in weight_dict.items()}
            weight = int(np.ceil(weight_dict[max(weight_dict)]/weight_dict[min(weight_dict)]))
        if self.params.method=='grid':
            self.hp.Choice('scale_pos_weight', [1, weight], weight, rank=6)
        else:
            self.hp.Choice('scale_pos_weight', [1, weight])
Ejemplo n.º 10
0
class DataSet():
    def __init__(self):
        self._params_init()
    
    def _params_init(self):
        self._params = Config()
        self._params.batch = 0
        self._params.batch_size = 1
        self._params.skip_size = None
        self._params.take_size = -1
        self._params.shuffle_size = 1
        self._params.prefetch_size = 1
        self._params.sample = 0
        self._params.step = 1
        self._params.tensor_mode = 'numpy'
        self._params.options = defaultdict(dict)
        
    def batch(self, batch_size, drop_remainder=False):
        """Combines consecutive elements of this dataset into batches.
        
        Args:
            batch_size: representing the number of consecutive elements of this dataset to combine in a single batch.
            drop_remainder: representing whether the last batch should be dropped in the case it has fewer than batch_size elements; 
                            the default behavior is not to drop the smaller batch.
        """
        assert 'batch' not in self._params.options, '`batch` already exists.'
        assert isinstance(batch_size, int) and batch_size>0, '`batch_size` type should be int and greater than 0.'
        self._params.batch_size = batch_size
        self._params.drop_remainder = drop_remainder
        self._params.options['batch'].update({self._params.step: {'batch_size':batch_size, 'drop_remainder':drop_remainder}})
        self._params.step += 1
        return self
    
    def concatenate(self, dataset):
        """Creates a Dataset by concatenating the given dataset with this dataset.
        
        Args:
            dataset: la.data.TextLineDataset object, Dataset to be concatenated.
        """
        self._params.data.append(dataset._params.data)
        self._params.options['concatenate'].update({self._params.step: None})
        self._params.step += 1
        return self
        
    def enumerate(self, start=0):
        """Enumerates the elements of this dataset.
        
        Args:
            start: int, representing the start value for enumeration.
        """
        assert 'enumerate' not in self._params.options, '`enumerate` already exists.'
        self._params.enumerate = start
        self._params.options['enumerate'].update({self._params.step: {'start':start}})
        self._params.step += 1
        return self
    
#     def filter(self, filter_func):
#         """A transformation that filter dataset based on a filter_func.
        
#         Args:
#             filter_func: A function that return True or False
#         """
#         if self.params.data_mode=='list':
#             filter_list = [i for i in range(len(self.params.data[0])) if filter_func([j[i] for j in self.params.data])]
#         else:
#             filter_list = [r for r, i in enumerate(self.params.data) if filter_func(i)]
#         if filter_list:
#             self.params.data_index = [i for i in self.params.data_index if i not in filter_list]
#         self.params.options['filter'].append((self.params.step, filter_func))
#         self.params.step += 1
#         return self
    
    def map(self, map_func):
        """Maps map_func across the elements of this dataset.
        
        Args:
            map_func: A function mapping a dataset element to another dataset element.
            map_size: representing the number elements to process asynchronously in parallel. 
        """
        assert 'map' not in self._params.options, '`map` already exists.'
        self._params.map_func = map_func
        self._params.options['map'].update({self._params.step: {'map_func':map_func}})
        self._params.step += 1
        return self
    
    def options(self):
        """Returns the options for this dataset and its inputs."""
        return self._params.options
    
    def prefetch(self, prefetch_size):
        """Creates a Dataset that prefetches elements from this dataset.
        
        Args:
            prefetch_size: representing the maximum number of elements that will be buffered when prefetching.
        """
        assert 'prefetch' not in self._params.options, '`prefetch` already exists.'
        assert 'take_while' not in self._params.options, '`prefetch` must be placed in `take_while` front.'
        assert isinstance(prefetch_size, int) and prefetch_size>0, '`prefetch_size` type should be int and greater than 0.'
        self._params.prefetch_size = prefetch_size
        self._params.options['prefetch'].update({self._params.step: {'prefetch_size':prefetch_size}})
        self._params.step += 1
        return self
        
#     def reduce(self, reduce_func):
#         """Reduces the input dataset to a single element.
        
#         Args:
#             reduce_func: A function that maps to new_state. It must take two arguments and return a new element
#         """
#         if self.params.data_mode=='list':
#             return [functools.reduce(reduce_func, i[self.params.data_index]) for i in self.params.data]
#         return functools.reduce(reduce_func, self.params.data[self.params.data_index])
    
    def repeat(self, repeat_size):
        """Repeats this dataset so each original value is seen count times.
        
        Args:
            repeat_size: representing the number of times the dataset should be repeated.
        """
        assert 'take_while' not in self._params.options, '`repeat` must be placed in `take_while` front.'
        assert isinstance(repeat_size, int) and repeat_size>0, '`repeat_size` type should be int and greater than 0.'
        self._params.data = self._params.data*(repeat_size+1)
        self._params.options['repeat'].update({self._params.step: {'repeat_size':repeat_size}})
        self._params.step += 1
        return self
        
#     def shard(self, shard_size, shard_index):
#         """Creates a Dataset that includes only 1/num_shards of this dataset.
        
#         Args:
#             shard_size: representing the number of shards operating in parallel.
#             shard_index: representing the worker index.
#         """
#         assert 'take_while' not in self.params.options, '`shard` must be placed in `take_while` front.'
#         assert isinstance(shard_size, int) and shard_size>0, '`shard_size` type should be int and greater than 0.'
#         assert isinstance(shard_index, int) and shard_index>=0, '`shard_index` type should be int and greater than or equal to 0.'
#         self.params.data_index = [self.params.data_index[i] for i in range(shard_index, len(self.params.data_index), shard_size)]
#         self.params.options['shard'].append((self.params.step, shard_size, shard_index))
#         self.params.step += 1
#         return self
    
    def shuffle(self, shuffle_size, seed=None):
        """Randomly shuffles the elements of this dataset.
        
        Args:
            shuffle_size: representing the number of elements from this dataset from which the new dataset will sample.
            seed: representing the random seed that will be used to create the distribution.
        """
        assert 'shuffle' not in self._params.options, '`shuffle` already exists.'
        assert 'take_while' not in self._params.options, '`shuffle` must be placed in `take_while` front.'
        assert isinstance(shuffle_size, int) and shuffle_size>-2 and shuffle_size!=0, '`shuffle_size` type should be int and greater than 0 or equal to -1.'
        self._params.shuffle_size = shuffle_size
        self._params.options['shuffle'].update({self._params.step: {'shuffle_size':shuffle_size, 'seed':seed}})
        self._params.step += 1
        return self
    
    def skip(self, skip_size):
        """Creates a Dataset that skips count elements from this dataset.
        
        Skip all data for the first file at most.
        
        Args:
            skip_size: representing the number of elements of this dataset that should be skipped to form the new dataset. 
                       If count is greater than the size of this dataset, the new dataset will contain no elements.
        """
        assert 'skip' not in self._params.options, '`skip` already exists.'
        assert 'take_while' not in self._params.options, '`skip` must be placed in `take_while` front.'
        assert isinstance(skip_size, int) and skip_size>0, '`skip_size` type should be int and greater than 0.'
        self._params.skip_size = skip_size
        self._params.options['skip'].update({self._params.step: {'skip_size':skip_size}})
        self._params.step += 1
        return self
        
    def take(self, take_size):
        """Creates a Dataset with at most count elements from this dataset.
        
        Args:
            take_size: representing the number of elements of this dataset that should be taken to form the new dataset. 
                       If count is -1, or if count is greater than the size of this dataset, 
                       the new dataset will contain all elements of this dataset.
        """
        assert 'take' not in self._params.options, '`take` already exists.'
        assert 'take_while' not in self._params.options, '`take` must be placed in `take_while` front.'
        assert isinstance(take_size, int) and take_size>-2 and take_size!=0, '`take_size` type should be int and greater than 0 or equal to -1.'
        self._params.take_size = take_size
        self._params.options['take'].update({self._params.step: {'take_size':take_size}})
        self._params.step += 1
        return self
    
#     def take_while(self, take_func):
#         """A transformation that stops dataset iteration based on a take_func.
        
#         Args:
#             take_func: A function that return True or False
#         """
#         temp = set()
#         index = self.params.data_index[:max([self.params.data_index.index(i) for i in range(len(self.params.data))])+1]
#         for r, i in enumerate(index):
#             if i in temp:
#                 continue
#             temp.add(i)
#             if self.params.data_mode=='list':
#                 if take_func([j[i] for j in self.params.data]):
#                     self.params.data_index = self.params.data_index[:r]
#                     break
#             else:
#                 if take_func(self.params.data[i]):
#                     self.params.data_index = self.params.data_index[:r]
#                     break
#         self.params.options['take_while'].append((self.params.step, take_func))
#         self.params.step += 1
#         return self

    def to_tensor(self, mode='tf'):
        """Transform data from numpy array to tensor.
        
        Args:
            mode: Deep learning framework name, one of ['tf', 'pytorch', 'paddle', 'mxnet', 'mindspore'].
        """
        assert 'to_tensor' not in self._params.options, '`to_tensor` already exists.'
        assert 'take_while' not in self._params.options, '`take` must be placed in `take_while` front.'
        if mode in ['tf', 'tensorflow']:
            from tensorflow import convert_to_tensor
            self._params.framework = convert_to_tensor
        elif mode in ['pytorch', 'torch']:
            from torch import as_tensor
            self._params.framework = as_tensor
        elif mode in ['paddle', 'paddlepaddle']:
            from paddle import to_tensor
            self._params.framework = to_tensor
        elif mode in ['mx', 'mxnet']:
            from mxnet.ndarray import array
            self._params.framework = array
        elif mode in ['mindspore']:
            from mindspore.numpy import array
            self._params.framework = array
        else:
            raise ValueError('`mode` value error.')
        self._params.tensor_mode = mode
        self._params.options['to_tensor'].update({self._params.step: {'mode':mode}})
        self._params.step += 1
        return self
    
    def _to_tensor(self, data):
        if self._params.tensor_mode=='numpy':
            return data
        return self._params.framework(data)
    
    def __iter__(self):
        self._params.shuffle_size = np.ceil(max(self._params.shuffle_size, self._params.prefetch_size, 1)/self._params.batch_size)*self._params.batch_size
        self._params.df = pd.read_csv(self._params.data[self._params.batch_file], sep=self._params.sep, 
                                     iterator=True, header=self._params.header, skiprows=self._params.skip_size)
        self._params.values = self._params.df.get_chunk(self._params.shuffle_size)
        if 'shuffle' in self._params.options:
            self._params.values = self._params.values.sample(frac=1, random_state=self._params.shuffle_seed).reset_index(drop=True)
        self._params.batch_file += 1
        self._params.batch_index = 0
        return self
        
    def __next__(self):
        values = self._params.values.loc[self._params.batch_size*self._params.batch_index:self._params.batch_size*(self._params.batch_index+1)]
        if len(values)<self._params.batch_size:
            self._params.batch_index = 0
            try:
                self._params.values = self._params.df.get_chunk(self._params.shuffle_size).reset_index(drop=True)
            except StopIteration:
                if self._params.batch_file==len(self._params.data):
                    raise StopIteration
                self._params.df = pd.read_csv(self._params.data[self._params.batch_file], sep=self._params.sep, header=self._params.header, iterator=True)
                self._params.values = self._params.df.get_chunk(self._params.shuffle_size)
                self._params.batch_file += 1
            if 'shuffle' in self._params.options:
                self._params.values = self._params.values.sample(frac=1, random_state=self._params.shuffle_seed).reset_index(drop=True)
            values = self._params.values.loc[0:self._params.batch_size]
            
        self._params.batch += 1
        self._params.batch_index += 1
        if self._params.take_size>0:
            if self._params.sample>=self._params.take_size:
                raise StopIteration
            self._params.sample += len(values)
        if 'map' in self._params.options:
            return self._to_tensor(values.apply(self._params.map_func, axis=1).values)
        return self._to_tensor(values.values)
Ejemplo n.º 11
0
import matplotlib.pyplot as plt

from linora.utils._config import Config

__all__ = ['Options']

Options = Config()
Options.cmap = Config(**{'viridis':'viridis', 'jet':'jet'})
Options.dash_capstyle = Config(**{'butt': 'butt', 'projecting': 'projecting', 'round': 'round'})
Options.dash_joinstyle = Config(**{'miter': 'miter', 'round': 'round', 'bevel': 'bevel'})
Options.linelink = Config(**{'steps':'steps', 'steps_pre':'steps-pre', 'steps_mid':'steps-mid', 'steps_post':'steps-post'})
Options.linestyle = Config(**{'solid':'-', 'dashed':'--', 'dashdot':'-.', 'dotted':':'})
Options.fontsize = Config(**{
    'small_xx':'xx_small', 
    'small_x':'x-small', 
    'small':'small', 
    'medium':'medium', 
    'large':'large', 
    'large_x':'x-large', 
    'large_xx':'xx-large'})
Options.fontweight = Config(**{
    'book': 'book', 
    'normal': 'normal',
    'bold': 'bold',
    'demi': 'demi',
    'semibold': 'semibold',
    'roman': 'roman',
    'black': 'black',
    'extra bold': 'extra bold',
    'light': 'light',
    'regular': 'regular',
Ejemplo n.º 12
0
class DataSet():
    def __init__(self):
        self._params_init()

    def _params_init(self):
        self._params = Config()
        self._params.step = 1
        self._params.tensor = 'numpy'
        self._params.mode = 'total'
        self._params.mode1 = 'total'
        self._params.index_data = defaultdict()
        self._params.index_data['total'] = 'total'
        self._params.data_from = 'tensor'
        self._params.data = defaultdict()
        self._params.index = defaultdict(list)
        self._params.map = defaultdict(list)
        self._params.batch = defaultdict(list)
        self._params.batch[self._params.mode] = [0, False, 0]
        self._params.enumerate = defaultdict(int)
        self._params.options = defaultdict(dict)

    def batch(self, batch_size, drop_remainder=False):
        """Combines consecutive elements of this dataset into batches.
        
        Args:
            batch_size: representing the number of consecutive elements of this dataset to combine in a single batch.
            drop_remainder: representing whether the last batch should be dropped in the case it has fewer than batch_size elements; 
                            the default behavior is not to drop the smaller batch.
        """
        assert isinstance(
            batch_size, int
        ) and batch_size > 0, '`batch_size` type should be int and greater than 0.'
        self._params.batch[self._params.mode][0] = batch_size
        self._params.batch[self._params.mode][1] = drop_remainder
        self._params.options['batch'].update({
            self._params.step: {
                'batch_size': batch_size,
                'drop_remainder': drop_remainder
            }
        })
        self._params.step += 1
        return self

    def cardinality(self):
        """Returns the cardinality of the dataset, if known."""
        return len(self._params.index[self._params.mode])

    def concatenate(self, datasets):
        """Creates a Dataset by concatenating the given dataset with this dataset.
        
        Args:
            datasets: la.data.Dataset or list of la.data.Dataset to be concatenated.
        """
        assert 'take_while' not in self._params.options, '`concatenate` must be placed in `take_while` front.'
        if not isinstance(datasets, list):
            self._concatenate(datasets)
        else:
            for dataset in datasets:
                assert self._params.data_mode == dataset._params.data_mode, 'The data types of the two data sets are inconsistent.'
            for dataset in datasets:
                self._concatenate(dataset)
        self._params.options['concatenate'].update({self._params.step: None})
        self._params.step += 1
        return self

    def _concatenate(self, dataset):
        if 'list' in self._params.data_mode:
            t = len(self._params.data[self._params.mode1][0])
        else:
            t = len(self._params.data[self._params.mode1])
        if 'list' in self._params.data_mode:
            assert len(self._params.data[self._params.mode1]) == len(
                dataset._params.data[self._params.mode1]
            ), 'Width needs to be consistent between data.'
            self._params.data[self._params.mode1] = [
                np.concatenate([
                    self._params.data[self._params.mode1][i],
                    dataset._params.data[self._params.mode1][i]
                ]) for i in range(len(self._params.data[self._params.mode1]))
            ]
        else:
            self._params.data[self._params.mode1] = np.concatenate([
                self._params.data[self._params.mode1],
                dataset._params.data[self._params.mode1]
            ])
        self._params.index[self._params.mode] += [
            i + t for i in dataset._params.index[dataset._params.mode]
        ]

    def drop(self, names):
        """Drop current dataset.
        
        Args:
            name: str or list, drop dataset name.
        """
        if isinstance(names, str):
            names = [names]
        for name in names:
            assert name != 'total', "`name` can't be 'total'."
        for name in names:
            if name in self._params.index_data:
                self._params.index_data.pop(name)
            if name in self._params.data:
                if name in [j for i, j in self._params.index_data.items()]:
                    name1 = str(time.time()).split('.')[0]
                    self._params.data[name1] = self._params.data.pop(name)
                    for i, j in self._params.index_data.items():
                        if name == j:
                            self._params.index_data[i] = name1
                else:
                    self._params.data.pop(name)

            if name in self._params.index:
                self._params.index.pop(name)

            if name in self._params.map:
                self._params.map.pop(name)

            if name in self._params.batch:
                self._params.batch.pop(name)

            if name in self._params.enumerate:
                self._params.enumerate.pop(name)

            if self._params.mode == name:
                self._params.mode = 'total'
                self._params.mode1 = 'total'

            for i in list(self._params.data):
                if i not in [j for k, j in self._params.index_data.items()]:
                    self._params.data.pop(i)
        return self

    def enumerate(self, start=0):
        """Enumerates the elements of this dataset.
        
        Args:
            start: int, representing the start value for enumeration.
        """
        self._params.enumerate[self._params.mode] = start
        self._params.options['enumerate'].update(
            {self._params.step: {
                'start': start
            }})
        self._params.step += 1
        return self

    def filter(self, filter_func):
        """A transformation that filter dataset based on a filter_func.
        
        Args:
            filter_func: A function that return True or False, datasets that are kept as True.
        """
        if self._params.data_mode == 'list':
            filter_list = [
                i for i in range(len(self._params.data[self._params.mode1][0]))
                if filter_func(
                    [j[i] for j in self._params.data[self._params.mode1]])
            ]
        else:
            filter_list = [
                r for r, i in enumerate(self._params.data[self._params.mode1])
                if filter_func(i)
            ]
        if filter_list:
            self._params.index[self._params.mode] = [
                i for i in self._params.index[self._params.mode]
                if i in filter_list
            ]
        self._params.options['filter'].update(
            {self._params.step: {
                'filter_func': filter_func
            }})
        self._params.step += 1
        return self

    def get(self, name):
        """Select current dataset.
        
        Args:
            name: split dataset name.
        """
        assert name in self._params.index, '`name` not in split dataset.'
        if self._params.batch[name][2] == -1:
            self._params.batch[name][0] = 0
        self._params.batch[name][2] = 0

        self._params.mode = name
        self._params.mode1 = self._params.index_data[name]

        for i in self._params.data:
            if i not in [j for k, j in self._params.index_data.items()]:
                self._params.data.pop(i)
        return self

    def join(self, join_dict, drop_exist_dataset=True):
        """Join Dataset.
        
        Args:
            join_dict: dict, {name: Dataset}, eg.{'train':la.data.Dataset.from_tensor()}.
            drop_exist_dataset: bool, If the name of the dataset is repeated, drop self exist dataset.
        """
        for name in join_dict:
            assert name != 'total', "`name` can't be 'total'."
        for name in join_dict:
            if name in self._params.index:
                if drop_exist_dataset:
                    self.drop(name)
                    self._join(name, join_dict)
            else:
                self._join(name, join_dict)
        return self

    def _join(self, name, join_dict):
        self._params.data[name] = join_dict[name]._params.data[
            join_dict[name]._params.mode1].copy()
        self._params.index[name] = join_dict[name]._params.index[
            join_dict[name]._params.mode].copy()

        self._params.map[name] = join_dict[name]._params.map[
            join_dict[name]._params.mode].copy()
        self._params.batch[name] = join_dict[name]._params.batch[
            join_dict[name]._params.mode].copy()

        if join_dict[name]._params.mode in join_dict[name]._params.enumerate:
            self._params.enumerate[name] = join_dict[name]._params.enumerate[
                join_dict[name]._params.mode].copy()

        self._params.index_data[name] = name

    def list_names(self):
        """list datasets name."""
        return [i for i in self._params.index]

    def map(self, map_func, map_size=8):
        """Maps map_func across the elements of this dataset.
        
        Args:
            map_func: A function mapping a dataset element to another dataset element.
            map_size: representing the number elements to process asynchronously in parallel. 
        """
        assert isinstance(
            map_size, int
        ) and map_size > 0, '`map_size` type should be int and greater than 0.'
        self._params.map[self._params.mode] = [map_func, map_size]
        self._params.options['map'].update(
            {self._params.step: {
                'map_func': map_func,
                'map_size': map_size
            }})
        self._params.step += 1
        return self

    def options(self):
        """Returns the options for this dataset and its inputs."""
        return self._params.options

    def prefetch(self, prefetch_size):
        """Creates a Dataset that prefetches elements from this dataset.
        
        Args:
            prefetch_size: representing the maximum number of elements that will be buffered when prefetching.
        """
        assert 'take_while' not in self._params.options, '`prefetch` must be placed in `take_while` front.'
        assert isinstance(
            prefetch_size, int
        ) and prefetch_size > 0, '`prefetch_size` type should be int and greater than 0.'
        self._params.options['prefetch'].update(
            {self._params.step: {
                'prefetch_size': prefetch_size
            }})
        self._params.step += 1
        return self

    def reduce(self, reduce_func):
        """Reduces the input dataset to a single element.
        
        Args:
            reduce_func: A function that maps to new_state. It must take two arguments and return a new element
        """
        if self._params.data_mode == 'list':
            return [
                functools.reduce(reduce_func,
                                 i[self._params.index[self._params.mode]])
                for i in self._params.data[self._params.mode1]
            ]
        return functools.reduce(
            reduce_func, self._params.data[self._params.mode1][
                self._params.index[self._params.mode]])

    def rename(self, name_dict):
        """Rename current dataset.
        
        Args:
            name_dict: rename dataset name dict, eg.{'train':'train_set'}.
        """
        for name in name_dict:
            assert name != 'total', "`name` can't be 'total'."
            assert name_dict[name] != 'total', "`name` can't be 'total'."
            assert name in self._params.index, "name not exist."
            assert name_dict[
                name] not in self._params.index, "name already exist."
        for name in name_dict:
            if name in self._params.data:
                self._params.data[name_dict[name]] = self._params.data.pop(
                    name)

            if name in self._params.index:
                self._params.index[name_dict[name]] = self._params.index.pop(
                    name)

            if name in self._params.index_data:
                self._params.index_data[
                    name_dict[name]] = self._params.index_data.pop(name)
            for i, j in self._params.index_data.items():
                if name == j:
                    self._params.index_data[i] = [name_dict[name]]

            if name in self._params.map:
                self._params.map[name_dict[name]] = self._params.map.pop(name)

            if name in self._params.batch:
                self._params.batch[name_dict[name]] = self._params.batch.pop(
                    name)

            if name in self._params.enumerate:
                self._params.enumerate[
                    name_dict[name]] = self._params.enumerate.pop(name)

            if self._params.mode == name:
                self._params.mode = name_dict[name]
                self._params.mode1 = self._params.index_data[self._params.mode]
        return self

    def repeat(self, repeat_size):
        """Repeats this dataset so each original value is seen count times.
        
        Args:
            repeat_size: representing the number of times the dataset should be repeated.
        """
        assert 'take_while' not in self._params.options, '`repeat` must be placed in `take_while` front.'
        assert isinstance(
            repeat_size, int
        ) and repeat_size > 0, '`repeat_size` type should be int and greater than 0.'
        self._params.index[self._params.mode] = self._params.index[
            self._params.mode] * (repeat_size + 1)
        self._params.options['repeat'].update(
            {self._params.step: {
                'repeat_size': repeat_size
            }})
        self._params.step += 1
        return self

    def shard(self, shard_size, shard_index):
        """Creates a Dataset that includes only 1/num_shards of this dataset.
        
        Args:
            shard_size: representing the number of shards operating in parallel.
            shard_index: representing the worker index.
        """
        assert 'take_while' not in self._params.options, '`shard` must be placed in `take_while` front.'
        assert isinstance(
            shard_size, int
        ) and shard_size > 0, '`shard_size` type should be int and greater than 0.'
        assert isinstance(
            shard_index, int
        ) and shard_index >= 0, '`shard_index` type should be int and greater than or equal to 0.'
        self._params.index[self._params.mode] = [
            self._params.index[self._params.mode][i] for i in range(
                shard_index, len(self._params.index[self._params.mode]),
                shard_size)
        ]
        self._params.options['shard'].update({
            self._params.step: {
                'shard_size': shard_size,
                'shard_index': shard_index
            }
        })
        self._params.step += 1
        return self

    def shuffle(self, shuffle_size, seed=None):
        """Randomly shuffles the elements of this dataset.
        
        Args:
            shuffle_size: representing the number of elements from this dataset from which the new dataset will sample.
            seed: representing the random seed that will be used to create the distribution.
        """
        assert 'take_while' not in self._params.options, '`shuffle` must be placed in `take_while` front.'
        assert isinstance(
            shuffle_size, int
        ) and shuffle_size > -2 and shuffle_size != 0, '`shuffle_size` type should be int and greater than 0 or equal to -1.'
        if isinstance(self._params.index[self._params.mode], list):
            self._params.index[self._params.mode] = pd.Series(
                index=self._params.index[self._params.mode], data=1).index
        if shuffle_size > 0:
            t = [
                self._params.index[self._params.mode][shuffle_size *
                                                      i:shuffle_size *
                                                      (i + 1)].to_list()
                for i in range(
                    len(self._params.index[self._params.mode]) //
                    shuffle_size + 1)
            ]
            [
                random.shuffle(
                    i,
                    random=lambda:
                    ((seed if seed is not None else random.randint(1, 99)) +
                     self._params.batch[self._params.mode][2]) % 10 / 10)
                for i in t
            ]
            self._params.index[self._params.mode] = list(
                itertools.chain.from_iterable(t))
        else:
            self._params.index[self._params.mode] = self._params.index[
                self._params.mode].to_series().sample(
                    frac=1, random_state=seed).tolist()
        self._params.options['shuffle'].update(
            {self._params.step: {
                'shuffle_size': shuffle_size,
                'seed': seed
            }})
        self._params.step += 1
        return self

    def skip(self, skip_size):
        """Creates a Dataset that skips count elements from this dataset.
        
        Args:
            skip_size: representing the number of elements of this dataset that should be skipped to form the new dataset. 
                       If count is greater than the size of this dataset, the new dataset will contain no elements.
        """
        assert 'take_while' not in self._params.options, '`skip` must be placed in `take_while` front.'
        assert isinstance(
            skip_size, int
        ) and skip_size > 0, '`skip_size` type should be int and greater than 0.'
        self._params.index[self._params.mode] = self._params.index[
            self._params.mode][skip_size:]
        self._params.options['skip'].update(
            {self._params.step: {
                'skip_size': skip_size
            }})
        self._params.step += 1
        return self

    def split(self, split_dict, shuffle=True, seed=None):
        """Split Dataset.
        
        Args:
            split_dict: dict, {data_name:data_rate}, eg.{'train':0.7, 'test':0.3}.
            shuffle: whether randomly shuffles the elements of this dataset.
            seed: random seed.
        """
        for i in split_dict:
            assert i not in self._params.index, f"`{i}` has exist."
            assert i != 'total', "`split_dict` key can't be 'total'."
        t = sum(split_dict[i] for i in split_dict)
        t = {i: split_dict[i] / t for i in split_dict}
        if self._params.data_from in ['from_folder', 'from_class_folder']:
            if isinstance(self._params.data[self._params.mode1], list):
                label = self._params.data[self._params.mode1][1][
                    self._params.index[self._params.mode]]
                index = np.array(self._params.index[self._params.mode])
                for i in np.unique(label):
                    index1 = index[label == i].tolist()
                    n = 0
                    for j in t:
                        self._params.index[j] += index1[n:n + int(t[j] *
                                                                  len(index1))]
                        n += int(t[j] * len(index1))
                if shuffle:
                    for i in t:
                        self._params.index[i] = pd.Series(
                            self._params.index[i]).sample(
                                frac=1, random_state=seed).tolist()
            else:
                self._split(t, shuffle, seed)
        else:
            self._split(t, shuffle, seed)

        for i in split_dict:
            self._params.batch[i] = [0, False, 0]
            self._params.index_data[i] = self._params.mode1
        return self

    def _split(self, t, shuffle, seed):
        if shuffle:
            index = pd.Series(self._params.index[self._params.mode]).sample(
                frac=1, random_state=seed).tolist()
        else:
            index = self._params.index[self._params.mode]
        n = 0
        for i in t:
            self._params.index[i] += index[n:n + int(t[i] * len(index))]
            n += int(t[i] * len(index))

    def take(self, take_size):
        """Creates a Dataset with at most count elements from this dataset.
        
        Args:
            take_size: representing the number of elements of this dataset that should be taken to form the new dataset. 
                       If count is -1, or if count is greater than the size of this dataset, 
                       the new dataset will contain all elements of this dataset.
        """
        assert 'take_while' not in self._params.options, '`take` must be placed in `take_while` front.'
        assert isinstance(
            take_size, int
        ) and take_size > -2 and take_size != 0, '`take_size` type should be int and greater than 0 or equal to -1.'
        if take_size != -1:
            self._params.index[self._params.mode] = self._params.index[
                self._params.mode][:take_size]
        self._params.options['take'].update(
            {self._params.step: {
                'take_size': take_size
            }})
        self._params.step += 1
        return self

    def take_while(self, take_func):
        """A transformation that stops dataset iteration based on a take_func.
        
        Args:
            take_func: A function that return True or False
        """
        temp = set()
        index = self._params.index[self._params.mode][:max([
            self._params.index[self._params.mode].index(i)
            for i in range(len(self._params.data[self._params.mode1]))
        ]) + 1]
        for r, i in enumerate(index):
            if i in temp:
                continue
            temp.add(i)
            if 'list' in self._params.data_mode:
                if take_func(
                    [j[i] for j in self._params.data[self._params.mode1]]):
                    self._params.index[self._params.mode] = self._params.index[
                        self._params.mode][:r]
                    break
            else:
                if take_func(self._params.data[self._params.mode1][i]):
                    self._params.index[self._params.mode] = self._params.index[
                        self._params.mode][:r]
                    break
        self._params.options['take_while'].update(
            {self._params.step: {
                'take_func': take_func
            }})
        self._params.step += 1
        return self

    def to_tensor(self, mode='tf'):
        """Transform data from numpy array to tensor.
        
        Args:
            mode: Deep learning framework name, one of ['tf', 'pytorch', 'paddle', 'mxnet', 'mindspore'].
        """
        assert 'to_tensor' not in self._params.options, '`to_tensor` already exists.'
        assert 'take_while' not in self._params.options, '`take` must be placed in `take_while` front.'
        if mode in ['tf', 'tensorflow']:
            from tensorflow import convert_to_tensor
            self._params.framework = convert_to_tensor
        elif mode in ['pytorch', 'torch']:
            from torch import as_tensor
            self._params.framework = as_tensor
        elif mode in ['paddle', 'paddlepaddle']:
            from paddle import to_tensor
            self._params.framework = to_tensor
        elif mode in ['mx', 'mxnet']:
            from mxnet.ndarray import array
            self._params.framework = array
        elif mode in ['mindspore']:
            from mindspore.numpy import array
            self._params.framework = array
        else:
            raise ValueError('`mode` value error.')
        self._params.tensor = mode
        self._params.options['to_tensor'].update(
            {self._params.step: {
                'mode': mode
            }})
        self._params.step += 1
        return self

    def unbatch(self):
        """Splits elements of a dataset into multiple elements."""
        assert not isinstance(self._params.data[self._params.mode1],
                              list), 'Input data cannot be a tuple.'
        assert self._params.mode == 'total', f'{self._params.mode} dataset not supported.'
        self._params.data[self._params.mode1] = np.array(
            list(
                itertools.chain.from_iterable(
                    self._params.data[self._params.mode1])))
        self._params.index[self._params.mode] = list(
            range(len(self._params.data[self._params.mode1])))
        return self

    def unique(self):
        """A transformation that discards duplicate elements of a Dataset."""
        if isinstance(self._params.data[self._params.mode1], list):
            return tuple([
                np.unique(i) for i in self._params.data[self._params.mode1][
                    self._params.index[self._params.mode]]
            ])
        else:
            return np.unique(self._params.data[self._params.mode1][
                self._params.index[self._params.mode]])

    def _to_tensor(self, data):
        if self._params.tensor == 'numpy':
            return data
        return self._params.framework(data)

    def _data_mode(self):
        self._params.data_mode = 'list_array' if isinstance(
            self._params.data[self._params.mode1], list) else 'array'
        if isinstance(self._params.data[self._params.mode1], list):
            t = [i[0] for i in self._params.data[self._params.mode1]]
        else:
            t = self._params.data[self._params.mode1][0]
        if isinstance(t, str):
            if isfile(t):
                if t.split('.')[-1] in [
                        'png', 'jpg', 'jpeg', 'bmp', 'rgb', 'tif', 'tiff',
                        'webp'
                ]:
                    self._params.data_mode = 'image'
        elif isinstance(t, list):
            for i in t:
                if isinstance(i, str):
                    if isfile(i):
                        if i.split('.')[-1] in [
                                'png', 'jpg', 'jpeg', 'bmp', 'rgb', 'tif',
                                'tiff', 'webp'
                        ]:
                            self._params.data_mode = 'list_image'

    def __iter__(self):
        if 'list' in self._params.data_mode:
            if self._params.mode in self._params.map:
                self._batch_func = self._batch_list_map
            else:
                self._batch_func = self._batch_list
        elif self._params.mode in self._params.map:
            self._batch_func = self._batch_map
        else:
            self._batch_func = self._batch
        return self

    def __next__(self):
        if self._params.batch[self._params.mode][0] == 0:
            self._params.batch[self._params.mode][0] = 1
            self._params.batch[self._params.mode][2] = -1
            if self._params.mode in self._params.enumerate:
                self._params.enumerate[self._params.mode] += 1
                return (self._params.enumerate[self._params.mode] - 1,
                        self._to_tensor(
                            self._batch_func(
                                self._params.index[self._params.mode])))
            return self._to_tensor(
                self._batch_func(self._params.index[self._params.mode]))
        loc = self._params.index[
            self._params.mode][self._params.batch[self._params.mode][0] *
                               self._params.batch[self._params.mode][2]:self.
                               _params.batch[self._params.mode][0] *
                               (self._params.batch[self._params.mode][2] + 1)]
        if len(loc) == 0:
            raise StopIteration
        elif len(loc) < self._params.batch[self._params.mode][0]:
            if self._params.batch[self._params.mode][1]:
                raise StopIteration
        self._params.batch[self._params.mode][2] += 1
        if self._params.mode in self._params.enumerate:
            self._params.enumerate[self._params.mode] += 1
            return (self._params.enumerate[self._params.mode] - 1,
                    self._to_tensor(self._batch_func(loc)))
        return self._to_tensor(self._batch_func(loc))