Beispiel #1
0
    def stop(self, event=''):
        span = self.gettime() - self._start
        import src.experiment.logger as logging
        logging.debug(
            '{} cost time {}',
            event, span)

        self._start = self.gettime()
        return span
Beispiel #2
0
def process_df_by_sections(df, n_section, func, tmp_output=None, ignore_index=True):
    import src.experiment.logger as logging
    size = df.shape[0]
    section_size = size / n_section + (size%n_section > 0)
    results = []
    n_success = 0
    for i, section in enumerate(chunks(range(size), 
                                            section_size)):
        try:
            logging.debug('processing section {} of {}...', i, n_section)
            sub = df.iloc[section]
            sub = func(sub)
            results.append(sub)
            if tmp_output is not None:
                tmp_output(i, sub)
            n_success += 1
        except Exception as e:
            logging.debug('exception when processing section {}'.format(i))
            logging.debug('jump this section for now and remember to come back to it'.format(i))
            logging.debug('{}', e)

    logging.debug('concatenating results...')
    results = pd.concat(results, ignore_index=ignore_index)
    return results
Beispiel #3
0
def process_data_by_sections(data, n_section, func, tmp_output=None, 
                             merge_results=True, ignore_index=True):
    import src.experiment.logger as logging
    size = len(data)
    section_size = size / n_section + (size%n_section > 0)
    if merge_results:
        results = []
    n_success = 0
    def sub_section(data, section):
        if isinstance(data, pd.DataFrame):
            return data.iloc[section]
        else:
            return data[section]

    for i, section in enumerate(chunks(range(size), 
                                            section_size)):
        try:
            logging.debug('processing section {} of {}...', i, n_section)
            sub = sub_section(data, section)
            sub = func(sub)
            if merge_results:
                results.append(sub)
            if tmp_output is not None:
                tmp_output(i, sub)
            n_success += 1
        except Exception as e:
            logging.debug('exception when processing section {}'.format(i))
            logging.debug('jump this section for now and remember to come back to it'.format(i))
            logging.debug('{}', e)

    logging.debug('finished processing {} sections, {} successes, remember to process skpped sections.',
                  n_section, n_success)
    if merge_results:
        logging.debug('concatenating results...')
        results = pd.concat(results, ignore_index=ignore_index)
        return results
    else:
        return n_section == n_success