def stop(self, event=''): span = self.gettime() - self._start import src.experiment.logger as logging logging.debug( '{} cost time {}', event, span) self._start = self.gettime() return span
def process_df_by_sections(df, n_section, func, tmp_output=None, ignore_index=True): import src.experiment.logger as logging size = df.shape[0] section_size = size / n_section + (size%n_section > 0) results = [] n_success = 0 for i, section in enumerate(chunks(range(size), section_size)): try: logging.debug('processing section {} of {}...', i, n_section) sub = df.iloc[section] sub = func(sub) results.append(sub) if tmp_output is not None: tmp_output(i, sub) n_success += 1 except Exception as e: logging.debug('exception when processing section {}'.format(i)) logging.debug('jump this section for now and remember to come back to it'.format(i)) logging.debug('{}', e) logging.debug('concatenating results...') results = pd.concat(results, ignore_index=ignore_index) return results
def process_data_by_sections(data, n_section, func, tmp_output=None, merge_results=True, ignore_index=True): import src.experiment.logger as logging size = len(data) section_size = size / n_section + (size%n_section > 0) if merge_results: results = [] n_success = 0 def sub_section(data, section): if isinstance(data, pd.DataFrame): return data.iloc[section] else: return data[section] for i, section in enumerate(chunks(range(size), section_size)): try: logging.debug('processing section {} of {}...', i, n_section) sub = sub_section(data, section) sub = func(sub) if merge_results: results.append(sub) if tmp_output is not None: tmp_output(i, sub) n_success += 1 except Exception as e: logging.debug('exception when processing section {}'.format(i)) logging.debug('jump this section for now and remember to come back to it'.format(i)) logging.debug('{}', e) logging.debug('finished processing {} sections, {} successes, remember to process skpped sections.', n_section, n_success) if merge_results: logging.debug('concatenating results...') results = pd.concat(results, ignore_index=ignore_index) return results else: return n_section == n_success