def fetch_data(task_target_date, period=datetime.timedelta(days=7)): all_data = [] for d in period_dates(task_target_date, period): data = fetch_wine_quality(task_target_date=d) all_data.append(data) return data_combine(all_data, sort=True)
def pipe1(value=1.0): # type: (float)-> Any c1 = calc_1(value) c2 = calc_2(value) data = data_combine([c1, c2]) return sum_values(data)
def band(self): all_tasks = [] my_config = SomeConfig() for i, d in enumerate(period_dates(self.task_target_date, self.period)): source = TDataSource(task_target_date=d) complicated = TComplicatedTask( specific_input=source, some_param=my_config.some_param, task_input=source.logs, # empty_input=None, task_target_date=d, ) all_tasks.append(complicated) self.combined_output = data_combine( [t.simplest_output for t in all_tasks]) self.list_output = [t.simplest_output for t in all_tasks] self.list_tasks_output = [t for t in all_tasks] self.nested = [ TNestedPipeline1(task_name="MyNewPipe").some_output, TNestedPipeline1(task_name="custom_task_name").some_output, ] self.nested2 = TNestedPipeline2().some_output self.nested3 = TSuperNestedPipeline( list_parameter=self.list_param).some_output
def band(self): all_ids, all_data = {}, {} for i, d in enumerate(period_dates(self.task_target_date, self.period)): # if self.task_env == TaskEnv.prod and not self.run_on_prod: # ids = cb_data_dump_path(task_target_date=d, name="ids") # data = cb_data_dump_path(task_target_date=d, name="data") # else: ids = FetchIds(task_target_date=d, period=one_day).ids data = FetchData(task_target_date=d, ids=ids).data d_key = d.strftime("%Y-%m-%d") all_ids[d_key] = ids all_data[d_key] = data self.ids = data_combine(all_ids.values(), sort=True) self.data = data_combine(all_data.values(), sort=True)
def band(self): projected_logs = [] for i, d in enumerate(period_dates(self.task_target_date, self.period)): raw_logs = RawDeviceLog(task_target_date=d).logs projected = DeviceLogProjection(raw_logs=raw_logs, task_target_date=d) projected_logs.append(projected.projected_logs) self.projected = data_combine(projected_logs)
def top_artists_report(task_target_date, period=timedelta(days=2)): logging.info("top_artists_report") streams = [ stream(task_name="Stream_%s" % i, task_target_date=d) for i, d in enumerate(period_dates(task_target_date, period)) ] artists = aggregate_artists(stream=data_combine(streams)) top_n = top_n_artists(artists=artists) return top_n
def output_per_id_report(partners): # type: (List[int]) -> ... dump = dump_table_from_db(table="my_table") partners_info = { partner: filter_by_id(dump, partner) for partner in partners } report_one_by_one = build_report_one_by_one(partners_files=partners_info) report_from_dataframe = build_report_from_dataframe( partners_df=data_combine(partners_info.values())) return report_one_by_one, report_from_dataframe
def fetch_partner_data( task_target_date, selected_partners: List[str], period=datetime.timedelta(days=7) ) -> List[pd.DataFrame]: partner_data = [] for partner in selected_partners: all_data = [] for d in period_dates(task_target_date, period): if partner == "a": data = ingest_partner_a(task_target_date=d) elif partner == "b": data = ingest_partner_b(task_target_date=d) elif partner == "c": data = ingest_partner_c(task_target_date=d) else: raise Exception("Partner not found!") all_data.append(data) partner_data.append(data_combine(all_data, sort=True)) return partner_data
def partners_report(partners): # type: (List[int]) -> ... table = "aa" dump = dump_db(tables=[table]) partners_info = { partner: filter_partner(dump[table], partner) for partner in partners } by_type = separate_data_by_type(dump[table]) report = build_report(partners_files=partners_info, partners_df=data_combine(partners_info.values())) graphs = { partner: build_graphs(pi) for partner, pi in partners_info.items() } published = publish_results(graphs).published return report, by_type, published