def fetch_data(task_target_date, period=datetime.timedelta(days=7)):
    all_data = []
    for d in period_dates(task_target_date, period):
        data = fetch_wine_quality(task_target_date=d)
        all_data.append(data)

    return data_combine(all_data, sort=True)
Example #2
0
def pipe1(value=1.0):
    # type: (float)-> Any
    c1 = calc_1(value)
    c2 = calc_2(value)
    data = data_combine([c1, c2])

    return sum_values(data)
Example #3
0
    def band(self):
        all_tasks = []
        my_config = SomeConfig()
        for i, d in enumerate(period_dates(self.task_target_date,
                                           self.period)):
            source = TDataSource(task_target_date=d)
            complicated = TComplicatedTask(
                specific_input=source,
                some_param=my_config.some_param,
                task_input=source.logs,
                # empty_input=None,
                task_target_date=d,
            )
            all_tasks.append(complicated)

        self.combined_output = data_combine(
            [t.simplest_output for t in all_tasks])
        self.list_output = [t.simplest_output for t in all_tasks]
        self.list_tasks_output = [t for t in all_tasks]

        self.nested = [
            TNestedPipeline1(task_name="MyNewPipe").some_output,
            TNestedPipeline1(task_name="custom_task_name").some_output,
        ]

        self.nested2 = TNestedPipeline2().some_output
        self.nested3 = TSuperNestedPipeline(
            list_parameter=self.list_param).some_output
Example #4
0
    def band(self):
        all_ids, all_data = {}, {}
        for i, d in enumerate(period_dates(self.task_target_date,
                                           self.period)):
            # if self.task_env == TaskEnv.prod and not self.run_on_prod:
            #     ids = cb_data_dump_path(task_target_date=d, name="ids")
            #     data = cb_data_dump_path(task_target_date=d, name="data")
            # else:
            ids = FetchIds(task_target_date=d, period=one_day).ids
            data = FetchData(task_target_date=d, ids=ids).data

            d_key = d.strftime("%Y-%m-%d")
            all_ids[d_key] = ids
            all_data[d_key] = data
        self.ids = data_combine(all_ids.values(), sort=True)
        self.data = data_combine(all_data.values(), sort=True)
Example #5
0
 def band(self):
     projected_logs = []
     for i, d in enumerate(period_dates(self.task_target_date,
                                        self.period)):
         raw_logs = RawDeviceLog(task_target_date=d).logs
         projected = DeviceLogProjection(raw_logs=raw_logs,
                                         task_target_date=d)
         projected_logs.append(projected.projected_logs)
     self.projected = data_combine(projected_logs)
Example #6
0
def top_artists_report(task_target_date, period=timedelta(days=2)):
    logging.info("top_artists_report")
    streams = [
        stream(task_name="Stream_%s" % i, task_target_date=d)
        for i, d in enumerate(period_dates(task_target_date, period))
    ]
    artists = aggregate_artists(stream=data_combine(streams))
    top_n = top_n_artists(artists=artists)
    return top_n
def output_per_id_report(partners):
    # type: (List[int]) -> ...
    dump = dump_table_from_db(table="my_table")
    partners_info = {
        partner: filter_by_id(dump, partner)
        for partner in partners
    }

    report_one_by_one = build_report_one_by_one(partners_files=partners_info)

    report_from_dataframe = build_report_from_dataframe(
        partners_df=data_combine(partners_info.values()))
    return report_one_by_one, report_from_dataframe
Example #8
0
def fetch_partner_data(
    task_target_date,
    selected_partners: List[str],
    period=datetime.timedelta(days=7)
) -> List[pd.DataFrame]:
    partner_data = []
    for partner in selected_partners:
        all_data = []
        for d in period_dates(task_target_date, period):
            if partner == "a":
                data = ingest_partner_a(task_target_date=d)
            elif partner == "b":
                data = ingest_partner_b(task_target_date=d)
            elif partner == "c":
                data = ingest_partner_c(task_target_date=d)
            else:
                raise Exception("Partner not found!")

            all_data.append(data)
        partner_data.append(data_combine(all_data, sort=True))
    return partner_data
Example #9
0
def partners_report(partners):
    # type: (List[int]) -> ...
    table = "aa"

    dump = dump_db(tables=[table])
    partners_info = {
        partner: filter_partner(dump[table], partner)
        for partner in partners
    }
    by_type = separate_data_by_type(dump[table])

    report = build_report(partners_files=partners_info,
                          partners_df=data_combine(partners_info.values()))

    graphs = {
        partner: build_graphs(pi)
        for partner, pi in partners_info.items()
    }

    published = publish_results(graphs).published

    return report, by_type, published