Python rows_header_trim Examples, starthinker.util.csv.rows_header_trim Python Examples

Example #1

0

Show file

File: run.py Project: google/starthinker

def floodlight_rows(
    config, task: dict, report_id: int
) -> Generator[list[str, str, str, str, str, str, int], None, None]:
    """ Monitor a report for completion and return rows

  Args:
    report_id - the report created earlier for a specific floodlight id.

  Returns:
    A stream of rows, see FLOODLIGHT_* constants for definitions.
  """

    # fetch report file if it exists
    filename, report = report_file(
        config,
        task['auth'],
        task['account'],
        report_id,
        None,  # no name
        10  # wait up to 10 minutes for report to complete
    )

    # clean up rows
    rows = report_to_rows(report)
    rows = report_clean(rows)
    rows = rows_header_trim(rows)
    rows = rows_to_type(rows, column=6)

    return rows

Example #2

0

Show file

File: run.py Project: Gregorfran/starthinker-gregor

def floodlight_rows(report_id):

    # fetch report file if it exists
    filename, report = report_file(project.task['auth'],
                                   project.task['account'], report_id, None,
                                   10)

    # clean up rows
    rows = report_to_rows(report)
    rows = report_clean(rows)
    rows = rows_header_trim(rows)
    rows = rows_to_type(rows, column=6)

    return rows

Example #3

0

Show file

File: run.py Project: hernanperalta/starthinker

def floodlight_monitor():
    if project.verbose: print("FLOODLIGHT MONITOR")

    # make sure tab exists in sheet ( deprecated, use sheet task instead )
    if 'template' in project.task['sheet']:
        sheets_tab_copy(project.task['auth'],
                        project.task['sheet']['template']['sheet'],
                        project.task['sheet']['template']['tab'],
                        project.task['sheet']['sheet'],
                        project.task['sheet']['tab'])

    # read peers from sheet
    triggers = sheets_read(project.task['auth'],
                           project.task['sheet']['sheet'],
                           project.task['sheet']['tab'],
                           project.task['sheet']['range'])
    # 0 - Floodlight Id
    # 1 - email

    if project.verbose and len(triggers) == 0:
        print("FLOODLIGHT MONITOR: No floodlight ids specified in sheet.")

    alerts = {}
    day = None

    for trigger in triggers:

        # get report data for each floodlight
        report = floodlight_report(trigger[0])
        rows = report_to_rows(report)
        rows = report_clean(rows)
        rows = rows_header_trim(rows)
        rows = rows_to_type(rows, column=6)

        # calculate outliers
        last_day, rows = floodlight_analysis(rows)

        if last_day:
            # find last day report ran
            day = last_day if day is None else max(day, last_day)

            # group alerts by email
            alerts.setdefault(trigger[1], [])
            alerts[trigger[1]].extend(rows)

    if alerts:
        floodlight_email(day, alerts)

Example #4

0

Show file

File: run.py Project: quesadillasraclette/starthinker

def sov_process_client(report_name):
    sov_rows = {}

    # Download DBM report: ['Advertiser', 'Advertiser ID', 'Advertiser Status', 'Advertiser Integration Code', 'Date', 'Month', 'Device Type', 'Creative Type', 'Region', 'Region ID', 'City', 'City ID', 'Impressions']
    #                        0             1                2                    3                              4       5        6              7                8         9            10      11         12
    filename, report = report_file(project.task['auth'], None, report_name, 60,
                                   DBM_CHUNKSIZE)

    # if a report exists
    if report:
        if project.verbose: print 'CLIENT FILE', filename

        # convert report to array
        rows = report_to_rows(report)
        rows = report_clean(rows, datastudio=True, nulls=True)
        rows = rows_header_trim(rows)

        # pull only needed fields ( see: SCHEMA )
        for row in rows:
            key = ''.join(row[4:-1])  # Everything except impressions

            # if peer is in sov, then just add the impressions
            if key in sov_rows:
                sov_rows[key][7] += long(row[12])

            # otherwise, create a new anonymous peer row
            else:
                sov_rows[key] = [
                    row[4],  # 0 Report_Day
                    row[5],  # 1 Year_Month
                    'Client',  # 2 Advertiser_Type
                    row[6],  # 3 Platform_Type
                    row[7],  # 4 Creative_Type
                    RE_STATE.sub('', row[8]),  # 5 State_Region
                    row[10],  # 6 Designated_Market_Area
                    long(row[12]),  # 7 Client_Impressions
                    0  # 8 Peer_Impressions
                ]

    else:
        if project.verbose: print 'SOV REPORT NOT READY YET'

    # return only row values, hash key no longer necessary
    return sov_rows.values()

Example #5

0

Show file

File: run.py Project: quesadillasraclette/starthinker

def sov_process_peer(report_name):
    sov_rows = {}
    sov_mix = {}
    mix_ratio_high = 50
    warnings = []
    errors = []

    # Download DBM report: ['Advertiser', 'Advertiser ID', 'Advertiser Status', 'Advertiser Integration Code', 'Date', 'Month', 'Device Type', 'Creative Type', 'Region', 'Region ID', 'City', 'City ID', 'Impressions']
    #                        0             1                2                    3                              4       5        6              7                8         9            10      11         12
    filename, report = report_file(project.task['auth'], None, report_name, 60,
                                   DBM_CHUNKSIZE)

    # if a report exists
    if report:
        if project.verbose: print 'CLIENT FILE', filename

        # convert report to array
        rows = report_to_rows(report)
        rows = report_clean(rows, datastudio=True, nulls=True)
        rows = rows_header_trim(rows)

        for row in rows:
            key = ''.join(row[4:-1])  # Everything except impressions

            # track advertiser level mix
            sov_mix[row[1]] = sov_mix.get(row[1], 0) + long(row[12])

            # if peer is in sov, then just add the impressions
            if key in sov_rows:
                sov_rows[key][8] += long(row[12])

            # otherwise, create a new anonymous peer row
            else:
                sov_rows[key] = [
                    row[4],  # 0 Report_Day
                    row[5],  # 1 Year_Month
                    'Peer',  # 2 Advertiser_Type
                    row[6],  # 3 Platform_Type
                    row[7],  # 4 Creative_Type
                    RE_STATE.sub('', row[8]),  # 5 State_Region
                    row[10],  # 6 Designated_Market_Area
                    0,  # 7 Client_Impressions
                    long(row[12])  # 8 Peer_Impressions
                ]

        # CHECK: Mix must be right, make sure we've got obfuscated data, no peer has more than 50%
        mix_total = sum(sov_mix.values())

        for account, impressions in sov_mix.items():
            percent = (100 * impressions) / mix_total
            if project.verbose:
                print 'EXPECTED MIX %d%% ACTUAL MIX: %s %d%%' % (
                    mix_ratio_high, account, percent)

            if impressions == 0:
                warnings.append('Warning advertiser %s has no impressions.' %
                                account)
            elif percent > mix_ratio_high:
                errors.append(
                    'Advertiser %s has too much weight %d%%, expected under %d%%, add other big peers!'
                    % (account, percent, mix_ratio_high))

        if len(sov_mix.keys()) < 5:
            errors.extend(warnings)
            errors.append(
                'Need at least 5 DBM advertisers with impressions to ensure anonymity!'
            )

        # raise all errors at once so user can clean up multiple errors at once
        if errors: raise Exception('\n'.join(errors))

    else:
        if project.verbose: print 'SOV REPORT NOT READY YET'

    # return only row values, hash key no longer necessary
    return sov_rows.values()