Beispiel #1
0
        input_file.seek(0)
        schema = field_list_to_schema(header)
        output_file_name = '/tmp/%s.csv' % str(uuid.uuid1())
        processor.clean_csv(input_file,
                            output_file_name,
                            len(header),
                            header=True)
        input_file.close()

        output_file = open(output_file_name, 'rb')
        io_to_table(project.task['auth'],
                    project.id,
                    project.task['to'].get('dataset'),
                    project.task['to'].get('table'),
                    output_file,
                    'CSV',
                    schema,
                    skip_rows=0,
                    disposition=project.task['to'].get('write_disposition',
                                                       'WRITE_TRUNCATE'))
        output_file.close()

        os.remove(input_file_name)

    os.remove(output_file_name)


if __name__ == "__main__":
    project.load('sftp')
    sftp()
Beispiel #2
0
def conversion_upload():

  rows = conversions_download()

  if project.verbose: print 'CONVERSION UPLOAD'

  statuses = conversions_upload(
    project.task['auth'], 
    project.task['account_id'], 
    project.task['activity_id'], 
    project.task['conversion_type'], 
    rows, 
    project.task['encryptionInfo']
  )

  has_rows = False
  for status in statuses:
    has_rows = True
    if 'errors' in status: 
      if project.verbose: print 'ERROR:', status['conversion']['ordinal'], '\n'.join([e['message'] for e in status['errors']])
    else:
      if project.verbose: print 'OK:', status['conversion']['ordinal'] 
      
  if not has_rows:
    if project.verbose: print 'NO ROWS'


if __name__ == "__main__":
  project.load('conversion_upload')
  conversion_upload()
Beispiel #3
0
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#
###########################################################################

from util.project import project
from util.google_api import API
from util.data import put_rows


def google_api():
    if project.verbose:
        print 'GOOGLE_API', project.task['api'], project.task['version']

    results = API(project.task).execute()

    put_rows(
        project.task['auth'], project.task['out'], '%s_%s.json' %
        (project.task['function'].replace('.', '_'), project.date), results)


if __name__ == "__main__":
    project.load('google_api')
    google_api()
Beispiel #4
0
            "endDate": str(inputs['End Date']),
        }
    else:
        date_range = {
            "kind": "dfareporting#dateRange",
            "relativeDateRange": str(inputs['Relative Date Range'])
        }

    combos_table = report_combos(unique_name, date_range,
                                 inputs['Main Advertiser ID'],
                                 inputs['Main Campaign ID'],
                                 inputs['Dynamic Profile ID'])

    main_table = report_main(unique_name, date_range,
                             inputs['Main Advertiser ID'],
                             inputs['Main Campaign ID'], shadow)

    if shadow:
        shadow_table = report_shadow(unique_name, date_range,
                                     inputs['Shadow Advertiser ID'],
                                     inputs['Shadow Campaign ID'])
    else:
        shadow_table = None

    view_combine(unique_name, combos_table, main_table, shadow_table)


if __name__ == "__main__":
    project.load("dynamic_costs")
    dynamic_costs()
Beispiel #5
0
        rows = query_to_rows(project.task['auth'], project.id,
                             project.task['bigquery']['dataset'],
                             project.task['bigquery']['query'])

        object_compare(sorted(rows),
                       sorted(project.task['bigquery']['values']))

    # simple table check ( unless query given )
    elif 'values' in project.task['bigquery']:
        rows = table_to_rows(project.task['auth'], project.id,
                             project.task['bigquery']['dataset'],
                             project.task['bigquery']['table'])

        object_compare(sorted(rows),
                       sorted(project.task['bigquery']['values']))


# decide which test to run
def test():
    if 'sheets' in project.task: sheets()
    elif 'bigquery' in project.task: bigquery()


# test should be run like any other task
# one test per task ( otherwise it gets confusing )
# calling script already indicates which test is being run
# print only PASS or FAIL
if __name__ == "__main__":
    project.load('test')
    test()
Beispiel #6
0
          count += 1

      # if offers for this account and solution exist, keep only the top ones ( largest impact to lowest )
      if offers:
        offers.sort(key=itemgetter('Impact'), reverse=True)
        offers = offers[:project.task['offers']]
        owner['Solutions'].append({'Solution':solution, 'Offers':offers})

  if project.verbose: print 'ASSEMBLED OFFERS', count

  if project.verbose: print 'SENDING OFFERS'

  # send emails
  count = 0
  for owner in owners:
    if owner['Solutions']: 
      compose_email_solution_centric(owner)
      count += 1
      if count == 10: exit()

  if project.verbose: print 'SENT OFFERS', count


def marketing():
  assemble_offers_solution_centric()


if __name__ == "__main__":
  project.load('marketing')
  marketing()
Beispiel #7
0
  if project.verbose and len(triggers) == 0: print "FLOODLIGHT MONITOR: No floodlight ids specified in sheet."

  alerts = {}
  day = None

  for trigger in triggers:

    # get report data for each floodlight
    report = floodlight_report(trigger[0])
    rows = report_to_rows(report)
    rows = report_clean(rows)
    rows = rows_header_trim(rows)
    rows = rows_to_type(rows, column=6)
 
    # calculate outliers
    last_day, rows = floodlight_analysis(rows)

    # find last day report ran
    day = last_day if day is None else max(day, last_day)

    # group alerts by email
    alerts.setdefault(trigger[1], [])
    alerts[trigger[1]].extend(rows)

  floodlight_email(day, alerts)


if __name__ == "__main__":
  project.load('floodlight_monitor')
  floodlight_monitor()
Beispiel #8
0
    # moving a report
    if 'out' in project.task:

        filename, report = report_file(
            project.task['auth'],
            project.task['report'].get('report_id', None),
            project.task['report'].get('name', None),
            project.task['report'].get('timeout', 10), DBM_CHUNKSIZE)

        # if a report exists
        if report:
            if project.verbose: print 'DBM FILE', filename

            # clean up the report
            rows = report_to_rows(report)
            rows = report_clean(rows,
                                datastudio=project.task.get(
                                    'datastudio', False),
                                nulls=True)

            # write rows using standard out block in json ( allows customization across all scripts )
            if rows:
                put_rows(project.task['auth'], project.task['out'], filename,
                         rows)


if __name__ == "__main__":
    project.load('dbm')
    dbm()
Beispiel #9
0
    print "The task name must match a directory with a run.py inside it."
    print "For example, 'hello' is a task which will executed by 'hello/run.py'."
    print ''
    print 'PROJECT JSON:'
    pprint.PrettyPrinter(depth=20).pprint(project.configuration)
    print ''
    print ''

    print '-' * 80
    print "Each task is passed a nested subset of json."
    print "Different tasks should NOT share json. Security and readability reasons."
    print "Each task can execute as a service or a user independently."
    print "Access structure data within a task as..."
    print ''
    print 'PROJECT TASK:', project.task
    print 'PROJECT TASK AUTH:', project.task['auth']
    print 'PROJECT TASK SAY:', project.task['say']
    print ''
    print ''

    print '-' * 80
    print "Take a look inside 'hello/run.py'."
    print "Its a great skeleton for your first project."
    print ''
    print ''


if __name__ == "__main__":
    project.load('hello')
    hello()
Beispiel #10
0
            rows = report_clean(rows, project.task.get('datastudio', False))
            rows = rows_column_add(rows, 'Account_Id', account_id)
            rows = rows_column_add(
                rows, 'Account_Name',
                get_account_name(project.task['auth'], account_id))

            # if BigQuery set to append ( storage will automatically file namespace )
            if project.task.get('out', {}).get('bigquery', {}).get('table'):
                project.task['out']['bigquery']['disposition'] = disposition

            # write rows using standard out block in json ( allows customization across all scripts )
            if rows:
                put_rows(project.task['auth'], project.task['out'], filename,
                         rows)


def dcm_bulk():
    if project.verbose: print 'DCM BULK'
    disposition = 'WRITE_TRUNCATE'
    for count, account in enumerate(project.task['accounts']):
        if project.verbose:
            print 'DCM BULK %d of %d' % (count, len(project.task['accounts']))
        dcm(account, disposition)
        disposition = 'WRITE_APPEND'
        sleep(3)


if __name__ == "__main__":
    project.load('dcm_bulk')
    dcm_bulk()
Beispiel #11
0
            # NOT RECOMMENDED: determine schema if missing
            else:
                if project.verbose:
                    print 'SHEETS SCHEMA DETECT ( Note Recommended - Define Schema In JSON )'
                # cast rows to types ( for schema detection )
                rows = rows_to_type(rows)
                rows, schema = get_schema(rows,
                                          project.task.get('header', False),
                                          infer_type=project.task.get(
                                              'infer_type', True))

            # write to table ( not using put because no use cases for other destinations )
            rows_to_table(
                auth=project.task['auth'],
                project_id=project.id,
                dataset_id=project.task['out']['bigquery']['dataset'],
                table_id=project.task['out']['bigquery']['table'],
                rows=rows,
                schema=schema,
                skip_rows=1 if project.task.get('header', False) else 0,
                disposition=project.task['out']['bigquery'].get(
                    'disposition', 'WRITE_TRUNCATE'))

        else:
            print 'SHEET EMPTY'


if __name__ == "__main__":
    project.load('sheets')
    sheets()
Beispiel #12
0
            project.task['auth'],
            project.task['report']['account'],
            project.task['report'].get('report_id', None),
            project.task['report'].get('name', None)
            or project.task['report'].get('body', {}).get('name', None),
            project.task['report'].get('timeout', 10),
        )

        if report:
            if project.verbose: print 'DCM FILE', filename

            # clean up the report
            rows = report_to_rows(report)
            rows = report_clean(rows, project.task.get('datastudio', False))

            # if bigquery, remove header and determine schema
            if 'bigquery' in project.task['out']:
                schema = report_schema(rows.next())
                project.task['out']['bigquery']['schema'] = schema
                project.task['out']['bigquery']['skip_rows'] = 0

            # write rows using standard out block in json ( allows customization across all scripts )
            if rows:
                put_rows(project.task['auth'], project.task['out'], filename,
                         rows)


if __name__ == "__main__":
    project.load('dcm')
    dcm()
Beispiel #13
0
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#
###########################################################################

from util.project import project
from util.storage import bucket_create, bucket_access


def bucket():
    if project.verbose: print "BUCKET", project.task['bucket']

    # create bucket
    bucket_create(project.task['auth'], project.id, project.task['bucket'])
    bucket_access(project.task['auth'],
                  project.id,
                  project.task['bucket'],
                  emails=project.task.get('emails', []),
                  groups=project.task.get('groups', []))


if __name__ == "__main__":
    project.load('bucket')
    bucket()
Beispiel #14
0
  schema = []
  for h in header:
    h = column_header_sanitize(h)
    schema.append({ 
      'name':h, 
      'type':DT_Field_Lookup.get(h, 'STRING'), 
      'mode':'NULLABLE' 
    }) 
  return schema


def dt():
  if project.verbose: print "DT TO TABLE", project.task['to']['table']

  storage_to_table(
    project.task['auth'],
    project.id,
    project.task['to']['dataset'],
    project.task['to']['table'],
    '%s:%s*' % (project.task['from']['bucket'], project.task['from']['path']), # append * to match all files with prefix
    dt_schema(dt_header()), # fetch schema from first dt file
    1,
    'CSV',
    'WRITE_TRUNCATE'
  )


if __name__ == "__main__":
  project.load('dt')
  dt()
Beispiel #15
0
        if project.verbose: print 'ENTITY:', entity

        # write public files only once
        if entity in PUBLIC_FILES:
            path = 'gdbm-public:entity/%s.0.%s.json' % (
                project.date.strftime('%Y%m%d'), entity)
            schema = Entity_Schema_Lookup[entity]
            move_entity(project, path, entity, schema, 'WRITE_TRUNCATE')

        # supports multiple partners, first one resets table, others append
        else:
            disposition = 'WRITE_TRUNCATE'
            for account in get_rows('user', project.task['partners']):

                #for account in project.task['accounts']:
                # if advertiser given do not run it ( SAFETY )
                if ':' in str(account):
                    print 'WARNING: Skipping advertiser: ', account
                    continue
                if project.verbose: print 'PARTNER:', account
                path = 'gdbm-%s:entity/%s.0.%s.json' % (
                    account, project.date.strftime('%Y%m%d'), entity)
                schema = Entity_Schema_Lookup[entity]
                move_entity(project, path, entity, schema, disposition)
                disposition = 'WRITE_APPEND'


if __name__ == '__main__':
    project.load('entity')
    entity()
Beispiel #16
0
                logger.flush()
                store.save_id_map()

    except Exception as error:
        stack = traceback.format_exc()
        print stack

        logger.log(str(error))
        logger.flush()


def test():
    """For development purposes when debugging a specific entity, this function is handy to run just that entity.

  """
    setup()
    init_daos()
    creatives()


if __name__ == '__main__':
    """Main entry point of Bulkdozer.

  """
    timer.start_timer('bulkdozer job')
    project.load('traffic')
    traffic()
    timer.check_timer('bulkdozer job')

    #test()
Beispiel #17
0
        if filename.endswith('.gz'):
            data = gzip.GzipFile(fileobj=data, mode='rb')
            filename = filename[:-3]

        # if excel file, save each sheet individually
        if filename.endswith('.xlsx'):

            for sheet, rows in excel_to_rows(data):
                rows = rows_trim(rows)
                rows = rows_header_sanitize(rows)

                if project.verbose: print 'EMAIL WRITE', filename
                put_rows(project.task['auth'], project.task['out'], filename,
                         rows, column_header_sanitize(sheet))

        # if csv, save directly
        elif filename.endswith('.csv'):
            rows = csv_to_rows(data)
            rows = rows_header_sanitize(rows)

            if project.verbose: print 'EMAIL WRITE', filename
            put_rows(project.task['auth'], project.task['out'], filename, rows)

        else:
            if project.verbose: print 'UNSUPPORTED FILE:', filename


if __name__ == "__main__":
    project.load('email')
    email()
Beispiel #18
0
of role granting and you need it to grant the role to your user.  So there is
NO SECURITY benefit, just a conveniece.

### UI Security

In a UI environment such as a web application, where users DO NOT have access to
the service credentials, but the server does have access to user credentials, this
handler allows the service to securely grant additional roles to users.

### Good Practice

Using roles is a better practice than assigning permissions to user accounts
individually because it allows for better tracking and quicker revocation. 
Changing a role permission changes all user at once without having to track 
down individuals.  We highly recommend using roles.

"""

from util.project import project
from util.auth import set_iam


def iam():
    set_iam(project.task['auth'], project.id, project.task['role'],
            project.task['email'])


if __name__ == "__main__":
    project.load('iam')
    iam()
Beispiel #19
0
    elif 'line_items' in project.task['read']:
      line_items = get_rows(project.task['auth'], project.task['read']['line_items'])

    rows = lineitem_read(
      project.task['auth'],
      advertisers,
      insertion_orders,
      line_items
    )

    if rows: 
      filename = 'lineitems_%s.csv' % project.date
      if 'bigquery' in project.task['read']['out']: 
        project.task['read']['out']['bigquery']['schema'] = LineItem_Read_Schema
        project.task['read']['out']['bigquery']['skip_rows'] = 0

      put_rows(project.task['auth'], project.task['read']['out'], filename, rows)

  elif 'write' in project.task:
    rows = get_rows(project.task['auth'], project.task['write'])
    lineitem_write(
      project.task['auth'], 
      rows,
      project.task['write'].get('dry_run', True)
    )
  

if __name__ == "__main__":
  project.load('lineitem')
  lineitem()
Beispiel #20
0
    # construct query
    query = 'SELECT\n  *,\n'
    for dimension, tags in dimensions.items():
        query += '  CASE\n'
        for tag, columns in tags.items():
            query += '    WHEN '
            for column, keywords in columns.items():
                for count, keyword in enumerate(keywords):
                    if count != 0: query += 'OR '
                    query += '%s CONTAINS "%s" ' % (column, keyword)
            query += 'THEN "%s"\n' % tag
        query += '    ELSE "%s"\n  END AS %s,\n' % (defaults.get(
            dimension, ''), dimension)
    query += 'FROM [%s.%s]' % (project.task['in']['dataset'],
                               project.task['in']['table'])

    if project.verbose: print 'QUERY: ', query

    # write to view
    query_to_view(project.task['out']['auth'],
                  project.id,
                  project.task['out']['dataset'],
                  project.task['out']['view'],
                  query,
                  replace=True)


if __name__ == "__main__":
    project.load('mapping')
    mapping()
Beispiel #21
0
                if project.verbose: print "QUERY TO SFTP"
                put_rows(project.task['auth'], project.task['to'], '', rows)
        else:
            if project.verbose:
                print "QUERY TO VIEW", project.task['to']['view']
            query_to_view(
                project.task['auth'],
                project.id,
                project.task['to']['dataset'],
                project.task['to']['view'],
                query_parameters(project.task['from']['query'],
                                 project.task['from'].get('parameters')),
                project.task['from'].get('legacy', project.task['from'].get(
                    'useLegacySql', True))  # DEPRECATED: useLegacySql
            )
    else:
        if project.verbose:
            print "STORAGE TO TABLE", project.task['to']['table']
        storage_to_table(
            project.task['auth'], project.id, project.task['to']['dataset'],
            project.task['to']['table'], project.task['from']['bucket'] + ':' +
            project.task['from']['path'], project.task.get('schema', []),
            project.task.get('skip_rows', 1),
            project.task.get('structure', 'CSV'),
            project.task.get('disposition', 'WRITE_TRUNCATE'))


if __name__ == "__main__":
    project.load('bigquery')
    bigquery()
Beispiel #22
0
from util.project import project
from util.regexp import parse_yyyymmdd
from util.storage import object_list, object_move, object_delete


def archive():
    if project.verbose: print 'ARCHIVE'

    day = project.date - timedelta(days=abs(project.task['days']))

    if 'storage' in project.task:
        for file_name in object_list(project.task['auth'],
                                     project.task['storage']['bucket'] + ':' +
                                     project.task['storage']['path'],
                                     files_only=True):
            file_day = parse_yyyymmdd(file_name)
            if file_day and file_day <= day:
                if project.task.get('delete', False) == False:
                    if project.verbose: print 'ARCHIVING FILE:', file_name
                    object_move(project.task['auth'], file_name,
                                file_name.replace(':', ':archive/'))
                else:
                    if project.verbose: print 'DELETING FILE:', file_name
                    object_delete(project.task['auth'], file_name)


if __name__ == "__main__":
    project.load('archive')
    archive()
Beispiel #23
0
See SCOPES in util/auth/__init__.py or review util/auth/README.md

Arguments

  --client / -c - path to client credentials file used to authenticate
  --user / -u - path to user credentials file to be created if it does not exist.

Example

  python auth/helper.py -u [user credentials path] -c [client credentials path]

"""

import json
import argparse

from util.project import project
from util.auth import get_profile

if __name__ == "__main__":

    # all parameters come from project ( forces ignore of json file )
    parser = argparse.ArgumentParser()

    # initialize project
    project.load(parser=parser)

    # get profile
    print 'Profile:', json.dumps(get_profile(), indent=2, sort_keys=True)
Beispiel #24
0
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#
###########################################################################

from util.project import project
from util.bigquery import datasets_create, datasets_access


def dataset():
    if project.verbose: print "DATASET", project.id, project.task['dataset']

    # create dataset
    datasets_create(project.task['auth'], project.id, project.task['dataset'])
    datasets_access(project.task['auth'],
                    project.id,
                    project.task['dataset'],
                    emails=project.task.get('emails', []),
                    groups=project.task.get('groups', []))


if __name__ == "__main__":
    project.load('dataset')
    dataset()
Beispiel #25
0
def twitter():
    if project.verbose: print 'TWITTER'

    rows = None

    if 'trends' in project.task:
        if 'places' in project.task['trends']:
            rows = twitter_trends_places()
            project.task['out']['bigquery'][
                'schema'] = TWITTER_TRENDS_PLACE_SCHEMA
            project.task['out']['bigquery']['skip_rows'] = 0
        elif 'closest' in project.task['trends']:
            rows = twitter_trends_closest()
            project.task['out']['bigquery'][
                'schema'] = TWITTER_TRENDS_CLOSEST_SCHEMA
            project.task['out']['bigquery']['skip_rows'] = 0
        else:
            rows = twitter_trends_available()
            project.task['out']['bigquery'][
                'schema'] = TWITTER_TRENDS_AVAILABLE_SCHEMA
            project.task['out']['bigquery']['skip_rows'] = 0

    if rows:
        put_rows(project.task['auth'], project.task['out'],
                 'twitter_%s.csv' % project.date, rows)


if __name__ == "__main__":
    project.load('twitter')
    twitter()
Beispiel #26
0
    # if a report exists
    for report in reports:
        for report_frag in report:
            if project.verbose: print 'DS FILE', report_frag['name']

            # read data and clean up the report
            # TODO change to fully streaming @jfno
            rows = report_to_rows(
                report_read_data(project.task['auth'],
                                 report_frag['report_id'],
                                 report_frag['report_fragment']))

            # upload to cloud if data
            if rows:  #put_rows(project.task['auth'], project.task['out'], report_frag['name'], rows)

                put_rows(project.task['auth'], project.task['out'], None, rows)


def ds():
    if project.verbose: print 'DS'

    if 'report' in project.task:
        day = project.date - timedelta(days=abs(project.task['days']))

        _one_report(day)


if __name__ == "__main__":
    project.load('ds')
    ds()