Example #1
0
def main():

  parser = argparse.ArgumentParser(
      formatter_class=argparse.RawDescriptionHelpFormatter,
      description=textwrap.dedent("""\
    Command line to transform excel sheets into csv files.

    Prints to STDOUT, user is expected to pipe output into file.
    Typically used for BigQuery data imports.

    Examples:
      List sheets in workbook: python helper.py [EXCEL FILE] --list
      Convert excel to CSV: python helper.py [EXCEL FILE] --sheet [SHEET NAME] > results.csv

  """))

  parser.add_argument('workbook', help='name of file to pull the rows.')
  parser.add_argument('--sheet', help='Sheet to pull the rows.', default=None)
  parser.add_argument('--list', help='List reports.', action='store_true')

  # initialize project
  parser = commandline_parser(parser, arguments=('-v'))
  args = parser.parse_args()
  config = Configuration(
    verbose=args.verbose
  )

  with open(args.workbook, 'rb') as excel_file:
    if args.list:
      for sheet in excel_to_sheets(excel_file):
        print(sheet)
    elif args.sheet:
      for sheet, row in excel_to_rows(excel_file, args.sheet):
        print(rows_to_csv(row).read())
def lineitem_write(auth, rows, dry_run=True):
    """ Writes a list of lineitem configurations to DBM.

  Bulletproofing: https://developers.google.com/bid-manager/v1/lineitems/uploadlineitems

   Args:
    * auth: (string) Either user or service.
    * rows (iterator) List of lineitems: https://developers.google.com/bid-manager/guides/entity-write/format
    * dry_run (boolean) If set to True no write will occur, only a test of the upload for errors.
  
  Returns:
    * Results of upload.

  """

    service = get_service('doubleclickbidmanager', API_VERSION, auth)

    header = [s['name'] for s in LineItem_Write_Schema]

    body = {
        "lineItems": '%s\n%s' %
        (','.join(header), rows_to_csv(rows).read()),  # add header row
        "format": 'CSV',
        "dryRun": dry_run
    }

    job = service.lineitems().uploadlineitems(body=body)
    result = API_Retry(job)
    #print result
    return result
Example #3
0
def lineitem_write(auth, rows, dry_run=True):
    """ Writes a list of lineitem configurations to DBM.

  Bulletproofing:
  https://developers.google.com/bid-manager/v1/lineitems/uploadlineitems

   Args:
    * auth: (string) Either user or service.
    * rows (iterator) List of lineitems:
      https://developers.google.com/bid-manager/guides/entity-write/format *
      dry_run (boolean) If set to True no write will occur, only a test of the
      upload for errors.

  Returns:
    * Results of upload.

  """

    header = [s['name'] for s in LineItem_Write_Schema]

    body = {
        'lineItems': '%s\n%s' %
        (','.join(header), rows_to_csv(rows).read()),  # add header row
        'format': 'CSV',
        'dryRun': dry_run
    }

    result = API_DBM(auth).lineitems().uploadlineitems(body=body).execute()
    #print(result)
    return (result)
Example #4
0
def send_email(auth, email_to, email_from, email_cc, subject, text, html=None, attachment_filename=None, attachment_rows=None):
  if project.verbose: print('SENDING EMAIL', email_to)
  
  service = get_service('gmail', 'v1', auth)
  message = MIMEMultipart('alternative')
  message.set_charset('utf8')

  message['to'] = email_to
  message['cc'] = email_cc
  message['from'] = email_from
  message['subject'] = subject
  text_part = MIMEText(text, 'plain', 'UTF-8')
  message.attach(text_part)

  if html: 
    html_part = MIMEText(html, 'html', 'UTF-8')
    message.attach(html_part)
  
  if attachment_filename and attachment_rows:
    attachment = MIMEBase("text", "csv")
    attachment.set_payload(rows_to_csv(attachment_rows).read())
    attachment.add_header('Content-Disposition', 'attachment',filename=attachment_filename)
    encode_base64(attachment)
    message.attach(attachment)

  #API_Retry(service.users().messages().send(userId='me', body={'raw': base64.urlsafe_b64encode(message.as_string())}))
  API_Retry(service.users().messages().send(userId='me', body={'raw': base64.urlsafe_b64encode(message.as_bytes()).decode()}))
Example #5
0
def send_email(config,
               auth,
               email_to,
               email_from,
               email_cc,
               subject,
               text,
               html=None,
               attachment_filename=None,
               attachment_rows=None):
    if config.verbose:
        print('SENDING EMAIL', email_to)

    message = MIMEMultipart('alternative')
    message.set_charset('utf8')

    message['to'] = email_to
    message['cc'] = email_cc
    message['from'] = email_from
    message['subject'] = subject
    message.attach(MIMEText(text, 'plain', 'UTF-8'))

    if html:
        message.attach(MIMEText(html, 'html', 'UTF-8'))

    if attachment_filename and attachment_rows:
        attachment = MIMEBase('text', 'csv')
        attachment.set_payload(rows_to_csv(attachment_rows).read())
        attachment.add_header('Content-Disposition',
                              'attachment',
                              filename=attachment_filename)
        encode_base64(attachment)
        message.attach(attachment)

    #API_Gmail(config, auth).users().messages().send(userId='me', body={'raw': base64.urlsafe_b64encode(message.as_string())}).execute()
    API_Gmail(config, auth).users().messages().send(
        userId='me',
        body={
            'raw': base64.urlsafe_b64encode(message.as_bytes()).decode()
        }).execute()
Example #6
0
def put_rows(auth, destination, rows, variant=''):
    """Processes standard write JSON block for dynamic export of data.

  Allows us to quickly write the results of a script to a destination.  For
  example
  write the results of a DCM report into BigQuery.

  - Will write to multiple destinations if specified.
  - Extensible, add a handler to define a new destination ( be kind update the
  documentation json ).

  Include the following JSON in a recipe, then in the run.py handler when
  encountering that block pass it to this function and use the returned results.

    from utils.data import put_rows

    var_json = {
      "out":{
        "bigquery":{
          "dataset": [ string ],
          "table": [ string ]
          "schema": [ json - standard bigquery schema json ],
          "skip_rows": [ integer - for removing header ]
          "disposition": [ string - same as BigQuery documentation ]
        },
        "sheets":{
          "sheet":[ string - full URL, suggest using share link ],
          "tab":[ string ],
          "range":[ string - A1:A notation ]
          "delete": [ boolean - if sheet range should be cleared before writing
          ]
        },
        "storage":{
          "bucket": [ string ],
          "path": [ string ]
        },
        "file":[ string - full path to place to write file ]
      }
    }

    values = put_rows('user', var_json)

  Or you can use it directly with project singleton.

    from util.project import project
    from utils.data import put_rows

    @project.from_parameters
    def something():
      values = get_rows(project.task['auth'], project.task['out'])

    if __name__ == "__main__":
      something()

  Args:
    auth: (string) The type of authentication to use, user or service.
    destination: (json) A json block resembling var_json described above. rows (
      list ) The data being written as a list object. variant (string) Appended
      to destination to differentieate multiple objects

  Returns:
    If single_cell is False: Returns a list of row values [[v1], [v2], ... ]
    If single_cell is True: Returns a list of values [v1, v2, ...]
"""

    if 'bigquery' in destination:

        if destination['bigquery'].get('format', 'CSV') == 'JSON':
            json_to_table(
                destination['bigquery'].get('auth', auth),
                destination['bigquery'].get('project_id', project.id),
                destination['bigquery']['dataset'],
                destination['bigquery']['table'] + variant,
                rows,
                destination['bigquery'].get('schema', []),
                destination['bigquery'].get('disposition', 'WRITE_TRUNCATE'),
            )

        elif destination['bigquery'].get('is_incremental_load', False) == True:
            incremental_rows_to_table(
                destination['bigquery'].get('auth', auth),
                destination['bigquery'].get('project_id', project.id),
                destination['bigquery']['dataset'],
                destination['bigquery']['table'] + variant,
                rows,
                destination['bigquery'].get('schema', []),
                destination['bigquery'].get(
                    'skip_rows',
                    1),  #0 if 'schema' in destination['bigquery'] else 1),
                destination['bigquery'].get('disposition', 'WRITE_APPEND'),
                billing_project_id=project.id)

        else:
            rows_to_table(
                destination['bigquery'].get('auth', auth),
                destination['bigquery'].get('project_id', project.id),
                destination['bigquery']['dataset'],
                destination['bigquery']['table'] + variant,
                rows,
                destination['bigquery'].get('schema', []),
                destination['bigquery'].get(
                    'skip_rows',
                    1),  #0 if 'schema' in destination['bigquery'] else 1),
                destination['bigquery'].get('disposition', 'WRITE_TRUNCATE'),
            )

    if 'sheets' in destination:
        if destination['sheets'].get('delete', False):
            sheets_clear(
                auth,
                destination['sheets']['sheet'],
                destination['sheets']['tab'] + variant,
                destination['sheets']['range'],
            )

        sheets_write(auth, destination['sheets']['sheet'],
                     destination['sheets']['tab'] + variant,
                     destination['sheets']['range'], rows)

    if 'file' in destination:
        path_out, file_ext = destination['file'].rsplit('.', 1)
        file_out = path_out + variant + '.' + file_ext
        if project.verbose:
            print('SAVING', file_out)
        makedirs_safe(parse_path(file_out))
        with open(file_out, 'w') as save_file:
            save_file.write(rows_to_csv(rows).read())

    if 'storage' in destination and destination['storage'].get(
            'bucket') and destination['storage'].get('path'):
        # create the bucket
        bucket_create(auth, project.id, destination['storage']['bucket'])

        # put the file
        file_out = destination['storage']['bucket'] + ':' + destination[
            'storage']['path'] + variant
        if project.verbose:
            print('SAVING', file_out)
        object_put(auth, file_out, rows_to_csv(rows))

    if 'sftp' in destination:
        try:
            cnopts = pysftp.CnOpts()
            cnopts.hostkeys = None

            path_out, file_out = destination['sftp']['file'].rsplit('.', 1)
            file_out = path_out + variant + file_out

            sftp = pysftp.Connection(host=destination['sftp']['host'],
                                     username=destination['sftp']['username'],
                                     password=destination['sftp']['password'],
                                     port=destination['sftp']['port'],
                                     cnopts=cnopts)

            if '/' in file_out:
                dir_out, file_out = file_out.rsplit('/', 1)
                sftp.cwd(dir_out)

            sftp.putfo(rows_to_csv(rows), file_out)

        except e:
            print(str(e))
            traceback.print_exc()
Example #7
0
 def handle(self, *args, **kwargs):
   print(rows_to_csv(self.get_scripts()).read())
Example #8
0
def put_rows(auth, destination, filename, rows, variant=''):
    """Processes standard write JSON block for dynamic export of data.
  
  Allows us to quickly write the results of a script to a destination.  For example
  write the results of a DCM report into BigQuery.

  - Will write to multiple destinations if specified.
  - Extensible, add a handler to define a new destination ( be kind update the documentation json ).

  Include the following JSON in a recipe, then in the run.py handler when
  encountering that block pass it to this function and use the returned results.
  
    from utils.data import put_rows
  
    var_json = {
      "out":{
        "bigquery":{
          "dataset": [ string ],
          "table": [ string ]
          "schema": [ json - standard bigquery schema json ],
          "skip_rows": [ integer - for removing header ]
          "disposition": [ string - same as BigQuery documentation ]
        },
        "sheets":{
          "url":[ string - full URL, suggest using share link ],
          "tab":[ string ],
          "range":[ string - A1:A notation ]
          "delete": [ boolean - if sheet range should be cleared before writing ]
        },
        "storage":{
          "bucket": [ string ],
          "path": [ string ]
        },
        "directory":[ string - full path to place to write file ]
      } 
    } 
  
    values = put_rows('user', var_json)
  
  Or you can use it directly with project singleton.
  
    from util.project import project
    from utils.data import put_rows
  
    @project.from_parameters
    def something():
      values = get_rows(project.task['auth'], project.task['out'])
  
    if __name__ == "__main__":
      something()
  
  Args:
    auth: (string) The type of authentication to use, user or service.
    destination: (json) A json block resembling var_json described above.
    filename: (string) A unique filename if writing to medium requiring one, Usually gnerated by script.
    rows ( list ) The data being written as a list object.
    variant ( string ) Appends this to the destination name to create a variant ( for example when downloading multiple tabs in a sheet ).

  Returns:
    If single_cell is False: Returns a list of row values [[v1], [v2], ... ]
    If single_cell is True: Returns a list of values [v1, v2, ...]
"""

    if 'bigquery' in destination:

        if destination['bigquery'].get('format', 'CSV') == 'JSON':
            json_to_table(
                destination['bigquery'].get('auth', auth),
                destination['bigquery'].get('project_id', project.id),
                destination['bigquery']['dataset'],
                destination['bigquery']['table'] + variant,
                rows,
                destination['bigquery'].get('schema', []),
                destination['bigquery'].get('disposition', 'WRITE_TRUNCATE'),
            )

        elif destination['bigquery'].get('is_incremental_load', False) == True:
            incremental_rows_to_table(
                destination['bigquery'].get('auth', auth),
                destination['bigquery'].get('project_id', project.id),
                destination['bigquery']['dataset'],
                destination['bigquery']['table'] + variant,
                rows,
                destination['bigquery'].get('schema', []),
                destination['bigquery'].get(
                    'skip_rows',
                    1),  #0 if 'schema' in destination['bigquery'] else 1),
                destination['bigquery'].get('disposition', 'WRITE_APPEND'),
                billing_project_id=project.id)

        else:
            rows_to_table(
                destination['bigquery'].get('auth', auth),
                destination['bigquery'].get('project_id', project.id),
                destination['bigquery']['dataset'],
                destination['bigquery']['table'] + variant,
                rows,
                destination['bigquery'].get('schema', []),
                destination['bigquery'].get(
                    'skip_rows',
                    1),  #0 if 'schema' in destination['bigquery'] else 1),
                destination['bigquery'].get('disposition', 'WRITE_TRUNCATE'),
            )

    if 'sheets' in destination:
        if destination['sheets'].get('delete', False):
            sheets_clear(auth, destination['sheets']['sheet'],
                         destination['sheets']['tab'] + variant,
                         destination['sheets']['range'])
        sheets_write(auth, destination['sheets']['sheet'],
                     destination['sheets']['tab'] + variant,
                     destination['sheets']['range'], rows)

    if 'directory' in destination:
        file_out = destination['directory'] + variant + filename
        if project.verbose: print 'SAVING', file_out
        makedirs_safe(parse_path(file_out))
        with open(file_out, 'wb') as save_file:
            save_file.write(rows_to_csv(rows).read())

    if 'storage' in destination and destination['storage'].get(
            'bucket') and destination['storage'].get('path'):
        # create the bucket
        bucket_create(auth, project.id, destination['storage']['bucket'])

        # put the file
        file_out = destination['storage']['bucket'] + ':' + destination[
            'storage']['path'] + variant + filename
        if project.verbose: print 'SAVING', file_out
        object_put(auth, file_out, rows_to_csv(rows))

    # deprecated do not use
    if 'trix' in destination:
        trix_update(auth, destination['trix']['sheet_id'],
                    destination['trix']['sheet_range'], rows_to_csv(rows),
                    destination['trix']['clear'])

    if 'email' in destination:
        pass

    if 'sftp' in destination:
        try:
            sys.stderr = StringIO()

            cnopts = pysftp.CnOpts()
            cnopts.hostkeys = None

            file_prefix = 'report'
            if 'file_prefix' in destination['sftp']:
                file_prefix = destination['sftp'].get('file_prefix')
                del destination['sftp']['file_prefix']

            #sftp_configs = destination['sftp']
            #sftp_configs['cnopts'] = cnopts
            #sftp = pysftp.Connection(**sftp_configs)

            sftp = pysftp.Connection(host=destination['sftp']['host'],
                                     username=destination['sftp']['username'],
                                     password=destination['sftp']['password'],
                                     port=destination['sftp']['port'],
                                     cnopts=cnopts)

            if 'directory' in destination['sftp']:
                sftp.cwd(destination['sftp']['directory'])

            tmp_file_name = '/tmp/%s_%s.csv' % (
                file_prefix,
                datetime.datetime.now().strftime('%Y-%m-%dT%H-%M-%S'))

            tmp_file = open(tmp_file_name, 'wb')
            tmp_file.write(rows_to_csv(rows).read())
            tmp_file.close()

            sftp.put(tmp_file_name)

            os.remove(tmp_file_name)

            sys.stderr = sys.__stderr__
        except e:
            print e
            traceback.print_exc()
Example #9
0
def put_rows(config, auth, destination, rows, schema=None, variant=''):
  """Processes standard write JSON block for dynamic export of data.

  Allows us to quickly write the results of a script to a destination.  For
  example
  write the results of a DCM report into BigQuery.

  - Will write to multiple destinations if specified.
  - Extensible, add a handler to define a new destination ( be kind update the
  documentation json ).

  Include the following JSON in a recipe, then in the run.py handler when
  encountering that block pass it to this function and use the returned results.

    from utils.data import put_rows

    var_json = {
      "out":{
        "bigquery":{
          "auth":"[ user or service ]",
          "dataset": [ string ],
          "table": [ string ]
          "schema": [ json - standard bigquery schema json ],
          "header": [ boolean - true if header exists in rows ]
          "disposition": [ string - same as BigQuery documentation ]
        },
        "sheets":{
          "auth":"[ user or service ]",
          "sheet":[ string - full URL, suggest using share link ],
          "tab":[ string ],
          "range":[ string - A1:A notation ]
          "append": [ boolean - if sheet range should be appended to ]
          "delete": [ boolean - if sheet range should be cleared before writing ]
          ]
        },
        "storage":{
          "auth":"[ user or service ]",
          "bucket": [ string ],
          "path": [ string ]
        },
        "file":[ string - full path to place to write file ]
      }
    }

    values = put_rows('user', var_json)

  Args:
    auth: (string) The type of authentication to use, user or service.
    rows: ( iterator ) The list of rows to be written, if NULL no action is performed.
    schema: (json) A bigquery schema definition.
    destination: (json) A json block resembling var_json described above. rows (
      list ) The data being written as a list object. variant (string) Appended
      to destination to differentieate multiple objects

  Returns:
    If unnest is False: Returns a list of row values [[v1], [v2], ... ]
    If unnest is True: Returns a list of values [v1, v2, ...]
"""

  if rows is None:
    if config.verbose:
      print('PUT ROWS: Rows is None, ignoring write.')
    return

  if 'bigquery' in destination:

    if not schema:
      schema = destination['bigquery'].get('schema')

    skip_rows = 1 if destination['bigquery'].get('header') and schema else 0

    if destination['bigquery'].get('format', 'CSV') == 'JSON':
      json_to_table(
          config,
          destination['bigquery'].get('auth', auth),
          destination['bigquery'].get('project_id', config.project),
          destination['bigquery']['dataset'],
          destination['bigquery']['table'] + variant,
          rows,
          schema,
          destination['bigquery'].get('disposition', 'WRITE_TRUNCATE'),
      )

    elif destination['bigquery'].get('is_incremental_load', False) == True:
      incremental_rows_to_table(
          config,
          destination['bigquery'].get('auth', auth),
          destination['bigquery'].get('project_id', config.project),
          destination['bigquery']['dataset'],
          destination['bigquery']['table'] + variant,
          rows,
          schema,
          destination['bigquery'].get('skip_rows', skip_rows),
          destination['bigquery'].get('disposition', 'WRITE_APPEND'),
          billing_project_id=config.project)

    else:

      rows_to_table(
          config,
          destination['bigquery'].get('auth', auth),
          destination['bigquery'].get('project_id', config.project),
          destination['bigquery']['dataset'],
          destination['bigquery']['table'] + variant,
          rows,
          schema,
          destination['bigquery'].get('skip_rows', skip_rows),
          destination['bigquery'].get('disposition', 'WRITE_TRUNCATE'),
      )

  if 'sheets' in destination:
    if destination['sheets'].get('delete', False):
      sheets_clear(
        config,
        destination['sheets'].get('auth', auth),
        destination['sheets']['sheet'],
        destination['sheets']['tab'] + variant,
        destination['sheets']['range'],
      )

    sheets_write(
      config,
      destination['sheets'].get('auth', auth),
      destination['sheets']['sheet'],
      destination['sheets']['tab'] + variant,
      destination['sheets']['range'],
      rows_to_type(rows),
      destination['sheets'].get('append', False),
    )

  if 'file' in destination:
    path_out, file_ext = destination['file'].rsplit('.', 1)
    file_out = path_out + variant + '.' + file_ext
    if config.verbose:
      print('SAVING', file_out)
    makedirs_safe(parse_path(file_out))
    with open(file_out, 'w') as save_file:
      save_file.write(rows_to_csv(rows).read())

  if 'storage' in destination and destination['storage'].get(
      'bucket') and destination['storage'].get('path'):
    bucket_create(
      config,
      destination['storage'].get('auth', auth),
      config.project,
      destination['storage']['bucket']
    )

    # put the file
    file_out = destination['storage']['bucket'] + ':' + destination['storage'][
        'path'] + variant
    if config.verbose:
      print('SAVING', file_out)
    object_put(config, auth, file_out, rows_to_csv(rows))

  if 'sftp' in destination:
    try:
      cnopts = pysftp.CnOpts()
      cnopts.hostkeys = None

      path_out, file_out = destination['sftp']['file'].rsplit('.', 1)
      file_out = path_out + variant + file_out

      sftp = pysftp.Connection(
          host=destination['sftp']['host'],
          username=destination['sftp']['username'],
          password=destination['sftp']['password'],
          port=destination['sftp']['port'],
          cnopts=cnopts)

      if '/' in file_out:
        dir_out, file_out = file_out.rsplit('/', 1)
        sftp.cwd(dir_out)

      sftp.putfo(rows_to_csv(rows), file_out)

    except e:
      print(str(e))
      traceback.print_exc()