Python get_schema 예제들, starthinker.util.bigquery.get_schema Python 예제들

예제 #1

0

파일 보기

def main():

  parser = argparse.ArgumentParser(
    formatter_class=argparse.RawDescriptionHelpFormatter,
    description=textwrap.dedent('''\
      Command line to help debug DV360 reports and build reporting tools.

      Examples:
        To get list of reports: python helper.py --list -u [user credentials path]
        To get report json: python helper.py --report [id] -u [user credentials path]
        To get report schema: python helper.py --schema [id] -u [user credentials path]
        To get report sample: python helper.py --sample [id] -u [user credentials path]

  '''))

  # create parameters
  parser.add_argument('--report', help='report ID to pull json definition', default=None)
  parser.add_argument('--schema', help='report ID to pull schema format', default=None)
  parser.add_argument('--sample', help='report ID to pull sample data', default=None)
  parser.add_argument('--list', help='list reports', action='store_true')

  # initialize project
  project.from_commandline(parser=parser, arguments=('-u', '-c', '-s', '-v'))
  auth = 'service' if project.args.service else 'user'

  # get report
  if project.args.report:
    report = API_DBM(auth).queries().getquery(queryId=project.args.report).execute()
    print(json.dumps(report, indent=2, sort_keys=True))

  # get schema
  elif project.args.schema:
    filename, report = report_file(auth, project.args.schema, None, 10)
    rows = report_to_rows(report)
    rows = report_clean(rows)
    rows = rows_to_type(rows)
    print(json.dumps(get_schema(rows)[1], indent=2, sort_keys=True))

  # get sample
  elif project.args.sample:
    filename, report = report_file(auth, project.args.sample, None, 10)
    rows = report_to_rows(report)
    rows = report_clean(rows)
    rows = rows_to_type(rows)
    for r in rows_print(rows, row_min=0, row_max=20): pass

  # get list
  else:
    for report in API_DBM(auth, iterate=True).queries().listqueries().execute():
      print(json.dumps(report, indent=2, sort_keys=True))

예제 #2

0

파일 보기

파일: helper.py 프로젝트: quesadillasraclette/starthinker

  # initialize project
  project.from_commandline(parser=parser)
  auth = 'service' if project.args.service else 'user'

  # get report
  if project.args.report:
    report = API_DBM(auth).queries().getquery(queryId=project.args.report).execute()
    print json.dumps(report, indent=2, sort_keys=True)

  # get schema
  elif project.args.schema:
    filename, report = report_file(auth, project.args.schema, None, 10)
    rows = report_to_rows(report)
    rows = report_clean(rows)
    rows = rows_to_type(rows)
    print json.dumps(get_schema(rows)[1], indent=2, sort_keys=True)

  # get sample
  elif project.args.sample:
    filename, report = report_file(auth, project.args.sample, None, 10)
    rows = report_to_rows(report)
    rows = report_clean(rows)
    rows = rows_to_type(rows)
    for r in rows_print(rows, row_min=0, row_max=20): pass

  # get list
  else:
    for report in API_DBM(auth, iterate=True).queries().listqueries().execute():
      print json.dumps(report, indent=2, sort_keys=True)

예제 #3

0

파일 보기

파일: run.py 프로젝트: Gregorfran/starthinker-gregor

def sheets():
    if project.verbose: print('SHEETS')

    # if sheet or tab is missing, don't do anything
    if not project.task.get('sheet') or not project.task.get('tab'):
        if project.verbose: print('Missing Sheet and/or Tab, skipping task.')
        return

    # delete if specified, will delete sheet if no more tabs remain
    if project.task.get('delete', False):
        sheets_tab_delete(project.task['auth'], project.task['sheet'],
                          project.task['tab'])

    # create a sheet and tab if specified, if template
    if 'template' in project.task:
        sheets_create(
            project.task['auth'],
            project.task['sheet'],
            project.task['tab'],
            project.task['template'].get('sheet'),
            project.task['template'].get('tab'),
        )

    # copy template if specified ( clear in this context means overwrite )
    #if project.task.get('template', {}).get('sheet'):
    #  sheets_tab_copy(
    #    project.task['auth'],
    #    project.task['template']['sheet'],
    #    project.task['template']['tab'],
    #    project.task['sheet'],
    #    project.task['tab'],
    #    project.task.get('clear', False)
    #  )

    # if no template at least create tab
    #else:
    #  sheets_tab_create(
    #    project.task['auth'],
    #    project.task['sheet'],
    #    project.task['tab']
    #  )

    # clear if specified
    if project.task.get('clear', False):
        sheets_clear(project.task['auth'], project.task['sheet'],
                     project.task['tab'], project.task.get('range', 'A1'))

    # write data if specified
    if 'write' in project.task:
        rows = get_rows(project.task['auth'], project.task['write'])
        sheets_write(project.task['auth'],
                     project.task['sheet'],
                     project.task['tab'],
                     project.task['range'],
                     rows,
                     append=False)

    # append data if specified
    if 'append' in project.task:
        rows = get_rows(project.task['auth'], project.task['append'])
        sheets_write(project.task['auth'],
                     project.task['sheet'],
                     project.task['tab'],
                     project.task['range'],
                     rows,
                     append=True)

    # move data if specified
    # move data if specified
    if 'out' in project.task:
        rows = sheets_read(project.task['auth'],
                           project.task['sheet'], project.task['tab'],
                           project.task.get('range', 'A1'))

        if rows:
            schema = None

            # RECOMMENDED: define schema in json
            if project.task['out']['bigquery'].get('schema'):
                if project.verbose: print('SHEETS SCHEMA DEFINED')
                schema = project.task['out']['bigquery']['schema']

            # NOT RECOMMENDED: determine schema if missing
            else:
                if project.verbose:
                    print(
                        'SHEETS SCHEMA DETECT ( Note Recommended - Define Schema In JSON )'
                    )
                # cast rows to types ( for schema detection )
                rows = rows_to_type(rows)
                rows, schema = get_schema(rows,
                                          project.task.get('header', False),
                                          infer_type=project.task.get(
                                              'infer_type', True))

            # write to table ( not using put because no use cases for other destinations )
            rows_to_table(
                auth=project.task['out'].get('auth', project.task['auth']),
                project_id=project.id,
                dataset_id=project.task['out']['bigquery']['dataset'],
                table_id=project.task['out']['bigquery']['table'],
                rows=rows,
                schema=schema,
                skip_rows=1 if project.task.get('header', False) else 0,
                disposition=project.task['out']['bigquery'].get(
                    'disposition', 'WRITE_TRUNCATE'))

        else:
            print('SHEET EMPTY')

예제 #4

0

파일 보기

파일: run.py 프로젝트: dvandra/starthinker

def sheets():
    if project.verbose: print 'SHEETS'

    # clear if specified
    if project.task.get('clear', False):
        sheets_clear(project.task['auth'], project.task['sheet'],
                     project.task['tab'], project.task['range'])

    # delete if specified ( after clear to prevent errors in case both are given )
    if project.task.get('delete', False):
        sheets_tab_delete(project.task['auth'], project.task['sheet'],
                          project.task['tab'])

    # create or copy if specified
    if 'template' in project.task:
        sheets_tab_copy(project.task['auth'],
                        project.task['template']['sheet'],
                        project.task['template']['tab'], project.task['sheet'],
                        project.task['tab'])
    else:
        sheets_tab_create(project.task['auth'], project.task['sheet'],
                          project.task['tab'])

    # write data if specified
    if 'write' in project.task:
        rows = get_rows(project.task['auth'], project.task['write'])
        sheets_write(project.task['auth'], project.task['sheet'],
                     project.task['tab'], project.task['range'], rows)

    # move if specified
    if 'out' in project.task:
        rows = sheets_read(project.task['auth'], project.task['sheet'],
                           project.task['tab'], project.task['range'])

        if rows:
            schema = None

            # RECOMMENDED: define schema in json
            if project.task['out']['bigquery'].get('schema'):
                if project.verbose: print 'SHEETS SCHEMA DEFINED'
                schema = project.task['out']['bigquery']['schema']
            # NOT RECOMMENDED: determine schema if missing
            else:
                if project.verbose:
                    print 'SHEETS SCHEMA DETECT ( Note Recommended - Define Schema In JSON )'
                # cast rows to types ( for schema detection )
                rows = rows_to_type(rows)
                rows, schema = get_schema(rows,
                                          project.task.get('header', False),
                                          infer_type=project.task.get(
                                              'infer_type', True))

            # write to table ( not using put because no use cases for other destinations )
            rows_to_table(
                auth=project.task['out'].get('auth', project.task['auth']),
                project_id=project.id,
                dataset_id=project.task['out']['bigquery']['dataset'],
                table_id=project.task['out']['bigquery']['table'],
                rows=rows,
                schema=schema,
                skip_rows=1 if project.task.get('header', False) else 0,
                disposition=project.task['out']['bigquery'].get(
                    'disposition', 'WRITE_TRUNCATE'))

        else:
            print 'SHEET EMPTY'

예제 #5

0

파일 보기

def main():
    # get parameters
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent("""\
    Command line to get table schema from BigQuery.

    Helps developers upload data to BigQuery and pull schemas.  These are the
    most common BigQuery tasks when developing solutions.

    Examples:
      Display table schema: `python helper.py --project [id] --dataset [name] --table [name] -s [credentials]`
      Upload csv table: `python helper.py --project [id] --dataset [name] --table [name] --csv [file] --schema [file] -s [credentials]`
      Upload excel sheet: `python helper.py --project [id] --dataset [name] --table [name] --excel_file [file] --excel_sheet [name] --schema [file] -s [credentials]`

  """))

    parser.add_argument('--dataset',
                        help='name of BigQuery dataset',
                        default=None)
    parser.add_argument('--table', help='name of BigQuery table', default=None)
    parser.add_argument('--csv', help='CSV file path', default=None)
    parser.add_argument('--schema', help='SCHEMA file path', default=None)
    parser.add_argument('--excel_workbook',
                        help='Excel file path',
                        default=None)
    parser.add_argument('--excel_sheet', help='Excel sheet name', default=None)

    # initialize project
    parser = commandline_parser(parser,
                                arguments=('-u', '-c', '-s', '-v', '-p'))
    args = parser.parse_args()
    config = Configuration(user=args.user,
                           client=args.client,
                           service=args.service,
                           verbose=args.verbose,
                           project=args.project)

    auth = 'service' if args.service else 'user'

    schema = json.loads(args.schema) if args.schema else None

    if args.csv:

        with open(args.csv, 'r') as csv_file:
            rows = csv_to_rows(csv_file.read())

            if not schema:
                rows, schema = get_schema(rows)
                print('DETECETED SCHEMA', json.dumps(schema))
                print('Please run again with the above schema provided.')
                exit()

            rows_to_table(config, auth, config.project, args.dataset,
                          args.table, rows, schema)

    elif args.excel_workbook and args.excel_sheet:
        with open(args.excel_workbook, 'r') as excel_file:
            rows = excel_to_rows(excel_file, args.excel_sheet)

            if not schema:
                rows, schema = get_schema(rows)
                print('DETECETED SCHEMA', json.dumps(schema))
                print('Please run again with the above schema provided.')
                exit()

            rows_to_table(config, auth, config.project, args.dataset,
                          args.table, rows, schema)

    else:
        # print schema
        print(
            json.dumps(table_to_schema(config, auth, config.project,
                                       args.dataset, args.table),
                       indent=2))