Beispiel #1
0
def bigquery():

  # check schema if given ( check independent of values )
  if 'schema' in project.task['bigquery']:
    schema = table_to_schema(
      project.task['auth'],
      project.id,
      project.task['bigquery']['dataset'],
      project.task['bigquery']['table']
    )
    object_compare(schema['fields'], project.task['bigquery']['schema'])

  # if query given check it
  if 'query' in project.task['bigquery']:
    rows = query_to_rows(
      project.task['auth'],
      project.id,
      project.task['bigquery']['dataset'],
      project.task['bigquery']['query']
    )

    object_compare(sorted(rows), sorted(project.task['bigquery']['values']))

  # simple table check ( unless query given )
  elif 'values' in project.task['bigquery']:
    rows = table_to_rows(
      project.task['auth'],
      project.id,
      project.task['bigquery']['dataset'],
      project.task['bigquery']['table']
    )

    object_compare(sorted(rows), sorted(project.task['bigquery']['values']))
Beispiel #2
0
def main():
  # get parameters
  parser = argparse.ArgumentParser(
      formatter_class=argparse.RawDescriptionHelpFormatter,
      description=textwrap.dedent("""\
      Command line to get table schema from BigQuery.
      This is a helper to help developers debug and create tables.

      Example: `python helper.py --project [id] --dataset [name] --table [name] -s [credentials]`

  """))

  parser.add_argument(
      '--dataset', '-d', help='name of BigQuery dataset', default=None)
  parser.add_argument(
      '--table', '-t', help='name of BigQuery table', default=None)

  # initialize project
  project.from_commandline(
      parser=parser, arguments=('-u', '-c', '-s', '-v', '-p'))
  auth = 'service' if project.args.service else 'user'

  # print schema
  print(
      json.dumps(
          table_to_schema(auth, project.id, project.args.dataset,
                          project.args.table)['fields'],
          indent=2))
Beispiel #3
0
def anonymize_table(table_id):

    if project.verbose:
        print('ANONYMIZE TABLE', project.task['bigquery']['to']['dataset'],
              table_id)

    schema = table_to_schema(project.task['auth'],
                             project.task['bigquery']['from']['project'],
                             project.task['bigquery']['from']['dataset'],
                             table_id)

    rows = table_to_rows(project.task['auth'],
                         project.task['bigquery']['from']['project'],
                         project.task['bigquery']['from']['dataset'],
                         table_id,
                         as_object=True)

    rows = anonymize_rows(rows,
                          project.task['bigquery']['to'].get('columns', []))

    json_to_table(project.task['auth'],
                  project.task['bigquery']['to']['project'],
                  project.task['bigquery']['to']['dataset'],
                  table_id,
                  rows,
                  schema,
                  disposition='WRITE_TRUNCATE')
Beispiel #4
0
def bigquery():
    print('TEST: bigquery')

    # check schema if given ( check independent of values )
    if 'schema' in project.task['bigquery']:
        schema = table_to_schema(project.task['auth'], project.id,
                                 project.task['bigquery']['dataset'],
                                 project.task['bigquery']['table'])
        deltas = schema_compare(project.task['bigquery']['schema'],
                                schema,
                                path='')

        if deltas:
            print(
                '\nFAILED *******************************************************\n'
            )
            for delta in deltas.values():
                print('%(path)s: %(error)s ( %(expected)s - %(actual)s)' %
                      delta)
            print(
                '\n**************************************************************\n'
            )
            test_failed()
        else:
            test_passed()

    # if query given check it
    if 'query' in project.task['bigquery']:
        rows = query_to_rows(
            project.task['auth'],
            project.id,
            project.task['bigquery']['dataset'],
            query_parameters(project.task['bigquery']['query'],
                             project.task['bigquery'].get('parameters')),
            legacy=project.task['bigquery'].get('legacy', True))

        object_compare(sorted(rows),
                       sorted(project.task['bigquery']['values']))

    # simple table check ( unless query given )
    elif 'values' in project.task['bigquery']:
        rows = table_to_rows(project.task['auth'], project.id,
                             project.task['bigquery']['dataset'],
                             project.task['bigquery']['table'])

        object_compare(sorted(rows),
                       sorted(project.task['bigquery']['values']))
Beispiel #5
0
def main():
    # get parameters
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent("""\
    Command line to get table schema from BigQuery.

    Helps developers upload data to BigQuery and pull schemas.  These are the
    most common BigQuery tasks when developing solutions.

    Examples:
      Display table schema: `python helper.py --project [id] --dataset [name] --table [name] -s [credentials]`
      Upload csv table: `python helper.py --project [id] --dataset [name] --table [name] --csv [file] --schema [file] -s [credentials]`
      Upload excel sheet: `python helper.py --project [id] --dataset [name] --table [name] --excel_file [file] --excel_sheet [name] --schema [file] -s [credentials]`

  """))

    parser.add_argument('--dataset',
                        help='name of BigQuery dataset',
                        default=None)
    parser.add_argument('--table', help='name of BigQuery table', default=None)
    parser.add_argument('--csv', help='CSV file path', default=None)
    parser.add_argument('--schema', help='SCHEMA file path', default=None)
    parser.add_argument('--excel_workbook',
                        help='Excel file path',
                        default=None)
    parser.add_argument('--excel_sheet', help='Excel sheet name', default=None)

    # initialize project
    parser = commandline_parser(parser,
                                arguments=('-u', '-c', '-s', '-v', '-p'))
    args = parser.parse_args()
    config = Configuration(user=args.user,
                           client=args.client,
                           service=args.service,
                           verbose=args.verbose,
                           project=args.project)

    auth = 'service' if args.service else 'user'

    schema = json.loads(args.schema) if args.schema else None

    if args.csv:

        with open(args.csv, 'r') as csv_file:
            rows = csv_to_rows(csv_file.read())

            if not schema:
                rows, schema = get_schema(rows)
                print('DETECETED SCHEMA', json.dumps(schema))
                print('Please run again with the above schema provided.')
                exit()

            rows_to_table(config, auth, config.project, args.dataset,
                          args.table, rows, schema)

    elif args.excel_workbook and args.excel_sheet:
        with open(args.excel_workbook, 'r') as excel_file:
            rows = excel_to_rows(excel_file, args.excel_sheet)

            if not schema:
                rows, schema = get_schema(rows)
                print('DETECETED SCHEMA', json.dumps(schema))
                print('Please run again with the above schema provided.')
                exit()

            rows_to_table(config, auth, config.project, args.dataset,
                          args.table, rows, schema)

    else:
        # print schema
        print(
            json.dumps(table_to_schema(config, auth, config.project,
                                       args.dataset, args.table),
                       indent=2))
Beispiel #6
0
This is a helper to help developers debug and create tables. 

- To get table schema: `python bigquery/helper.py --project [id] --dataset [name] --table [name] -u [credentials] -s [credentials]`

"""

import json
import argparse

from starthinker.util.project import project
from starthinker.util.bigquery import table_to_schema

if __name__ == "__main__":

    # get parameters
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset',
                        help='name of BigQuery dataset',
                        default=None)
    parser.add_argument('--table', help='name of BigQuery table', default=None)

    # initialize project
    project.from_commandline(parser=parser)
    auth = 'service' if project.args.service else 'user'

    # print schema
    print(
        json.dumps(table_to_schema(auth, project.id, project.args.dataset,
                                   project.args.table)['fields'],
                   indent=2))