def bigquery(): # check schema if given ( check independent of values ) if 'schema' in project.task['bigquery']: schema = table_to_schema( project.task['auth'], project.id, project.task['bigquery']['dataset'], project.task['bigquery']['table'] ) object_compare(schema['fields'], project.task['bigquery']['schema']) # if query given check it if 'query' in project.task['bigquery']: rows = query_to_rows( project.task['auth'], project.id, project.task['bigquery']['dataset'], project.task['bigquery']['query'] ) object_compare(sorted(rows), sorted(project.task['bigquery']['values'])) # simple table check ( unless query given ) elif 'values' in project.task['bigquery']: rows = table_to_rows( project.task['auth'], project.id, project.task['bigquery']['dataset'], project.task['bigquery']['table'] ) object_compare(sorted(rows), sorted(project.task['bigquery']['values']))
def main(): # get parameters parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent("""\ Command line to get table schema from BigQuery. This is a helper to help developers debug and create tables. Example: `python helper.py --project [id] --dataset [name] --table [name] -s [credentials]` """)) parser.add_argument( '--dataset', '-d', help='name of BigQuery dataset', default=None) parser.add_argument( '--table', '-t', help='name of BigQuery table', default=None) # initialize project project.from_commandline( parser=parser, arguments=('-u', '-c', '-s', '-v', '-p')) auth = 'service' if project.args.service else 'user' # print schema print( json.dumps( table_to_schema(auth, project.id, project.args.dataset, project.args.table)['fields'], indent=2))
def anonymize_table(table_id): if project.verbose: print('ANONYMIZE TABLE', project.task['bigquery']['to']['dataset'], table_id) schema = table_to_schema(project.task['auth'], project.task['bigquery']['from']['project'], project.task['bigquery']['from']['dataset'], table_id) rows = table_to_rows(project.task['auth'], project.task['bigquery']['from']['project'], project.task['bigquery']['from']['dataset'], table_id, as_object=True) rows = anonymize_rows(rows, project.task['bigquery']['to'].get('columns', [])) json_to_table(project.task['auth'], project.task['bigquery']['to']['project'], project.task['bigquery']['to']['dataset'], table_id, rows, schema, disposition='WRITE_TRUNCATE')
def bigquery(): print('TEST: bigquery') # check schema if given ( check independent of values ) if 'schema' in project.task['bigquery']: schema = table_to_schema(project.task['auth'], project.id, project.task['bigquery']['dataset'], project.task['bigquery']['table']) deltas = schema_compare(project.task['bigquery']['schema'], schema, path='') if deltas: print( '\nFAILED *******************************************************\n' ) for delta in deltas.values(): print('%(path)s: %(error)s ( %(expected)s - %(actual)s)' % delta) print( '\n**************************************************************\n' ) test_failed() else: test_passed() # if query given check it if 'query' in project.task['bigquery']: rows = query_to_rows( project.task['auth'], project.id, project.task['bigquery']['dataset'], query_parameters(project.task['bigquery']['query'], project.task['bigquery'].get('parameters')), legacy=project.task['bigquery'].get('legacy', True)) object_compare(sorted(rows), sorted(project.task['bigquery']['values'])) # simple table check ( unless query given ) elif 'values' in project.task['bigquery']: rows = table_to_rows(project.task['auth'], project.id, project.task['bigquery']['dataset'], project.task['bigquery']['table']) object_compare(sorted(rows), sorted(project.task['bigquery']['values']))
def main(): # get parameters parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent("""\ Command line to get table schema from BigQuery. Helps developers upload data to BigQuery and pull schemas. These are the most common BigQuery tasks when developing solutions. Examples: Display table schema: `python helper.py --project [id] --dataset [name] --table [name] -s [credentials]` Upload csv table: `python helper.py --project [id] --dataset [name] --table [name] --csv [file] --schema [file] -s [credentials]` Upload excel sheet: `python helper.py --project [id] --dataset [name] --table [name] --excel_file [file] --excel_sheet [name] --schema [file] -s [credentials]` """)) parser.add_argument('--dataset', help='name of BigQuery dataset', default=None) parser.add_argument('--table', help='name of BigQuery table', default=None) parser.add_argument('--csv', help='CSV file path', default=None) parser.add_argument('--schema', help='SCHEMA file path', default=None) parser.add_argument('--excel_workbook', help='Excel file path', default=None) parser.add_argument('--excel_sheet', help='Excel sheet name', default=None) # initialize project parser = commandline_parser(parser, arguments=('-u', '-c', '-s', '-v', '-p')) args = parser.parse_args() config = Configuration(user=args.user, client=args.client, service=args.service, verbose=args.verbose, project=args.project) auth = 'service' if args.service else 'user' schema = json.loads(args.schema) if args.schema else None if args.csv: with open(args.csv, 'r') as csv_file: rows = csv_to_rows(csv_file.read()) if not schema: rows, schema = get_schema(rows) print('DETECETED SCHEMA', json.dumps(schema)) print('Please run again with the above schema provided.') exit() rows_to_table(config, auth, config.project, args.dataset, args.table, rows, schema) elif args.excel_workbook and args.excel_sheet: with open(args.excel_workbook, 'r') as excel_file: rows = excel_to_rows(excel_file, args.excel_sheet) if not schema: rows, schema = get_schema(rows) print('DETECETED SCHEMA', json.dumps(schema)) print('Please run again with the above schema provided.') exit() rows_to_table(config, auth, config.project, args.dataset, args.table, rows, schema) else: # print schema print( json.dumps(table_to_schema(config, auth, config.project, args.dataset, args.table), indent=2))
This is a helper to help developers debug and create tables. - To get table schema: `python bigquery/helper.py --project [id] --dataset [name] --table [name] -u [credentials] -s [credentials]` """ import json import argparse from starthinker.util.project import project from starthinker.util.bigquery import table_to_schema if __name__ == "__main__": # get parameters parser = argparse.ArgumentParser() parser.add_argument('--dataset', help='name of BigQuery dataset', default=None) parser.add_argument('--table', help='name of BigQuery table', default=None) # initialize project project.from_commandline(parser=parser) auth = 'service' if project.args.service else 'user' # print schema print( json.dumps(table_to_schema(auth, project.id, project.args.dataset, project.args.table)['fields'], indent=2))