def _get_api_client(args, commcarehq_base_url): return CommCareHqClient(url=commcarehq_base_url, project=args.project, username=args.username, password=args.password, auth_mode=args.auth_mode, version=args.api_version)
def _test_iterate(self, session, paginator): client = CommCareHqClient('/fake/commcare-hq/url', project='fake-project', session=session) # Iteration should do two "gets" because the first will have something in the "next" metadata field paginator.init() results = list(client.iterate('/fake/uri', paginator)) self.assertEqual(len(results), 2) self.assertEqual(results[0]['foo'], 1) self.assertEqual(results[1]['foo'], 2)
def _test_iterate(self, session, paginator, expected_count, expected_vals): client = CommCareHqClient('/fake/commcare-hq/url', 'fake-project', None, None) client.session = session # Iteration should do two "gets" because the first will have something in the "next" metadata field paginator.init() checkpoint_manager = CheckpointManagerWithSince(None, None) results = list( client.iterate('/fake/uri', paginator, checkpoint_manager=checkpoint_manager)) self.assertEqual(len(results), expected_count) self.assertEqual([result['foo'] for result in results], expected_vals)
def main_with_args(args): # Grab the timestamp here so that anything that comes in while this runs will be grabbed next time. run_start = datetime.utcnow() # Reads as excel if it is a file name that looks like excel, otherwise reads as JSON, # falling back to parsing arg directly as JSON, and finally parsing stdin as JSON if args.query: if os.path.exists(args.query): query_file_md5 = misc.digest_file(args.query) if os.path.splitext(args.query)[1] in ['.xls', '.xlsx']: import openpyxl workbook = openpyxl.load_workbook(args.query) query = excel_query.compile_workbook(workbook, args.missing_value) else: with io.open(args.query, encoding='utf-8') as fh: query = MiniLinq.from_jvalue(json.loads(fh.read())) else: print('Query file not found: %s' % args.query) exit(1) else: query = MiniLinq.from_jvalue(json.loads(sys.stdin.read())) if args.dump_query: print(json.dumps(query.to_jvalue(), indent=4)) exit(0) if not args.username: args.username = input('Please provide a username: '******'xlsx': writer = writers.Excel2007TableWriter(args.output) elif args.output_format == 'xls': writer = writers.Excel2003TableWriter(args.output) elif args.output_format == 'csv': if not args.output.endswith(".zip"): print("WARNING: csv output is a zip file, but " "will be written to %s" % args.output) print( "Consider appending .zip to the file name to avoid confusion.") writer = writers.CsvTableWriter(args.output) elif args.output_format == 'json': writer = writers.JValueTableWriter() elif args.output_format == 'markdown': writer = writers.StreamingMarkdownTableWriter(sys.stdout) elif args.output_format == 'sql': # Output should be a connection URL # Writer had bizarre issues so we use a full connection instead of passing in a URL or engine import sqlalchemy engine = sqlalchemy.create_engine(args.output) is_mysql = 'mysql' in args.output collation = 'utf8_bin' if is_mysql else None writer = writers.SqlTableWriter(engine.connect(), args.strict_types, collation=collation) if not args.since and not args.start_over and os.path.exists( args.query): connection = sqlalchemy.create_engine(args.output) # Grab the current list of tables to see if we have already run & written to it metadata = sqlalchemy.MetaData() metadata.bind = connection metadata.reflect() if 'commcare_export_runs' in metadata.tables: cursor = connection.execute(sqlalchemy.sql.text( 'SELECT time_of_run FROM commcare_export_runs WHERE query_file_md5 = :query_file_md5 ORDER BY time_of_run DESC' ), query_file_md5=query_file_md5) for row in cursor: args.since = row[0] logger.debug('Last successful run was %s', args.since) break cursor.close() else: logger.warn( 'No successful runs found, and --since not specified: will import ALL data' ) if args.since: logger.debug('Starting from %s', args.since) since = dateutil.parser.parse(args.since) if args.since else None until = dateutil.parser.parse(args.until) if args.until else None env = BuiltInEnv({ 'commcarehq_base_url': commcarehq_base_url }) | CommCareHqEnv(api_client, since=since, until=until) | JsonPathEnv({}) results = query.eval(env) # Assume that if any tables were emitted, that is the idea, otherwise print the output if len(list(env.emitted_tables())) > 0: with writer: api_client.set_checkpointer(writer, query=args.query, query_md5=query_file_md5) for table in env.emitted_tables(): logger.debug('Writing %s', table['name']) if table['name'] != table['name'].lower(): logger.warning("Caution: Using upper case letters in a " "table name is not advised: {}".format( table['name'])) writer.write_table(table) if os.path.exists(args.query): writer.set_checkpoint(args.query, query_file_md5, run_start, True) if args.output_format == 'json': print( json.dumps(writer.tables, indent=4, default=RepeatableIterator.to_jvalue)) else: print( json.dumps(list(results), indent=4, default=RepeatableIterator.to_jvalue))