def main(): parser = argparse.ArgumentParser() parser.add_argument('--debug', action='store_true', help="enable debugging interface") parser.add_argument('--host-url', default="http://localhost:9411/v0", help="connect to this host/port") subparsers = parser.add_subparsers() sub_uuid2fcid = subparsers.add_parser('uuid2fcid') sub_uuid2fcid.set_defaults(func=run_uuid2fcid) sub_uuid2fcid.add_argument('uuid', help="UUID to transform") sub_fcid2uuid = subparsers.add_parser('fcid2uuid') sub_fcid2uuid.set_defaults(func=run_fcid2uuid) sub_fcid2uuid.add_argument('fcid', help="FCID to transform (into UUID)") args = parser.parse_args() if not args.__dict__.get("func"): print("tell me what to do!") sys.exit(-1) args.api = public_api(args.host_url) args.func(args)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--debug', action='store_true', help="enable debugging interface") parser.add_argument('--host-url', default="http://localhost:9411/v0", help="connect to this host/port") subparsers = parser.add_subparsers() sub_elasticsearch_releases = subparsers.add_parser('elasticsearch-releases') sub_elasticsearch_releases.set_defaults(func=run_elasticsearch_releases) sub_elasticsearch_releases.add_argument('json_input', help="JSON-per-line of release entities", default=sys.stdin, type=argparse.FileType('r')) sub_elasticsearch_releases.add_argument('json_output', help="where to send output", default=sys.stdout, type=argparse.FileType('w')) sub_elasticsearch_containers = subparsers.add_parser('elasticsearch-containers') sub_elasticsearch_containers.set_defaults(func=run_elasticsearch_containers) sub_elasticsearch_containers.add_argument('json_input', help="JSON-per-line of container entities", default=sys.stdin, type=argparse.FileType('r')) sub_elasticsearch_containers.add_argument('json_output', help="where to send output", default=sys.stdout, type=argparse.FileType('w')) sub_elasticsearch_changelogs = subparsers.add_parser('elasticsearch-changelogs') sub_elasticsearch_changelogs.set_defaults(func=run_elasticsearch_changelogs) sub_elasticsearch_changelogs.add_argument('json_input', help="JSON-per-line of changelog entries", default=sys.stdin, type=argparse.FileType('r')) sub_elasticsearch_changelogs.add_argument('json_output', help="where to send output", default=sys.stdout, type=argparse.FileType('w')) sub_citeproc_releases = subparsers.add_parser('citeproc-releases') sub_citeproc_releases.set_defaults(func=run_citeproc_releases) sub_citeproc_releases.add_argument('json_input', help="JSON-per-line of release entities", default=sys.stdin, type=argparse.FileType('r')) sub_citeproc_releases.add_argument('json_output', help="where to send output", default=sys.stdout, type=argparse.FileType('w')) sub_citeproc_releases.add_argument('--style', help="citation style to output", default='csl-json') sub_citeproc_releases.add_argument('--html', action='store_true', help="output HTML, not plain text") args = parser.parse_args() if not args.__dict__.get("func"): print("tell me what to do!") sys.exit(-1) args.api = public_api(args.host_url) args.func(args)
def test_lowercase_doi() -> None: api = public_api("http://localhost:9411/v0") rldc = ReleaseLowercaseDoiCleanup(api=api) rldc.testing_mode = True assert rldc.want("") is False assert rldc.want("aaaaaaaaaaaaarceaaaaaaaaai") is True assert rldc.want("aaaaaaaaaaaaarceaaaaaaaaai\t10.1234/ABCD") is True rldc.parse_record("aaaaaaaaaaaaarceaaaaaaaaai") dummy_re = api.get_release("aaaaaaaaaaaaarceaaaaaaaaai") assert rldc.try_update(dummy_re) is False assert rldc.counts["skip-existing-doi-fine"] == 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('--debug', action='store_true', help="enable debug logging") parser.add_argument('--api-host-url', default="http://localhost:9411/v0", help="fatcat API host/port to use") parser.add_argument('--kafka-hosts', default="localhost:9092", help="list of Kafka brokers (host/port) to use") parser.add_argument('--env', default="dev", help="Kafka topic namespace to use (eg, prod, qa, dev)") subparsers = parser.add_subparsers() sub_changelog = subparsers.add_parser('changelog') sub_changelog.set_defaults(func=run_changelog) sub_changelog.add_argument('--poll-interval', help="how long to wait between polling (seconds)", default=5.0, type=float) sub_entity_updates = subparsers.add_parser('entity-updates') sub_entity_updates.set_defaults(func=run_entity_updates) sub_elasticsearch_release = subparsers.add_parser('elasticsearch-release') sub_elasticsearch_release.set_defaults(func=run_elasticsearch_release) sub_elasticsearch_release.add_argument('--elasticsearch-backend', help="elasticsearch backend to connect to", default="http://localhost:9200") sub_elasticsearch_release.add_argument('--elasticsearch-index', help="elasticsearch index to push into", default="fatcat_release_v03") sub_elasticsearch_container = subparsers.add_parser('elasticsearch-container') sub_elasticsearch_container.set_defaults(func=run_elasticsearch_container) sub_elasticsearch_container.add_argument('--elasticsearch-backend', help="elasticsearch backend to connect to", default="http://localhost:9200") sub_elasticsearch_container.add_argument('--elasticsearch-index', help="elasticsearch index to push into", default="fatcat_container") args = parser.parse_args() if not args.__dict__.get("func"): print("tell me what to do!") sys.exit(-1) args.api = public_api(args.api_host_url) args.func(args)
def main() -> None: parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--fatcat-api-url", default="http://localhost:9411/v0", help="connect to this host/port") subparsers = parser.add_subparsers() sub_releases = subparsers.add_parser("releases") sub_releases.set_defaults(func=run_export_releases) sub_releases.add_argument( "ident_file", help="TSV list of fatcat release idents to dump", default=sys.stdin, type=argparse.FileType("r"), ) sub_releases.add_argument( "json_output", help="where to send output", default=sys.stdout, type=argparse.FileType("w"), ) sub_changelog = subparsers.add_parser("changelog") sub_changelog.set_defaults(func=run_export_changelog) sub_changelog.add_argument("--start", help="index to start dumping at", default=1, type=int) sub_changelog.add_argument( "--end", help="index to stop dumping at (else detect most recent)", default=None, type=int, ) sub_changelog.add_argument( "json_output", help="where to send output", default=sys.stdout, type=argparse.FileType("w"), ) args = parser.parse_args() if not args.__dict__.get("func"): print("tell me what to do!") sys.exit(-1) args.api = public_api(args.fatcat_api_url) args.func(args)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--debug', action='store_true', help="enable debugging interface") parser.add_argument('--host-url', default="http://localhost:9411/v0", help="connect to this host/port") subparsers = parser.add_subparsers() sub_releases = subparsers.add_parser('releases') sub_releases.set_defaults(func=run_export_releases) sub_releases.add_argument('ident_file', help="TSV list of fatcat release idents to dump", default=sys.stdin, type=argparse.FileType('r')) sub_releases.add_argument('json_output', help="where to send output", default=sys.stdout, type=argparse.FileType('w')) sub_changelog = subparsers.add_parser('changelog') sub_changelog.set_defaults(func=run_export_changelog) sub_changelog.add_argument('--start', help="index to start dumping at", default=1, type=int) sub_changelog.add_argument( '--end', help="index to stop dumping at (else detect most recent)", default=None, type=int) sub_changelog.add_argument('json_output', help="where to send output", default=sys.stdout, type=argparse.FileType('w')) args = parser.parse_args() if not args.__dict__.get("func"): print("tell me what to do!") sys.exit(-1) args.api = public_api(args.host_url) args.func(args)
def main() -> None: """ Run this utility like: python -m fatcat_tools.references Examples: python -m fatcat_tools.references query release_pfrind3kh5hqhgqkueulk2tply """ parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) subparsers = parser.add_subparsers() parser.add_argument("--fatcat-api-base", default="https://api.fatcat.wiki/v0") parser.add_argument("--elasticsearch-base", default="https://search.fatcat.wiki") parser.add_argument("--elasticsearch-ref-index", default="fatcat_ref") sub = subparsers.add_parser( "query", help= "takes a fatcat ident argument, prints both inbound and outbound references", ) sub.set_defaults(func="run_ref_query") sub.add_argument("ident", type=str) sub.add_argument("--enrich", type=str) args = parser.parse_args() if not args.__dict__.get("func"): parser.print_help(file=sys.stderr) sys.exit(-1) args.es_client = elasticsearch.Elasticsearch(args.elasticsearch_base) args.fatcat_api_client = public_api(args.fatcat_api_base) if args.func == "run_ref_query": run_ref_query(args) else: raise NotImplementedError(args.func)
def test_file_release_bugfix() -> None: api = public_api("http://localhost:9411/v0") frbc = FileReleaseBugfix(api=api) frbc.testing_mode = True assert frbc.want({"this": "asdf"}) is False example_line: Dict[str, Any] = { "file_ident": "00000000-0000-0000-3333-000000000002", "wrong_release_ident": "00000000-0000-0000-4444-000000000002", "edit_extra": { "link_source": "unpaywall", "link_source_id": "10.1371/journal.pmed.0020124", "ingest_request_source": "unpaywall", }, } fe1 = frbc.parse_record(example_line) print(frbc.counts) frbc.try_update(fe1)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--fatcat-api-url', default="http://localhost:9411/v0", help="connect to this host/port") parser.add_argument('--enqueue-kafka', action='store_true', help="send ingest requests directly to sandcrawler kafka topic for processing") parser.add_argument('--kafka-hosts', default="localhost:9092", help="list of Kafka brokers (host/port) to use") parser.add_argument('--elasticsearch-endpoint', default="https://search.fatcat.wiki", help="elasticsearch API. internal endpoint preferred, but public is default") parser.add_argument('--elasticsearch-index', default="fatcat_release", help="elasticsearch index to query") parser.add_argument('--env', default="dev", help="Kafka topic namespace to use (eg, prod, qa, dev)") parser.add_argument('--limit', default=None, type=int, help="Max number of search hits to return") parser.add_argument('--dry-run', action='store_true', help="runs through creating all ingest requests, but doesn't actually output or enqueue") parser.add_argument('--before-year', type=str, help="filters results to only with release_year before this (not inclusive)") parser.add_argument('--after-year', type=str, help="filters results to only with release_year after this (inclusive)") parser.add_argument('--release-types', type=str, help="filters results to specified release-types, separated by commas. By default, 'stub' is filtered out.") parser.add_argument('--allow-non-oa', action='store_true', help="By default, we limit to OA releases. This removes that filter") parser.add_argument('--force-recrawl', action='store_true', help="Tell ingest worker to skip GWB history lookup and do SPNv2 crawl") subparsers = parser.add_subparsers() sub_container = subparsers.add_parser('container', help="Create ingest requests for releases from a specific container") sub_container.set_defaults(func=run_ingest_container) sub_container.add_argument('--container-id', help="fatcat container entity ident") sub_container.add_argument('--issnl', help="ISSN-L of container entity") sub_container.add_argument('--publisher', help="publisher name") sub_container.add_argument('--name', help="container name") sub_query = subparsers.add_parser('query', help="Create ingest requests for releases from a specific query") sub_query.set_defaults(func=run_ingest_query) sub_query.add_argument('query', help="search query (same DSL as web interface search)") sub_extid = subparsers.add_parser('extid', help="Create ingest requests for releases that have given extid defined") sub_extid.set_defaults(func=run_ingest_extid) sub_extid.add_argument('extid', help="extid short name (as included in ES release schema)") args = parser.parse_args() if not args.__dict__.get("func"): print("tell me what to do!") sys.exit(-1) args.api = public_api(args.fatcat_api_url) args.func(args)
def run(self) -> None: ac = ApiClient() api = public_api(self.api_host) # only used by container indexing query_stats code path es_client = elasticsearch.Elasticsearch(self.elasticsearch_backend) def fail_fast(err: Any, partitions: List[Any]) -> None: if err is not None: print("Kafka consumer commit error: {}".format(err), file=sys.stderr) print("Bailing out...", file=sys.stderr) # TODO: should it be sys.exit(-1)? raise KafkaException(err) for p in partitions: # check for partition-specific commit errors if p.error: print("Kafka consumer commit error: {}".format(p.error), file=sys.stderr) print("Bailing out...", file=sys.stderr) # TODO: should it be sys.exit(-1)? raise KafkaException(p.error) # print("Kafka consumer commit successful") pass def on_rebalance(consumer: Consumer, partitions: List[Any]) -> None: for p in partitions: if p.error: raise KafkaException(p.error) print( "Kafka partitions rebalanced: {} / {}".format(consumer, partitions), file=sys.stderr, ) consumer_conf = self.kafka_config.copy() consumer_conf.update( { "group.id": self.consumer_group, "on_commit": fail_fast, # messages don't have offset marked as stored until pushed to # elastic, but we do auto-commit stored offsets to broker "enable.auto.commit": True, "enable.auto.offset.store": False, # user code timeout; if no poll after this long, assume user code # hung and rebalance (default: 5min) "max.poll.interval.ms": 60000, "default.topic.config": { "auto.offset.reset": "latest", }, } ) consumer = Consumer(consumer_conf) consumer.subscribe( [self.consume_topic], on_assign=on_rebalance, on_revoke=on_rebalance, ) while True: batch = consumer.consume(num_messages=self.batch_size, timeout=self.poll_interval) if not batch: if not consumer.assignment(): print("... no Kafka consumer partitions assigned yet", file=sys.stderr) print( "... nothing new from kafka, try again (interval: {}".format( self.poll_interval ), file=sys.stderr, ) continue print("... got {} kafka messages".format(len(batch)), file=sys.stderr) # first check errors on entire batch... for msg in batch: if msg.error(): raise KafkaException(msg.error()) # ... then process bulk_actions = [] for msg in batch: json_str = msg.value().decode("utf-8") entity = entity_from_json(json_str, self.entity_type, api_client=ac) assert isinstance(entity, self.entity_type) if self.entity_type == ChangelogEntry: key = entity.index # might need to fetch from API if not ( entity.editgroup # pylint: disable=no-member # (TODO) and entity.editgroup.editor # pylint: disable=no-member # (TODO) ): entity = api.get_changelog_entry(entity.index) else: key = entity.ident # pylint: disable=no-member # (TODO) if self.entity_type != ChangelogEntry and entity.state == "wip": print( f"WARNING: skipping state=wip entity: {self.entity_type.__name__} {entity.ident}", file=sys.stderr, ) continue if self.entity_type == ContainerEntity and self.query_stats: stats = query_es_container_stats( entity.ident, es_client=es_client, es_index=self.elasticsearch_release_index, merge_shadows=True, ) doc_dict = container_to_elasticsearch(entity, stats=stats) else: doc_dict = self.transform_func(entity) # TODO: handle deletions from index bulk_actions.append( json.dumps( { "index": { "_id": key, }, } ) ) bulk_actions.append(json.dumps(doc_dict)) # if only WIP entities, then skip if not bulk_actions: for msg in batch: consumer.store_offsets(message=msg) continue print( "Upserting, eg, {} (of {} {} in elasticsearch)".format( key, len(batch), self.entity_type.__name__ ), file=sys.stderr, ) elasticsearch_endpoint = "{}/{}/_bulk".format( self.elasticsearch_backend, self.elasticsearch_index ) resp = requests.post( elasticsearch_endpoint, headers={"Content-Type": "application/x-ndjson"}, data="\n".join(bulk_actions) + "\n", ) resp.raise_for_status() if resp.json()["errors"]: desc = "Elasticsearch errors from post to {}:".format(elasticsearch_endpoint) print(desc, file=sys.stderr) print(resp.content, file=sys.stderr) raise Exception(desc) for msg in batch: # offsets are *committed* (to brokers) automatically, but need # to be marked as processed here consumer.store_offsets(message=msg)
def main() -> None: parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--fatcat-api-url", default="http://localhost:9411/v0", help="connect to this host/port") parser.add_argument( "--fatcat-elasticsearch-url", default="http://localhost:9200", help="connect to this host/port", ) subparsers = parser.add_subparsers() sub_elasticsearch_releases = subparsers.add_parser( "elasticsearch-releases", help= "convert fatcat release JSON schema to elasticsearch release schema", ) sub_elasticsearch_releases.set_defaults(func=run_elasticsearch_releases) sub_elasticsearch_releases.add_argument( "json_input", help="JSON-per-line of release entities", default=sys.stdin, type=argparse.FileType("r"), ) sub_elasticsearch_releases.add_argument( "json_output", help="where to send output", default=sys.stdout, type=argparse.FileType("w"), ) sub_elasticsearch_containers = subparsers.add_parser( "elasticsearch-containers", help= "convert fatcat container JSON schema to elasticsearch container schema", ) sub_elasticsearch_containers.set_defaults( func=run_elasticsearch_containers) sub_elasticsearch_containers.add_argument( "json_input", help="JSON-per-line of container entities", default=sys.stdin, type=argparse.FileType("r"), ) sub_elasticsearch_containers.add_argument( "json_output", help="where to send output", default=sys.stdout, type=argparse.FileType("w"), ) sub_elasticsearch_containers.add_argument( "--query-stats", action="store_true", help="whether to query release search index for container stats", ) sub_elasticsearch_files = subparsers.add_parser( "elasticsearch-files", help="convert fatcat file JSON schema to elasticsearch file schema", ) sub_elasticsearch_files.set_defaults(func=run_elasticsearch_files) sub_elasticsearch_files.add_argument( "json_input", help="JSON-per-line of file entities", default=sys.stdin, type=argparse.FileType("r"), ) sub_elasticsearch_files.add_argument( "json_output", help="where to send output", default=sys.stdout, type=argparse.FileType("w"), ) sub_elasticsearch_changelogs = subparsers.add_parser( "elasticsearch-changelogs", help= "convert fatcat changelog JSON schema to elasticsearch changelog schema", ) sub_elasticsearch_changelogs.set_defaults( func=run_elasticsearch_changelogs) sub_elasticsearch_changelogs.add_argument( "json_input", help="JSON-per-line of changelog entries", default=sys.stdin, type=argparse.FileType("r"), ) sub_elasticsearch_changelogs.add_argument( "json_output", help="where to send output", default=sys.stdout, type=argparse.FileType("w"), ) sub_citeproc_releases = subparsers.add_parser( "citeproc-releases", help= "convert fatcat release schema to any standard citation format using citeproc/CSL", ) sub_citeproc_releases.set_defaults(func=run_citeproc_releases) sub_citeproc_releases.add_argument( "json_input", help="JSON-per-line of release entities", default=sys.stdin, type=argparse.FileType("r"), ) sub_citeproc_releases.add_argument( "json_output", help="where to send output", default=sys.stdout, type=argparse.FileType("w"), ) sub_citeproc_releases.add_argument("--style", help="citation style to output", default="csl-json") sub_citeproc_releases.add_argument("--html", action="store_true", help="output HTML, not plain text") args = parser.parse_args() if not args.__dict__.get("func"): print("tell me what to do!") sys.exit(-1) args.api = public_api(args.fatcat_api_url) args.func(args)
def test_public_api(): api = public_api("http://localhost:9411/v0") api.get_changelog() with pytest.raises(ApiException): api.auth_check()
def main() -> None: parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--api-host-url", default="http://localhost:9411/v0", help="fatcat API host/port to use") parser.add_argument( "--kafka-hosts", default="localhost:9092", help="list of Kafka brokers (host/port) to use", ) parser.add_argument( "--env", default="dev", help="Kafka topic namespace to use (eg, prod, qa, dev)") subparsers = parser.add_subparsers() sub_changelog = subparsers.add_parser( "changelog", help="poll fatcat API for changelog entries, push to kafka") sub_changelog.set_defaults(func=run_changelog) sub_changelog.add_argument( "--poll-interval", help="how long to wait between polling (seconds)", default=5.0, type=float, ) sub_entity_updates = subparsers.add_parser( "entity-updates", help= "poll kafka for changelog entries; push entity changes to various kafka topics", ) sub_entity_updates.set_defaults(func=run_entity_updates) sub_elasticsearch_release = subparsers.add_parser( "elasticsearch-release", help= "consume kafka feed of new/updated releases, transform and push to search", ) sub_elasticsearch_release.set_defaults(func=run_elasticsearch_release) sub_elasticsearch_release.add_argument( "--elasticsearch-backend", help="elasticsearch backend to connect to", default="http://localhost:9200", ) sub_elasticsearch_release.add_argument( "--elasticsearch-index", help="elasticsearch index to push into", default="fatcat_release_v03", ) sub_elasticsearch_container = subparsers.add_parser( "elasticsearch-container", help= "consume kafka feed of new/updated containers, transform and push to search", ) sub_elasticsearch_container.set_defaults(func=run_elasticsearch_container) sub_elasticsearch_container.add_argument( "--elasticsearch-backend", help="elasticsearch backend to connect to", default="http://localhost:9200", ) sub_elasticsearch_container.add_argument( "--elasticsearch-index", help="elasticsearch index to push into", default="fatcat_container", ) sub_elasticsearch_container.add_argument( "--query-stats", action="store_true", help="whether to query release search index for container stats", ) sub_elasticsearch_file = subparsers.add_parser( "elasticsearch-file", help= "consume kafka feed of new/updated files, transform and push to search", ) sub_elasticsearch_file.set_defaults(func=run_elasticsearch_file) sub_elasticsearch_file.add_argument( "--elasticsearch-backend", help="elasticsearch backend to connect to", default="http://localhost:9200", ) sub_elasticsearch_file.add_argument( "--elasticsearch-index", help="elasticsearch index to push into", default="fatcat_file", ) sub_elasticsearch_changelog = subparsers.add_parser( "elasticsearch-changelog", help="consume changelog kafka feed, transform and push to search", ) sub_elasticsearch_changelog.set_defaults(func=run_elasticsearch_changelog) sub_elasticsearch_changelog.add_argument( "--elasticsearch-backend", help="elasticsearch backend to connect to", default="http://localhost:9200", ) sub_elasticsearch_changelog.add_argument( "--elasticsearch-index", help="elasticsearch index to push into", default="fatcat_changelog", ) args = parser.parse_args() if not args.__dict__.get("func"): print("tell me what to do!") sys.exit(-1) args.api = public_api(args.api_host_url) sentry_sdk.init(environment=args.env) args.func(args)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--api-host-url', default="http://localhost:9411/v0", help="fatcat API host/port to use") parser.add_argument('--kafka-hosts', default="localhost:9092", help="list of Kafka brokers (host/port) to use") parser.add_argument( '--env', default="dev", help="Kafka topic namespace to use (eg, prod, qa, dev)") subparsers = parser.add_subparsers() sub_changelog = subparsers.add_parser( 'changelog', help="poll fatcat API for changelog entries, push to kafka") sub_changelog.set_defaults(func=run_changelog) sub_changelog.add_argument( '--poll-interval', help="how long to wait between polling (seconds)", default=5.0, type=float) sub_entity_updates = subparsers.add_parser( 'entity-updates', help= "poll kafka for changelog entries; push entity changes to various kafka topics" ) sub_entity_updates.set_defaults(func=run_entity_updates) sub_elasticsearch_release = subparsers.add_parser( 'elasticsearch-release', help= "consume kafka feed of new/updated releases, transform and push to search" ) sub_elasticsearch_release.set_defaults(func=run_elasticsearch_release) sub_elasticsearch_release.add_argument( '--elasticsearch-backend', help="elasticsearch backend to connect to", default="http://localhost:9200") sub_elasticsearch_release.add_argument( '--elasticsearch-index', help="elasticsearch index to push into", default="fatcat_release_v03") sub_elasticsearch_container = subparsers.add_parser( 'elasticsearch-container', help= "consume kafka feed of new/updated containers, transform and push to search" ) sub_elasticsearch_container.set_defaults(func=run_elasticsearch_container) sub_elasticsearch_container.add_argument( '--elasticsearch-backend', help="elasticsearch backend to connect to", default="http://localhost:9200") sub_elasticsearch_container.add_argument( '--elasticsearch-index', help="elasticsearch index to push into", default="fatcat_container") sub_elasticsearch_changelog = subparsers.add_parser( 'elasticsearch-changelog', help="consume changelog kafka feed, transform and push to search") sub_elasticsearch_changelog.set_defaults(func=run_elasticsearch_changelog) sub_elasticsearch_changelog.add_argument( '--elasticsearch-backend', help="elasticsearch backend to connect to", default="http://localhost:9200") sub_elasticsearch_changelog.add_argument( '--elasticsearch-index', help="elasticsearch index to push into", default="fatcat_changelog") args = parser.parse_args() if not args.__dict__.get("func"): print("tell me what to do!") sys.exit(-1) args.api = public_api(args.api_host_url) args.func(args)