コード例 #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--debug',
                        action='store_true',
                        help="enable debugging interface")
    parser.add_argument('--host-url',
                        default="http://localhost:9411/v0",
                        help="connect to this host/port")
    subparsers = parser.add_subparsers()

    sub_uuid2fcid = subparsers.add_parser('uuid2fcid')
    sub_uuid2fcid.set_defaults(func=run_uuid2fcid)
    sub_uuid2fcid.add_argument('uuid', help="UUID to transform")

    sub_fcid2uuid = subparsers.add_parser('fcid2uuid')
    sub_fcid2uuid.set_defaults(func=run_fcid2uuid)
    sub_fcid2uuid.add_argument('fcid', help="FCID to transform (into UUID)")

    args = parser.parse_args()
    if not args.__dict__.get("func"):
        print("tell me what to do!")
        sys.exit(-1)

    args.api = public_api(args.host_url)
    args.func(args)
コード例 #2
0
ファイル: fatcat_transform.py プロジェクト: openAccess/fatcat
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--debug',
        action='store_true',
        help="enable debugging interface")
    parser.add_argument('--host-url',
        default="http://localhost:9411/v0",
        help="connect to this host/port")
    subparsers = parser.add_subparsers()

    sub_elasticsearch_releases = subparsers.add_parser('elasticsearch-releases')
    sub_elasticsearch_releases.set_defaults(func=run_elasticsearch_releases)
    sub_elasticsearch_releases.add_argument('json_input',
        help="JSON-per-line of release entities",
        default=sys.stdin, type=argparse.FileType('r'))
    sub_elasticsearch_releases.add_argument('json_output',
        help="where to send output",
        default=sys.stdout, type=argparse.FileType('w'))

    sub_elasticsearch_containers = subparsers.add_parser('elasticsearch-containers')
    sub_elasticsearch_containers.set_defaults(func=run_elasticsearch_containers)
    sub_elasticsearch_containers.add_argument('json_input',
        help="JSON-per-line of container entities",
        default=sys.stdin, type=argparse.FileType('r'))
    sub_elasticsearch_containers.add_argument('json_output',
        help="where to send output",
        default=sys.stdout, type=argparse.FileType('w'))

    sub_elasticsearch_changelogs = subparsers.add_parser('elasticsearch-changelogs')
    sub_elasticsearch_changelogs.set_defaults(func=run_elasticsearch_changelogs)
    sub_elasticsearch_changelogs.add_argument('json_input',
        help="JSON-per-line of changelog entries",
        default=sys.stdin, type=argparse.FileType('r'))
    sub_elasticsearch_changelogs.add_argument('json_output',
        help="where to send output",
        default=sys.stdout, type=argparse.FileType('w'))

    sub_citeproc_releases = subparsers.add_parser('citeproc-releases')
    sub_citeproc_releases.set_defaults(func=run_citeproc_releases)
    sub_citeproc_releases.add_argument('json_input',
        help="JSON-per-line of release entities",
        default=sys.stdin, type=argparse.FileType('r'))
    sub_citeproc_releases.add_argument('json_output',
        help="where to send output",
        default=sys.stdout, type=argparse.FileType('w'))
    sub_citeproc_releases.add_argument('--style',
        help="citation style to output",
        default='csl-json')
    sub_citeproc_releases.add_argument('--html',
        action='store_true',
        help="output HTML, not plain text")

    args = parser.parse_args()
    if not args.__dict__.get("func"):
        print("tell me what to do!")
        sys.exit(-1)

    args.api = public_api(args.host_url)
    args.func(args)
コード例 #3
0
def test_lowercase_doi() -> None:
    api = public_api("http://localhost:9411/v0")
    rldc = ReleaseLowercaseDoiCleanup(api=api)
    rldc.testing_mode = True

    assert rldc.want("") is False
    assert rldc.want("aaaaaaaaaaaaarceaaaaaaaaai") is True
    assert rldc.want("aaaaaaaaaaaaarceaaaaaaaaai\t10.1234/ABCD") is True
    rldc.parse_record("aaaaaaaaaaaaarceaaaaaaaaai")

    dummy_re = api.get_release("aaaaaaaaaaaaarceaaaaaaaaai")
    assert rldc.try_update(dummy_re) is False
    assert rldc.counts["skip-existing-doi-fine"] == 1
コード例 #4
0
ファイル: fatcat_worker.py プロジェクト: timholds/fatcat
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--debug',
        action='store_true',
        help="enable debug logging")
    parser.add_argument('--api-host-url',
        default="http://localhost:9411/v0",
        help="fatcat API host/port to use")
    parser.add_argument('--kafka-hosts',
        default="localhost:9092",
        help="list of Kafka brokers (host/port) to use")
    parser.add_argument('--env',
        default="dev",
        help="Kafka topic namespace to use (eg, prod, qa, dev)")
    subparsers = parser.add_subparsers()

    sub_changelog = subparsers.add_parser('changelog')
    sub_changelog.set_defaults(func=run_changelog)
    sub_changelog.add_argument('--poll-interval',
        help="how long to wait between polling (seconds)",
        default=5.0, type=float)

    sub_entity_updates = subparsers.add_parser('entity-updates')
    sub_entity_updates.set_defaults(func=run_entity_updates)

    sub_elasticsearch_release = subparsers.add_parser('elasticsearch-release')
    sub_elasticsearch_release.set_defaults(func=run_elasticsearch_release)
    sub_elasticsearch_release.add_argument('--elasticsearch-backend',
        help="elasticsearch backend to connect to",
        default="http://localhost:9200")
    sub_elasticsearch_release.add_argument('--elasticsearch-index',
        help="elasticsearch index to push into",
        default="fatcat_release_v03")

    sub_elasticsearch_container = subparsers.add_parser('elasticsearch-container')
    sub_elasticsearch_container.set_defaults(func=run_elasticsearch_container)
    sub_elasticsearch_container.add_argument('--elasticsearch-backend',
        help="elasticsearch backend to connect to",
        default="http://localhost:9200")
    sub_elasticsearch_container.add_argument('--elasticsearch-index',
        help="elasticsearch index to push into",
        default="fatcat_container")

    args = parser.parse_args()
    if not args.__dict__.get("func"):
        print("tell me what to do!")
        sys.exit(-1)

    args.api = public_api(args.api_host_url)
    args.func(args)
コード例 #5
0
ファイル: fatcat_export.py プロジェクト: cclauss/fatcat
def main() -> None:
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("--fatcat-api-url",
                        default="http://localhost:9411/v0",
                        help="connect to this host/port")
    subparsers = parser.add_subparsers()

    sub_releases = subparsers.add_parser("releases")
    sub_releases.set_defaults(func=run_export_releases)
    sub_releases.add_argument(
        "ident_file",
        help="TSV list of fatcat release idents to dump",
        default=sys.stdin,
        type=argparse.FileType("r"),
    )
    sub_releases.add_argument(
        "json_output",
        help="where to send output",
        default=sys.stdout,
        type=argparse.FileType("w"),
    )

    sub_changelog = subparsers.add_parser("changelog")
    sub_changelog.set_defaults(func=run_export_changelog)
    sub_changelog.add_argument("--start",
                               help="index to start dumping at",
                               default=1,
                               type=int)
    sub_changelog.add_argument(
        "--end",
        help="index to stop dumping at (else detect most recent)",
        default=None,
        type=int,
    )
    sub_changelog.add_argument(
        "json_output",
        help="where to send output",
        default=sys.stdout,
        type=argparse.FileType("w"),
    )

    args = parser.parse_args()
    if not args.__dict__.get("func"):
        print("tell me what to do!")
        sys.exit(-1)

    args.api = public_api(args.fatcat_api_url)
    args.func(args)
コード例 #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--debug',
                        action='store_true',
                        help="enable debugging interface")
    parser.add_argument('--host-url',
                        default="http://localhost:9411/v0",
                        help="connect to this host/port")
    subparsers = parser.add_subparsers()

    sub_releases = subparsers.add_parser('releases')
    sub_releases.set_defaults(func=run_export_releases)
    sub_releases.add_argument('ident_file',
                              help="TSV list of fatcat release idents to dump",
                              default=sys.stdin,
                              type=argparse.FileType('r'))
    sub_releases.add_argument('json_output',
                              help="where to send output",
                              default=sys.stdout,
                              type=argparse.FileType('w'))

    sub_changelog = subparsers.add_parser('changelog')
    sub_changelog.set_defaults(func=run_export_changelog)
    sub_changelog.add_argument('--start',
                               help="index to start dumping at",
                               default=1,
                               type=int)
    sub_changelog.add_argument(
        '--end',
        help="index to stop dumping at (else detect most recent)",
        default=None,
        type=int)
    sub_changelog.add_argument('json_output',
                               help="where to send output",
                               default=sys.stdout,
                               type=argparse.FileType('w'))

    args = parser.parse_args()
    if not args.__dict__.get("func"):
        print("tell me what to do!")
        sys.exit(-1)

    args.api = public_api(args.host_url)
    args.func(args)
コード例 #7
0
def main() -> None:
    """
    Run this utility like:

        python -m fatcat_tools.references

    Examples:

        python -m fatcat_tools.references query release_pfrind3kh5hqhgqkueulk2tply
    """

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    subparsers = parser.add_subparsers()

    parser.add_argument("--fatcat-api-base",
                        default="https://api.fatcat.wiki/v0")
    parser.add_argument("--elasticsearch-base",
                        default="https://search.fatcat.wiki")
    parser.add_argument("--elasticsearch-ref-index", default="fatcat_ref")

    sub = subparsers.add_parser(
        "query",
        help=
        "takes a fatcat ident argument, prints both inbound and outbound references",
    )
    sub.set_defaults(func="run_ref_query")
    sub.add_argument("ident", type=str)
    sub.add_argument("--enrich", type=str)

    args = parser.parse_args()
    if not args.__dict__.get("func"):
        parser.print_help(file=sys.stderr)
        sys.exit(-1)

    args.es_client = elasticsearch.Elasticsearch(args.elasticsearch_base)
    args.fatcat_api_client = public_api(args.fatcat_api_base)

    if args.func == "run_ref_query":
        run_ref_query(args)
    else:
        raise NotImplementedError(args.func)
コード例 #8
0
ファイル: file_release_bugfix.py プロジェクト: cclauss/fatcat
def test_file_release_bugfix() -> None:
    api = public_api("http://localhost:9411/v0")
    frbc = FileReleaseBugfix(api=api)
    frbc.testing_mode = True

    assert frbc.want({"this": "asdf"}) is False

    example_line: Dict[str, Any] = {
        "file_ident": "00000000-0000-0000-3333-000000000002",
        "wrong_release_ident": "00000000-0000-0000-4444-000000000002",
        "edit_extra": {
            "link_source": "unpaywall",
            "link_source_id": "10.1371/journal.pmed.0020124",
            "ingest_request_source": "unpaywall",
        },
    }

    fe1 = frbc.parse_record(example_line)
    print(frbc.counts)
    frbc.try_update(fe1)
コード例 #9
0
ファイル: fatcat_ingest.py プロジェクト: eevelweezel/fatcat
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--fatcat-api-url',
        default="http://localhost:9411/v0",
        help="connect to this host/port")
    parser.add_argument('--enqueue-kafka',
        action='store_true',
        help="send ingest requests directly to sandcrawler kafka topic for processing")
    parser.add_argument('--kafka-hosts',
        default="localhost:9092",
        help="list of Kafka brokers (host/port) to use")
    parser.add_argument('--elasticsearch-endpoint',
        default="https://search.fatcat.wiki",
        help="elasticsearch API. internal endpoint preferred, but public is default")
    parser.add_argument('--elasticsearch-index',
        default="fatcat_release",
        help="elasticsearch index to query")
    parser.add_argument('--env',
        default="dev",
        help="Kafka topic namespace to use (eg, prod, qa, dev)")
    parser.add_argument('--limit',
        default=None,
        type=int,
        help="Max number of search hits to return")
    parser.add_argument('--dry-run',
        action='store_true',
        help="runs through creating all ingest requests, but doesn't actually output or enqueue")
    parser.add_argument('--before-year',
        type=str,
        help="filters results to only with release_year before this (not inclusive)")
    parser.add_argument('--after-year',
        type=str,
        help="filters results to only with release_year after this (inclusive)")
    parser.add_argument('--release-types',
        type=str,
        help="filters results to specified release-types, separated by commas. By default, 'stub' is filtered out.")
    parser.add_argument('--allow-non-oa',
        action='store_true',
        help="By default, we limit to OA releases. This removes that filter")
    parser.add_argument('--force-recrawl',
        action='store_true',
        help="Tell ingest worker to skip GWB history lookup and do SPNv2 crawl")
    subparsers = parser.add_subparsers()

    sub_container = subparsers.add_parser('container',
        help="Create ingest requests for releases from a specific container")
    sub_container.set_defaults(func=run_ingest_container)
    sub_container.add_argument('--container-id',
        help="fatcat container entity ident")
    sub_container.add_argument('--issnl',
        help="ISSN-L of container entity")
    sub_container.add_argument('--publisher',
        help="publisher name")
    sub_container.add_argument('--name',
        help="container name")

    sub_query = subparsers.add_parser('query',
        help="Create ingest requests for releases from a specific query")
    sub_query.set_defaults(func=run_ingest_query)
    sub_query.add_argument('query',
        help="search query (same DSL as web interface search)")

    sub_extid = subparsers.add_parser('extid',
        help="Create ingest requests for releases that have given extid defined")
    sub_extid.set_defaults(func=run_ingest_extid)
    sub_extid.add_argument('extid',
        help="extid short name (as included in ES release schema)")

    args = parser.parse_args()
    if not args.__dict__.get("func"):
        print("tell me what to do!")
        sys.exit(-1)

    args.api = public_api(args.fatcat_api_url)
    args.func(args)
コード例 #10
0
    def run(self) -> None:
        ac = ApiClient()
        api = public_api(self.api_host)

        # only used by container indexing query_stats code path
        es_client = elasticsearch.Elasticsearch(self.elasticsearch_backend)

        def fail_fast(err: Any, partitions: List[Any]) -> None:
            if err is not None:
                print("Kafka consumer commit error: {}".format(err), file=sys.stderr)
                print("Bailing out...", file=sys.stderr)
                # TODO: should it be sys.exit(-1)?
                raise KafkaException(err)
            for p in partitions:
                # check for partition-specific commit errors
                if p.error:
                    print("Kafka consumer commit error: {}".format(p.error), file=sys.stderr)
                    print("Bailing out...", file=sys.stderr)
                    # TODO: should it be sys.exit(-1)?
                    raise KafkaException(p.error)
            # print("Kafka consumer commit successful")
            pass

        def on_rebalance(consumer: Consumer, partitions: List[Any]) -> None:
            for p in partitions:
                if p.error:
                    raise KafkaException(p.error)
            print(
                "Kafka partitions rebalanced: {} / {}".format(consumer, partitions),
                file=sys.stderr,
            )

        consumer_conf = self.kafka_config.copy()
        consumer_conf.update(
            {
                "group.id": self.consumer_group,
                "on_commit": fail_fast,
                # messages don't have offset marked as stored until pushed to
                # elastic, but we do auto-commit stored offsets to broker
                "enable.auto.commit": True,
                "enable.auto.offset.store": False,
                # user code timeout; if no poll after this long, assume user code
                # hung and rebalance (default: 5min)
                "max.poll.interval.ms": 60000,
                "default.topic.config": {
                    "auto.offset.reset": "latest",
                },
            }
        )
        consumer = Consumer(consumer_conf)
        consumer.subscribe(
            [self.consume_topic],
            on_assign=on_rebalance,
            on_revoke=on_rebalance,
        )

        while True:
            batch = consumer.consume(num_messages=self.batch_size, timeout=self.poll_interval)
            if not batch:
                if not consumer.assignment():
                    print("... no Kafka consumer partitions assigned yet", file=sys.stderr)
                print(
                    "... nothing new from kafka, try again (interval: {}".format(
                        self.poll_interval
                    ),
                    file=sys.stderr,
                )
                continue
            print("... got {} kafka messages".format(len(batch)), file=sys.stderr)
            # first check errors on entire batch...
            for msg in batch:
                if msg.error():
                    raise KafkaException(msg.error())
            # ... then process
            bulk_actions = []
            for msg in batch:
                json_str = msg.value().decode("utf-8")
                entity = entity_from_json(json_str, self.entity_type, api_client=ac)
                assert isinstance(entity, self.entity_type)
                if self.entity_type == ChangelogEntry:
                    key = entity.index
                    # might need to fetch from API
                    if not (
                        entity.editgroup  # pylint: disable=no-member # (TODO)
                        and entity.editgroup.editor  # pylint: disable=no-member # (TODO)
                    ):
                        entity = api.get_changelog_entry(entity.index)
                else:
                    key = entity.ident  # pylint: disable=no-member # (TODO)

                if self.entity_type != ChangelogEntry and entity.state == "wip":
                    print(
                        f"WARNING: skipping state=wip entity: {self.entity_type.__name__} {entity.ident}",
                        file=sys.stderr,
                    )
                    continue

                if self.entity_type == ContainerEntity and self.query_stats:
                    stats = query_es_container_stats(
                        entity.ident,
                        es_client=es_client,
                        es_index=self.elasticsearch_release_index,
                        merge_shadows=True,
                    )
                    doc_dict = container_to_elasticsearch(entity, stats=stats)
                else:
                    doc_dict = self.transform_func(entity)

                # TODO: handle deletions from index
                bulk_actions.append(
                    json.dumps(
                        {
                            "index": {
                                "_id": key,
                            },
                        }
                    )
                )
                bulk_actions.append(json.dumps(doc_dict))

            # if only WIP entities, then skip
            if not bulk_actions:
                for msg in batch:
                    consumer.store_offsets(message=msg)
                continue

            print(
                "Upserting, eg, {} (of {} {} in elasticsearch)".format(
                    key, len(batch), self.entity_type.__name__
                ),
                file=sys.stderr,
            )
            elasticsearch_endpoint = "{}/{}/_bulk".format(
                self.elasticsearch_backend, self.elasticsearch_index
            )
            resp = requests.post(
                elasticsearch_endpoint,
                headers={"Content-Type": "application/x-ndjson"},
                data="\n".join(bulk_actions) + "\n",
            )
            resp.raise_for_status()
            if resp.json()["errors"]:
                desc = "Elasticsearch errors from post to {}:".format(elasticsearch_endpoint)
                print(desc, file=sys.stderr)
                print(resp.content, file=sys.stderr)
                raise Exception(desc)
            for msg in batch:
                # offsets are *committed* (to brokers) automatically, but need
                # to be marked as processed here
                consumer.store_offsets(message=msg)
コード例 #11
0
ファイル: fatcat_transform.py プロジェクト: cclauss/fatcat
def main() -> None:
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("--fatcat-api-url",
                        default="http://localhost:9411/v0",
                        help="connect to this host/port")
    parser.add_argument(
        "--fatcat-elasticsearch-url",
        default="http://localhost:9200",
        help="connect to this host/port",
    )
    subparsers = parser.add_subparsers()

    sub_elasticsearch_releases = subparsers.add_parser(
        "elasticsearch-releases",
        help=
        "convert fatcat release JSON schema to elasticsearch release schema",
    )
    sub_elasticsearch_releases.set_defaults(func=run_elasticsearch_releases)
    sub_elasticsearch_releases.add_argument(
        "json_input",
        help="JSON-per-line of release entities",
        default=sys.stdin,
        type=argparse.FileType("r"),
    )
    sub_elasticsearch_releases.add_argument(
        "json_output",
        help="where to send output",
        default=sys.stdout,
        type=argparse.FileType("w"),
    )

    sub_elasticsearch_containers = subparsers.add_parser(
        "elasticsearch-containers",
        help=
        "convert fatcat container JSON schema to elasticsearch container schema",
    )
    sub_elasticsearch_containers.set_defaults(
        func=run_elasticsearch_containers)
    sub_elasticsearch_containers.add_argument(
        "json_input",
        help="JSON-per-line of container entities",
        default=sys.stdin,
        type=argparse.FileType("r"),
    )
    sub_elasticsearch_containers.add_argument(
        "json_output",
        help="where to send output",
        default=sys.stdout,
        type=argparse.FileType("w"),
    )
    sub_elasticsearch_containers.add_argument(
        "--query-stats",
        action="store_true",
        help="whether to query release search index for container stats",
    )

    sub_elasticsearch_files = subparsers.add_parser(
        "elasticsearch-files",
        help="convert fatcat file JSON schema to elasticsearch file schema",
    )
    sub_elasticsearch_files.set_defaults(func=run_elasticsearch_files)
    sub_elasticsearch_files.add_argument(
        "json_input",
        help="JSON-per-line of file entities",
        default=sys.stdin,
        type=argparse.FileType("r"),
    )
    sub_elasticsearch_files.add_argument(
        "json_output",
        help="where to send output",
        default=sys.stdout,
        type=argparse.FileType("w"),
    )

    sub_elasticsearch_changelogs = subparsers.add_parser(
        "elasticsearch-changelogs",
        help=
        "convert fatcat changelog JSON schema to elasticsearch changelog schema",
    )
    sub_elasticsearch_changelogs.set_defaults(
        func=run_elasticsearch_changelogs)
    sub_elasticsearch_changelogs.add_argument(
        "json_input",
        help="JSON-per-line of changelog entries",
        default=sys.stdin,
        type=argparse.FileType("r"),
    )
    sub_elasticsearch_changelogs.add_argument(
        "json_output",
        help="where to send output",
        default=sys.stdout,
        type=argparse.FileType("w"),
    )

    sub_citeproc_releases = subparsers.add_parser(
        "citeproc-releases",
        help=
        "convert fatcat release schema to any standard citation format using citeproc/CSL",
    )
    sub_citeproc_releases.set_defaults(func=run_citeproc_releases)
    sub_citeproc_releases.add_argument(
        "json_input",
        help="JSON-per-line of release entities",
        default=sys.stdin,
        type=argparse.FileType("r"),
    )
    sub_citeproc_releases.add_argument(
        "json_output",
        help="where to send output",
        default=sys.stdout,
        type=argparse.FileType("w"),
    )
    sub_citeproc_releases.add_argument("--style",
                                       help="citation style to output",
                                       default="csl-json")
    sub_citeproc_releases.add_argument("--html",
                                       action="store_true",
                                       help="output HTML, not plain text")

    args = parser.parse_args()
    if not args.__dict__.get("func"):
        print("tell me what to do!")
        sys.exit(-1)

    args.api = public_api(args.fatcat_api_url)
    args.func(args)
コード例 #12
0
ファイル: tools_api.py プロジェクト: luisdomin5/fatcat
def test_public_api():
    api = public_api("http://localhost:9411/v0")
    api.get_changelog()
    with pytest.raises(ApiException):
        api.auth_check()
コード例 #13
0
def main() -> None:
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("--api-host-url",
                        default="http://localhost:9411/v0",
                        help="fatcat API host/port to use")
    parser.add_argument(
        "--kafka-hosts",
        default="localhost:9092",
        help="list of Kafka brokers (host/port) to use",
    )
    parser.add_argument(
        "--env",
        default="dev",
        help="Kafka topic namespace to use (eg, prod, qa, dev)")
    subparsers = parser.add_subparsers()

    sub_changelog = subparsers.add_parser(
        "changelog",
        help="poll fatcat API for changelog entries, push to kafka")
    sub_changelog.set_defaults(func=run_changelog)
    sub_changelog.add_argument(
        "--poll-interval",
        help="how long to wait between polling (seconds)",
        default=5.0,
        type=float,
    )

    sub_entity_updates = subparsers.add_parser(
        "entity-updates",
        help=
        "poll kafka for changelog entries; push entity changes to various kafka topics",
    )
    sub_entity_updates.set_defaults(func=run_entity_updates)

    sub_elasticsearch_release = subparsers.add_parser(
        "elasticsearch-release",
        help=
        "consume kafka feed of new/updated releases, transform and push to search",
    )
    sub_elasticsearch_release.set_defaults(func=run_elasticsearch_release)
    sub_elasticsearch_release.add_argument(
        "--elasticsearch-backend",
        help="elasticsearch backend to connect to",
        default="http://localhost:9200",
    )
    sub_elasticsearch_release.add_argument(
        "--elasticsearch-index",
        help="elasticsearch index to push into",
        default="fatcat_release_v03",
    )

    sub_elasticsearch_container = subparsers.add_parser(
        "elasticsearch-container",
        help=
        "consume kafka feed of new/updated containers, transform and push to search",
    )
    sub_elasticsearch_container.set_defaults(func=run_elasticsearch_container)
    sub_elasticsearch_container.add_argument(
        "--elasticsearch-backend",
        help="elasticsearch backend to connect to",
        default="http://localhost:9200",
    )
    sub_elasticsearch_container.add_argument(
        "--elasticsearch-index",
        help="elasticsearch index to push into",
        default="fatcat_container",
    )
    sub_elasticsearch_container.add_argument(
        "--query-stats",
        action="store_true",
        help="whether to query release search index for container stats",
    )

    sub_elasticsearch_file = subparsers.add_parser(
        "elasticsearch-file",
        help=
        "consume kafka feed of new/updated files, transform and push to search",
    )
    sub_elasticsearch_file.set_defaults(func=run_elasticsearch_file)
    sub_elasticsearch_file.add_argument(
        "--elasticsearch-backend",
        help="elasticsearch backend to connect to",
        default="http://localhost:9200",
    )
    sub_elasticsearch_file.add_argument(
        "--elasticsearch-index",
        help="elasticsearch index to push into",
        default="fatcat_file",
    )

    sub_elasticsearch_changelog = subparsers.add_parser(
        "elasticsearch-changelog",
        help="consume changelog kafka feed, transform and push to search",
    )
    sub_elasticsearch_changelog.set_defaults(func=run_elasticsearch_changelog)
    sub_elasticsearch_changelog.add_argument(
        "--elasticsearch-backend",
        help="elasticsearch backend to connect to",
        default="http://localhost:9200",
    )
    sub_elasticsearch_changelog.add_argument(
        "--elasticsearch-index",
        help="elasticsearch index to push into",
        default="fatcat_changelog",
    )

    args = parser.parse_args()
    if not args.__dict__.get("func"):
        print("tell me what to do!")
        sys.exit(-1)

    args.api = public_api(args.api_host_url)
    sentry_sdk.init(environment=args.env)
    args.func(args)
コード例 #14
0
ファイル: fatcat_worker.py プロジェクト: eevelweezel/fatcat
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--api-host-url',
                        default="http://localhost:9411/v0",
                        help="fatcat API host/port to use")
    parser.add_argument('--kafka-hosts',
                        default="localhost:9092",
                        help="list of Kafka brokers (host/port) to use")
    parser.add_argument(
        '--env',
        default="dev",
        help="Kafka topic namespace to use (eg, prod, qa, dev)")
    subparsers = parser.add_subparsers()

    sub_changelog = subparsers.add_parser(
        'changelog',
        help="poll fatcat API for changelog entries, push to kafka")
    sub_changelog.set_defaults(func=run_changelog)
    sub_changelog.add_argument(
        '--poll-interval',
        help="how long to wait between polling (seconds)",
        default=5.0,
        type=float)

    sub_entity_updates = subparsers.add_parser(
        'entity-updates',
        help=
        "poll kafka for changelog entries; push entity changes to various kafka topics"
    )
    sub_entity_updates.set_defaults(func=run_entity_updates)

    sub_elasticsearch_release = subparsers.add_parser(
        'elasticsearch-release',
        help=
        "consume kafka feed of new/updated releases, transform and push to search"
    )
    sub_elasticsearch_release.set_defaults(func=run_elasticsearch_release)
    sub_elasticsearch_release.add_argument(
        '--elasticsearch-backend',
        help="elasticsearch backend to connect to",
        default="http://localhost:9200")
    sub_elasticsearch_release.add_argument(
        '--elasticsearch-index',
        help="elasticsearch index to push into",
        default="fatcat_release_v03")

    sub_elasticsearch_container = subparsers.add_parser(
        'elasticsearch-container',
        help=
        "consume kafka feed of new/updated containers, transform and push to search"
    )
    sub_elasticsearch_container.set_defaults(func=run_elasticsearch_container)
    sub_elasticsearch_container.add_argument(
        '--elasticsearch-backend',
        help="elasticsearch backend to connect to",
        default="http://localhost:9200")
    sub_elasticsearch_container.add_argument(
        '--elasticsearch-index',
        help="elasticsearch index to push into",
        default="fatcat_container")

    sub_elasticsearch_changelog = subparsers.add_parser(
        'elasticsearch-changelog',
        help="consume changelog kafka feed, transform and push to search")
    sub_elasticsearch_changelog.set_defaults(func=run_elasticsearch_changelog)
    sub_elasticsearch_changelog.add_argument(
        '--elasticsearch-backend',
        help="elasticsearch backend to connect to",
        default="http://localhost:9200")
    sub_elasticsearch_changelog.add_argument(
        '--elasticsearch-index',
        help="elasticsearch index to push into",
        default="fatcat_changelog")

    args = parser.parse_args()
    if not args.__dict__.get("func"):
        print("tell me what to do!")
        sys.exit(-1)

    args.api = public_api(args.api_host_url)
    args.func(args)