コード例 #1
0
def query_and_export(options):
    """Iterate over the query list, place the queries, and then export results

    Our client has provided us with a spreadsheet chalk-full of queries. Our
    task is to take those queries, run them, identify the matched dockets, then
    serialize those dockets to disk as the deliverable for the client.

    :param options: The argparse options
    :return None
    """
    f = options["file"]
    reader = csv.DictReader(f)
    d_pks = set()
    for i, row in enumerate(reader):
        if i < options["query_offset"]:
            continue
        if i >= options["query_limit"] > 0:
            break
        query_params = get_query_from_link(row["Link"])
        logger.info("Doing query: %s", query_params)
        d_pks.update(query_dockets(query_params))

    q = options["queue"]
    throttle = CeleryThrottle(queue_name=q)
    for i, d_pk in enumerate(d_pks):
        if i < options["offset"]:
            continue
        if i >= options["limit"] > 0:
            break
        if i % 1000 == 0:
            logger.info("Doing item %s with pk %s", i, d_pk)
        throttle.maybe_wait()
        save_ia_docket_to_disk.apply_async(
            args=(d_pk, options["output_directory"]), queue=q,
        )
コード例 #2
0
def do_bulk_export(options):
    """Save selected dockets from 2016 to disk

    This will serialize the items to disk using celery tasks and the IA
    serializer.
    """
    q = options["queue"]
    offset = options["offset"]
    throttle = CeleryThrottle(queue_name=q)
    if offset > 0:
        logger.info("Skipping dockets with PK less than than %s", offset)
    d_pks = (Docket.objects.filter(
        court__jurisdiction=Court.FEDERAL_DISTRICT,
        pk__gt=offset,
        source__in=Docket.RECAP_SOURCES,
        date_filed__gte="2016-01-01",
        date_filed__lte="2016-12-31",
    ).order_by("pk").values_list("pk", flat=True))
    for i, d_pk in enumerate(d_pks):
        if i >= options["limit"] > 0:
            break
        logger.info("Doing item %s with pk %s", i, d_pk)
        throttle.maybe_wait()
        save_ia_docket_to_disk.apply_async(
            args=(d_pk, options["output_directory"]),
            queue=q,
        )
コード例 #3
0
def do_bulk_export(options):
    """The final step of this project is to bulk export an outrageous
    amount of bankruptcy data from our system.

    Limit/offset work differently than in many other functions. Limit is a
    true hard limit to the number that should get done. A limit of 10 means
    ten items will be done. Offset corresponds to the docket PK below which you
    do not want to process. (It does *not* correspond to the number of
    completed items.)
    """
    q = options['queue']
    offset = options['offset']
    throttle = CeleryThrottle(queue_name=q)
    if offset > 0:
        logger.info("Skipping to dockets with PK greater than %s", offset)
    d_pks = Docket.objects.filter(
        court__jurisdiction=Court.FEDERAL_BANKRUPTCY,
        pk__gt=offset,
    ).order_by('pk').values_list('pk', flat=True)
    for i, d_pk in enumerate(d_pks):
        if i >= options['limit'] > 0:
            break
        logger.info("Doing item %s with pk %s", i, d_pk)
        throttle.maybe_wait()
        save_ia_docket_to_disk.apply_async(
            args=(d_pk, options['output_directory']),
            queue=q,
        )
コード例 #4
0
ファイル: mfb.py プロジェクト: samaramas/courtlistener
def do_bulk_export(options):
    """Save selected dockets to disk

    This will serialize the items to disk using celery tasks and the IA
    serializer.
    """
    q = options["queue"]
    throttle = CeleryThrottle(queue_name=q)
    for i, d_pk in enumerate(docket_pks_for_query(QUERY)):
        if i < options["offset"]:
            continue
        if i >= options["limit"] > 0:
            break
        if i % 1000 == 0:
            logger.info("Doing item %s with pk %s", i, d_pk)
        throttle.maybe_wait()
        save_ia_docket_to_disk.apply_async(args=(d_pk,
                                                 options["output_directory"]),
                                           queue=q)