Ejemplo n.º 1
0
def process_recap_upload(pq):
    """Process an item uploaded from an extension or API user.

    Uploaded objects can take a variety of forms, and we'll need to
    process them accordingly.
    """
    if pq.upload_type == UPLOAD_TYPE.DOCKET:
        chain(
            process_recap_docket.s(pq.pk), add_or_update_recap_docket.s()
        ).apply_async()
    elif pq.upload_type == UPLOAD_TYPE.ATTACHMENT_PAGE:
        process_recap_attachment.delay(pq.pk)
    elif pq.upload_type == UPLOAD_TYPE.PDF:
        process_recap_pdf.delay(pq.pk)
    elif pq.upload_type == UPLOAD_TYPE.DOCKET_HISTORY_REPORT:
        chain(
            process_recap_docket_history_report.s(pq.pk),
            add_or_update_recap_docket.s(),
        ).apply_async()
    elif pq.upload_type == UPLOAD_TYPE.APPELLATE_DOCKET:
        chain(
            process_recap_appellate_docket.s(pq.pk),
            add_or_update_recap_docket.s(),
        ).apply_async()
    elif pq.upload_type == UPLOAD_TYPE.APPELLATE_ATTACHMENT_PAGE:
        process_recap_appellate_attachment.delay(pq.pk)
    elif pq.upload_type == UPLOAD_TYPE.CLAIMS_REGISTER:
        process_recap_claims_register.delay(pq.pk)
    elif pq.upload_type == UPLOAD_TYPE.DOCUMENT_ZIP:
        process_recap_zip.delay(pq.pk)
Ejemplo n.º 2
0
def get_dockets(options, items, tags, sample_size=0, doc_num_end=""):
    """Download dockets from PACER.

    :param options: Options provided by argparse
    :param items: Items from our FJC IDB database
    :param tags: A list of tag names to associate with the purchased content.
    :param sample_size: The number of items to get. If 0, get them all. Else,
    get only this many and do it randomly.
    :param doc_num_end: Only get docket numbers up to this value to constrain
    costs. If set to an empty string, no constraints are applied. Note that
    applying this value means no unnumbered entries will be retrieved by PACER.
    """

    if sample_size > 0:
        items = items.order_by("?")[:sample_size]

    q = options["queue"]
    throttle = CeleryThrottle(queue_name=q)
    session = PacerSession(username=PACER_USERNAME, password=PACER_PASSWORD)
    session.login()
    for i, row in enumerate(items):
        if i < options["offset"]:
            continue
        if i >= options["limit"] > 0:
            break

        if i % 5000 == 0:
            # Re-authenticate just in case the auto-login mechanism isn't
            # working.
            session = PacerSession(username=PACER_USERNAME,
                                   password=PACER_PASSWORD)
            session.login()

        # All tests pass. Get the docket.
        logger.info("Doing row %s: %s", i, row)

        throttle.maybe_wait()
        params = make_fjc_idb_lookup_params(row)
        chain(
            get_pacer_case_id_and_title.s(
                pass_through=None,
                docket_number=row.docket_number,
                court_id=row.district_id,
                cookies=session.cookies,
                **params,
            ).set(queue=q),
            filter_docket_by_tags.s(tags, row.district_id).set(queue=q),
            get_docket_by_pacer_case_id.s(
                court_id=row.district_id,
                cookies=session.cookies,
                tag_names=tags,
                **{
                    "show_parties_and_counsel": True,
                    "show_terminated_parties": True,
                    "show_list_of_member_cases": False,
                    "doc_num_end": doc_num_end,
                },
            ).set(queue=q),
            add_or_update_recap_docket.s().set(queue=q),
        ).apply_async()
Ejemplo n.º 3
0
def get_pacer_dockets(options, docket_pks, tags):
    """Get the pacer dockets identified by the FJC IDB rows"""
    q = options["queue"]
    throttle = CeleryThrottle(queue_name=q)
    pacer_session = None
    for i, docket_pk in enumerate(docket_pks):
        if i < options["offset"]:
            continue
        if i >= options["limit"] > 0:
            break
        throttle.maybe_wait()
        if i % 1000 == 0 or pacer_session is None:
            pacer_session = PacerSession(
                username=PACER_USERNAME, password=PACER_PASSWORD
            )
            pacer_session.login()
            logger.info(f"Sent {i} tasks to celery so far.")
        d = Docket.objects.get(pk=docket_pk)
        chain(
            get_docket_by_pacer_case_id.s(
                {"pacer_case_id": d.pacer_case_id, "docket_pk": d.pk},
                d.court_id,
                cookies=pacer_session.cookies,
                tag_names=tags,
                **{
                    "show_parties_and_counsel": True,
                    "show_terminated_parties": True,
                    "show_list_of_member_cases": False,
                },
            ).set(queue=q),
            add_or_update_recap_docket.s().set(queue=q),
        ).apply_async()
Ejemplo n.º 4
0
def get_pacer_dockets(options, docket_pks, tag):
    """Get the pacer dockets identified by the FJC IDB rows"""
    q = options['queue']
    throttle = CeleryThrottle(queue_name=q)
    pacer_session = None
    for i, docket_pk in enumerate(docket_pks):
        if i < options['offset']:
            continue
        if i >= options['limit'] > 0:
            break
        throttle.maybe_wait()
        if i % 1000 == 0 or pacer_session is None:
            pacer_session = PacerSession(username=PACER_USERNAME,
                                         password=PACER_PASSWORD)
            pacer_session.login()
            logger.info("Sent %s tasks to celery so far." % i)
        d = Docket.objects.get(pk=docket_pk)
        chain(
            get_docket_by_pacer_case_id.s({
                'pacer_case_id': d.pacer_case_id
            },
                                          d.court_id,
                                          cookies=pacer_session.cookies,
                                          **{
                                              'tag_names': [tag],
                                              'show_parties_and_counsel': True,
                                              'show_terminated_parties': True,
                                              'show_list_of_member_cases': True
                                          }).set(queue=q),
            add_or_update_recap_docket.s().set(queue=q),
        ).apply_async()
def get_pacer_dockets(options, docket_pks, tags):
    """Get the pacer dockets identified by the FJC IDB rows"""
    q = options['queue']
    throttle = CeleryThrottle(queue_name=q)
    pacer_session = None
    for i, docket_pk in enumerate(docket_pks):
        if i < options['offset']:
            continue
        if i >= options['limit'] > 0:
            break
        throttle.maybe_wait()
        if i % 1000 == 0 or pacer_session is None:
            pacer_session = PacerSession(username=PACER_USERNAME,
                                         password=PACER_PASSWORD)
            pacer_session.login()
            logger.info("Sent %s tasks to celery so far." % i)
        d = Docket.objects.get(pk=docket_pk)
        chain(
            get_docket_by_pacer_case_id.s(
                {'pacer_case_id': d.pacer_case_id,
                 'docket_pk': d.pk},
                d.court_id,
                cookies=pacer_session.cookies,
                tag_names=tags,
                **{'show_parties_and_counsel': True,
                   'show_terminated_parties': True,
                   'show_list_of_member_cases': False}
            ).set(queue=q),
            add_or_update_recap_docket.s().set(queue=q),
        ).apply_async()
Ejemplo n.º 6
0
def do_pacer_fetch(fq):
    """Process a request made by a user to get an item from PACER.

    :param fq: The PacerFetchQueue item to process
    :return: None
    """
    result = None
    if fq.request_type == REQUEST_TYPE.DOCKET:
        # Request by docket_id
        c = chain(
            fetch_docket.si(fq.pk),
            add_or_update_recap_docket.s(),
            mark_fq_successful.si(fq.pk),
        )
        result = c.apply_async()
    elif fq.request_type == REQUEST_TYPE.PDF:
        # Request by recap_document_id
        rd_pk = fq.recap_document_id
        result = chain(
            fetch_pacer_doc_by_rd.si(rd_pk, fq.pk),
            extract_recap_pdf.si(rd_pk),
            add_items_to_solr.si([rd_pk], "search.RECAPDocument"),
            mark_fq_successful.si(fq.pk),
        ).apply_async()
    elif fq.request_type == REQUEST_TYPE.ATTACHMENT_PAGE:
        result = fetch_attachment_page.apply_async(args=(fq.pk,))
    return result
Ejemplo n.º 7
0
def get_docket_and_claims(docket_number, court, case_name, cookies, tags, q):
    """Get the docket report, claims history report, and save it all to the DB
     and Solr
    """
    chain(
        get_pacer_case_id_and_title.s(
            pass_through=None,
            docket_number=docket_number,
            court_id=court,
            cookies=cookies,
            case_name=case_name,
            docket_number_letters="bk",
        ).set(queue=q),
        get_docket_by_pacer_case_id.s(court_id=court,
                                      cookies=cookies,
                                      tag_names=tags,
                                      **{
                                          "show_parties_and_counsel": True,
                                          "show_terminated_parties": True,
                                          "show_list_of_member_cases": False,
                                      }).set(queue=q),
        get_bankr_claims_registry.s(
            cookies=cookies,
            tag_names=tags,
        ).set(queue=q),
        add_or_update_recap_docket.s().set(queue=q),
    ).apply_async()
def get_dockets(options, items, tags, sample_size=0, doc_num_end=''):
    """Download dockets from PACER.

    :param options: Options provided by argparse
    :param items: Items from our FJC IDB database
    :param tags: A list of tag names to associate with the purchased content.
    :param sample_size: The number of items to get. If 0, get them all. Else,
    get only this many and do it randomly.
    :param doc_num_end: Only get docket numbers up to this value to constrain
    costs. If set to an empty string, no constraints are applied. Note that
    applying this value means no unnumbered entries will be retrieved by PACER.
    """

    if sample_size > 0:
        items = items.order_by('?')[:sample_size]

    q = options['queue']
    throttle = CeleryThrottle(queue_name=q)
    session = PacerSession(username=PACER_USERNAME, password=PACER_PASSWORD)
    session.login()
    for i, row in enumerate(items):
        if i < options['offset']:
            continue
        if i >= options['limit'] > 0:
            break

        if i % 5000 == 0:
            # Re-authenticate just in case the auto-login mechanism isn't
            # working.
            session = PacerSession(username=PACER_USERNAME,
                                   password=PACER_PASSWORD)
            session.login()

        # All tests pass. Get the docket.
        logger.info("Doing row %s: %s", i, row)

        throttle.maybe_wait()
        params = make_fjc_idb_lookup_params(row)
        chain(
            get_pacer_case_id_and_title.s(
                pass_through=None,
                docket_number=row.docket_number,
                court_id=row.district_id,
                cookies=session.cookies,
                **params
            ).set(queue=q),
            filter_docket_by_tags.s(tags, row.district_id).set(queue=q),
            get_docket_by_pacer_case_id.s(
                court_id=row.district_id,
                cookies=session.cookies,
                tag_names=tags,
                **{
                    'show_parties_and_counsel': True,
                    'show_terminated_parties': True,
                    'show_list_of_member_cases': False,
                    'doc_num_end': doc_num_end,
                }
            ).set(queue=q),
            add_or_update_recap_docket.s().set(queue=q),
        ).apply_async()
Ejemplo n.º 9
0
def get_pacer_dockets(options, row_pks, tag=None):
    """Get the pacer dockets identified by the FJC IDB rows"""
    q = options['queue']
    throttle = CeleryThrottle(queue_name=q)
    for i, row_pk in enumerate(row_pks):
        if i >= options['count'] > 0:
            break
        throttle.maybe_wait()
        if i % 1000 == 0:
            pacer_session = PacerSession(username=PACER_USERNAME,
                                         password=PACER_PASSWORD)
            pacer_session.login()
            logger.info("Sent %s tasks to celery so far." % i)
        row = FjcIntegratedDatabase.objects.get(pk=row_pk)
        chain(
            get_docket_by_pacer_case_id.s(
                row.pacer_case_id,
                row.district_id,
                pacer_session,
                **{'tag': tag, 'show_parties_and_counsel': True,
                   'show_terminated_parties': True,
                   'show_list_of_member_cases': True}
            ).set(queue=q),
            add_or_update_recap_docket.s().set(queue=q),
        ).apply_async()
Ejemplo n.º 10
0
def get_dockets(options, items, tags, sample_size=0):
    """Download dockets from PACER.

    :param options: Options provided by argparse
    :param items: Items from our FJC IDB database
    :param tags: A list of tag names to associate with the purchased content.
    :param sample_size: The number of items to get. If 0, get them all. Else,
    get only this many and do it randomly.
    """

    if sample_size > 0:
        items = items.order_by('?')[:sample_size]

    q = options['queue']
    throttle = CeleryThrottle(queue_name=q)
    session = PacerSession(username=PACER_USERNAME, password=PACER_PASSWORD)
    session.login()
    for i, row in enumerate(items):
        if i < options['offset']:
            continue
        if i >= options['limit'] > 0:
            break

        if i % 5000 == 0:
            # Re-authenticate just in case the auto-login mechanism isn't
            # working.
            session = PacerSession(username=PACER_USERNAME,
                                   password=PACER_PASSWORD)
            session.login()

        # All tests pass. Get the docket.
        logger.info("Doing row %s: %s", i, row)

        throttle.maybe_wait()
        params = make_fjc_idb_lookup_params(row)
        chain(
            get_pacer_case_id_and_title.s(
                docket_number=row.docket_number,
                court_id=row.district_id,
                cookies=session.cookies,
                **params
            ).set(queue=q),
            filter_docket_by_tags.s(tags, row.district_id).set(queue=q),
            get_docket_by_pacer_case_id.s(
                court_id=row.district_id,
                cookies=session.cookies,
                tag_names=tags,
                **{
                    'show_parties_and_counsel': True,
                    'show_terminated_parties': True,
                    'show_list_of_member_cases': True
                }
            ).set(queue=q),
            add_or_update_recap_docket.s().set(queue=q),
        ).apply_async()
def download_dockets(options):
    """Download dockets listed in the spreadsheet."""
    with open(options["input_file"], "r") as f:
        dialect = csv.Sniffer().sniff(f.read(1024))
        f.seek(0)
        reader = csv.DictReader(f, dialect=dialect)
        q = options["queue"]
        task = options["task"]
        throttle = CeleryThrottle(queue_name=q,
                                  min_items=options["queue_length"])
        session = PacerSession(username=PACER_USERNAME,
                               password=PACER_PASSWORD)
        session.login()
        for i, row in enumerate(reader):
            if i < options["offset"]:
                continue
            if i >= options["limit"] > 0:
                break
            throttle.maybe_wait()

            logger.info("Doing row %s: %s", i, row)

            if row["idb_docket_number"]:
                if task == "download_student_dockets":
                    continue
                # Zero-pad the docket number up to seven digits because Excel
                # ate the leading zeros that these would normally have.
                docket_number = row["idb_docket_number"].rjust(7, "0")
            elif row["student_docket_number"]:
                # Use the values collected by student
                # researchers, then cleaned up my mlr.
                docket_number = row["student_docket_number"]
            else:
                # No docket number; move on.
                continue
            court = Court.objects.get(
                fjc_court_id=row["AO ID"].rjust(2, "0"),
                jurisdiction=Court.FEDERAL_DISTRICT,
            )
            chain(
                get_pacer_case_id_and_title.s(
                    pass_through=None,
                    docket_number=docket_number,
                    court_id=court.pk,
                    cookies=session.cookies,
                    case_name=row["Case Name"],
                ).set(queue=q),
                get_docket_by_pacer_case_id.s(
                    court_id=court.pk,
                    cookies=session.cookies,
                    tag_names=[TAG_NAME],
                ).set(queue=q),
                add_or_update_recap_docket.s().set(queue=q),
            ).apply_async()
def download_dockets(options):
    """Download dockets listed in the spreadsheet."""
    with open(options['input_file'], 'r') as f:
        dialect = csv.Sniffer().sniff(f.read(1024))
        f.seek(0)
        reader = csv.DictReader(f, dialect=dialect)
        q = options['queue']
        task = options['task']
        throttle = CeleryThrottle(queue_name=q,
                                  min_items=options['queue_length'])
        session = PacerSession(username=PACER_USERNAME,
                               password=PACER_PASSWORD)
        session.login()
        for i, row in enumerate(reader):
            if i < options['offset']:
                continue
            if i >= options['limit'] > 0:
                break
            throttle.maybe_wait()

            logger.info("Doing row %s: %s", i, row)

            if row['idb_docket_number']:
                if task == 'download_student_dockets':
                    continue
                # Zero-pad the docket number up to seven digits because Excel
                # ate the leading zeros that these would normally have.
                docket_number = row['idb_docket_number'].rjust(7, '0')
            elif row['student_docket_number']:
                # Use the values collected by student
                # researchers, then cleaned up my mlr.
                docket_number = row['student_docket_number']
            else:
                # No docket number; move on.
                continue
            court = Court.objects.get(fjc_court_id=row['AO ID'].rjust(2, '0'),
                                      jurisdiction=Court.FEDERAL_DISTRICT)
            chain(
                get_pacer_case_id_and_title.s(
                    pass_through=None,
                    docket_number=docket_number,
                    court_id=court.pk,
                    cookies=session.cookies,
                    case_name=row['Case Name'],
                ).set(queue=q),
                get_docket_by_pacer_case_id.s(
                    court_id=court.pk,
                    cookies=session.cookies,
                    tag_names=[TAG_NAME],
                ).set(queue=q),
                add_or_update_recap_docket.s().set(queue=q),
            ).apply_async()
Ejemplo n.º 13
0
def get_dockets(options):
    """Download a sample of dockets from PACER matching the 7xx series of NOS
    codes.
    """
    nos_codes = [
        LABOR_LITIGATION_OTHER, LABOR_MANAGEMENT_RELATIONS_ACT,
        LABOR_MANAGEMENT_REPORT_DISCLOSURE, FAIR_LABOR_STANDARDS_ACT_CV,
        RAILWAY_LABOR_ACT, FAMILY_AND_MEDICAL_LEAVE_ACT,
        EMPLOYEE_RETIREMENT_INCOME_SECURITY_ACT
    ]
    sample_size = 300
    items = FjcIntegratedDatabase.objects.filter(
        nature_of_suit__in=nos_codes,
        date_terminated__gt='2009-01-01',
        date_terminated__lt='2018-10-15',
        date_filed__gt='2009-01-01').order_by('?')[:sample_size]

    q = options['queue']
    throttle = CeleryThrottle(queue_name=q)
    session = PacerSession(username=PACER_USERNAME, password=PACER_PASSWORD)
    session.login()
    for i, row in enumerate(items):
        if i < options['offset']:
            continue
        if i >= options['limit'] > 0:
            break

        # All tests pass. Get the docket.
        logger.info("Doing row %s: %s", i, row)
        logger.info("This case is from year: %s", row.date_filed.year)

        throttle.maybe_wait()
        case_name = '%s v. %s' % (row.plaintiff, row.defendant)
        chain(
            get_pacer_case_id_and_title.s(
                docket_number=row.docket_number,
                court_id=row.district_id,
                cookies=session.cookies,
                case_name=case_name,
            ).set(queue=q),
            get_docket_by_pacer_case_id.s(court_id=row.district_id,
                                          cookies=session.cookies,
                                          tag_names=[TAG],
                                          **{
                                              'show_parties_and_counsel': True,
                                              'show_terminated_parties': True,
                                              'show_list_of_member_cases': True
                                          }).set(queue=q),
            add_or_update_recap_docket.s().set(queue=q),
        ).apply_async()
Ejemplo n.º 14
0
def get_dockets(options):
    """Get the dockets by the particular judge now that we have run iquery
    for all of the cases in the jurisdiction, and now that we have
    """
    q = options["queue"]
    throttle = CeleryThrottle(queue_name=q)
    session = PacerSession(username=PACER_USERNAME, password=PACER_PASSWORD)
    session.login()

    buchwald_id = 450
    ds = (
        Docket.objects.filter(
            court_id="nysd", assigned_to_id=buchwald_id, tags__name=NYSD_TAG
        )
        .exclude(idb_data__nature_of_suit__in=NOS_EXCLUSIONS)
        .exclude(idb_data__isnull=True)
    )
    logger.info("Got %s dockets to download", ds.count())
    for i, d in enumerate(ds):
        if i < options["skip_until"]:
            continue
        if i >= options["limit"] > 0:
            break

        if i % 5000 == 0:
            # Re-authenticate just in case the auto-login mechanism isn't
            # working.
            session = PacerSession(
                username=PACER_USERNAME, password=PACER_PASSWORD
            )
            session.login()

        throttle.maybe_wait()
        logger.info("%s: Doing docket with pk: %s", i, d.pk)
        chain(
            get_docket_by_pacer_case_id.s(
                data={"pacer_case_id": d.pacer_case_id},
                court_id=d.court_id,
                cookies=session.cookies,
                docket_pk=d.pk,
                tag_names=[BUCKWALD_TAG],
                **{
                    "show_parties_and_counsel": True,
                    "show_terminated_parties": True,
                    "show_list_of_member_cases": False,
                }
            ).set(queue=q),
            add_or_update_recap_docket.s().set(queue=q),
        ).apply_async()
Ejemplo n.º 15
0
def get_dockets(options):
    """Download the dockets described in the CSV
    """
    f = options["file"]
    reader = csv.DictReader(f)
    q = options["queue"]
    throttle = CeleryThrottle(queue_name=q)
    pacer_session = PacerSession(
        username=PACER_USERNAME, password=PACER_PASSWORD
    )
    pacer_session.login()
    for i, row in enumerate(reader):
        if i < options["offset"]:
            continue
        if i >= options["limit"] > 0:
            break

        if i % 1000 == 0:
            pacer_session = PacerSession(
                username=PACER_USERNAME, password=PACER_PASSWORD
            )
            pacer_session.login()
            logger.info("Sent %s tasks to celery so far." % i)
        logger.info("Doing row %s", i)
        throttle.maybe_wait()
        chain(
            get_pacer_case_id_and_title.s(
                pass_through=None,
                docket_number=make_docket_number(row["filecy"], row["docket"]),
                court_id="ilnb",
                cookies=pacer_session.cookies,
                office_number=row["office"],
                docket_number_letters="bk",
            ).set(queue=q),
            get_docket_by_pacer_case_id.s(
                court_id="ilnb",
                cookies=pacer_session.cookies,
                tag_names=[TAG],
                **{
                    "show_parties_and_counsel": True,
                    "show_terminated_parties": True,
                    "show_list_of_member_cases": True,
                }
            ).set(queue=q),
            add_or_update_recap_docket.s().set(queue=q),
        ).apply_async()
Ejemplo n.º 16
0
def get_dockets(options):
    """Download the dockets described in the CSV
    """
    f = options['file']
    reader = csv.DictReader(f)
    q = options['queue']
    throttle = CeleryThrottle(queue_name=q)
    pacer_session = PacerSession(username=PACER_USERNAME, password=PACER_PASSWORD)
    pacer_session.login()
    for i, row in enumerate(reader):
        if i < options['offset']:
            continue
        if i >= options['limit'] > 0:
            break

        if i % 1000 == 0:
            pacer_session = PacerSession(username=PACER_USERNAME,
                                         password=PACER_PASSWORD)
            pacer_session.login()
            logger.info("Sent %s tasks to celery so far." % i)
        logger.info("Doing row %s", i)
        throttle.maybe_wait()
        chain(
            get_pacer_case_id_and_title.s(
                pass_through=None,
                docket_number=make_docket_number(row['filecy'], row['docket']),
                court_id='ilnb',
                cookies=pacer_session.cookies,
                office_number=row['office'],
                docket_number_letters='bk',
            ).set(queue=q),
            get_docket_by_pacer_case_id.s(
                court_id='ilnb',
                cookies=pacer_session.cookies,
                tag_names=[TAG],
                **{
                    'show_parties_and_counsel': True,
                    'show_terminated_parties': True,
                    'show_list_of_member_cases': True
                }
            ).set(queue=q),
            add_or_update_recap_docket.s().set(queue=q),
        ).apply_async()
Ejemplo n.º 17
0
def download_dockets(options):
    """Download dockets listed in the spreadsheet."""
    f = open(options['input_file'], 'r')
    dialect = csv.Sniffer().sniff(f.read(2048))
    f.seek(0)
    reader = csv.DictReader(f, dialect=dialect)
    q = options['queue']
    throttle = CeleryThrottle(queue_name=q, min_items=options['queue_length'])
    session = PacerSession(username=PACER_USERNAME, password=PACER_PASSWORD)
    session.login()
    for i, row in enumerate(reader):
        if i < options['offset']:
            continue
        if i >= options['limit'] > 0:
            break

        throttle.maybe_wait()
        logger.info("Doing row %s: %s", i, row)

        row_tag = '%s-%s' % (PROJECT_TAG_NAME, row['id'])
        if not row['district_ct']:
            chain(
                get_appellate_docket_by_docket_number.s(
                    docket_number=row['docket_no1'],
                    court_id=row['cl_court'],
                    cookies=session.cookies,
                    tag_names=[PROJECT_TAG_NAME, row_tag],
                    # Do not get the docket entries for now. We're only
                    # interested in the date terminated. If it's an open case,
                    # we'll handle that later.
                    **{
                        'show_docket_entries': False,
                        'show_orig_docket': False,
                        'show_prior_cases': False,
                        'show_associated_cases': False,
                        'show_panel_info': True,
                        'show_party_atty_info': True,
                        'show_caption': True,
                    }).set(queue=q),
                add_or_update_recap_docket.s().set(queue=q),
            ).apply_async()
        else:
            chain(
                get_pacer_case_id_and_title.s(
                    docket_number=row['docket_no1'],
                    court_id=row['cl_court'],
                    cookies=session.cookies,
                    case_name=row['name'],
                ).set(queue=q),
                do_case_query_by_pacer_case_id.s(
                    court_id=row['cl_court'],
                    cookies=session.cookies,
                    tag_names=[PROJECT_TAG_NAME, row_tag],
                ).set(queue=q),
                get_docket_by_pacer_case_id.s(
                    court_id=row['cl_court'],
                    cookies=session.cookies,
                    tag_names=[PROJECT_TAG_NAME, row_tag],
                    **{
                        # No docket entries
                        'doc_num_start': 10000,
                        'doc_num_end': 10000,
                        'show_parties_and_counsel': True,
                        'show_terminated_parties': True,
                        'show_list_of_member_cases': True,
                    }).set(queue=q),
                add_or_update_recap_docket.s().set(queue=q),
            ).apply_async()

    f.close()
Ejemplo n.º 18
0
def download_dockets(options):
    """Download dockets listed in the spreadsheet."""
    f = open(options["input_file"], "r")
    dialect = csv.Sniffer().sniff(f.read(2048))
    f.seek(0)
    reader = csv.DictReader(f, dialect=dialect)
    q = options["queue"]
    throttle = CeleryThrottle(queue_name=q)
    session = PacerSession(username=PACER_USERNAME, password=PACER_PASSWORD)
    session.login()
    for i, row in enumerate(reader):
        if i < options["offset"]:
            continue
        if i >= options["limit"] > 0:
            break

        throttle.maybe_wait()
        logger.info("Doing row %s: %s", i, row)

        row_tag = f"{PROJECT_TAG_NAME}-{row['id']}"
        if not row["district_ct"]:
            chain(
                get_appellate_docket_by_docket_number.s(
                    docket_number=row["docket_no1"],
                    court_id=row["cl_court"],
                    cookies=session.cookies,
                    tag_names=[PROJECT_TAG_NAME, row_tag],
                    # Do not get the docket entries for now. We're only
                    # interested in the date terminated. If it's an open case,
                    # we'll handle that later.
                    **{
                        "show_docket_entries": False,
                        "show_orig_docket": False,
                        "show_prior_cases": False,
                        "show_associated_cases": False,
                        "show_panel_info": True,
                        "show_party_atty_info": True,
                        "show_caption": True,
                    },
                ).set(queue=q),
                add_or_update_recap_docket.s().set(queue=q),
            ).apply_async()
        else:
            chain(
                get_pacer_case_id_and_title.s(
                    pass_through=None,
                    docket_number=row["docket_no1"],
                    court_id=row["cl_court"],
                    cookies=session.cookies,
                    case_name=row["name"],
                ).set(queue=q),
                do_case_query_by_pacer_case_id.s(
                    court_id=row["cl_court"],
                    cookies=session.cookies,
                    tag_names=[PROJECT_TAG_NAME, row_tag],
                ).set(queue=q),
                get_docket_by_pacer_case_id.s(
                    court_id=row["cl_court"],
                    cookies=session.cookies,
                    tag_names=[PROJECT_TAG_NAME, row_tag],
                    **{
                        # No docket entries
                        "doc_num_start": 10000,
                        "doc_num_end": 10000,
                        "show_parties_and_counsel": True,
                        "show_terminated_parties": True,
                        "show_list_of_member_cases": True,
                    },
                ).set(queue=q),
                add_or_update_recap_docket.s().set(queue=q),
            ).apply_async()

    f.close()
Ejemplo n.º 19
0
def do_pacer_fetch(fq):
    """Process a request made by a user to get an item from PACER.

    :param fq: The PacerFetchQueue item to process
    :return: None
    """
    c = None
    if fq.request_type == REQUEST_TYPE.DOCKET:
        # Request by docket_id
        court_id = fq.court_id or getattr(fq.docket, "court_id", None)
        kwargs = {
            # Universal params
            "court_id": court_id,
            "user_pk": fq.user_id,
            "docket_pk": fq.docket_id,
            # Scraping params
            "doc_num_start": fq.de_number_start,
            "doc_num_end": fq.de_number_end,
            "date_start": fq.de_date_start,
            "date_end": fq.de_date_end,
            "show_parties_and_counsel": fq.show_parties_and_counsel,
            "show_terminated_parties": fq.show_terminated_parties,
            "show_list_of_member_cases": fq.show_list_of_member_cases,
        }
        if (fq.docket_id and not fq.docket.pacer_case_id) or fq.docket_number:
            # We lack the pacer_case_id either on the docket or from the
            # submission. Look it up.
            docket_number = fq.docket_number or getattr(
                fq.docket, "docket_number", None
            )
            c = chain(
                get_pacer_case_id_and_title.si(
                    pass_through=None,
                    docket_number=docket_number,
                    court_id=court_id,
                    user_pk=fq.user_id,
                ),
                get_docket_by_pacer_case_id.s(**kwargs),
            )
        else:
            if fq.docket_id is not None and fq.docket.pacer_case_id:
                # We have the docket and its pacer_case_id
                kwargs.update(
                    {
                        "data": {"pacer_case_id": fq.docket.pacer_case_id},
                        "court_id": fq.docket.court_id,
                    }
                )
            elif fq.pacer_case_id:
                # We lack the docket, but have a pacer_case_id
                kwargs.update(
                    {"data": {"pacer_case_id": fq.pacer_case_id},}
                )
            c = chain(get_docket_by_pacer_case_id.si(**kwargs))
        c |= add_or_update_recap_docket.s()
    elif fq.request_type == REQUEST_TYPE.PDF:
        # Request by recap_document_id
        rd_pk = fq.recap_document_id
        if fq.recap_document_id:
            c = chain(
                fetch_pacer_doc_by_rd.si(rd_pk, fq.pk, fq.user_id),
                extract_recap_pdf.si(rd_pk),
                add_items_to_solr.si([rd_pk], "search.RECAPDocument"),
            )
    if c is not None:
        c |= mark_fq_successful.si(fq.pk)
        c.apply_async()
    else:
        # Somehow failed to make a chain. Log an error.
        fq.status = PROCESSING_STATUS.INVALID_CONTENT
        fq.message = "Invalid submission, unable to make chain for processing."
        fq.save()
Ejemplo n.º 20
0
def get_dockets(options):
    """Download the dockets described in the CSV according to the `tasks`
    option.
    """
    f = options['file']
    reader = csv.DictReader(f)
    q = options['queue']
    task = options['task']
    throttle = CeleryThrottle(queue_name=q)
    session = PacerSession(username=PACER_USERNAME, password=PACER_PASSWORD)
    session.login()
    for i, row in enumerate(reader):
        if i < options['offset']:
            continue
        if i >= options['limit'] > 0:
            break
        if row['Too Old'] == 'Yes':
            continue
        if row['Appellate/District'].lower() != task:
            # Only do appellate when appellate, and district when district.
            continue

        # All tests pass. Get the docket.
        logger.info("Doing row %s: %s", i, row)
        throttle.maybe_wait()
        if task == 'appellate':
            chain(
                get_appellate_docket_by_docket_number.s(
                    docket_number=row['Cleaned case_No'],
                    court_id=row['fjc_court_id'],
                    cookies=session.cookies,
                    tag_names=[TAG],
                    **{
                        'show_docket_entries': True,
                        'show_orig_docket': True,
                        'show_prior_cases': True,
                        'show_associated_cases': True,
                        'show_panel_info': True,
                        'show_party_atty_info': True,
                        'show_caption': True,
                    }).set(queue=q),
                add_or_update_recap_docket.s().set(queue=q),
            ).apply_async()
        elif task == 'district':
            chain(
                get_pacer_case_id_and_title.s(
                    pass_through=None,
                    docket_number=row['Cleaned case_No'],
                    court_id=row['fjc_court_id'],
                    cookies=session.cookies,
                    case_name=row['Title'],
                ).set(queue=q),
                get_docket_by_pacer_case_id.s(court_id=row['fjc_court_id'],
                                              cookies=session.cookies,
                                              tag_names=[TAG],
                                              **{
                                                  'show_parties_and_counsel':
                                                  True,
                                                  'show_terminated_parties':
                                                  True,
                                                  'show_list_of_member_cases':
                                                  True
                                              }).set(queue=q),
                add_or_update_recap_docket.s().set(queue=q),
            ).apply_async()
Ejemplo n.º 21
0
def do_pacer_fetch(fq):
    """Process a request made by a user to get an item from PACER.

    :param fq: The PacerFetchQueue item to process
    :return: None
    """
    result = None
    if fq.request_type == REQUEST_TYPE.DOCKET:
        # Request by docket_id
        court_id = fq.court_id or getattr(fq.docket, "court_id", None)
        kwargs = {
            # Universal params
            "court_id": court_id,
            "user_pk": fq.user_id,
            "docket_pk": fq.docket_id,
            # Scraping params
            "doc_num_start": fq.de_number_start,
            "doc_num_end": fq.de_number_end,
            "date_start": fq.de_date_start,
            "date_end": fq.de_date_end,
            "show_parties_and_counsel": fq.show_parties_and_counsel,
            "show_terminated_parties": fq.show_terminated_parties,
            "show_list_of_member_cases": fq.show_list_of_member_cases,
        }
        if (fq.docket_id and not fq.docket.pacer_case_id) or fq.docket_number:
            # We lack the pacer_case_id either on the docket or from the
            # submission. Look it up.
            docket_number = fq.docket_number or getattr(
                fq.docket, "docket_number", None)
            c = chain(
                get_pacer_case_id_and_title.si(
                    pass_through=None,
                    docket_number=docket_number,
                    court_id=court_id,
                    user_pk=fq.user_id,
                ),
                get_docket_by_pacer_case_id.s(**kwargs),
            )
        else:
            if fq.docket_id is not None and fq.docket.pacer_case_id:
                # We have the docket and its pacer_case_id
                kwargs.update({
                    "data": {
                        "pacer_case_id": fq.docket.pacer_case_id
                    },
                    "court_id": fq.docket.court_id,
                })
            elif fq.pacer_case_id:
                # We lack the docket, but have a pacer_case_id
                kwargs.update({
                    "data": {
                        "pacer_case_id": fq.pacer_case_id
                    },
                })
            c = chain(get_docket_by_pacer_case_id.si(**kwargs))
        c |= add_or_update_recap_docket.s()
        c |= mark_fq_successful.si(fq.pk)
        result = c.apply_async()
    elif fq.request_type == REQUEST_TYPE.PDF:
        # Request by recap_document_id
        rd_pk = fq.recap_document_id
        result = chain(
            fetch_pacer_doc_by_rd.si(rd_pk, fq.pk),
            extract_recap_pdf.si(rd_pk),
            add_items_to_solr.si([rd_pk], "search.RECAPDocument"),
            mark_fq_successful.si(fq.pk),
        ).apply_async()
    elif fq.request_type == REQUEST_TYPE.ATTACHMENT_PAGE:
        result = fetch_attachment_page.apply_async(args=(fq.pk, ))
    return result
Ejemplo n.º 22
0
def get_dockets(options):
    """Download the dockets described in the CSV according to the `tasks`
    option.
    """
    f = options['file']
    reader = csv.DictReader(f)
    q = options['queue']
    task = options['task']
    throttle = CeleryThrottle(queue_name=q)
    session = PacerSession(username=PACER_USERNAME, password=PACER_PASSWORD)
    session.login()
    for i, row in enumerate(reader):
        if i < options['offset']:
            continue
        if i >= options['limit'] > 0:
            break
        if row['Too Old'] == 'Yes':
            continue
        if row['Appellate/District'].lower() != task:
            # Only do appellate when appellate, and district when district.
            continue

        # All tests pass. Get the docket.
        logger.info("Doing row %s: %s", i, row)
        throttle.maybe_wait()
        if task == 'appellate':
            chain(
                get_appellate_docket_by_docket_number.s(
                    docket_number=row['Cleaned case_No'],
                    court_id=row['fjc_court_id'],
                    cookies=session.cookies,
                    tag_names=[TAG],
                    **{
                        'show_docket_entries': True,
                        'show_orig_docket': True,
                        'show_prior_cases': True,
                        'show_associated_cases': True,
                        'show_panel_info': True,
                        'show_party_atty_info': True,
                        'show_caption': True,
                    }
                ).set(queue=q),
                add_or_update_recap_docket.s().set(queue=q),
            ).apply_async()
        elif task == 'district':
            chain(
                get_pacer_case_id_and_title.s(
                    pass_through=None,
                    docket_number=row['Cleaned case_No'],
                    court_id=row['fjc_court_id'],
                    cookies=session.cookies,
                    case_name=row['Title'],
                ).set(queue=q),
                get_docket_by_pacer_case_id.s(
                    court_id=row['fjc_court_id'],
                    cookies=session.cookies,
                    tag_names=[TAG],
                    **{
                        'show_parties_and_counsel': True,
                        'show_terminated_parties': True,
                        'show_list_of_member_cases': True
                    }
                ).set(queue=q),
                add_or_update_recap_docket.s().set(queue=q),
            ).apply_async()
Ejemplo n.º 23
0
def download_dockets(options):
    """Download dockets listed in the spreadsheet."""
    f = open(options['input_file'], 'r')
    dialect = csv.Sniffer().sniff(f.read(2048))
    f.seek(0)
    reader = csv.DictReader(f, dialect=dialect)
    q = options['queue']
    throttle = CeleryThrottle(queue_name=q,
                              min_items=options['queue_length'])
    session = PacerSession(username=PACER_USERNAME,
                           password=PACER_PASSWORD)
    session.login()
    for i, row in enumerate(reader):
        if i < options['offset']:
            continue
        if i >= options['limit'] > 0:
            break

        throttle.maybe_wait()
        logger.info("Doing row %s: %s", i, row)

        row_tag = '%s-%s' % (PROJECT_TAG_NAME, row['id'])
        if not row['district_ct']:
            chain(
                get_appellate_docket_by_docket_number.s(
                    docket_number=row['docket_no1'],
                    court_id=row['cl_court'],
                    cookies=session.cookies,
                    tag_names=[PROJECT_TAG_NAME, row_tag],
                    # Do not get the docket entries for now. We're only
                    # interested in the date terminated. If it's an open case,
                    # we'll handle that later.
                    **{
                        'show_docket_entries': False,
                        'show_orig_docket': False,
                        'show_prior_cases': False,
                        'show_associated_cases': False,
                        'show_panel_info': True,
                        'show_party_atty_info': True,
                        'show_caption': True,
                    }
                ).set(queue=q),
                add_or_update_recap_docket.s().set(queue=q),
            ).apply_async()
        else:
            chain(
                get_pacer_case_id_and_title.s(
                    pass_through=None,
                    docket_number=row['docket_no1'],
                    court_id=row['cl_court'],
                    cookies=session.cookies,
                    case_name=row['name'],
                ).set(queue=q),
                do_case_query_by_pacer_case_id.s(
                    court_id=row['cl_court'],
                    cookies=session.cookies,
                    tag_names=[PROJECT_TAG_NAME, row_tag],
                ).set(queue=q),
                get_docket_by_pacer_case_id.s(
                    court_id=row['cl_court'],
                    cookies=session.cookies,
                    tag_names=[PROJECT_TAG_NAME, row_tag],
                    **{
                        # No docket entries
                        'doc_num_start': 10000,
                        'doc_num_end': 10000,
                        'show_parties_and_counsel': True,
                        'show_terminated_parties': True,
                        'show_list_of_member_cases': True,
                    }
                ).set(queue=q),
                add_or_update_recap_docket.s().set(queue=q),
            ).apply_async()

    f.close()