Ejemplo n.º 1
0
 def test_normalize_atty_contact(self):
     pairs = [
         {
             # Email and phone number
             'q':
             "Landye Bennett Blumstein LLP\n"
             "701 West Eighth Avenue, Suite 1200\n"
             "Anchorage, AK 99501\n"
             "907-276-5152\n"
             "Email: [email protected]",
             'a': ({
                 'name':
                 u"Landye Bennett Blumstein LLP",
                 'address1':
                 u'701 West Eighth Ave.',
                 'address2':
                 u'Suite 1200',
                 'city':
                 u'Anchorage',
                 'state':
                 u'AK',
                 'zip_code':
                 u'99501',
                 'lookup_key':
                 u'701westeighthavesuite1200anchoragelandyebennettblumsteinak99501',
             }, {
                 'email': u'*****@*****.**',
                 'phone': u'(907) 276-5152',
                 'fax': u'',
             })
         },
         {
             # PO Box
             'q':
             "Sands Anderson PC\n"
             "P.O. Box 2188\n"
             "Richmond, VA 23218-2188\n"
             "(804) 648-1636",
             'a': ({
                 'name':
                 u'Sands Anderson PC',
                 'address1':
                 u'P.O. Box 2188',
                 'city':
                 u'Richmond',
                 'state':
                 u'VA',
                 'zip_code':
                 u'23218-2188',
                 'lookup_key':
                 u'pobox2188richmondsandsandersonva232182188',
             }, {
                 'phone': u'(804) 648-1636',
                 'fax': u'',
                 'email': u'',
             })
         },
         {
             # Lowercase state (needs normalization)
             'q':
             "Sands Anderson PC\n"
             "P.O. Box 2188\n"
             "Richmond, va 23218-2188\n"
             "(804) 648-1636",
             'a': ({
                 'name':
                 u'Sands Anderson PC',
                 'address1':
                 u'P.O. Box 2188',
                 'city':
                 u'Richmond',
                 'state':
                 u'VA',
                 'zip_code':
                 u'23218-2188',
                 'lookup_key':
                 u'pobox2188richmondsandsandersonva232182188',
             }, {
                 'phone': u"(804) 648-1636",
                 'fax': u'',
                 'email': u'',
             })
         },
         {
             # Phone, fax, and email -- the whole package.
             'q':
             "Susman Godfrey, LLP\n"
             "1201 Third Avenue, Suite 3800\n"
             "Seattle, WA 98101\n"
             "206-373-7381\n"
             "Fax: 206-516-3883\n"
             "Email: [email protected]",
             'a': ({
                 'name':
                 u'Susman Godfrey, LLP',
                 'address1':
                 u'1201 Third Ave.',
                 'address2':
                 u'Suite 3800',
                 'city':
                 u'Seattle',
                 'state':
                 u'WA',
                 'zip_code':
                 u'98101',
                 'lookup_key':
                 u'1201thirdavesuite3800seattlesusmangodfreywa98101',
             }, {
                 'phone': u'(206) 373-7381',
                 'fax': u'(206) 516-3883',
                 'email': u'*****@*****.**',
             })
         },
         {
             # No recipient name
             'q':
             "211 E. Livingston Ave\n"
             "Columbus, OH 43215\n"
             "(614) 228-3727\n"
             "Email:",
             'a': ({
                 'address1': u'211 E. Livingston Ave',
                 'city': u'Columbus',
                 'state': u'OH',
                 'zip_code': u'43215',
                 'lookup_key': u'211elivingstonavecolumbusoh43215',
             }, {
                 'phone': u'(614) 228-3727',
                 'email': u'',
                 'fax': u'',
             }),
         },
         {
             # Weird ways of doing phone numbers
             'q':
             """1200 Boulevard Tower
                 1018 Kanawha Boulevard, E
                 Charleston, WV 25301
                 304/342-3174
                 Fax: 304/342-0448
                 Email: [email protected]
             """,
             'a': ({
                 'address1':
                 u'1018 Kanawha Blvd., E',
                 'address2':
                 u'1200 Blvd. Tower',
                 'city':
                 u'Charleston',
                 'state':
                 u'WV',
                 'zip_code':
                 u'25301',
                 'lookup_key':
                 u'1018kanawhablvde1200blvdtowercharlestonwv25301',
             }, {
                 'phone': '(304) 342-3174',
                 'fax': '(304) 342-0448',
                 'email': '*****@*****.**',
             })
         },
         {
             # Empty fax numbers (b/c PACER).
             'q':
             """303 E 17th Ave
                 Suite 300
                 Denver, CO 80203
                 303-861-1764
                 Fax:
                 Email: [email protected]
         """,
             'a': ({
                 'address1': u'303 E 17th Ave',
                 'address2': u'Suite 300',
                 'city': u'Denver',
                 'state': u'CO',
                 'zip_code': u'80203',
                 'lookup_key': u'303e17thavesuite300denverco80203',
             }, {
                 'phone': u'(303) 861-1764',
                 'fax': u'',
                 'email': u'*****@*****.**',
             })
         },
         {
             # Funky phone number
             'q':
             """Guerrini Law Firm
                 106 SOUTH MENTOR AVE. #150
                 Pasadena, CA 91106
                 626-229-9611-202
                 Fax: 626-229-9615
                 Email: [email protected]
             """,
             'a': ({
                 'name':
                 u'Guerrini Law Firm',
                 'address1':
                 u'106 South Mentor Ave.',
                 'address2':
                 u'# 150',
                 'city':
                 u'Pasadena',
                 'state':
                 u'CA',
                 'zip_code':
                 u'91106',
                 'lookup_key':
                 u'106southmentorave150pasadenaguerrinilawfirmca91106',
             }, {
                 'phone': u'',
                 'fax': u'(626) 229-9615',
                 'email': u'*****@*****.**',
             })
         },
         {
             'q':
             """Duncan & Sevin, LLC
                 400 Poydras St.
                 Suite 1200
                 New Orleans, LA 70130
             """,
             'a': ({
                 'name':
                 u'Duncan & Sevin, LLC',
                 'address1':
                 u'400 Poydras St.',
                 'address2':
                 u'Suite 1200',
                 'city':
                 u'New Orleans',
                 'state':
                 u'LA',
                 'zip_code':
                 u'70130',
                 'lookup_key':
                 u'400poydrasstsuite1200neworleansduncansevinllcla70130',
             }, {
                 'phone': u'',
                 'fax': u'',
                 'email': u'',
             })
         },
         {
             # Ambiguous address. Returns empty dict.
             'q':
             """Darden, Koretzky, Tessier, Finn, Blossman & Areaux
                 Energy Centre
                 1100 Poydras Street
                 Suite 3100
                 New Orleans, LA 70163
                 504-585-3800
                 Email: [email protected]
             """,
             'a': ({}, {
                 'phone': u'(504) 585-3800',
                 'email': u'*****@*****.**',
                 'fax': u'',
             })
         },
         {
             # Ambiguous address with unicode that triggers
             # https://github.com/datamade/probableparsing/issues/2
             'q':
             u"""Darden, Koretzky, Tessier, Finn, Blossman & Areaux
                 Energy Centre
                 1100 Poydras Street
                 Suite 3100
                 New Orléans, LA 70163
                 504-585-3800
                 Email: [email protected]
             """,
             'a': ({}, {
                 'phone': u'(504) 585-3800',
                 'email': u'*****@*****.**',
                 'fax': u'',
             })
         },
         {
             # Missing zip code, phone number ambiguously used instead.
             'q':
             """NSB - Department of Law
                 POB 69
                 Barrow, AK 907-852-0300
             """,
             'a': ({
                 'name': u'NSB Department of Law',
                 'address1': u'Pob 69',
                 'city': u'Barrow',
                 'state': u'AK',
                 'zip_code': u'',
                 'lookup_key': u'pob69barrownsbdepartmentoflawak',
             }, {
                 'phone': u'',
                 'fax': u'',
                 'email': u'',
             })
         },
         {
             # Unknown/invalid state.
             'q':
             """Kessler Topaz Meltzer Check LLP
                 280 King of Prussia Road
                 Radnor, OA 19087
                 (610) 667-7706
                 Fax: (610) 667-7056
                 Email: [email protected]
             """,
             'a': ({
                 'name': u'Kessler Topaz Meltzer Check LLP',
                 'city': u'Radnor',
                 'address1': u'280 King of Prussia Road',
                 'lookup_key':
                 u'280kingofprussiaroadradnorkesslertopazmeltzercheck19087',
                 'state': u'',
                 'zip_code': u'19087'
             }, {
                 'phone': u'(610) 667-7706',
                 'fax': u'(610) 667-7056',
                 'email': u'*****@*****.**'
             })
         }
     ]
     for i, pair in enumerate(pairs):
         print("Normalizing address %s..." % i, end='')
         result = normalize_attorney_contact(pair['q'])
         self.maxDiff = None
         self.assertEqual(result, pair['a'])
         print('✓')
Ejemplo n.º 2
0
def add_attorney(atty, p, d):
    """Add/update an attorney.

    Given an attorney node, and a party and a docket object, add the attorney
    to the database or link the attorney to the new docket. Also add/update the
    attorney organization, and the attorney's role in the case.

    :param atty: A dict representing an attorney, as provided by Juriscraper.
    :param p: A Party object
    :param d: A Docket object
    :return: None if there's an error, or an Attorney object if not.
    """
    newest_docket_date = max([dt for dt in [d.date_filed, d.date_terminated,
                                            d.date_last_filing] if dt])
    atty_org_info, atty_info = normalize_attorney_contact(
        atty['contact'],
        fallback_name=atty['name'],
    )
    try:
        q = Q()
        fields = {
            ('phone', atty_info['phone']),
            ('fax', atty_info['fax']),
            ('email', atty_info['email']),
            ('contact_raw', atty['contact']),
            ('organizations__lookup_key', atty_org_info.get('lookup_key')),
        }
        for field, lookup in fields:
            if lookup:
                q |= Q(**{field: lookup})
        a, created = Attorney.objects.filter(
            Q(name=atty['name']) & q,
        ).distinct().get_or_create(
            defaults={
                'name': atty['name'],
                'date_sourced': newest_docket_date,
                'contact_raw': atty['contact'],
            },
        )
    except Attorney.MultipleObjectsReturned:
        logger.info("Got too many results for attorney: '%s'. Punting." % atty)
        return None

    # Associate the attorney with an org and update their contact info.
    if atty['contact']:
        if atty_org_info:
            logger.info("Adding organization information to '%s': '%s'" %
                        (atty['name'], atty_org_info))
            try:
                org = AttorneyOrganization.objects.get(
                    lookup_key=atty_org_info['lookup_key'],
                )
            except AttorneyOrganization.DoesNotExist:
                try:
                    org = AttorneyOrganization.objects.create(**atty_org_info)
                except IntegrityError:
                    # Race condition. Item was created after get. Try again.
                    org = AttorneyOrganization.objects.get(
                        lookup_key=atty_org_info['lookup_key'],
                    )

            # Add the attorney to the organization
            AttorneyOrganizationAssociation.objects.get_or_create(
                attorney=a,
                attorney_organization=org,
                docket=d,
            )

        docket_info_is_newer = (a.date_sourced <= newest_docket_date)
        if atty_info and docket_info_is_newer:
            logger.info("Updating atty info because %s is more recent than %s."
                        % (newest_docket_date, a.date_sourced))
            a.date_sourced = newest_docket_date
            a.contact_raw = atty['contact']
            a.email = atty_info['email']
            a.phone = atty_info['phone']
            a.fax = atty_info['fax']
            a.save()

    # Do roles
    atty_roles = [normalize_attorney_role(r) for r in atty['roles']]
    atty_roles = filter(lambda r: r['role'] is not None, atty_roles)
    atty_roles = remove_duplicate_dicts(atty_roles)
    if len(atty_roles) > 0:
        logger.info("Linking attorney '%s' to party '%s' via %s roles: %s" %
                    (atty['name'], p.name, len(atty_roles), atty_roles))
    else:
        logger.info("No role data parsed. Linking via 'UNKNOWN' role.")
        atty_roles = [{'role': Role.UNKNOWN, 'date_action': None}]

    # Delete the old roles, replace with new.
    Role.objects.filter(attorney=a, party=p, docket=d).delete()
    Role.objects.bulk_create([
        Role(attorney=a, party=p, docket=d, **atty_role) for
        atty_role in atty_roles
    ])
    return a
Ejemplo n.º 3
0
 def test_normalize_atty_contact(self):
     pairs = [
         {
             # Email and phone number
             "q": "Landye Bennett Blumstein LLP\n"
             "701 West Eighth Avenue, Suite 1200\n"
             "Anchorage, AK 99501\n"
             "907-276-5152\n"
             "Email: [email protected]",
             "a": (
                 {
                     "name": u"Landye Bennett Blumstein LLP",
                     "address1": u"701 West Eighth Ave.",
                     "address2": u"Suite 1200",
                     "city": u"Anchorage",
                     "state": u"AK",
                     "zip_code": u"99501",
                     "lookup_key": u"701westeighthavesuite1200anchoragelandyebennettblumsteinak99501",
                 },
                 {
                     "email": u"*****@*****.**",
                     "phone": u"(907) 276-5152",
                     "fax": u"",
                 },
             ),
         },
         {
             # PO Box
             "q": "Sands Anderson PC\n"
             "P.O. Box 2188\n"
             "Richmond, VA 23218-2188\n"
             "(804) 648-1636",
             "a": (
                 {
                     "name": u"Sands Anderson PC",
                     "address1": u"P.O. Box 2188",
                     "city": u"Richmond",
                     "state": u"VA",
                     "zip_code": u"23218-2188",
                     "lookup_key": u"pobox2188richmondsandsandersonva232182188",
                 },
                 {"phone": u"(804) 648-1636", "fax": u"", "email": u"",},
             ),
         },
         {
             # Lowercase state (needs normalization)
             "q": "Sands Anderson PC\n"
             "P.O. Box 2188\n"
             "Richmond, va 23218-2188\n"
             "(804) 648-1636",
             "a": (
                 {
                     "name": u"Sands Anderson PC",
                     "address1": u"P.O. Box 2188",
                     "city": u"Richmond",
                     "state": u"VA",
                     "zip_code": u"23218-2188",
                     "lookup_key": u"pobox2188richmondsandsandersonva232182188",
                 },
                 {"phone": u"(804) 648-1636", "fax": u"", "email": u"",},
             ),
         },
         {
             # Phone, fax, and email -- the whole package.
             "q": "Susman Godfrey, LLP\n"
             "1201 Third Avenue, Suite 3800\n"
             "Seattle, WA 98101\n"
             "206-373-7381\n"
             "Fax: 206-516-3883\n"
             "Email: [email protected]",
             "a": (
                 {
                     "name": u"Susman Godfrey, LLP",
                     "address1": u"1201 Third Ave.",
                     "address2": u"Suite 3800",
                     "city": u"Seattle",
                     "state": u"WA",
                     "zip_code": u"98101",
                     "lookup_key": u"1201thirdavesuite3800seattlesusmangodfreywa98101",
                 },
                 {
                     "phone": u"(206) 373-7381",
                     "fax": u"(206) 516-3883",
                     "email": u"*****@*****.**",
                 },
             ),
         },
         {
             # No recipient name
             "q": "211 E. Livingston Ave\n"
             "Columbus, OH 43215\n"
             "(614) 228-3727\n"
             "Email:",
             "a": (
                 {
                     "address1": u"211 E. Livingston Ave",
                     "city": u"Columbus",
                     "state": u"OH",
                     "zip_code": u"43215",
                     "lookup_key": u"211elivingstonavecolumbusoh43215",
                 },
                 {"phone": u"(614) 228-3727", "email": u"", "fax": u"",},
             ),
         },
         {
             # Weird ways of doing phone numbers
             "q": """1200 Boulevard Tower
                 1018 Kanawha Boulevard, E
                 Charleston, WV 25301
                 304/342-3174
                 Fax: 304/342-0448
                 Email: [email protected]
             """,
             "a": (
                 {
                     "address1": u"1018 Kanawha Blvd., E",
                     "address2": u"1200 Blvd. Tower",
                     "city": u"Charleston",
                     "state": u"WV",
                     "zip_code": u"25301",
                     "lookup_key": u"1018kanawhablvde1200blvdtowercharlestonwv25301",
                 },
                 {
                     "phone": "(304) 342-3174",
                     "fax": "(304) 342-0448",
                     "email": "*****@*****.**",
                 },
             ),
         },
         {
             # Empty fax numbers (b/c PACER).
             "q": """303 E 17th Ave
                 Suite 300
                 Denver, CO 80203
                 303-861-1764
                 Fax:
                 Email: [email protected]
         """,
             "a": (
                 {
                     "address1": u"303 E 17th Ave",
                     "address2": u"Suite 300",
                     "city": u"Denver",
                     "state": u"CO",
                     "zip_code": u"80203",
                     "lookup_key": u"303e17thavesuite300denverco80203",
                 },
                 {
                     "phone": u"(303) 861-1764",
                     "fax": u"",
                     "email": u"*****@*****.**",
                 },
             ),
         },
         {
             # Funky phone number
             "q": """Guerrini Law Firm
                 106 SOUTH MENTOR AVE. #150
                 Pasadena, CA 91106
                 626-229-9611-202
                 Fax: 626-229-9615
                 Email: [email protected]
             """,
             "a": (
                 {
                     "name": u"Guerrini Law Firm",
                     "address1": u"106 South Mentor Ave.",
                     "address2": u"# 150",
                     "city": u"Pasadena",
                     "state": u"CA",
                     "zip_code": u"91106",
                     "lookup_key": u"106southmentorave150pasadenaguerrinilawfirmca91106",
                 },
                 {
                     "phone": u"",
                     "fax": u"(626) 229-9615",
                     "email": u"*****@*****.**",
                 },
             ),
         },
         {
             "q": """Duncan & Sevin, LLC
                 400 Poydras St.
                 Suite 1200
                 New Orleans, LA 70130
             """,
             "a": (
                 {
                     "name": u"Duncan & Sevin, LLC",
                     "address1": u"400 Poydras St.",
                     "address2": u"Suite 1200",
                     "city": u"New Orleans",
                     "state": u"LA",
                     "zip_code": u"70130",
                     "lookup_key": u"400poydrasstsuite1200neworleansduncansevinllcla70130",
                 },
                 {"phone": u"", "fax": u"", "email": u"",},
             ),
         },
         {
             # Ambiguous address. Returns empty dict.
             "q": """Darden, Koretzky, Tessier, Finn, Blossman & Areaux
                 Energy Centre
                 1100 Poydras Street
                 Suite 3100
                 New Orleans, LA 70163
                 504-585-3800
                 Email: [email protected]
             """,
             "a": (
                 {},
                 {
                     "phone": u"(504) 585-3800",
                     "email": u"*****@*****.**",
                     "fax": u"",
                 },
             ),
         },
         {
             # Ambiguous address with unicode that triggers
             # https://github.com/datamade/probableparsing/issues/2
             "q": u"""Darden, Koretzky, Tessier, Finn, Blossman & Areaux
                 Energy Centre
                 1100 Poydras Street
                 Suite 3100
                 New Orléans, LA 70163
                 504-585-3800
                 Email: [email protected]
             """,
             "a": (
                 {},
                 {
                     "phone": u"(504) 585-3800",
                     "email": u"*****@*****.**",
                     "fax": u"",
                 },
             ),
         },
         {
             # Missing zip code, phone number ambiguously used instead.
             "q": """NSB - Department of Law
                 POB 69
                 Barrow, AK 907-852-0300
             """,
             "a": (
                 {
                     "name": u"NSB Department of Law",
                     "address1": u"Pob 69",
                     "city": u"Barrow",
                     "state": u"AK",
                     "zip_code": u"",
                     "lookup_key": u"pob69barrownsbdepartmentoflawak",
                 },
                 {"phone": u"", "fax": u"", "email": u"",},
             ),
         },
         {
             # Unknown/invalid state.
             "q": """Kessler Topaz Meltzer Check LLP
                 280 King of Prussia Road
                 Radnor, OA 19087
                 (610) 667-7706
                 Fax: (610) 667-7056
                 Email: [email protected]
             """,
             "a": (
                 {
                     "name": u"Kessler Topaz Meltzer Check LLP",
                     "city": u"Radnor",
                     "address1": u"280 King of Prussia Road",
                     "lookup_key": u"280kingofprussiaroadradnorkesslertopazmeltzercheck19087",
                     "state": u"",
                     "zip_code": u"19087",
                 },
                 {
                     "phone": u"(610) 667-7706",
                     "fax": u"(610) 667-7056",
                     "email": u"*****@*****.**",
                 },
             ),
         },
     ]
     for i, pair in enumerate(pairs):
         print("Normalizing address %s..." % i, end="")
         result = normalize_attorney_contact(pair["q"])
         self.maxDiff = None
         self.assertEqual(result, pair["a"])
         print("✓")
Ejemplo n.º 4
0
def add_attorney(atty, p, d):
    """Add/update an attorney.

    Given an attorney node, and a party and a docket object, add the attorney
    to the database or link the attorney to the new docket. Also add/update the
    attorney organization, and the attorney's role in the case.

    :param atty: A dict representing an attorney, as provided by Juriscraper.
    :param p: A Party object
    :param d: A Docket object
    :return: None if there's an error, or an Attorney ID if not.
    """
    atty_org_info, atty_info = normalize_attorney_contact(
        atty['contact'],
        fallback_name=atty['name'],
    )

    # Try lookup by atty name in the docket.
    attys = Attorney.objects.filter(name=atty['name'],
                                    roles__docket=d).distinct()
    count = attys.count()
    if count == 0:
        # Couldn't find the attorney. Make one.
        a = Attorney.objects.create(
            name=atty['name'],
            contact_raw=atty['contact'],
        )
    elif count == 1:
        # Nailed it.
        a = attys[0]
    elif count >= 2:
        # Too many found, choose the most recent attorney.
        logger.info("Got too many results for atty: '%s'. Picking earliest." %
                    atty)
        a = attys.earliest('date_created')

    # Associate the attorney with an org and update their contact info.
    if atty['contact']:
        if atty_org_info:
            try:
                org = AttorneyOrganization.objects.get(
                    lookup_key=atty_org_info['lookup_key'], )
            except AttorneyOrganization.DoesNotExist:
                try:
                    org = AttorneyOrganization.objects.create(**atty_org_info)
                except IntegrityError:
                    # Race condition. Item was created after get. Try again.
                    org = AttorneyOrganization.objects.get(
                        lookup_key=atty_org_info['lookup_key'], )

            # Add the attorney to the organization
            AttorneyOrganizationAssociation.objects.get_or_create(
                attorney=a,
                attorney_organization=org,
                docket=d,
            )

        if atty_info:
            a.contact_raw = atty['contact']
            a.email = atty_info['email']
            a.phone = atty_info['phone']
            a.fax = atty_info['fax']
            a.save()

    # Do roles
    roles = atty['roles']
    if len(roles) == 0:
        roles = [{'role': Role.UNKNOWN, 'date_action': None}]

    # Delete the old roles, replace with new.
    Role.objects.filter(attorney=a, party=p, docket=d).delete()
    Role.objects.bulk_create([
        Role(attorney=a, party=p, docket=d, **atty_role) for atty_role in roles
    ])
    return a.pk
Ejemplo n.º 5
0
 def test_normalize_atty_contact(self):
     pairs = [{
         # Email and phone number
         'q': "Landye Bennett Blumstein LLP\n"
              "701 West Eighth Avenue, Suite 1200\n"
              "Anchorage, AK 99501\n"
              "907-276-5152\n"
              "Email: [email protected]",
         'a': ({
             'name': u"Landye Bennett Blumstein LLP",
             'address1': u'701 West Eighth Ave.',
             'address2': u'Suite 1200',
             'city': u'Anchorage',
             'state': u'AK',
             'zip_code': u'99501',
             'lookup_key': u'701westeighthavesuite1200anchoragelandyebennettblumsteinak99501',
         }, {
             'email': u'*****@*****.**',
             'phone': u'907-276-5152',
             'fax': u'',
         })
     }, {
         # PO Box
         'q': "Sands Anderson PC\n"
              "P.O. Box 2188\n"
              "Richmond, VA 23218-2188\n"
              "(804) 648-1636",
         'a': ({
             'name': u'Sands Anderson PC',
             'address1': u'P.O. Box 2188',
             'city': u'Richmond',
             'state': u'VA',
             'zip_code': u'23218-2188',
             'lookup_key': u'pobox2188richmondsandsandersonva232182188',
         }, {
             'phone': u'804648-1636',
             'fax': u'',
             'email': u'',
         })
     }, {
         # Lowercase state (needs normalization)
         'q': "Sands Anderson PC\n"
              "P.O. Box 2188\n"
              "Richmond, va 23218-2188\n"
              "(804) 648-1636",
         'a': ({
             'name': u'Sands Anderson PC',
             'address1': u'P.O. Box 2188',
             'city': u'Richmond',
             'state': u'VA',
             'zip_code': u'23218-2188',
             'lookup_key': u'pobox2188richmondsandsandersonva232182188',
         }, {
             'phone': u"804648-1636",
             'fax': u'',
             'email': u'',
         })
     }, {
         # Phone, fax, and email -- the whole package.
         'q': "Susman Godfrey, LLP\n"
              "1201 Third Avenue, Suite 3800\n"
              "Seattle, WA 98101\n"
              "206-373-7381\n"
              "Fax: 206-516-3883\n"
              "Email: [email protected]",
         'a': ({
             'name': u'Susman Godfrey, LLP',
             'address1': u'1201 Third Ave.',
             'address2': u'Suite 3800',
             'city': u'Seattle',
             'state': u'WA',
             'zip_code': u'98101',
             'lookup_key': u'1201thirdavesuite3800seattlesusmangodfreywa98101',
         }, {
             'phone': u'206-373-7381',
             'fax': u'206-516-3883',
             'email': u'*****@*****.**',
         })
     }, {
         # No recipient name
         'q': "211 E. Livingston Ave\n"
              "Columbus, OH 43215\n"
              "(614) 228-3727\n"
              "Email:",
         'a': ({
             'address1': u'211 E. Livingston Ave',
             'city': u'Columbus',
             'state': u'OH',
             'zip_code': u'43215',
             'lookup_key': u'211elivingstonavecolumbusoh43215',
         }, {
             'phone': u'614228-3727',
             'email': u'',
             'fax': u'',
         }),
     }, {
         # Weird ways of doing phone numbers
         'q': """1200 Boulevard Tower
                 1018 Kanawha Boulevard, E
                 Charleston, WV 25301
                 304/342-3174
                 Fax: 304/342-0448
                 Email: [email protected]
             """,
         'a': ({
             'address1': u'1018 Kanawha Blvd., E',
             'address2': u'1200 Blvd. Tower',
             'city': u'Charleston',
             'state': u'WV',
             'zip_code': u'25301',
             'lookup_key': u'1018kanawhablvde1200blvdtowercharlestonwv25301',
         }, {
             'phone': '304342-3174',
             'fax': '304342-0448',
             'email': '*****@*****.**',
         })
     }, {
         # Empty fax numbers (b/c PACER).
         'q': """303 E 17th Ave
                 Suite 300
                 Denver, CO 80203
                 303-861-1764
                 Fax:
                 Email: [email protected]
         """,
         'a': ({
             'address1': u'303 E 17th Ave',
             'address2': u'Suite 300',
             'city': u'Denver',
             'state': u'CO',
             'zip_code': u'80203',
             'lookup_key': u'303e17thavesuite300denverco80203',
         }, {
             'phone': u'303-861-1764',
             'fax': u'',
             'email': u'*****@*****.**',
         })
     }, {
         # Funky phone number
         'q': """Guerrini Law Firm
                 106 SOUTH MENTOR AVE. #150
                 Pasadena, CA 91106
                 626-229-9611-202
                 Fax: 626-229-9615
                 Email: [email protected]
             """,
         'a': ({
             'name': u'Guerrini Law Firm',
             'address1': u'106 South Mentor Ave.',
             'address2': u'# 150',
             'city': u'Pasadena',
             'state': u'CA',
             'zip_code': u'91106',
             'lookup_key': u'106southmentorave150pasadenaguerrinilawfirmca91106',
         }, {
             'phone': u'',
             'fax': u'626-229-9615',
             'email': u'*****@*****.**',
         })
     }, {
         'q': """Duncan & Sevin, LLC
                 400 Poydras St.
                 Suite 1200
                 New Orleans, LA 70130
             """,
         'a': ({
             'name': u'Duncan & Sevin, LLC',
             'address1': u'400 Poydras St.',
             'address2': u'Suite 1200',
             'city': u'New Orleans',
             'state': u'LA',
             'zip_code': u'70130',
             'lookup_key': u'400poydrasstsuite1200neworleansduncansevinllcla70130',
         }, {
             'phone': u'',
             'fax': u'',
             'email': u'',
         })
     }, {
         # Ambiguous address. Returns empty dict.
         'q': """Darden, Koretzky, Tessier, Finn, Blossman & Areaux
                 Energy Centre
                 1100 Poydras Street
                 Suite 3100
                 New Orleans, LA 70163
                 504-585-3800
                 Email: [email protected]
             """,
         'a': ({}, {
             'phone': u'504-585-3800',
             'email': u'*****@*****.**',
             'fax': u'',
         })
     }, {
         # Missing zip code, phone number ambiguously used instead.
         'q': """NSB - Department of Law
                 POB 69
                 Barrow, AK 907-852-0300
             """,
         'a': ({
             'name': u'NSB Department of Law',
             'address1': u'Pob 69',
             'city': u'Barrow',
             'state': u'AK',
             'zip_code': u'',
             'lookup_key': u'pob69barrownsbdepartmentoflawak',
         }, {
             'phone': u'',
             'fax': u'',
             'email': u'',
         })
     }, {
         # Unknown/invalid state.
         'q': """Kessler Topaz Meltzer Check LLP
                 280 King of Prussia Road
                 Radnor, OA 19087
                 (610) 667-7706
                 Fax: (610) 667-7056
                 Email: [email protected]
             """,
         'a': ({
             'name': u'Kessler Topaz Meltzer Check LLP',
             'city': u'Radnor',
             'address1': u'280 King of Prussia Road',
             'lookup_key': u'280kingofprussiaroadradnorkesslertopazmeltzercheck19087',
             'state': u'',
             'zip_code': u'19087'
         }, {
             'phone': u'610667-7706',
             'fax': u'610667-7056',
             'email': u'*****@*****.**'
         })
     }]
     for i, pair in enumerate(pairs):
         print("Normalizing address %s..." % i, end='')
         result = normalize_attorney_contact(pair['q'])
         self.maxDiff = None
         self.assertEqual(result, pair['a'])
         print('✓')
Ejemplo n.º 6
0
def add_attorney(atty, p, d):
    """Add/update an attorney.
     
    Given an attorney node, and a party and a docket object, add the attorney
    to the database or link the attorney to the new docket. Also add/update the
    attorney organization, and the attorney's role in the case.
    
    :param atty: A dict representing an attorney, as provided by Juriscraper.
    :param p: A Party object
    :param d: A Docket object
    :return: None if there's an error, or an Attorney object if not.
    """
    newest_docket_date = max([dt for dt in [d.date_filed, d.date_terminated,
                                            d.date_last_filing] if dt])
    atty_org_info, atty_info = normalize_attorney_contact(
        atty['contact'],
        fallback_name=atty['name'],
    )
    try:
        q = Q()
        fields = {
            ('phone', atty_info['phone']),
            ('fax', atty_info['fax']),
            ('email', atty_info['email']),
            ('contact_raw', atty['contact']),
            ('organizations__lookup_key', atty_org_info.get('lookup_key')),
        }
        for field, lookup in fields:
            if lookup:
                q |= Q(**{field: lookup})
        a, created = Attorney.objects.filter(
            Q(name=atty['name']) & q,
        ).get_or_create(
            defaults={
                'name': atty['name'],
                'date_sourced': newest_docket_date,
                'contact_raw': atty['contact'],
            },
        )
    except Attorney.MultipleObjectsReturned:
        logger.info("Got too many results for attorney: '%s'. Punting." % atty)
        return None

    # Associate the attorney with an org and update their contact info.
    if atty['contact']:
        if atty_org_info:
            logger.info("Adding organization information to '%s': '%s'" %
                        (atty['name'], atty_org_info))
            try:
                org = AttorneyOrganization.objects.get(
                    lookup_key=atty_org_info['lookup_key'],
                )
            except AttorneyOrganization.DoesNotExist:
                org = AttorneyOrganization.objects.create(**atty_org_info)

            # Add the attorney to the organization
            AttorneyOrganizationAssociation.objects.get_or_create(
                attorney=a,
                attorney_organization=org,
                docket=d,
            )

        docket_info_is_newer = (a.date_sourced <= newest_docket_date)
        if atty_info and docket_info_is_newer:
            logger.info("Updating atty info because %s is more recent than %s."
                        % (newest_docket_date, a.date_sourced))
            a.date_sourced = newest_docket_date
            a.contact_raw = atty['contact']
            a.email = atty_info['email']
            a.phone = atty_info['phone']
            a.fax = atty_info['fax']
            a.save()

    # Do roles
    atty_roles = [normalize_attorney_role(r) for r in atty['roles']]
    atty_roles = filter(lambda r: r['role'] is not None, atty_roles)
    atty_roles = remove_duplicate_dicts(atty_roles)
    if len(atty_roles) > 0:
        logger.info("Linking attorney '%s' to party '%s' via %s roles: %s" %
                    (atty['name'], p.name, len(atty_roles), atty_roles))
    else:
        logger.info("No role data parsed. Linking via 'UNKNOWN' role.")
        atty_roles = [{'role': Role.UNKNOWN, 'date_action': None}]

    # Delete the old roles, replace with new.
    Role.objects.filter(attorney=a, party=p, docket=d).delete()
    Role.objects.bulk_create([
        Role(attorney=a, party=p, docket=d, **atty_role) for
        atty_role in atty_roles
    ])
    return a