def add_attorneys(self, docket, party_node, party, atty_obj_cache, debug): # Get the most recent date on the docket. We'll use this to have the # most updated attorney info. newest_docket_date = max([ d for d in [ docket.date_filed, docket.date_terminated, docket.date_last_filing ] if d ], ) atty_nodes = party_node.xpath('.//attorney_list/attorney') logger.info("Adding %s attorneys to the party." % len(atty_nodes)) for atty_node in atty_nodes: atty_name = self.get_str_from_node(atty_node, 'attorney_name') logger.info("Adding attorney: '%s'" % atty_name) atty_contact_raw = self.get_str_from_node(atty_node, 'contact') if 'see above' in atty_contact_raw.lower(): logger.info("Got 'see above' entry for atty_contact_raw.") atty_contact_raw = '' try: atty, atty_org_info, atty_info = atty_obj_cache[atty_name] except KeyError: logger.warn("Unable to lookup 'see above' entry. " "Creating/using atty with no contact info.") try: atty = Attorney.objects.get( name=atty_name, contact_raw=atty_contact_raw) except Attorney.DoesNotExist: atty = Attorney(name=atty_name, date_sourced=newest_docket_date, contact_raw=atty_contact_raw) if not debug: atty.save() else: # New attorney for this docket. Look them up in DB or create new # attorney if necessary. atty_org_info, atty_info = normalize_attorney_contact( atty_contact_raw, fallback_name=atty_name) try: logger.info("Didn't find attorney in cache, attempting " "lookup in the DB.") # Find an atty with the same name and one of another several # IDs. Important to add contact_raw here, b/c if it cannot # be parsed, all other values are blank. q = Q() fields = [ ('phone', atty_info['phone']), ('fax', atty_info['fax']), ('email', atty_info['email']), ('contact_raw', atty_contact_raw), ('organizations__lookup_key', atty_org_info.get('lookup_key')), ] for field, lookup in fields: if lookup: q |= Q(**{field: lookup}) atty = Attorney.objects.get(Q(name=atty_name) & q) except Attorney.DoesNotExist: logger.info("Unable to find matching attorney. Creating a " "new one: %s" % atty_name) atty = Attorney(name=atty_name, date_sourced=newest_docket_date, contact_raw=atty_contact_raw) if not debug: atty.save() except Attorney.MultipleObjectsReturned: logger.warn("Got too many results for attorney: '%s' " "Punting." % atty_name) continue # Cache the atty object and info for "See above" entries. atty_obj_cache[atty_name] = (atty, atty_org_info, atty_info) if atty_contact_raw: if atty_org_info: logger.info("Adding organization information to " "'%s': %s" % (atty_name, atty_org_info)) try: org = AttorneyOrganization.objects.get( lookup_key=atty_org_info['lookup_key'], ) except AttorneyOrganization.DoesNotExist: org = AttorneyOrganization(**atty_org_info) if not debug: org.save() # Add the attorney to the organization if not debug: AttorneyOrganizationAssociation.objects.get_or_create( attorney=atty, attorney_organization=org, docket=docket, ) atty_info_is_newer = (atty.date_sourced <= newest_docket_date) if atty_info and atty_info_is_newer: logger.info("Updating atty info because %s is more recent " "than %s." % (newest_docket_date, atty.date_sourced)) atty.date_sourced = newest_docket_date atty.contact_raw = atty_contact_raw atty.email = atty_info['email'] atty.phone = atty_info['phone'] atty.fax = atty_info['fax'] if not debug: atty.save() atty_role_str = self.get_str_from_node(atty_node, 'attorney_role') atty_roles = [ normalize_attorney_role(r) for r in atty_role_str.split('\n') if r ] atty_roles = [r for r in atty_roles if r['role'] is not None] atty_roles = remove_duplicate_dicts(atty_roles) if len(atty_roles) > 0: logger.info( "Linking attorney '%s' to party '%s' via %s " "roles: %s" % (atty_name, party.name, len(atty_roles), atty_roles)) else: logger.info("No role data parsed. Linking via 'UNKNOWN' role.") atty_roles = [{'role': Role.UNKNOWN, 'date_action': None}] if not debug: # Delete the old roles, replace with new. Role.objects.filter(attorney=atty, party=party, docket=docket).delete() Role.objects.bulk_create([ Role(attorney=atty, party=party, docket=docket, **atty_role) for atty_role in atty_roles ])
def add_attorney(atty, p, d): """Add/update an attorney. Given an attorney node, and a party and a docket object, add the attorney to the database or link the attorney to the new docket. Also add/update the attorney organization, and the attorney's role in the case. :param atty: A dict representing an attorney, as provided by Juriscraper. :param p: A Party object :param d: A Docket object :return: None if there's an error, or an Attorney object if not. """ newest_docket_date = max([dt for dt in [d.date_filed, d.date_terminated, d.date_last_filing] if dt]) atty_org_info, atty_info = normalize_attorney_contact( atty['contact'], fallback_name=atty['name'], ) try: q = Q() fields = { ('phone', atty_info['phone']), ('fax', atty_info['fax']), ('email', atty_info['email']), ('contact_raw', atty['contact']), ('organizations__lookup_key', atty_org_info.get('lookup_key')), } for field, lookup in fields: if lookup: q |= Q(**{field: lookup}) a, created = Attorney.objects.filter( Q(name=atty['name']) & q, ).distinct().get_or_create( defaults={ 'name': atty['name'], 'date_sourced': newest_docket_date, 'contact_raw': atty['contact'], }, ) except Attorney.MultipleObjectsReturned: logger.info("Got too many results for attorney: '%s'. Punting." % atty) return None # Associate the attorney with an org and update their contact info. if atty['contact']: if atty_org_info: logger.info("Adding organization information to '%s': '%s'" % (atty['name'], atty_org_info)) try: org = AttorneyOrganization.objects.get( lookup_key=atty_org_info['lookup_key'], ) except AttorneyOrganization.DoesNotExist: try: org = AttorneyOrganization.objects.create(**atty_org_info) except IntegrityError: # Race condition. Item was created after get. Try again. org = AttorneyOrganization.objects.get( lookup_key=atty_org_info['lookup_key'], ) # Add the attorney to the organization AttorneyOrganizationAssociation.objects.get_or_create( attorney=a, attorney_organization=org, docket=d, ) docket_info_is_newer = (a.date_sourced <= newest_docket_date) if atty_info and docket_info_is_newer: logger.info("Updating atty info because %s is more recent than %s." % (newest_docket_date, a.date_sourced)) a.date_sourced = newest_docket_date a.contact_raw = atty['contact'] a.email = atty_info['email'] a.phone = atty_info['phone'] a.fax = atty_info['fax'] a.save() # Do roles atty_roles = [normalize_attorney_role(r) for r in atty['roles']] atty_roles = filter(lambda r: r['role'] is not None, atty_roles) atty_roles = remove_duplicate_dicts(atty_roles) if len(atty_roles) > 0: logger.info("Linking attorney '%s' to party '%s' via %s roles: %s" % (atty['name'], p.name, len(atty_roles), atty_roles)) else: logger.info("No role data parsed. Linking via 'UNKNOWN' role.") atty_roles = [{'role': Role.UNKNOWN, 'date_action': None}] # Delete the old roles, replace with new. Role.objects.filter(attorney=a, party=p, docket=d).delete() Role.objects.bulk_create([ Role(attorney=a, party=p, docket=d, **atty_role) for atty_role in atty_roles ]) return a
def add_attorney(atty, p, d): """Add/update an attorney. Given an attorney node, and a party and a docket object, add the attorney to the database or link the attorney to the new docket. Also add/update the attorney organization, and the attorney's role in the case. :param atty: A dict representing an attorney, as provided by Juriscraper. :param p: A Party object :param d: A Docket object :return: None if there's an error, or an Attorney ID if not. """ atty_org_info, atty_info = normalize_attorney_contact( atty['contact'], fallback_name=atty['name'], ) # Try lookup by atty name in the docket. attys = Attorney.objects.filter(name=atty['name'], roles__docket=d).distinct() count = attys.count() if count == 0: # Couldn't find the attorney. Make one. a = Attorney.objects.create( name=atty['name'], contact_raw=atty['contact'], ) elif count == 1: # Nailed it. a = attys[0] elif count >= 2: # Too many found, choose the most recent attorney. logger.info("Got too many results for atty: '%s'. Picking earliest." % atty) a = attys.earliest('date_created') # Associate the attorney with an org and update their contact info. if atty['contact']: if atty_org_info: try: org = AttorneyOrganization.objects.get( lookup_key=atty_org_info['lookup_key'], ) except AttorneyOrganization.DoesNotExist: try: org = AttorneyOrganization.objects.create(**atty_org_info) except IntegrityError: # Race condition. Item was created after get. Try again. org = AttorneyOrganization.objects.get( lookup_key=atty_org_info['lookup_key'], ) # Add the attorney to the organization AttorneyOrganizationAssociation.objects.get_or_create( attorney=a, attorney_organization=org, docket=d, ) if atty_info: a.contact_raw = atty['contact'] a.email = atty_info['email'] a.phone = atty_info['phone'] a.fax = atty_info['fax'] a.save() # Do roles roles = atty['roles'] if len(roles) == 0: roles = [{'role': Role.UNKNOWN, 'date_action': None}] # Delete the old roles, replace with new. Role.objects.filter(attorney=a, party=p, docket=d).delete() Role.objects.bulk_create([ Role(attorney=a, party=p, docket=d, **atty_role) for atty_role in roles ]) return a.pk