Python PersonRole Examples

Programming Language: Python

Namespace/Package Name: person.models

Class/Type: PersonRole

Examples at hotexamples.com: 3

Python PersonRole - 3 examples found. These are the top rated real world Python examples of person.models.PersonRole extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PersonRole(3)

Frequently Used Methods

PersonRole (3)

Example #1

Show file

def main(options):
    """
    Update Person and PersonRole models.
    
    Do safe update: touch only those records
    which have been changed.
    """

    BASE_PATH = CONGRESS_LEGISLATORS_PATH
    SRC_FILES = ['legislators-current', 'legislators-historical', 'legislators-social-media', 'executive'] # order matters

    for p in SRC_FILES:
        f = BASE_PATH + p + ".yaml"
        if not File.objects.is_changed(f) and not options.force:
            log.info('File %s was not changed' % f)
        else:
            # file modified...
            break
    else:
        # no 'break' ==> no files modified
        return

    # Start parsing.
    
    had_error = False

    # Get combined data.
    legislator_data = { }
    leg_id_map = { }
    for p in SRC_FILES:
        log.info('Opening %s...' % p)
        f = BASE_PATH + p + ".yaml"
        y = yaml_load(f)
        for m in y:
            if p != 'legislators-social-media':
                govtrack_id = m["id"].get("govtrack")
                
                # For the benefit of the social media file, make a mapping of IDs.
                for k, v in m["id"].items():
                    if type(v) != list:
                        leg_id_map[(k,v)] = govtrack_id
            else:
                # GovTrack IDs are not always listed in this file.
                govtrack_id = None
                for k, v in m["id"].items():
                    if type(v) != list and (k, v) in leg_id_map:
                        govtrack_id = leg_id_map[(k,v)]
                        break
            
            if not govtrack_id:
                print "No GovTrack ID:"
                pprint.pprint(m)
                had_error = True
                continue
                
            if govtrack_id not in legislator_data:
                legislator_data[govtrack_id] = m
            elif p == "legislators-social-media":
                legislator_data[govtrack_id]["social"] = m["social"]
            elif p == "executive":
                legislator_data[govtrack_id]["terms"].extend( m["terms"] )
            else:
                raise ValueError("Duplication in an unexpected way (%d, %s)." % (govtrack_id, p))
    
    person_processor = PersonProcessor()
    role_processor = PersonRoleProcessor()

    existing_persons = set(Person.objects.values_list('pk', flat=True))
    processed_persons = set()
    created_persons = set()

    progress = Progress(total=len(legislator_data))
    log.info('Processing persons')

    for node in legislator_data.values():
        # Wrap each iteration in try/except
        # so that if some node breaks the parsing process
        # then other nodes could be parsed
        try:
            person = person_processor.process(Person(), node)
            
            # Create cached name strings. This is done again later
            # after the roles are updated.
            person.set_names()

            # Now try to load the person with such ID from
            # database. If found it then just update it
            # else create new Person object
            try:
                ex_person = Person.objects.get(pk=person.pk)
                if person_processor.changed(ex_person, person) or options.force:
                    # If the person has PK of existing record,
                    # coming in via the YAML-specified GovTrack ID,
                    # then Django ORM will update existing record
                    if not options.force:
                        log.warn("Updated %s" % person)
                    person.save()
                    
            except Person.DoesNotExist:
                created_persons.add(person.pk)
                person.save()
                log.warn("Created %s" % person)

            processed_persons.add(person.pk)

            # Process roles of the person
            roles = list(PersonRole.objects.filter(person=person))
            existing_roles = set(PersonRole.objects.filter(person=person).values_list('pk', flat=True))
            processed_roles = set()
            role_list = []
            for role in node['terms']:
                role = role_processor.process(PersonRole(), role)
                role.person = person
                
                role.current = role.startdate <= datetime.now().date() and role.enddate >= datetime.now().date() # \
                        #and CURRENT_CONGRESS in role.congress_numbers()

                # Scan for most recent leadership role within the time period of this term,
                # which isn't great for Senators because it's likely it changed a few times
                # within a term, especially if there was a party switch.
                role.leadership_title = None
                for leadership_node in node.get("leadership_roles", []):
                    # must match on date and chamber
                    if leadership_node["start"] >= role.enddate.isoformat(): continue # might start on the same day but is for the next Congress
                    if "end" in leadership_node and leadership_node["end"] <= role.startdate.isoformat(): continue # might start on the same day but is for the previous Congress
                    if leadership_node["chamber"] != RoleType.by_value(role.role_type).congress_chamber.lower(): continue
                    role.leadership_title = leadership_node["title"]
                
                # Try to match this role with one already in the database.
                # First search for an exact match on type/start/end.
                ex_role = None
                for r in roles:
                    if role.role_type == r.role_type and r.startdate == role.startdate and r.enddate == role.enddate:
                        ex_role = r
                        break
                        
                # Otherwise match on type/start only.
                if not ex_role:
                    for r in roles:
                        if role.role_type == r.role_type and r.startdate == role.startdate:
                            ex_role = r
                            break
                        
                if ex_role:    
                    # These roles correspond.
                    processed_roles.add(ex_role.id)
                    role.id = ex_role.id
                    if role_processor.changed(ex_role, role) or options.force:
                        role.save()
                        role_list.append(role)
                        if not options.force:
                            log.warn("Updated %s" % role)
                    roles.remove(ex_role) # don't need to try matching this to any other node
                else:
                    # Didn't find a matching role.
                    if len([r for r in roles if r.role_type == role.role_type]) > 0:
                        print role, "is one of these?"
                        for ex_role in roles:
                            print "\t", ex_role
                        raise Exception("There is an unmatched role.")
                    log.warn("Created %s" % role)
                    role.save()
                    role_list.append(role)
                        
            # create the events for the roles after all have been loaded
            # because we don't create events for ends of terms and
            # starts of terms that are adjacent.
            if not options.disable_events:
                for i in xrange(len(role_list)):
                    role_list[i].create_events(
                        role_list[i-1] if i > 0 else None,
                        role_list[i+1] if i < len(role_list)-1 else None
                        )
            
            removed_roles = existing_roles - processed_roles
            for pk in removed_roles:
                pr = PersonRole.objects.get(pk=pk)
                print pr.person.id, pr
                raise ValueError("Deleted role??")
                log.warn("Deleted %s" % pr)
                pr.delete()
            
            # The name can't be determined until all of the roles are set. If
            # it changes, re-save. Unfortunately roles are cached so this actually
            # doesn't work yet. Re-run the parser to fix names.
            nn = (person.name, person.sortname)
            if hasattr(person, "role"): delattr(person, "role") # clear the cached info
            person.set_names()
            if nn != (person.name, person.sortname):
                log.warn("%s is now %s." % (nn[0], person.name))
                person.save()
            
        except Exception, ex:
            # Catch unexpected exceptions and log them
            pprint.pprint(node)
            log.error('', exc_info=ex)
            had_error = True

        progress.tick()

Example #2

Show file

def main(options):
    """
    Update Person and PersonRole models.
    
    Do safe update: touch only those records
    which have been changed.
    """

    BASE_PATH = CONGRESS_LEGISLATORS_PATH
    SRC_FILES = [
        'legislators-current', 'legislators-historical',
        'legislators-social-media', 'executive'
    ]  # order matters

    for p in SRC_FILES:
        f = BASE_PATH + p + ".yaml"
        if not File.objects.is_changed(f) and not options.force:
            log.info('File %s was not changed' % f)
        else:
            # file modified...
            break
    else:
        # no 'break' ==> no files modified
        return

    # Start parsing.

    had_error = False

    # Get combined data.
    legislator_data = {}
    leg_id_map = {}
    for p in SRC_FILES:
        log.info('Opening %s...' % p)
        f = BASE_PATH + p + ".yaml"
        y = yaml_load(f)
        for m in y:
            if p == "legislators-current":
                # We know all terms but the last are non-current and the last is.
                for r in m["terms"]:
                    r["current"] = False
                m["terms"][-1]["current"] = True
            elif p == "legislators-historical":
                # We know all terms are non-current.
                for r in m["terms"]:
                    r["current"] = False

            if p != 'legislators-social-media':
                govtrack_id = m["id"].get("govtrack")

                # For the benefit of the social media file, make a mapping of IDs.
                for k, v in m["id"].items():
                    if type(v) != list:
                        leg_id_map[(k, v)] = govtrack_id
            else:
                # GovTrack IDs are not always listed in this file.
                govtrack_id = None
                for k, v in m["id"].items():
                    if type(v) != list and (k, v) in leg_id_map:
                        govtrack_id = leg_id_map[(k, v)]
                        break

            if not govtrack_id:
                print("No GovTrack ID:")
                pprint.pprint(m)
                had_error = True
                continue

            if govtrack_id not in legislator_data:
                legislator_data[govtrack_id] = m
            elif p == "legislators-social-media":
                legislator_data[govtrack_id]["social"] = m["social"]
            elif p == "executive":
                legislator_data[govtrack_id]["terms"].extend(m["terms"])
            else:
                raise ValueError("Duplication in an unexpected way (%d, %s)." %
                                 (govtrack_id, p))

    person_processor = PersonProcessor()
    role_processor = PersonRoleProcessor()

    existing_persons = set(Person.objects.values_list('pk', flat=True))
    processed_persons = set()
    created_persons = set()

    progress = Progress(total=len(legislator_data))
    log.info('Processing persons')

    for node in legislator_data.values():
        # Wrap each iteration in try/except
        # so that if some node breaks the parsing process
        # then other nodes could be parsed
        try:
            person = person_processor.process(Person(), node)

            # Create cached name strings. This is done again later
            # after the roles are updated.
            person.set_names()

            # Now try to load the person with such ID from
            # database. If found it then just update it
            # else create new Person object
            try:
                ex_person = Person.objects.get(pk=person.pk)
                if person_processor.changed(ex_person,
                                            person) or options.force:
                    # If the person has PK of existing record,
                    # coming in via the YAML-specified GovTrack ID,
                    # then Django ORM will update existing record
                    if not options.force:
                        log.warn("Updated %s" % person)
                    person.save()

            except Person.DoesNotExist:
                created_persons.add(person.pk)
                person.save()
                log.warn("Created %s" % person)

            processed_persons.add(person.pk)

            # Parse all of the roles.
            new_roles = []
            for termnode in node['terms']:
                role = role_processor.process(PersonRole(), termnode)
                role.person = person
                role.extra = filter_yaml_term_structure(
                    termnode)  # copy in the whole YAML structure

                # Is this role current? For legislators, same as whether it came from legislators-current, which eases Jan 3 transitions when we can't distinguish by date.
                if "current" in termnode:
                    role.current = termnode["current"]

                # But executives...
                else:
                    now = datetime.now().date()
                    role.current = role.startdate <= now and role.enddate >= now
                    # Because of date overlaps at noon transition dates, ensure that only the last term that covers
                    # today is current --- reset past roles to not current. Doesn't handle turning off retirning people tho.
                    for r in new_roles:
                        r.current = False

                # Scan for most recent leadership role within the time period of this term,
                # which isn't great for Senators because it's likely it changed a few times
                # within a term, especially if there was a party switch.
                role.leadership_title = None
                for leadership_node in node.get("leadership_roles", []):
                    # must match on date and chamber
                    if leadership_node["start"] >= role.enddate.isoformat():
                        continue  # might start on the same day but is for the next Congress
                    if "end" in leadership_node and leadership_node[
                            "end"] <= role.startdate.isoformat():
                        continue  # might start on the same day but is for the previous Congress
                    if leadership_node["chamber"] != RoleType.by_value(
                            role.role_type).congress_chamber.lower():
                        continue
                    role.leadership_title = leadership_node["title"]

                new_roles.append(role)

            # Try matching the new roles to existing db records. Since we don't have a primry key
            # in the source data, we have to match on the record values. But because of errors in data,
            # term start/end dates can change, so matching has to be a little fuzzy.
            existing_roles = list(PersonRole.objects.filter(person=person))
            matches = []

            def run_match_rule(rule):
                import itertools
                for new_role, existing_role in itertools.product(
                        new_roles, existing_roles):
                    if new_role not in new_roles or existing_role not in existing_roles:
                        continue  # already matched on a previous iteration
                    if new_role.role_type != existing_role.role_type: continue
                    if new_role.state != existing_role.state: continue
                    if rule(new_role, existing_role):
                        matches.append((new_role, existing_role))
                        new_roles.remove(new_role)
                        existing_roles.remove(existing_role)

            # First match exactly, then exact on just one date, then on contractions and expansions.
            run_match_rule(lambda new_role, existing_role: new_role.startdate
                           == existing_role.startdate and new_role.enddate ==
                           existing_role.enddate)
            run_match_rule(lambda new_role, existing_role: new_role.startdate
                           == existing_role.startdate or new_role.enddate ==
                           existing_role.enddate)
            run_match_rule(lambda new_role, existing_role: new_role.startdate
                           >= existing_role.startdate and new_role.enddate <=
                           existing_role.enddate)
            run_match_rule(lambda new_role, existing_role: new_role.startdate
                           <= existing_role.startdate and new_role.enddate >=
                           existing_role.enddate)

            # Update the database entries that correspond with records in the data file.
            did_update_any = False
            for new_role, existing_role in matches:
                new_role.id = existing_role.id
                if role_processor.changed(existing_role,
                                          new_role) or options.force:
                    new_role.save()
                    did_update_any = True
                    if not options.force:
                        log.warn("Updated %s" % new_role)

            # If we have mutliple records on disk that didn't match and multiple records in the database
            # that didn't match, then we don't know how to align them.
            if len(new_roles) > 0 and len(existing_roles) > 0:
                print(new_roles)
                print(existing_roles)
                raise Exception("There is an unmatched role.")

            # Otherwise if there are any unmatched new roles, we can just add them.
            for role in new_roles:
                log.warn("Created %s" % role)
                role.save()
                did_update_any = True

            # And likewise for any existing roles that are left over.
            for pr in existing_roles:
                print(pr.person.id, pr)
                raise ValueError("Deleted role??")
                log.warn("Deleted %s" % pr)
                pr.delete()

            if did_update_any and not options.disable_events:
                # Create the events for the roles after all have been loaded
                # because we don't create events for ends of terms and
                # starts of terms that are adjacent. Refresh the list to get
                # the roles in order.
                role_list = list(
                    PersonRole.objects.filter(
                        person=person).order_by('startdate'))
                for i in range(len(role_list)):
                    role_list[i].create_events(
                        role_list[i - 1] if i > 0 else None,
                        role_list[i + 1] if i < len(role_list) - 1 else None)

            # The name can't be determined until all of the roles are set. If
            # it changes, re-save. Unfortunately roles are cached so this actually
            # doesn't work yet. Re-run the parser to fix names.
            nn = (person.name, person.sortname)
            if hasattr(person, "role"):
                delattr(person, "role")  # clear the cached info
            person._most_recent_role = None  # clear cache here too
            person.set_names()
            if nn != (person.name, person.sortname):
                log.warn("%s is now %s." % (nn[0], person.name))
                person.save()

        except Exception as ex:
            # Catch unexpected exceptions and log them
            pprint.pprint(node)
            log.error('', exc_info=ex)
            had_error = True

        progress.tick()

    log.info('Processed persons: %d' % len(processed_persons))
    log.info('Created persons: %d' % len(created_persons))

    if not had_error:
        # Remove person which were not found in XML file
        removed_persons = existing_persons - processed_persons
        for pk in removed_persons:
            p = Person.objects.get(pk=pk)
            if p.roles.all().count() > 0:
                log.warn("Missing? Deleted? %d: %s" % (p.id, p))
            else:
                log.warn("Deleting... %d: %s (remember to prune_index!)" %
                         (p.id, p))
                raise Exception("Won't delete!")
                p.delete()
        log.info('Missing/deleted persons: %d' % len(removed_persons))

        # Mark the files as processed.
        for p in SRC_FILES:
            f = BASE_PATH + p + ".yaml"
            File.objects.save_file(f)

    update_twitter_list()

Example #3

Show file

    def main(self, movie_id, api_key):
        print(api_key)
        kinopoisk = KP(api_key)
        self.stdout.write("Collect data")

        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        future = asyncio.ensure_future(
            self._get_movie_info(kinopoisk, movie_id))
        loop.run_until_complete(future)
        full_movie_info: dict = future.result()
        self.stdout.write(self.style.SUCCESS("Data received"))

        movie_info: dict = full_movie_info['movie']
        genres = [
            Genre.objects.get_or_create(title=genre)[0]
            for genre in movie_info['genres']
        ]
        formatted_movie_info = get_formatted_movie_fields(movie_info)
        # movie = Movie.objects.filter(**formatted_movie_info).first()
        if Movie.objects.filter(**formatted_movie_info).exists():
            self.stdout.write(
                self.style.WARNING(
                    f"Movie {movie_id} exists in this database"))
            return
        formatted_movie_info['movie_type_id'] = formatted_movie_info.pop(
            'movie_type')
        movie: Movie = Movie(**formatted_movie_info)
        movie.save()
        self.stdout.write(f"Movie {movie} created")
        for genre in genres:
            movie.genres.add(genre)
        self.stdout.write(self.style.SUCCESS("Movie saved"))
        photos = {
            self._get_kp_id_from_image_data(image_data): image_data
            for image_data in full_movie_info['photos']
        }

        persons_kp_id_map = {}
        raw_person_data: dict
        for raw_person_data in full_movie_info['persons']:
            kp_id, person = self.add_person(raw_person_data, photos)
            persons_kp_id_map[kp_id] = person

        self.stdout.write(self.style.SUCCESS("Persons saved"))

        for role in movie_info['roles']:
            PersonRole(**get_formatted_role_fields(
                role, movie, persons_kp_id_map[int(role['kp_id'])])).save()
        self.stdout.write(self.style.SUCCESS("Roles saved"))

        for filename, image_bin in full_movie_info['posters'].items():
            if not image_bin:
                continue
            self.safe_mkdir('temp')
            file_path = os.path.join('temp', filename)
            with open(file_path, 'wb') as f:
                f.write(image_bin)
            try:
                Poster(movie=movie,
                       image=ImageFile(open(file_path, 'rb')),
                       orientation=Poster.OrientationType.VERTICAL.name,
                       format=Poster.FormatType.LARGE.name if '_small' in filename else Poster.FormatType.LARGE.name). \
                    save()
            finally:
                os.remove(file_path)
        os.rmdir('temp')
        self.stdout.write(self.style.SUCCESS("Posters saved"))