Ejemplo n.º 1
0
 def __init__(self, abbr, chamber, meta, term=None, session=None):
     self.meta = meta
     self.abbr = abbr
     self.session = session
     self.term = term
     self.chamber = chamber
     self.bills = IterBills(abbr, chamber, session=session, term=term)
     self.legislators = IterLegislators(abbr, chamber)
     self.leg_deets = {}
Ejemplo n.º 2
0
class ScoreCalculator(object):
    """Given a state, chamber, and term or session, calculate
    the cosponsorship pagerank, effectiveness, and ideal point
    scores for each legislator.
    """

    def __init__(self, abbr, chamber, meta, term=None, session=None):
        self.meta = meta
        self.abbr = abbr
        self.session = session
        self.term = term
        self.chamber = chamber
        self.bills = IterBills(abbr, chamber, session=session, term=term)
        self.legislators = IterLegislators(abbr, chamber)
        self.leg_deets = {}

    def get_pagerank(self):
        """Create a co-sponsorship digraph based on the information from
        the Open States API and calculate the pagerank of each legislator.
        """
        ids = set()
        G = networkx.DiGraph()
        number_of_bills = 0

        for bill in self.bills:
            sponsors = bill["sponsors"]
            # if len(sponsors) < 2:
            #     continue

            # Separate sponsors into primary, secondary.
            primary = []
            secondary = []
            for sponsor in sponsors:
                if sponsor["leg_id"] is None:
                    continue
                if sponsor["type"] == "primary":
                    primary.append(sponsor["leg_id"])
                else:
                    secondary.append(sponsor["leg_id"])
                ids.add(sponsor["leg_id"])

            # Add them to the network.
            if primary and secondary:
                for primary, secondary in product(primary, secondary):
                    try:
                        G[secondary][primary]["weight"] += 1
                    except KeyError:
                        G.add_edge(secondary, primary, weight=1)
            elif primary:
                for edge in combinations(primary, r=2):
                    for p1, p2 in [edge, edge[::-1]]:
                        try:
                            G[p1][p2]["weight"] += 1
                        except KeyError:
                            G.add_edge(p1, p2, weight=1)

        if not G.nodes():
            # Known offenders: CO, AR, CT, ID, and others.
            # Reuturn all ones.
            return dict.fromkeys(ids, 1)
            data = dict(abbr=self.abbr, chamber=self.chamber)
            msg = "Can't generate PageRank scores due to lack of secondary " "sponsorship data: %r."
            raise DataQualityError(msg % (data,))

        return networkx.pagerank_numpy(G)

    def get_effectiveness(self):
        """Create an effectiveness score for each legislator relative to
        all the others based on the extent to which bills by each leg'r
        are passed on the chamber of origin, the other chamber, or into law.
        """
        # Multipliers used below.
        multipliers = dict(passed_own=1, passed_other=2, signed=20)

        legislators = defaultdict(Counter)
        number_of_bills = 0
        chamber = self.chamber

        # Calculate the scores.
        for bill in self.bills:
            sponsors = bill["sponsors"]

            # Separate sponsors into primary, secondary.
            primary = []
            secondary = []
            for sponsor in sponsors:
                if sponsor["type"] == "primary":
                    primary.append(sponsor["leg_id"])
                else:
                    secondary.append(sponsor["leg_id"])

            for sponsor in primary:
                if chamber == "upper":
                    other_chamber = "lower"
                else:
                    other_chamber = "upper"
                if bill["action_dates"]["passed_%s" % self.chamber]:
                    legislators[sponsor]["passed_own"] += 1
                if bill["action_dates"]["passed_%s" % other_chamber]:
                    legislators[sponsor]["passed_other"] += 1
                if bill["action_dates"]["signed"]:
                    legislators[sponsor]["signed"] += 1

        # Compute the scores.
        vals = []
        detail = self.legislators.detail
        for leg_id, counter in legislators.items():
            if leg_id is None:
                continue
            with DictSetDefault(self.leg_deets, leg_id, detail(leg_id)) as deets:
                for key, multiplier in multipliers.items():
                    score = counter[key] * multiplier
                vals.append(score)
                deets["eff_stats"] = dict(counter, score=score)

        if not vals:
            raise DataQualityError("No effectiveness data available.")

        scoresdict = {}
        for key in multipliers:
            scoresdict[key] = [d[key] for d in legislators.values()]

        percentiles = defaultdict(dict)
        for key, scores in scoresdict.items():
            for n in range(1, 101):
                percentiles[key][n] = np.percentile(scores, n)

        # Normalize the scores.
        vals = np.array(map(float, vals))
        normed = vals / sum(vals) * 250
        normed = dict(zip(vals, normed))
        newvals = {}
        for leg_id in legislators:
            if leg_id is None:
                continue
            leg_deets = self.leg_deets[leg_id]
            with DictSetDefault(leg_deets, "eff_stats", {}) as eff_stats:
                for key, percentiledict in percentiles.items():
                    score = eff_stats.get(key, 0)
                    eff_stats[key] = score
                    percentile = 0
                    if set(percentiledict.values()) == set([0.0]):
                        percentile = 0
                    else:
                        for n, val in percentiledict.items():
                            if score < val:
                                break
                            else:
                                percentile = n
                    eff_stats[key + "_percentile"] = percentile
                newvals[leg_id] = normed.get(eff_stats.get("score", 0))
        return newvals

    def get_idealpoints(self):
        """Get ideal point for each legislator.
        """
        YES = float(1)
        NO = float(2)
        OTHER = float(3)

        votedata = defaultdict(dict)
        vote_vals = dict(yes_votes=YES, no_votes=NO, other_votes=OTHER)
        leg_ids = set()
        chamber_ids = [leg["id"] for leg in self.legislators.metadata]

        vote_keys = "yes_votes, no_votes, other_votes".split(", ")
        for vote in self.bills.itervotes():
            for k in vote_keys:
                for voter in vote[k]:
                    leg_id = voter["leg_id"]
                    if leg_id is None:
                        continue
                    if leg_id not in chamber_ids:
                        continue
                    leg_ids.add(leg_id)
                    votedata[vote["id"]][leg_id] = vote_vals[k]

        # Convert the dict into a pandas DataFrame.
        dataframe = DataFrame(votedata, index=leg_ids)
        dataframe.fillna(value=9)

        # Create a rollcall object similar to pscl's.
        rollcall = Rollcall.from_dataframe(
            dataframe, yea=[YES], nay=[NO], missing=[OTHER], not_in_legis=0.0, legis_names=tuple(leg_ids)
        )

        # Here they are.
        xbar = rollcall.ideal().xbar

        # Now guess the polarity.
        polarities = defaultdict(list)
        parties = {}
        polarity_parties = defaultdict(Counter)
        for legislator in self.legislators:
            leg_id = legislator["leg_id"]
            if leg_id not in xbar:
                continue
            parties[leg_id] = legislator.get("party", "o")
            sign = 0 < xbar[leg_id]
            polarities[sign].append(leg_id)

        for polarity, leg_ids in polarities.items():
            for leg_id in leg_ids:
                party = parties[leg_id]
                letter = party.lower()[0]
                if letter not in "rd":
                    letter = "o"
                polarity_parties[polarity][letter] += 1

        # If a the parties are clustered on distinct sides use that,
        # else on the side where most are clustered, assign that
        # side to the most frequently occuring party.
        polarity_results = {}
        for polarity, partydict in polarity_parties.items():
            most_frequent = max(partydict, key=partydict.get)
            polarity_results[polarity] = most_frequent

        # If the polarity appears to be backwards, reverse it.
        if polarity_results[True] != "r":
            xbar = {leg_id: -n for (leg_id, n) in xbar.items()}

        return xbar

    def get_scores(self):
        """Helper function for ScoreCalculator monster.
        """
        logging.info("Starting %r" % ([self.abbr, self.chamber, self.term],))

        logging.info("Starting pagerank calculation...")
        pageranks = self.get_pagerank()
        logging.info("...done")

        logging.info("Starting effectiveness calculation...")
        effectiveness = self.get_effectiveness()
        logging.info("...done")

        logging.info("Starting ideal point calculation...")
        idealpoints = self.get_idealpoints()
        logging.info("...done")

        return dict(effectiveness=effectiveness, pageranks=pageranks, idealpoints=idealpoints)

    def import_scores(self, meta):
        """Write the scores into mongo.
        """
        keep_keys = ("first_name", "last_name", "party", "eff_stats", "photo_url", "district", "full_name", "id")

        def party_letter(party):
            parties = "rd"
            letter = party.lower()[0]
            if letter in parties:
                return letter
            else:
                return "o"

        scores = self.get_scores()

        # Get a set of all ids.
        ids = set(scores["idealpoints"].keys())
        ids = filter(None, ids)

        points = []
        leg_deets = self.leg_deets
        for leg_id in ids:
            legislator = self.leg_deets.get(leg_id)
            if legislator is None:
                legislator = self.legislators.detail(leg_id)
            party = party_letter(legislator.get("party", "o"))
            logging.debug("Party is %r" % party)

            leg_keys = ("first_name", "last_name", "district", "photo_url", "full_name", "id", "eff_stats")
            for key in tuple(legislator):
                if key not in leg_keys:
                    legislator.pop(key)

            # Calculate the point data.
            point = dict(
                x=scores["idealpoints"][leg_id],
                # If no effectiveness score, s/he got no bills passed.
                y=scores["effectiveness"].get(leg_id, 0),
                # If no PR score, s/he had no consponsorships.
                size=scores["pageranks"].get(leg_id, 0),
                party=party,
                legislator=legislator,
            )
            points.append(point)

        report = dict(
            name=self.meta["name"],
            term=self.term,
            term_name="%s Term" % self.term,
            chamber_name=self.meta["chambers"][self.chamber]["name"],
            abbr=self.abbr,
            chamber=self.chamber,
            points=points,
        )
        mongo.reports.save(report)
        return report