Exemple #1
0
def getError(im):
    """
    Args:
        im: A simple image that may show a robot.
        We make a histogram on the x axis of the image where the y axis is the number of red detections in a column.
        The detection is the weigthed median of the histogram and the error is calculated with respect to the center
        of the image. That is we are trying to minimize the distance of the center of the distribution and the center
        of the frame.
    Returns: error value between -1 and 1. -1 Means the target is to the left of the center of the image and 1 means
    the target is to the right of the center of the image. Returns None if no other robot is detected.

    """
    yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV)
    yuv = yuv[:, :, 1]

    yuv[0 : ((yuv.shape[1]) / 2.5), :] = 0
    # yuv = yuv[height/2:end,:] = black
    lb = 150
    ub = 210
    yuv[yuv > ub] = 255
    yuv[yuv < lb] = 255
    yuv[yuv != 255] = 0
    histogram = np.sum(yuv == 0, axis=0)
    returns = ws.weighted_median(range(0, len(histogram)), histogram.tolist())

    if returns is None:
        return None
    # Uncomment to visualize the detected center.
    cv2.imshow("yuv", visualizeParticles(yuv, np.array([[math.floor(returns), 50]], np.int32), [0.2]))
    cv2.waitKey(1)
    return (returns - yuv.shape[1] / 2) / (yuv.shape[1] / 2)
Exemple #2
0
def main():

    #this is an array with possible parameter values and weight used as example

    data.value_options = {
        2 : [1,2,3,1],
        10 : [1,3,1,2],
        3 : [2,3,1,2],
        5 : [2,3,2,1],
        11 : [1,2,3,2]
    }

    for value in data.value_options:

        data.measured_v.append(value)

        method = data.value_options[value][0]
        condition = data.value_options[value][1]
        enzyme = data.value_options[value][2]
        organism = data.value_options[value][3]


        weightValues(value , method , condition , enzyme , organism)

        data.weights.append(weight.total)

    print(data.weights)
    print(data.measured_v)

    print(ws.median(data.measured_v))


    print(ws.weighted_median(data.measured_v, data.weights))
Exemple #3
0
def CalcModeCI_Factor (values, weights):

    """

    This function will find the confidence interval factor of our data given an array of values and weights of equal length

    If all values are the same or we only have 1 sample, we use a CI factor of 10 to generate the range of values from which we sample

    """

    if len(values) == 1 or len(set(values)) <= 1:

        print("values were the same")

        mode = values[0]

        CI_factor = 10

        print(mode)
        print(CI_factor)

    else:

        print("ran normal")

        total_weight = sum(weights)
        array_len = len(weights)
        mode = ws.weighted_median(values, weights)

        min_position_weighted = round(0.25*total_weight)
        max_position_weighted = round(0.975*total_weight)

        #translating the weights and the values array to percentages and using this to find out where the min and max positiions are

        min_position= round(((min_position_weighted*100)/total_weight)*array_len/100) -1
        max_position= round(((max_position_weighted*100)/total_weight)*array_len/100) -1

        sort = sorted(values)

        min_value = sort[min_position]
        max_value = sort[max_position]

        if min_value == max_value:

            print("weighted values were the same")

            mode = min_value
            CI_factor = 10

        else:
            CI_min = mode/min_value
            CI_max = max_value/mode

            CI_factor = (CI_min + CI_max)/2

    return (mode, CI_factor)
Exemple #4
0
    def kth_pair_algorithm(self):
        L = [0] * self.p
        R = [self.q - 1] * self.p

        Ltotal = 0

        Rtotal = self.p * self.q

        medcouple_index = math.floor(Rtotal / 2)

        while Rtotal - Ltotal > self.p:

            middle_idx = [i for i in range(self.p) if L[i] <= R[i]]
            row_medians = [
                self.H(i, math.floor((L[i] + R[i]) / 2)) for i in middle_idx
            ]

            weight = [R[i] - L[i] + 1 for i in middle_idx]

            WM = ws.weighted_median(row_medians, weights=weight)

            P = self.greater_h(WM)

            Q = self.less_h(WM)

            Ptotal = np.sum(P) + len(P)
            Qtotal = np.sum(Q)

            if medcouple_index <= Ptotal - 1:
                R = P.copy()
                Rtotal = Ptotal
            else:
                if medcouple_index > Qtotal - 1:
                    L = Q.copy()
                    Ltotal = Qtotal
                else:
                    return WM
        remaining = np.array([])

        for i in range(self.p):
            for j in range(L[i], R[i] + 1):
                remaining = np.append(remaining, self.H(i, j))

        find_index = medcouple_index - Ltotal

        k_minimum_element = remaining[np.argpartition(remaining, find_index)]

        # print(find_index,'tim trong mang ',sorted(remaining))
        return k_minimum_element[find_index]
Exemple #5
0
def get_cryptopia_price(market, hm_orders=12, book='Buy'):
    """
    :param market: LTC_USDT ex.
    :param hm_orders: how many orders depth
    :param book: Sell or Buy
    :return:
    """
    api = Cryptopia()
    ret, error = api.get_orders(market)
    orders = ret[book]
    required_orders = orders[:hm_orders]
    prices = [float(o['Price']) for o in required_orders]
    volumes = [float(o['Volume']) for o in required_orders]
    price = ws.weighted_median(prices, volumes)
    return price
Exemple #6
0
    def _compute_cvr_split_stats(self, split_filter) -> pd.DataFrame:

        filtered_stat_table = self._cvr_stat_table.loc[split_filter, :]

        first_round_overvote = filtered_stat_table.loc[filtered_stat_table['first_round_overvote'], 'weight'].sum()
        ranked_single = filtered_stat_table.loc[filtered_stat_table['ranked_single'], 'weight'].sum()
        ranked_multiple = filtered_stat_table.loc[filtered_stat_table['ranked_multiple'], 'weight'].sum()
        ranked_3_or_more = filtered_stat_table.loc[filtered_stat_table['ranked_3_or_more'], 'weight'].sum()
        total_fully_ranked = filtered_stat_table.loc[filtered_stat_table['fully_ranked'], 'weight'].sum()
        includes_duplicate_ranking = filtered_stat_table.loc[filtered_stat_table['contains_duplicate'], 'weight'].sum()
        includes_skipped_ranking = filtered_stat_table.loc[filtered_stat_table['contains_skip'], 'weight'].sum()
        total_irregular = filtered_stat_table.loc[filtered_stat_table['irregular'], 'weight'].sum()
        total_ballots = filtered_stat_table['weight'].sum()
        includes_overvote_ranking = filtered_stat_table.loc[filtered_stat_table['contains_overvote'], 'weight'].sum()
        total_undervote = filtered_stat_table.loc[filtered_stat_table['undervote'], 'weight'].sum()

        weighted_sum = filtered_stat_table.loc[~filtered_stat_table['undervote'], 'ranks_used_times_weight'].sum()
        mean_rankings_used = weighted_sum / filtered_stat_table.loc[~filtered_stat_table['undervote'], 'weight'].sum()

        ranks_used = filtered_stat_table.loc[~filtered_stat_table['undervote'], 'valid_ranks_used'].tolist()
        weights = filtered_stat_table.loc[~filtered_stat_table['undervote'], 'weight'].tolist()
        weights_float = [float(i) for i in weights]
        median_rankings_used = weightedstats.weighted_median(ranks_used, weights=weights_float)

        filtered_summary_stat_table = pd.DataFrame({
            'split_first_round_overvote': [first_round_overvote],
            'split_ranked_single': [ranked_single],
            'split_ranked_multiple': [ranked_multiple],
            'split_ranked_3_or_more': [ranked_3_or_more],
            'split_mean_rankings_used': [mean_rankings_used],
            'split_median_rankings_used': [median_rankings_used],
            'split_total_fully_ranked': [total_fully_ranked],
            'split_includes_duplicate_ranking': [includes_duplicate_ranking],
            'split_includes_skipped_ranking': [includes_skipped_ranking],
            'split_total_irregular': [total_irregular],
            'split_total_ballots': [total_ballots],
            'split_includes_overvote_ranking': [includes_overvote_ranking],
            'split_total_undervote': [total_undervote]
            })
        return filtered_summary_stat_table
Exemple #7
0
    def GetDecisionOutcomes(self, votes, ScaledIndex):
        """Determines the Outcomes of Decisions based on the provided
        reputation (weighted vote).

        """
        DecisionOutcomes_Raw = []
        
        # Iterate over decisions (columns)
        for i in range(votes.shape[1]):

            # The Reputation of the rows (players) who DID provide
            # judgements, rescaled to sum to 1.
            Row = self.reputation[-votes[:,i].mask]

            # Set missing values to 0
            Row[np.isnan(Row)] = 0

            # Normalize
            Row /= np.sum(Row)

            # The relevant Decision with NAs removed.
            # ("What these row-players had to say about the Decisions
            # they DID judge.")
            Col = votes[-votes[:,i].mask, i]

            # Discriminate based on contract type.
            # Current best-guess for this Binary Decision (weighted average)
            if not ScaledIndex[i]:
                DecisionOutcomes_Raw.append(np.dot(Col, Row))

            # Current best-guess for this Scaled Decision (weighted median)
            else:
                wmed = weighted_median(Row[:,0], Col)
                DecisionOutcomes_Raw.append(wmed)

        return np.array(DecisionOutcomes_Raw).T
Exemple #8
0
def get_median_vote(votes):
    amounts = [x['amount'] for x in votes]
    return weighted_median(amounts)
Exemple #9
0
def get_weighted_median_vote(votes):
    weights = [x['weight'] for x in votes]
    amounts = [x['amount'] for x in votes]
    return weighted_median(amounts, weights=weights)
    def get_type_split(self, sample, shape, pz, pz_1p, pz_1m, pz_2p, pz_2m):
        if sample == '':
            sample = 0
        else:
            sample = int(sample) - 1
        # SWS: The way I've coded this, the colour_bins param defines different samples
        # The sample name has two parts separated by an underscore
        # The first part is either 'early' or 'late'
        # The second defines a magnitude cut: 'bright' or 'faint' (if this isn't defined, assume no magnitude cut)
        gt = self.samples[sample].split("_")
        print "Cutting to galaxy type", gt

        # If we need to split into bright/faint subsets, then include this in the mask first
        if len(gt) > 1:
            subpop = gt[0]
            galaxy_type = gt[1]

            r = 30 - 2.5 * np.log10(shape["flux_r"])
            rp1 = 30 - 2.5 * np.log10(shape["flux_r_1p"])
            rm1 = 30 - 2.5 * np.log10(shape["flux_r_1m"])
            rp2 = 30 - 2.5 * np.log10(shape["flux_r_2p"])
            rm2 = 30 - 2.5 * np.log10(shape["flux_r_2m"])

            # Get the weighted mean and split about this value
            import weightedstats as ws
            flags_select = shape["flags"] == 0
            R = (shape["m1"] + shape["m2"]) / 2
            median_r = ws.weighted_median(r[flags_select],
                                          weights=R[flags_select])

            if (subpop.lower() == "bright"):
                print "Selecting below the weighted median r-band mag : ", median_r
                mag_mask = (r < median_r), (rp1 < median_r), (
                    rm1 < median_r), (rp2 < median_r), (rm2 < median_r)
                print "%d/%d" % (r[mag_mask[0]].size, r.size)

            elif (subpop.lower() == "faint"):
                print "Selecting above the weighted median r-band mag : ", median_r
                mag_mask = (r > median_r), (rp1 > median_r), (
                    rm1 > median_r), (rp2 > median_r), (rm2 > median_r)
                print "%d/%d" % (r[mag_mask[0]].size, r.size)
        else:
            galaxy_type = gt[0]
            mag_mask = [np.ones(pz["t_bpz"].size).astype(bool)] * 5

        # Now select either early or late type galaxies, as defined by BPZ
        # allow for four possible cuts: the Heymans defaults, and a more stringent version
        if galaxy_type == 'early':
            mask = [(pz['t_bpz'] < 1.0), (pz_1p['t_bpz'] < 1.0),
                    (pz_1m['t_bpz'] < 1.0), (pz_2p['t_bpz'] < 1.0),
                    (pz_2m['t_bpz'] < 1.0)]
        elif galaxy_type == 'rearly':
            mask = [(pz['t_bpz'] < 0.5), (pz_1p['t_bpz'] < 0.5),
                    (pz_1m['t_bpz'] < 0.5), (pz_2p['t_bpz'] < 0.5),
                    (pz_2m['t_bpz'] < 0.5)]
        elif galaxy_type == 'late':
            mask = [(pz['t_bpz'] >= 1.0), (pz_1p['t_bpz'] >= 1.0),
                    (pz_1m['t_bpz'] >= 1.0), (pz_2p['t_bpz'] >= 1.0),
                    (pz_2m['t_bpz'] >= 1.0)]
        elif galaxy_type == 'rlate':
            mask = [(pz['t_bpz'] >= 1.5), (pz_1p['t_bpz'] >= 1.5),
                    (pz_1m['t_bpz'] >= 1.5), (pz_2p['t_bpz'] >= 1.5),
                    (pz_2m['t_bpz'] >= 1.5)]
        elif galaxy_type == 'all':
            mask = [np.ones(pz["t_bpz"].size).astype(bool)] * 5

        final_mask = []
        for (m1, m2) in zip(mask, mag_mask):
            final_mask.append(m1 & m2)

        return tuple(final_mask)
Exemple #11
0
    def consensus(self):
        # Handle missing values
        reports_filled = self.interpolate(self.reports)

        # Consensus - Row Players
        player_info = self.lie_detector(reports_filled)

        # Column Players (The Event Creators)
        outcomes_raw = np.dot(player_info['smooth_rep'], reports_filled)
        if outcomes_raw.shape != (1, ):
            outcomes_raw = outcomes_raw.squeeze()

        # Discriminate Based on Contract Type
        if self.event_bounds is not None:
            for i in range(reports_filled.shape[1]):

                # Our Current best-guess for this Scaled Event (weighted median)
                if self.event_bounds[i]["scaled"]:
                    outcomes_raw[i] = weighted_median(
                        reports_filled[:, i],
                        weights=player_info["smooth_rep"].ravel(),
                    )

        # The Outcome (Discriminate Based on Contract Type)
        outcomes_adj = []
        for i, raw in enumerate(outcomes_raw):
            if self.event_bounds is not None and self.event_bounds[i]["scaled"]:
                outcomes_adj.append(raw)
            else:
                outcomes_adj.append(self.catch(raw))

        outcomes_final = []
        for i, raw in enumerate(outcomes_raw):
            outcomes_final.append(outcomes_adj[i])
            if self.event_bounds is not None and self.event_bounds[i]["scaled"]:
                outcomes_final[i] *= self.event_bounds[i][
                    "max"] - self.event_bounds[i]["min"]
                outcomes_final[i] += self.event_bounds[i]["min"]

        certainty = []
        for i, adj in enumerate(outcomes_adj):
            certainty.append(
                sum(player_info["smooth_rep"][reports_filled[:, i] == adj]))

        certainty = np.array(certainty)
        consensus_reward = self.normalize(certainty)
        avg_certainty = np.mean(certainty)

        # Participation: information about missing values
        na_mat = self.reports * 0
        na_mat[np.isnan(self.reports)] = 1  # indicator matrix for missing
        na_mat[self.reports == NA] = 1
        if self.verbose:
            print "NA Mat:"
            print na_mat
            print

        # Participation Within Events (Columns)
        # % of reputation that answered each Event
        participation_columns = 1 - np.dot(player_info['smooth_rep'], na_mat)

        # Participation Within Agents (Rows)
        # Democracy Option - all Events treated equally.
        if self.verbose:
            print "Sum:"
            print na_mat.sum(axis=1)
            print
        participation_rows = 1 - na_mat.sum(axis=1) / na_mat.shape[1]

        # General Participation
        percent_na = 1 - np.mean(participation_columns)
        if self.verbose:
            print percent_na

        # Combine Information
        # Row
        na_bonus_reporters = self.normalize(participation_rows)
        reporter_bonus = na_bonus_reporters * percent_na + player_info[
            'smooth_rep'] * (1 - percent_na)

        # Column
        na_bonus_events = self.normalize(participation_columns)
        author_bonus = na_bonus_events * percent_na + consensus_reward * (
            1 - percent_na)

        return {
            'original': self.reports.data,
            'filled': reports_filled.data,
            'agents': {
                'old_rep': player_info['old_rep'],
                'this_rep': player_info['this_rep'],
                'smooth_rep': player_info['smooth_rep'],
                'na_row': na_mat.sum(axis=1).data.tolist(),
                'participation_rows': participation_rows.data.tolist(),
                'relative_part': na_bonus_reporters.data.tolist(),
                'reporter_bonus': reporter_bonus.data.tolist(),
                'scores': player_info['scores'],
            },
            'events': {
                'adj_first_loadings':
                player_info['first_loading'].data.tolist(),
                'outcomes_raw': outcomes_raw.tolist(),
                'consensus_reward': consensus_reward,
                'certainty': certainty,
                'NAs Filled': na_mat.sum(axis=0).data.tolist(),
                'participation_columns': participation_columns.data.tolist(),
                'author_bonus': author_bonus.data.tolist(),
                'outcomes_adjusted': outcomes_adj,
                'outcomes_final': outcomes_final,
            },
            'participation': 1 - percent_na,
            'avg_certainty': avg_certainty,
            'convergence': self.convergence,
            'components': self.num_components,
        }
Exemple #12
0
    def interpolate(self, reports):
        """Uses existing data and reputations to fill missing observations.
        Weighted average/median using all available (non-nan) data.

        """
        # Rescale scaled events
        if self.event_bounds is not None:
            for i in range(self.num_events):
                if self.event_bounds[i]["scaled"]:
                    reports[:,
                            i] = (reports[:, i] - self.event_bounds[i]["min"]
                                  ) / float(self.event_bounds[i]["max"] -
                                            self.event_bounds[i]["min"])

        # Interpolation to fill the missing observations
        reports_mask = np.zeros([self.num_reports, self.num_events])
        missing_values = 0
        reports = np.array(reports)
        num_present = np.zeros(self.num_events).astype(int)
        for i in range(self.num_events):
            for j in range(self.num_reports):
                if reports[j, i] == NA or np.isnan(reports[j, i]):
                    reports_mask[j, i] = 1
                    missing_values += 1
                else:
                    num_present[i] += 1
        reports_copy = np.copy(reports)
        if missing_values > 0:
            for i in range(self.num_events):
                if num_present[i] < self.num_reports:
                    total_active_reputation = 0
                    active_reputation = np.zeros(num_present[i])
                    active_reports = np.zeros(num_present[i])
                    active_index = 0
                    nan_indices = np.zeros(self.num_reports) + self.num_reports
                    for j in range(self.num_reports):
                        if reports_copy[j, i] != NA and not np.isnan(
                                reports_copy[j, i]):
                            total_active_reputation += self.reputation[j]
                            active_reputation[active_index] = self.reputation[
                                j]
                            active_reports[active_index] = reports_copy[j, i]
                            active_index += 1
                        else:
                            nan_indices[j] = j
                    if self.event_bounds is not None and self.event_bounds[
                            i] is not None and self.event_bounds[i]["scaled"]:
                        for j in range(num_present[i]):
                            active_reputation[j] /= total_active_reputation
                        guess = weighted_median(active_reports,
                                                weights=active_reputation)
                    else:
                        guess = 0
                        for j in range(num_present[i]):
                            active_reputation[j] /= total_active_reputation
                            guess += active_reputation[j] * active_reports[j]
                        guess = self.catch(guess)
                    for j in range(self.num_reports):
                        if nan_indices[j] < self.num_reports:
                            reports_copy[nan_indices[j], i] = guess
        return reports_copy
Exemple #13
0
    def _compute_summary_cvr_stat_table(self) -> None:

        cvr = self.get_cvr_dict()
        candidates = self.get_candidates()

        s = pd.Series(dtype=object)

        candidates_no_writeins = BallotMarks.remove_mark(BallotMarks.combine_writein_marks(candidates), [BallotMarks.WRITEIN])
        s['n_candidates'] = len(candidates_no_writeins.marks)

        s['rank_limit'] = len(cvr['ballot_marks'][0].marks)
        s['restrictive_rank_limit'] = True if s['rank_limit'] < (s['n_candidates'] - 1) else False

        # first_round_overvote
        # The number of ballots with an overvote before any valid ranking. (weighted)

        # Note that this is not the same as "exhausted by overvote". This is because
        # some jurisdictions (Maine) discard any ballot beginning with two
        # skipped rankings, and call this ballot as exhausted by skipped rankings, even if the
        # skipped rankings are followed by an overvote.

        # Other jursidictions (Minneapolis) simply skip over overvotes in a ballot.
        s['first_round_overvote'] = self._cvr_stat_table.loc[self._cvr_stat_table['first_round_overvote'], 'weight'].sum()

        # The number of voters that validly used only a single ranking. (weighted)
        s['ranked_single'] = self._cvr_stat_table.loc[self._cvr_stat_table['ranked_single'], 'weight'].sum()

        # The number of voters that validly used 3 or more rankings. (weighted)
        s['ranked_3_or_more'] = self._cvr_stat_table.loc[self._cvr_stat_table['ranked_3_or_more'], 'weight'].sum()

        # The number of voters that validly use more than one ranking. (weighted)
        s['ranked_multiple'] = self._cvr_stat_table.loc[self._cvr_stat_table['ranked_multiple'], 'weight'].sum()

        # The number of voters that have validly used all available rankings on the
        # ballot, or that have validly ranked all non-write-in candidates. (weighted)
        s['total_fully_ranked'] = self._cvr_stat_table.loc[self._cvr_stat_table['fully_ranked'], 'weight'].sum()

        # The number of ballots that rank the same candidate more than once. (weighted)
        s['includes_duplicate_ranking'] = self._cvr_stat_table.loc[self._cvr_stat_table['contains_duplicate'], 'weight'].sum()

        # The number of ballots that have an skipped ranking followed by any other marked ranking. (weighted)
        s['includes_skipped_ranking'] = self._cvr_stat_table.loc[self._cvr_stat_table['contains_skip'], 'weight'].sum()

        # This includes ballots with no marks. (weighted)
        s['total_ballots'] = self._cvr_stat_table['weight'].sum()

        # Number of ballots that either had a multiple ranking, overvote,
        # or a skipped ranking (only those followed by a mark). This includes ballots even where the irregularity was not
        # the cause of exhaustion. (weighted)
        s['total_irregular'] = self._cvr_stat_table.loc[self._cvr_stat_table['irregular'], 'weight'].sum()

        # Number of ballots with at least one overvote. Not necessarily cause of exhaustion. (weighted)
        s['includes_overvote_ranking'] = self._cvr_stat_table.loc[self._cvr_stat_table['contains_overvote'], 'weight'].sum()

        # Ballots completely made up of skipped rankings (no marks). (weighted)
        s['total_undervote'] = self._cvr_stat_table.loc[self._cvr_stat_table['undervote'], 'weight'].sum()

        # Mean number of validly used rankings across all non-undervote ballots. (weighted)
        weighted_sum = self._cvr_stat_table.loc[~self._cvr_stat_table['undervote'], 'ranks_used_times_weight'].sum()
        s['mean_rankings_used'] = weighted_sum / self._cvr_stat_table.loc[~self._cvr_stat_table['undervote'], 'weight'].sum()

        # Median number of validly used rankings across all non-undervote ballots. (weighted)
        # s['median_rankings_used'] = self._cvr_stat_table.loc[~self._cvr_stat_table['undervote'], 'ranks_used_times_weight'].median()

        ranks_used = self._cvr_stat_table.loc[~self._cvr_stat_table['undervote'], 'valid_ranks_used'].tolist()
        weights = self._cvr_stat_table.loc[~self._cvr_stat_table['undervote'], 'weight'].tolist()
        weights_float = [float(i) for i in weights]
        s['median_rankings_used'] = weightedstats.weighted_median(ranks_used, weights=weights_float)

        self._summary_cvr_stat_table = s.to_frame().transpose()
Exemple #14
0
def wmedian(df, column_name, weights_name='wt0'):

    df = df.dropna(subset=[column_name, weights_name])

    return ws.weighted_median(df[column_name], weights=df[weights_name])
def wmedian2(df, column_name, weights_name='wt0'):
    import weightedstats as ws 
    df = df.dropna(subset=[column_name,weights_name ])
    
    return ws.weighted_median( df[column_name], weights=df[weights_name])
Exemple #16
0
cdcdf['PopulationCount'] = [int(pop.replace(',','')) for pop in cdcdf.PopulationCount]
cdcdf.dropna(subset=['FIPS'],inplace=True)
coudf = pd.DataFrame({'FIPS':cdcdf.FIPS.unique()})
x = 0
for mid in cdcdf.MeasureId.unique():
    ndf = cdcdf[cdcdf.Year==2017][cdcdf.MeasureId==mid]
    print('Progress: {:.2%}'.format(x/len(cdcdf.MeasureId.unique())), end = '\r')
    if len(ndf) > 0:
        wtd_medians = []
        pops = []
        for i in coudf.FIPS:
            if len(ndf[ndf.FIPS==i]) > 0:
                nndf = ndf[ndf.FIPS==i].reset_index()
                pop = nndf.PopulationCount.sum()
                pops.append(pop)
                value = weighted_median([v for v in nndf.Data_Value], weights=[v for v in nndf.PopulationCount])
                wtd_medians.append(value)
            else:
                wtd_medians.append(np.nan)
                pops.append(np.nan)
        coudf['pop500'] = pops
        coudf[mid] = wtd_medians
        
    x+=1
##########################################################################################
##########################################################################################
"""
Defining a Metro Area by cdcdf[cdcdf.FIPS==i].CityName.unique(),
which returns every CityName (as defined by CDC 500 Cities) within the 5 digit FIPS grouping
"""
##########################################################################################
Exemple #17
0
import weightedstats as ws

my_data = [1, 2, 3, 4, 5]
my_weights = [10, 1, 1, 1, 9]

# Ordinary (unweighted) mean and median
print(ws.mean(my_data))  # equivalent to ws.weighted_mean(my_data)
ws.median(my_data)  # equivalent to ws.weighted_median(my_data)

# Weighted mean and median
ws.weighted_mean(my_data, weights=my_weights)
ws.weighted_median(my_data, weights=my_weights)

# Special weighted mean and median functions for use with numpy arrays
ws.numpy_weighted_mean(my_data, weights=my_weights)
ws.numpy_weighted_median(my_data, weights=my_weights)
Exemple #18
0
    def consensus(self):
        """PCA-based consensus algorithm.

        Returns:
          dict: consensus results

        """
        # Fill the default scales (binary) if none are provided.
        # In practice, this would also never be used.
        if self.decision_bounds is None:
            ScaledIndex = [False] * self.votes.shape[1]
            MScaled = self.votes
        else:
            ScaledIndex = [scale["scaled"] for scale in self.decision_bounds]
            MScaled = self.Rescale()

        # Handle Missing Values
        votes_filled = self.FillNa(MScaled, ScaledIndex)

        # Consensus - Row Players
        # New Consensus Reward
        PlayerInfo = self.GetRewardWeights(votes_filled)
        AdjLoadings = PlayerInfo['FirstL']

        # Column Players (The Decision Creators)
        # Calculation of Reward for Decision Authors
        # Consensus - "Who won?" Decision Outcome    
        # Simple matrix multiplication ... highest information density at RowBonus,
        # but need DecisionOutcomes.Raw to get to that
        DecisionOutcomes_Raw = np.dot(PlayerInfo['SmoothRep'], votes_filled).squeeze()

        # Discriminate Based on Contract Type
        for i in range(votes_filled.shape[1]):
            # Our Current best-guess for this Scaled Decision (weighted median)
            if ScaledIndex[i]:
                DecisionOutcomes_Raw[i] = weighted_median(votes_filled[:,i],
                                                          PlayerInfo["SmoothRep"].flatten())

        # .5 is obviously undesireable, this function travels from 0 to 1
        # with a minimum at .5
        Certainty = abs(2 * (DecisionOutcomes_Raw - 0.5))

        # Grading Authors on a curve.
        ConReward = self.GetWeight(Certainty)

        # How well did beliefs converge?
        Avg_Certainty = np.mean(Certainty)

        # The Outcome Itself
        # Discriminate Based on Contract Type
        DecisionOutcome_Adj = []
        for i, raw in enumerate(DecisionOutcomes_Raw):
            DecisionOutcome_Adj.append(self.Catch(raw))
            if ScaledIndex[i]:
                DecisionOutcome_Adj[i] = raw

        DecisionOutcome_Final = []
        for i, raw in enumerate(DecisionOutcomes_Raw):
            DecisionOutcome_Final.append(DecisionOutcome_Adj[i])
            if ScaledIndex[i]:
                DecisionOutcome_Final[i] *= (self.decision_bounds[i]["max"] - self.decision_bounds[i]["min"])

        # Participation
        # Information about missing values
        NAmat = self.votes * 0
        NAmat[NAmat.mask] = 1  # indicator matrix for missing

        # Participation Within Decisions (Columns)
        # % of reputation that answered each Decision
        ParticipationC = 1 - np.dot(PlayerInfo['SmoothRep'], NAmat)

        # Participation Within Agents (Rows)
        # Many options
        # 1- Democracy Option - all Decisions treated equally.
        ParticipationR = 1 - NAmat.sum(axis=1) / NAmat.shape[1]

        # General Participation
        PercentNA = 1 - np.mean(ParticipationC)

        # Possibly integrate two functions of participation? Chicken and egg problem...
        if self.verbose:
            print('*Participation Information*')
            print('Voter Turnout by question')
            print(ParticipationC)
            print('Voter Turnout across questions')
            print(ParticipationR)

        # Combine Information
        # Row
        NAbonusR = self.GetWeight(ParticipationR)
        RowBonus = NAbonusR * PercentNA + PlayerInfo['SmoothRep'] * (1 - PercentNA)

        # Column
        NAbonusC = self.GetWeight(ParticipationC)
        ColBonus = NAbonusC * PercentNA + ConReward * (1 - PercentNA)

        Output = {
            'Original': self.votes.base,
            'Filled': votes_filled.base,
            'Agents': {
                'OldRep': PlayerInfo['OldRep'][0],
                'ThisRep': PlayerInfo['ThisRep'][0],
                'SmoothRep': PlayerInfo['SmoothRep'][0],
                'NArow': NAmat.sum(axis=1).base,
                'ParticipationR': ParticipationR.base,
                'RelativePart': NAbonusR.base,
                'RowBonus': RowBonus.base,
                },
            'Decisions': {
                'First Loading': AdjLoadings,
                'DecisionOutcomes_Raw': DecisionOutcomes_Raw,
                'Consensus Reward': ConReward,
                'Certainty': Certainty,
                'NAs Filled': NAmat.sum(axis=0),
                'ParticipationC': ParticipationC,
                'Author Bonus': ColBonus,
                'DecisionOutcome_Final': DecisionOutcome_Final,
                },
            'Participation': 1 - PercentNA,
            'Certainty': Avg_Certainty,
        }
        return Output