Beispiel #1
0
def test_make_array():
    test1 = ds.make_array(0)
    assert len(test1) == 1
    test2 = ds.make_array(2, 3, 4)
    assert sum(test2) == 9
    assert test2.dtype == "int64"
    test3 = ds.make_array("foo", "bar")
    assert test3.dtype == "<U3"
Beispiel #2
0
def game_filter(csv_file):
    # Takes in a csv filepath of one of the EightThirtyFour data sets
    # and filters the data to games with a 10 or less point lead in
    # the last 6 minutes of the game.
    pbp = Table().read_table(csv_file)
    unique_games = pbp.group('GAME_ID').column(0)
    print(unique_games)
    last_quarter = pbp.where('PERIOD', predicates.are.equal_to(4))
    transformed_minutes = last_quarter.apply(time_string_to_number,
                                             'PCTIMESTRING')
    last_quarter_and_minutes = last_quarter.with_column(
        'TIME', transformed_minutes)
    between_six_and_seven = last_quarter_and_minutes.where(
        'TIME', predicates.are.below_or_equal_to(6.5))
    close_games = make_array()
    for game in unique_games:
        game_scores_only = between_six_and_seven.where(
            'GAME_ID',
            predicates.are.equal_to(game)).select('TIME', 'SCORE').where(
                'SCORE', predicates.are.not_equal_to('nan'))
        score = game_scores_only.row(0).item(1)
        t1, t2 = score.split('-')
        if abs(int(t1) - int(t2)) <= 10:
            close_games = np.append(close_games, game)
    return close_games
Beispiel #3
0
def test_sample_proportions():
    uniform = ds.sample_proportions(1000, np.ones(50) / 50)
    assert len(uniform) == 50 and _round_eq(1, sum(uniform))
    assert [
        x in (0, 0.5, 1)
        for x in ds.sample_proportions(2, ds.make_array(.2, .3, .5))
    ]
Beispiel #4
0
def test_proportions_from_distribution():
    t = ds.Table().with_column('probs', np.ones(50) / 50)
    u = ds.proportions_from_distribution(t, 'probs', 1000)
    assert t.num_columns == 1 and t.num_rows == 50
    assert u.num_columns == 2 and u.num_rows == 50
    uniform = u.column(1)
    assert len(uniform) == 50 and _round_eq(1, sum(uniform))
    assert [
        x in (0, 0.5, 1)
        for x in ds.sample_proportions(2, ds.make_array(.2, .3, .5))
    ]
Beispiel #5
0
    def conditional_dist(self, label, given='', show_ev=False):
        """
        Given the random variable label, finds the conditional distribution of
        the other variable.

        Parameters
        ----------
        label : String
            Variable given.

        Returns
        -------
        JointDistribution Table

        Examples
        --------
        >>> coins = Table().values('Coin1', ['H', 'T'], 'Coin2', ['H','T']).probabilities(np.array([0.24, 0.36, 0.16,0.24])).to_joint()
        >>> coins.conditional_dist('Coin1', 'Coin2')
                                  Coin1=H  Coin1=T  Sum
        Dist. of Coin1 | Coin2=H      0.6      0.4  1.0
        Dist. of Coin1 | Coin2=T      0.6      0.4  1.0
        Marginal of Coin1             0.6      0.4  1.0
        >>> coins.conditional_dist('Coin2', 'Coin1')
                 Dist. of Coin2 | Coin1=H  Dist. of Coin2 | Coin1=T  Marginal of Coin2
        Coin2=H                       0.4                       0.4                0.4
        Coin2=T                       0.6                       0.6                0.6
        Sum                           1.0                       1.0                1.0
        """
        # TODO Refactor this function.
        if label == self._Y_column_label:
            both = self.both_marginals()
            new = np.append(both.index[0:-1], 'Sum')
            y = both.apply(conditional, axis=0).set_index(new)
            matrix = y.to_numpy()[:-1, :]
            y_labels = list(self.index)
            domain = np.array([evaluate(lab) for lab in y_labels])
            exp_values = [
                sum(matrix[:, i] * domain) for i in range(len(matrix[0]))
            ]
            column_names = y.columns

            new = make_array()
            for i in np.arange(len(column_names) - 1):
                new_name = 'Dist. of {0} | '.format(self._Y_column_label)
                new_name += column_names[i]
                new = np.append(new, new_name)
            new = np.append(new,
                            'Marginal of {0}'.format(self._Y_column_label))
            y.columns = new
            if show_ev:
                y.loc['EV'] = exp_values
            return y

        elif label == self._X_column_label:
            both = self.both_marginals()

            x = both.apply(conditional, axis=1).rename(
                columns={
                    'Sum: Marginal of {0}'.format(self._Y_column_label): 'Sum'
                })

            matrix = x.to_numpy()[:, :-1]
            x_labels = list(self)
            domain = np.array([evaluate(lab) for lab in x_labels])
            exp_values = [sum(matrix[i] * domain) for i in range(len(matrix))]
            indices = both.index
            new = make_array()
            for i in np.arange(len(indices) - 1):
                new_name = 'Dist. of {0} | '.format(self._X_column_label)
                new_name += indices[i]
                new = np.append(new, new_name)
            new = np.append(new,
                            'Marginal of {0}'.format(self._X_column_label))
            new_df = x.set_index(new)

            if show_ev:
                new_df['EV'] = exp_values

            return new_df
        else:
            raise AssertionError(
                'Label does not correspond with existing variable name')