Ejemplo n.º 1
0
def intersection(df, pairwise=False, **subset_args):
    """
    Counts the size of intersections of subsets of predicted examples.
    E.g. count the overlap between the top k of two different models
    Args:
        df: the result of to_dataframe(), Predict steps of length n_steps
        pairwise: when False, returns the mutual intersection between 
            all subsets. Otherwise returns an n_steps x n_steps matrix 
            whose i,j entry is the number of examples in the 
            intersection between the i and j step subsets.
        **subset_args: arguments to be passed to model.y_subset()
            for each predict step
    Returns: the intersection, either an integer, if pairwise is False, 
        or a DataFrame, otherwise.
    """
    indexes = map(lambda row: set(model.y_subset(row[1].step.get_result()['y'], **subset_args).index), df.iterrows())

    if not pairwise:
        return len(util.intersect(indexes))
    else:
        r = pd.DataFrame(index=df.index, columns=xrange(len(df)))

        for i in xrange(len(df)):
            r.values[i][i] = len(indexes[i])
            for j in xrange(i+1, len(df)):
                r.values[i][j] = len(indexes[i] & indexes[j])
        return r
Ejemplo n.º 2
0
def test_subset_k():
    assert set(y_subset(y, k=2).index) == set([1, 3])
Ejemplo n.º 3
0
def test_subset_dropna():
    assert set(y_subset(y, dropna=True).index) == set([0, 1, 3, 4])
Ejemplo n.º 4
0
def test_subset_query():
    assert set(y_subset(y, query="attr").index) == set([1])
Ejemplo n.º 5
0
def apply_y(df, fn, **kwargs):
    return apply(df, lambda s: fn(model.y_subset(s.get_result()['y'], **kwargs)))
Ejemplo n.º 6
0
def test_subset_k():
    assert set(y_subset(y, k=2).index) == set([1, 3])
Ejemplo n.º 7
0
def test_subset_dropna():
    assert set(y_subset(y, dropna=True).index) == set([0, 1, 3, 4])
Ejemplo n.º 8
0
def test_subset_query():
    assert set(y_subset(y, query='attr').index) == set([1])