Exemple #1
0
def test_filters():
    """Test the ``Filter*`` classes."""
    from clair.diagram import FilterInterval, FilterContains
    print "Start"
    
    #Create a test data frame
    data = pd.DataFrame({"id": ["foo-" + str(i) for i in range(10)],
                         "str_list": None,
                         "float_val": np.linspace(2.1, 21, 10), 
                         "time": pd.date_range(start="2000-1-1", periods=10, 
                                              freq="D")})
    str_lists =[["foo"], ["bar"], ["foo", "bar"], ["baz"], None,
                ["foo"], ["bar"], ["foo", "bar"], ["baz"], ["boum"]]
    for i, l in enumerate(str_lists):
        data["str_list"][i] = l
    data.set_index("id", drop=False, inplace=True)
    print data
    
    print "\nSelect string lists that contain string 'foo'."
    contains_foo = FilterContains("str_list", "foo")
    data_foo = contains_foo.filter(data)
    print data_foo
    assert all(data_foo["id"] == ["foo-0", "foo-2", "foo-5", "foo-7"])
    
    print "\nSelect strings that contain the substring '2'."
    contains_2 = FilterContains("id", "2")
    data_2 = contains_2.filter(data)
    print data_2
    assert all(data_2["id"] == ["foo-2"])
    
    print "\nSelect values between 3 and 9."
    between_3_9 = FilterInterval("float_val", 3, 9)
    data_3_9 = between_3_9.filter(data)
    print data_3_9
    #TODO: Bug report to Pandas because of inconsistent behavior.
#    print data_3_9["float_val"] == [4.2, 6.3, 8.4]
#    print data_3_9["id"] == ["foo-1", "foo-2", "foo-3"]
#    print data_3_9["float_val"]
#    assert all(data_3_9["float_val"] == [4.2, 6.3, 8.4])
    assert all(data_3_9["id"] == ["foo-1", "foo-2", "foo-3"])
    
    print "\nExclude values between 3 and 9."
    exclude_3_9 = FilterInterval("float_val", 3., 9., inside=False)
    data_no_3_9 = exclude_3_9.filter(data)
    print data_no_3_9
    #Parameter ``inside``: 
    #    * True vs. False must partition data frame into disjoint sets.
    #    * Both options together must cover all elements of data frame.
    assert len(data_no_3_9) == 7
    assert len(data_3_9) == 3
    all_data = data_3_9.combine_first(data_no_3_9)
    assert all(all_data["float_val"] == data["float_val"])
    
    print "\nSelect select dates between 2000-1-3 and 2000-1-6 " \
          "(excluding last date)."
    contains_3dates = FilterInterval("time", 
                                     pd.Timestamp("2000-1-3"), 
                                     pd.Timestamp("2000-1-6"))
    data_3dates = contains_3dates.filter(data)
    print data_3dates
    assert all(data_3dates["id"] == ["foo-2", "foo-3", "foo-4"])
    
    print "\nTest converting filters to strings."
    print contains_foo
    print exclude_3_9
    print contains_3dates