def output_html(lr):
    fi = file(OUT_FILE)

    lines = []
    used = []
    for line in fi:
        if line.startswith("RT "): continue
        if any(line.startswith(x) for x in used):
            continue
        used.append(line[:30])
        lines.append(line)
    lines.reverse()

    X = make_feature_matrix(lines)
    ps = lr.predict_proba(X)[:, 1]

    data = []
    for line, p in zip(lines, ps):
        if p < 0.6: continue
        items = line.split('\t')
        url = "https://twitter.com/{1}/status/{2}".format(*items)
        data.append(dict(url=url, score=p, text=items[0]))
    print len(data)
    from filter import render
    render(data, OUT_FILE)
Beispiel #2
0
    def test_bad_date(self):
        # columns that aren't dates -> error
        params = {
            'column': 'a',
            'condition': menu.index('Date is'),
            'value': '2015-7-31'
        }
        out = render(self.table, params)
        self.assertTrue(isinstance(out, str))  # should return error message

        params = {
            'column': 'b',
            'condition': menu.index('Date is'),
            'value': '2015-7-31'
        }
        out = render(self.table, params)
        self.assertTrue(isinstance(out, str))

        # stirng that isn't a date -> error
        params = {
            'column': 'date',
            'condition': menu.index('Date is'),
            'value': 'gibberish'
        }
        out = render(self.table, params)
        self.assertTrue(isinstance(out, str))
Beispiel #3
0
    def test_not_empty(self):
        params = simple_params('c', 'cell_is_not_empty', 'nonsense')
        result = render(self.table, params)
        expected = self.table[[True, False, False, True,
                               False]].reset_index(drop=True)
        assert_frame_equal(result, expected)

        # should not require value
        params = simple_params('c', 'cell_is_not_empty', '')
        result = render(self.table, params)
        assert_frame_equal(result, expected)
Beispiel #4
0
    def test_empty(self):
        params = simple_params("c", "cell_is_empty", "nonsense")
        result = render(self.table, params)
        expected = self.table[[False, True, True, False,
                               True]].reset_index(drop=True)
        assert_frame_equal(result, expected)

        # should not require value
        params = simple_params("c", "cell_is_empty", "")
        result = render(self.table, params)
        assert_frame_equal(result, expected)
Beispiel #5
0
    def test_not_contains(self):
        # Case-insensitive, no regex, keep
        params = {
            'column': 'a',
            'condition': menu.index('Text does not contain'),
            'value': 'fred',
            'casesensitive': False,
            'regex': False,
            'keep': keepmenu.index('Keep')
        }
        out = render(self.table, params)
        ref = self.table[[False, False, True, True, False]]
        self.assertTrue(out.equals(ref))

        # Case-sensitive, no regex, keep
        params = {
            'column': 'a',
            'condition': menu.index('Text does not contain'),
            'value': 'fred',
            'casesensitive': True,
            'regex': False,
            'keep': keepmenu.index('Keep')
        }
        out = render(self.table, params)
        ref = self.table[[False, False, True, True, True]]
        self.assertTrue(out.equals(ref))

        # Case-sensitive, regex, keep
        params = {
            'column': 'a',
            'condition': menu.index('Text does not contain'),
            'value': 'f[a-zA-Z]+d',
            'casesensitive': True,
            'regex': True,
            'keep': keepmenu.index('Keep')
        }
        out = render(self.table, params)
        ref = self.table[[False, False, True, True, True]]
        self.assertTrue(out.equals(ref))

        # Case-sensitive, regex, drop
        params = {
            'column': 'a',
            'condition': menu.index('Text does not contain'),
            'value': 'f[a-zA-Z]+d',
            'casesensitive': True,
            'regex': True,
            'keep': keepmenu.index('Drop')
        }
        out = render(self.table, params)
        ref = self.table[[True, True, False, False, False]]
        self.assertTrue(out.equals(ref))
Beispiel #6
0
 def test_not_exactly_case_insensitive(self):
     params = simple_params("a",
                            "text_is_not_exactly",
                            "x",
                            case_sensitive=False)
     result = render(pd.DataFrame({"a": ["x", "X", "y"]}), params)
     assert_frame_equal(result, pd.DataFrame({"a": ["y"]}))
def _test_render(
    arrow_table: pa.Table,
    params: Dict[str, Any],
    expected_table: Optional[pa.Table],
    expected_errors: List[I18nMessage] = [],
):
    with tempfile.NamedTemporaryFile() as tf:
        path = Path(tf.name)
        actual_errors = render(arrow_table, params, path)
        if path.stat().st_size == 0:
            actual_table = None
        else:
            with pa.ipc.open_file(path) as f:
                actual_table = f.read_all()
        assert actual_errors == expected_errors
        if expected_table is None:
            assert actual_table is None
        else:
            assert actual_table is not None
            assert actual_table.column_names == expected_table.column_names
            for output_column, expected_column in zip(
                actual_table.itercolumns(), expected_table.itercolumns()
            ):
                assert output_column.type == expected_column.type
                assert output_column.to_pylist() == expected_column.to_pylist()
                if pa.types.is_dictionary(output_column.type):
                    for output_chunk, expected_chunk in zip(
                        output_column.iterchunks(), expected_column.iterchunks()
                    ):
                        assert (
                            output_chunk.dictionary.to_pylist()
                            == expected_chunk.dictionary.to_pylist()
                        )
Beispiel #8
0
 def test_two_subfilters_or(self):
     table = pd.DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]})
     params = {
         "keep": True,
         "filters": {
             "operator":
             "and",
             "filters": [{
                 "operator":
                 "or",
                 "subfilters": [
                     {
                         "colname": "A",
                         "condition": "number_is_less_than",
                         "value": 2,
                         "case_sensitive": False,
                     },
                     {
                         "colname": "B",
                         "condition": "number_is_greater_than",
                         "value": 3,
                         "case_sensitive": False,
                     },
                 ],
             }],
         },
     }
     result = render(table, params)
     assert_frame_equal(result, pd.DataFrame({"A": [1, 3], "B": [2, 4]}))
Beispiel #9
0
 def test_two_subfilters_or(self):
     table = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]})
     params = {
         'keep': True,
         'filters': {
             'operator':
             'and',
             'filters': [
                 {
                     'operator':
                     'or',
                     'subfilters': [
                         {
                             'colname': 'A',
                             'condition': 'number_is_less_than',
                             'value': 2,
                             'case_sensitive': False,
                         },
                         {
                             'colname': 'B',
                             'condition': 'number_is_greater_than',
                             'value': 3,
                             'case_sensitive': False,
                         },
                     ],
                 },
             ],
         },
     }
     result = render(table, params)
     assert_frame_equal(result, pd.DataFrame({'A': [1, 3], 'B': [2, 4]}))
Beispiel #10
0
 def test_equals(self):
     # working as intended
     params = simple_params('c', 'number_equals', '3')
     result = render(self.table, params)
     expected = self.table[[True, False, False, False,
                            False]].reset_index(drop=True)
     assert_frame_equal(result, expected)
Beispiel #11
0
 def test_greater_equals(self):
     # edge case, first row has b=2
     params = simple_params('b', 'number_is_greater_than_or_equals', '2')
     result = render(self.table, params)
     expected = self.table[[True, True, False, True,
                            True]].reset_index(drop=True)
     assert_frame_equal(result, expected)
Beispiel #12
0
 def test_less_equals(self):
     # edge case, second and last row has b=5
     params = simple_params('b', 'number_is_less_than_or_equals', '5')
     result = render(self.table, params)
     expected = self.table[[True, True, False, False,
                            True]].reset_index(drop=True)
     assert_frame_equal(result, expected)
 def test_datetime_before(self):
     table = pd.DataFrame(
         {"date": ["2019-01-01T04:59+0500", "2019-01-01T05:01+0500"]})
     params = simple_params("date", "date_is_before", "2019-01-01")
     result = render(table, params)
     expected = pd.DataFrame({"date": ["2019-01-01T04:59+0500"]})
     assert_frame_equal(result, expected)
Beispiel #14
0
 def test_contains_regex_nan(self):
     table = pd.DataFrame({'A': ['a', np.nan]})
     params = simple_params('A',
                            'text_contains_regex',
                            'a',
                            case_sensitive=True)
     result = render(table, params)
     assert_frame_equal(result, pd.DataFrame({'A': ['a']}))
Beispiel #15
0
 def test_contains_regex_parse_error_case_insensitive(self):
     table = pd.DataFrame({'A': ['a']})
     params = simple_params('A',
                            'text_contains_regex',
                            '(',
                            case_sensitive=False)
     result = render(table, params)
     self.assertEqual(result, 'Regex parse error: missing ): (')
Beispiel #16
0
 def test_contains_regex_parse_error_case_insensitive(self):
     table = pd.DataFrame({"A": ["a"]})
     params = simple_params("A",
                            "text_contains_regex",
                            "(",
                            case_sensitive=False)
     result = render(table, params)
     self.assertEqual(result, "Regex parse error: missing ): (")
Beispiel #17
0
    def test_bad_date(self):
        # columns that aren't dates -> error
        params = simple_params("a", "date_is", "2015-7-31")
        result = render(self.table, params)
        self.assertEqual(result,
                         "Column is not dates. Please convert to dates.")

        params = simple_params("b", "date_is", "2015-7-31")
        result = render(self.table, params)
        self.assertEqual(result,
                         "Column is not dates. Please convert to dates.")

        # string that isn't a date -> error
        params = simple_params("date", "date_is", "gibberish")
        result = render(self.table, params)
        self.assertEqual(result,
                         "Value is not a date. Please enter a date and time.")
Beispiel #18
0
 def test_contains_regex_nan(self):
     table = pd.DataFrame({"A": ["a", np.nan]})
     params = simple_params("A",
                            "text_contains_regex",
                            "a",
                            case_sensitive=True)
     result = render(table, params)
     assert_frame_equal(result, pd.DataFrame({"A": ["a"]}))
Beispiel #19
0
 def test_contains_regex_case_insensitive(self):
     table = pd.DataFrame({'A': ['a', 'A', 'b']})
     params = simple_params('A',
                            'text_contains_regex',
                            'a',
                            case_sensitive=False)
     result = render(table, params)
     assert_frame_equal(result, pd.DataFrame({'A': ['a', 'A']}))
Beispiel #20
0
 def test_contains_regex_case_insensitive(self):
     table = pd.DataFrame({"A": ["a", "A", "b"]})
     params = simple_params("A",
                            "text_contains_regex",
                            "a",
                            case_sensitive=False)
     result = render(table, params)
     assert_frame_equal(result, pd.DataFrame({"A": ["a", "A"]}))
Beispiel #21
0
 def test_date_after(self):
     # edge case, first row is 2018-1-12 08:15 so after implied midnight of
     # date without time
     params = simple_params('date', 'date_is_after', '2018-01-12')
     result = render(self.table, params)
     expected = self.table[[False, True, False, False,
                            True]].reset_index(drop=True)
     assert_frame_equal(result, expected)
Beispiel #22
0
    def test_bad_date(self):
        # columns that aren't dates -> error
        params = simple_params('a', 'date_is', '2015-7-31')
        result = render(self.table, params)
        self.assertEqual(result,
                         'Column is not dates. Please convert to dates.')

        params = simple_params('b', 'date_is', '2015-7-31')
        result = render(self.table, params)
        self.assertEqual(result,
                         'Column is not dates. Please convert to dates.')

        # string that isn't a date -> error
        params = simple_params('date', 'date_is', 'gibberish')
        result = render(self.table, params)
        self.assertEqual(result,
                         'Value is not a date. Please enter a date and time.')
Beispiel #23
0
 def test_not_contains_regex(self):
     params = simple_params('a',
                            'text_does_not_contain_regex',
                            'f[a-zA-Z]+d',
                            case_sensitive=True)
     result = render(self.table, params)
     expected = self.table[[False, False, True, True,
                            True]].reset_index(drop=True)
     assert_frame_equal(result, expected)
Beispiel #24
0
    def test_equals_non_number_errors(self):
        # non-numeric column should return error message
        params = simple_params("a", "number_equals", "3")
        result = render(self.table, params)
        self.assertEqual(result,
                         "Column is not numbers. Please convert to numbers.")

        # non-numeric column should return error message
        params = simple_params("date", "number_equals", "3")
        result = render(self.table, params)
        self.assertEqual(result,
                         "Column is not numbers. Please convert to numbers.")

        # non-numeric value should return error message
        params = simple_params("c", "number_equals", "gibberish")
        result = render(self.table, params)
        self.assertEqual(
            result, "Value is not a number. Please enter a valid number.")
Beispiel #25
0
 def test_category_equals(self):
     table = pd.DataFrame({'A': ['foo', np.nan, 'bar']}, dtype='category')
     params = simple_params('A',
                            'text_is_exactly',
                            'foo',
                            case_sensitive=True)
     result = render(table, params)
     assert_frame_equal(result,
                        pd.DataFrame({'A': ['foo']}, dtype='category'))
Beispiel #26
0
 def test_category_equals(self):
     table = pd.DataFrame({"A": ["foo", np.nan, "bar"]}, dtype="category")
     params = simple_params("A",
                            "text_is_exactly",
                            "foo",
                            case_sensitive=True)
     result = render(table, params)
     assert_frame_equal(result,
                        pd.DataFrame({"A": ["foo"]}, dtype="category"))
Beispiel #27
0
    def test_equals_non_number_errors(self):
        # non-numeric column should return error message
        params = simple_params('a', 'number_equals', '3')
        result = render(self.table, params)
        self.assertEqual(result,
                         'Column is not numbers. Please convert to numbers.')

        # non-numeric column should return error message
        params = simple_params('date', 'number_equals', '3')
        result = render(self.table, params)
        self.assertEqual(result,
                         'Column is not numbers. Please convert to numbers.')

        # non-numeric value should return error message
        params = simple_params('c', 'number_equals', 'gibberish')
        result = render(self.table, params)
        self.assertEqual(
            result, 'Value is not a number. Please enter a valid number.')
Beispiel #28
0
 def test_contains_case_insensitive(self):
     params = simple_params('a',
                            'text_contains',
                            'fred',
                            case_sensitive=False)
     result = render(self.table, params)
     expected = self.table[[True, True, False, False,
                            True]].reset_index(drop=True)
     assert_frame_equal(result, expected)
Beispiel #29
0
 def test_contains_regex(self):
     params = simple_params("a",
                            "text_contains_regex",
                            "f[a-zA-Z]+d",
                            case_sensitive=True)
     result = render(self.table, params)
     expected = self.table[[True, True, False, False,
                            False]].reset_index(drop=True)
     assert_frame_equal(result, expected)
Beispiel #30
0
 def test_exactly(self):
     params = simple_params('a',
                            'text_is_exactly',
                            'fred',
                            case_sensitive=True)
     result = render(self.table, params)
     expected = self.table[[True, False, False, False,
                            False]].reset_index(drop=True)
     assert_frame_equal(result, expected)
Beispiel #31
0
 def test_not_contains_case_sensitive(self):
     params = simple_params("a",
                            "text_does_not_contain",
                            "fred",
                            case_sensitive=True)
     result = render(self.table, params)
     expected = self.table[[False, False, True, True,
                            True]].reset_index(drop=True)
     assert_frame_equal(result, expected)