Beispiel #1
0
    def setUp(self):
        f = StringIO(self.input_data)
        rows = rowset.RowSet()
        rows.load_file(f)

        datarows = rows.filter(['isdata==1'])

        self.grid = datarows.grid_by('month', 'hashtag')
Beispiel #2
0
    def test_forecast_error2(self):
        input_data = """
#balance 0
100 1980-04-05 !forecast:monthly:rain
"""
        rows = rowset.RowSet()
        rows.load_file(StringIO(input_data))

        with self.assertRaises(ValueError):
            rows.autosplit()
Beispiel #3
0
    def test_forecast_simple(self):
        input_data = """
#balance 0
100 1980-04-05 !forecast
"""
        rows = rowset.RowSet()
        rows.load_file(StringIO(input_data))

        got = str(rows.autosplit())

        self.assertEqual(input_data, got)
Beispiel #4
0
    def test_load_file3(self):
        """Any balance pragma must match the running balance
        """

        f = StringIO("""
10 1972-02-03 comment7
#balance 100000 The wrong balance
""")

        set = rowset.RowSet()
        with self.assertRaises(ValueError):
            set.load_file(f)
Beispiel #5
0
    def setUp(self):
        f = StringIO("""
# Files can contain comments and empty lines

-10 1970-02-06 comment4
10 1970-01-05 comment1
-10 1970-01-10 comment2 #rent
-10 1970-01-01 comment3 #water
-10 1970-03-01 comment5 #rent
-15 1970-01-11 comment6 #water !months:3
#balance -45 A comment
""")
        self.rows = balance.RowSet()
        self.rows.load_file(f)
Beispiel #6
0
    def test_len(self):
        """After splitting, we should have the right number of new rows"""
        input_data = """
#balance 0
100  1980-01-01 incoming comment
-100 1980-01-02 outgoing comment
10   1980-01-03 a !test_bangtag
100  1980-01-04 a #test_hashtag
100  1984-02-29 !months:-1:5
100  1984-01-31 !months:4
100  1980-01-05 !months:3
"""
        rows = rowset.RowSet()
        rows.load_file(StringIO(input_data))

        self.assertEqual(len(rows), 9)
        self.assertEqual(len(rows.autosplit()), 18)
Beispiel #7
0
    def test_rounding(self):
        """When splitting, round down and add the remainder to the first"""
        input_data = """
#balance 0
100  1980-01-05 !months:3
"""
        rows = rowset.RowSet()
        rows.load_file(StringIO(input_data))

        got = str(rows.autosplit())

        expected = """
#balance 0
34 1980-01-05 !months:child
33 1980-02-05 !months:child
33 1980-03-05 !months:child
"""

        self.assertEqual(expected, got)
Beispiel #8
0
    def test_forecast_until(self):
        input_data = """
#balance 0
100  1980-05-05 !forecast:monthly:until:1980-10-01
"""
        rows = rowset.RowSet()
        rows.load_file(StringIO(input_data))

        got = str(rows.autosplit())

        expected = """
#balance 0
100 1980-05-05 !forecast:child:until:1980-10-01
100 1980-06-05 !forecast:child:until:1980-10-01
100 1980-07-05 !forecast:child:until:1980-10-01
100 1980-08-05 !forecast:child:until:1980-10-01
100 1980-09-05 !forecast:child:until:1980-10-01
"""

        self.assertEqual(expected, got)
Beispiel #9
0
    def test_endofmonth(self):
        """When splitting, We clamp to the correct end of month"""
        input_data = """
#balance 0
100  1984-01-31 !months:4
"""
        rows = rowset.RowSet()
        rows.load_file(StringIO(input_data))

        got = str(rows.autosplit())

        expected = """
#balance 0
25 1984-01-31 !months:child
25 1984-02-29 !months:child
25 1984-03-31 !months:child
25 1984-04-30 !months:child
"""

        self.assertEqual(expected, got)
Beispiel #10
0
    def test_leapday(self):
        """We can split a leap day, if it is the original row date"""
        input_data = """
#balance 0
100  1984-02-29 !months:-1:5
"""
        rows = rowset.RowSet()
        rows.load_file(StringIO(input_data))

        got = str(rows.autosplit())

        expected = """
#balance 0
20 1984-01-29 !months:child
20 1984-02-29 !months:child
20 1984-03-29 !months:child
20 1984-04-29 !months:child
20 1984-05-29 !months:child
"""

        self.assertEqual(expected, got)
Beispiel #11
0
    def test_forecast_endless(self):
        input_data = """
#balance 0
100  1981-01-05 !forecast:monthly
"""
        rows = rowset.RowSet()
        rows.load_file(StringIO(input_data))

        got = str(rows.autosplit())

        expected = """
#balance 0
100 1981-01-05 !forecast:child
100 1981-02-05 !forecast:child
100 1981-03-05 !forecast:child
100 1981-04-05 !forecast:child
100 1981-05-05 !forecast:child
100 1981-06-05 !forecast:child
100 1981-07-05 !forecast:child
100 1981-08-05 !forecast:child
"""

        self.assertEqual(expected, got)
Beispiel #12
0
            subset = list(rows)
            unused = []

        output_rows.extend(subset)
        unused_rows.extend(unused)

    num_requested = args.num_per_image * len(image_instances)
    num_unfulfilled = num_requested - len(output_rows)

    additional_rows = random.sample(unused_rows,
                                    min(len(unused_rows), num_unfulfilled))

    output_rows.extend(additional_rows)

    print(
        "{} images in original set, {} images requested => {} images in subset"
        .format(num_total_images, args.max_images, len(image_instances)))
    print("{} rows per image requested".format(args.num_per_image))
    print("{} total rows requested".format(num_requested))
    print("{} rows from rejected workers excluded".format(rejected_row_count))
    print("{} unfulfilled (not enough samples for some images)".format(
        num_unfulfilled))
    print("{} additional rows randomly sampled from unused".format(
        len(additional_rows)))
    print("= {} total rows in sample".format(len(output_rows)))

    output_row_set = rowset.RowSet(row_set.headers, output_rows)
    output_row_set.save(args.output_csv)
    print("{} total rows written to file {}".format(len(output_rows),
                                                    args.output_csv))
Beispiel #13
0
 def setUp(self):
     f = StringIO(self.input_data)
     self.rows = rowset.RowSet()
     self.rows.load_file(f)
Beispiel #14
0
    def setUp(self):
        f = StringIO(self.input_data)
        rows = rowset.RowSet()
        rows.load_file(f)

        self.rows = rows = rows.filter(['isdata==1'])