def setUp(self): f = StringIO(self.input_data) rows = rowset.RowSet() rows.load_file(f) datarows = rows.filter(['isdata==1']) self.grid = datarows.grid_by('month', 'hashtag')
def test_forecast_error2(self): input_data = """ #balance 0 100 1980-04-05 !forecast:monthly:rain """ rows = rowset.RowSet() rows.load_file(StringIO(input_data)) with self.assertRaises(ValueError): rows.autosplit()
def test_forecast_simple(self): input_data = """ #balance 0 100 1980-04-05 !forecast """ rows = rowset.RowSet() rows.load_file(StringIO(input_data)) got = str(rows.autosplit()) self.assertEqual(input_data, got)
def test_load_file3(self): """Any balance pragma must match the running balance """ f = StringIO(""" 10 1972-02-03 comment7 #balance 100000 The wrong balance """) set = rowset.RowSet() with self.assertRaises(ValueError): set.load_file(f)
def setUp(self): f = StringIO(""" # Files can contain comments and empty lines -10 1970-02-06 comment4 10 1970-01-05 comment1 -10 1970-01-10 comment2 #rent -10 1970-01-01 comment3 #water -10 1970-03-01 comment5 #rent -15 1970-01-11 comment6 #water !months:3 #balance -45 A comment """) self.rows = balance.RowSet() self.rows.load_file(f)
def test_len(self): """After splitting, we should have the right number of new rows""" input_data = """ #balance 0 100 1980-01-01 incoming comment -100 1980-01-02 outgoing comment 10 1980-01-03 a !test_bangtag 100 1980-01-04 a #test_hashtag 100 1984-02-29 !months:-1:5 100 1984-01-31 !months:4 100 1980-01-05 !months:3 """ rows = rowset.RowSet() rows.load_file(StringIO(input_data)) self.assertEqual(len(rows), 9) self.assertEqual(len(rows.autosplit()), 18)
def test_rounding(self): """When splitting, round down and add the remainder to the first""" input_data = """ #balance 0 100 1980-01-05 !months:3 """ rows = rowset.RowSet() rows.load_file(StringIO(input_data)) got = str(rows.autosplit()) expected = """ #balance 0 34 1980-01-05 !months:child 33 1980-02-05 !months:child 33 1980-03-05 !months:child """ self.assertEqual(expected, got)
def test_forecast_until(self): input_data = """ #balance 0 100 1980-05-05 !forecast:monthly:until:1980-10-01 """ rows = rowset.RowSet() rows.load_file(StringIO(input_data)) got = str(rows.autosplit()) expected = """ #balance 0 100 1980-05-05 !forecast:child:until:1980-10-01 100 1980-06-05 !forecast:child:until:1980-10-01 100 1980-07-05 !forecast:child:until:1980-10-01 100 1980-08-05 !forecast:child:until:1980-10-01 100 1980-09-05 !forecast:child:until:1980-10-01 """ self.assertEqual(expected, got)
def test_endofmonth(self): """When splitting, We clamp to the correct end of month""" input_data = """ #balance 0 100 1984-01-31 !months:4 """ rows = rowset.RowSet() rows.load_file(StringIO(input_data)) got = str(rows.autosplit()) expected = """ #balance 0 25 1984-01-31 !months:child 25 1984-02-29 !months:child 25 1984-03-31 !months:child 25 1984-04-30 !months:child """ self.assertEqual(expected, got)
def test_leapday(self): """We can split a leap day, if it is the original row date""" input_data = """ #balance 0 100 1984-02-29 !months:-1:5 """ rows = rowset.RowSet() rows.load_file(StringIO(input_data)) got = str(rows.autosplit()) expected = """ #balance 0 20 1984-01-29 !months:child 20 1984-02-29 !months:child 20 1984-03-29 !months:child 20 1984-04-29 !months:child 20 1984-05-29 !months:child """ self.assertEqual(expected, got)
def test_forecast_endless(self): input_data = """ #balance 0 100 1981-01-05 !forecast:monthly """ rows = rowset.RowSet() rows.load_file(StringIO(input_data)) got = str(rows.autosplit()) expected = """ #balance 0 100 1981-01-05 !forecast:child 100 1981-02-05 !forecast:child 100 1981-03-05 !forecast:child 100 1981-04-05 !forecast:child 100 1981-05-05 !forecast:child 100 1981-06-05 !forecast:child 100 1981-07-05 !forecast:child 100 1981-08-05 !forecast:child """ self.assertEqual(expected, got)
subset = list(rows) unused = [] output_rows.extend(subset) unused_rows.extend(unused) num_requested = args.num_per_image * len(image_instances) num_unfulfilled = num_requested - len(output_rows) additional_rows = random.sample(unused_rows, min(len(unused_rows), num_unfulfilled)) output_rows.extend(additional_rows) print( "{} images in original set, {} images requested => {} images in subset" .format(num_total_images, args.max_images, len(image_instances))) print("{} rows per image requested".format(args.num_per_image)) print("{} total rows requested".format(num_requested)) print("{} rows from rejected workers excluded".format(rejected_row_count)) print("{} unfulfilled (not enough samples for some images)".format( num_unfulfilled)) print("{} additional rows randomly sampled from unused".format( len(additional_rows))) print("= {} total rows in sample".format(len(output_rows))) output_row_set = rowset.RowSet(row_set.headers, output_rows) output_row_set.save(args.output_csv) print("{} total rows written to file {}".format(len(output_rows), args.output_csv))
def setUp(self): f = StringIO(self.input_data) self.rows = rowset.RowSet() self.rows.load_file(f)
def setUp(self): f = StringIO(self.input_data) rows = rowset.RowSet() rows.load_file(f) self.rows = rows = rows.filter(['isdata==1'])