def main(args): with open(args.label_path) as label_file, open( args.training_path, 'wb') as training_file, open( args.testing_path, 'wb') as testing_file: label_reader = labelled_data.Reader(label_file) training_writer = labelled_data.Writer(training_file) testing_writer = labelled_data.Writer(testing_file) partitioner.split_labels(label_reader, training_writer, testing_writer, args.training_fraction, args.max_labels)
def test_writes_with_utf8_encoding(self): mock_file = io.StringIO() writer = labelled_data.Writer(mock_file) writer.writerow({ 'input': '2 jalape\xc3\xb1os', 'name': 'jalape\xc3\xb1os', 'qty': 2.0, 'unit': '', 'range_end': 0.0, 'comment': '', }) self.assertMultiLineEqual( ('input,name,qty,range_end,unit,comment\n' '2 jalape\xc3\xb1os,jalape\xc3\xb1os,2.0,0.0,,\n'), mock_file.getvalue())
def test_writes_with_utf8_encoding(self): mock_file = io.StringIO() writer = labelled_data.Writer(mock_file) writer.writerow({ "input": u"2 jalape\xc3\xb1os", "name": u"jalape\xc3\xb1os", "qty": 2.0, "unit": u"", "range_end": 0.0, "comment": u"", }) self.assertMultiLineEqual( (u"input,name,qty,range_end,unit,comment\n" u"2 jalape\xc3\xb1os,jalape\xc3\xb1os,2.0,0.0,,\n"), mock_file.getvalue(), )
def test_writes_valid_rows(self): mock_file = io.StringIO() writer = labelled_data.Writer(mock_file) writer.writerows([ { "input": u"4 to 6 large cloves garlic", "qty": 4.0, "unit": u"clove", "name": u"garlic", "range_end": 6.0, "comment": u"", }, { "input": u"3 bananas", "qty": 3.0, "unit": u"", "name": u"bananas", "comment": u"", "range_end": 0.0, }, { "input": (u"2 1/2 pounds bell peppers (about 6 peppers in " u"assorted colors), cut into 2-inch chunks"), "qty": 2.5, "unit": u"pound", "name": u"bell peppers", "range_end": 0.0, "comment": (u"(about 6 peppers in assorted colors), cut into " u"2-inch chunks"), }, ]) self.assertMultiLineEqual( """\ input,name,qty,range_end,unit,comment 4 to 6 large cloves garlic,garlic,4.0,6.0,clove, 3 bananas,bananas,3.0,0.0,, "2 1/2 pounds bell peppers (about 6 peppers in assorted colors), cut into 2-inch chunks",bell peppers,2.5,0.0,pound,"(about 6 peppers in assorted colors), cut into 2-inch chunks"\ """, mock_file.getvalue().strip(), )
def test_writes_valid_rows_one_by_one(self): mock_file = io.StringIO() writer = labelled_data.Writer(mock_file) writer.writerow({ 'input': '4 to 6 large cloves garlic', 'qty': 4.0, 'unit': 'clove', 'name': 'garlic', 'range_end': 6.0, 'comment': '', }) writer.writerow({ 'input': '3 bananas', 'qty': 3.0, 'unit': '', 'name': 'bananas', 'comment': '', 'range_end': 0.0, }) writer.writerow({ 'input': ('2 1/2 pounds bell peppers (about 6 peppers in ' 'assorted colors), cut into 2-inch chunks'), 'qty': 2.5, 'unit': 'pound', 'name': 'bell peppers', 'range_end': 0.0, 'comment': ('(about 6 peppers in assorted colors), cut into ' '2-inch chunks'), }) self.assertMultiLineEqual( """ input,name,qty,range_end,unit,comment 4 to 6 large cloves garlic,garlic,4.0,6.0,clove, 3 bananas,bananas,3.0,0.0,, "2 1/2 pounds bell peppers (about 6 peppers in assorted colors), cut into 2-inch chunks",bell peppers,2.5,0.0,pound,"(about 6 peppers in assorted colors), cut into 2-inch chunks" """.strip(), mock_file.getvalue().strip())
def setUp(self): self.mock_training_file = io.StringIO() self.mock_training_writer = labelled_data.Writer( self.mock_training_file) self.mock_testing_file = io.StringIO() self.mock_testing_writer = labelled_data.Writer(self.mock_testing_file)