Example #1
0
def main(args):
    with open(args.label_path) as label_file, open(
            args.training_path, 'wb') as training_file, open(
                args.testing_path, 'wb') as testing_file:
        label_reader = labelled_data.Reader(label_file)
        training_writer = labelled_data.Writer(training_file)
        testing_writer = labelled_data.Writer(testing_file)

        partitioner.split_labels(label_reader, training_writer, testing_writer,
                                 args.training_fraction, args.max_labels)
 def test_writes_with_utf8_encoding(self):
     mock_file = io.StringIO()
     writer = labelled_data.Writer(mock_file)
     writer.writerow({
         'input': '2 jalape\xc3\xb1os',
         'name': 'jalape\xc3\xb1os',
         'qty': 2.0,
         'unit': '',
         'range_end': 0.0,
         'comment': '',
     })
     self.assertMultiLineEqual(
         ('input,name,qty,range_end,unit,comment\n'
          '2 jalape\xc3\xb1os,jalape\xc3\xb1os,2.0,0.0,,\n'),
         mock_file.getvalue())
 def test_writes_with_utf8_encoding(self):
     mock_file = io.StringIO()
     writer = labelled_data.Writer(mock_file)
     writer.writerow({
         "input": u"2 jalape\xc3\xb1os",
         "name": u"jalape\xc3\xb1os",
         "qty": 2.0,
         "unit": u"",
         "range_end": 0.0,
         "comment": u"",
     })
     self.assertMultiLineEqual(
         (u"input,name,qty,range_end,unit,comment\n"
          u"2 jalape\xc3\xb1os,jalape\xc3\xb1os,2.0,0.0,,\n"),
         mock_file.getvalue(),
     )
    def test_writes_valid_rows(self):
        mock_file = io.StringIO()
        writer = labelled_data.Writer(mock_file)
        writer.writerows([
            {
                "input": u"4 to 6 large cloves garlic",
                "qty": 4.0,
                "unit": u"clove",
                "name": u"garlic",
                "range_end": 6.0,
                "comment": u"",
            },
            {
                "input": u"3 bananas",
                "qty": 3.0,
                "unit": u"",
                "name": u"bananas",
                "comment": u"",
                "range_end": 0.0,
            },
            {
                "input": (u"2 1/2 pounds bell peppers (about 6 peppers in "
                          u"assorted colors), cut into 2-inch chunks"),
                "qty":
                2.5,
                "unit":
                u"pound",
                "name":
                u"bell peppers",
                "range_end":
                0.0,
                "comment": (u"(about 6 peppers in assorted colors), cut into "
                            u"2-inch chunks"),
            },
        ])
        self.assertMultiLineEqual(
            """\
input,name,qty,range_end,unit,comment
4 to 6 large cloves garlic,garlic,4.0,6.0,clove,
3 bananas,bananas,3.0,0.0,,
"2 1/2 pounds bell peppers (about 6 peppers in assorted colors), cut into 2-inch chunks",bell peppers,2.5,0.0,pound,"(about 6 peppers in assorted colors), cut into 2-inch chunks"\
""",
            mock_file.getvalue().strip(),
        )
    def test_writes_valid_rows_one_by_one(self):
        mock_file = io.StringIO()
        writer = labelled_data.Writer(mock_file)
        writer.writerow({
            'input': '4 to 6 large cloves garlic',
            'qty': 4.0,
            'unit': 'clove',
            'name': 'garlic',
            'range_end': 6.0,
            'comment': '',
        })
        writer.writerow({
            'input': '3 bananas',
            'qty': 3.0,
            'unit': '',
            'name': 'bananas',
            'comment': '',
            'range_end': 0.0,
        })
        writer.writerow({
            'input': ('2 1/2 pounds bell peppers (about 6 peppers in '
                      'assorted colors), cut into 2-inch chunks'),
            'qty':
            2.5,
            'unit':
            'pound',
            'name':
            'bell peppers',
            'range_end':
            0.0,
            'comment': ('(about 6 peppers in assorted colors), cut into '
                        '2-inch chunks'),
        })
        self.assertMultiLineEqual(
            """
input,name,qty,range_end,unit,comment
4 to 6 large cloves garlic,garlic,4.0,6.0,clove,
3 bananas,bananas,3.0,0.0,,
"2 1/2 pounds bell peppers (about 6 peppers in assorted colors), cut into 2-inch chunks",bell peppers,2.5,0.0,pound,"(about 6 peppers in assorted colors), cut into 2-inch chunks"
""".strip(),
            mock_file.getvalue().strip())
 def setUp(self):
     self.mock_training_file = io.StringIO()
     self.mock_training_writer = labelled_data.Writer(
         self.mock_training_file)
     self.mock_testing_file = io.StringIO()
     self.mock_testing_writer = labelled_data.Writer(self.mock_testing_file)