Example #1
0
 async def test_json_to_csv(self):
     with non_existant_tempfile() as csv_tempfile:
         await Merge.cli(
             "dest=csv",
             "src=json",
             "-source-dest-filename",
             csv_tempfile,
             "-source-dest-key",
             "src_url",
             "-source-src-filename",
             self.temp_filename,
             "-source-src-allowempty",
             "-source-dest-allowempty",
             "-source-src-readwrite",
             "-source-dest-readwrite",
         )
         contents = Path(csv_tempfile).read_text()
         self.assertEqual(
             contents,
             "src_url,label,prediction,confidence\n"
             + "\n".join(
                 [f"{repo.src_url},unlabeled,," for repo in self.repos]
             )
             + "\n",
             "Incorrect data in csv file",
         )
Example #2
0
 async def test_json_to_csv(self):
     with non_existant_tempfile() as csv_tempfile:
         await Merge.cli(
             "dest=csv",
             "src=json",
             "-source-dest-filename",
             csv_tempfile,
             "-source-dest-key",
             "key",
             "-source-src-filename",
             self.temp_filename,
             "-source-src-allowempty",
             "-source-dest-allowempty",
             "-source-src-readwrite",
             "-source-dest-readwrite",
         )
         contents = Path(csv_tempfile).read_text()
         self.assertEqual(
             contents,
             "key,tag\n"
             + "\n".join(
                 [f"{record.key},untagged" for record in self.records]
             )
             + "\n",
             "Incorrect data in csv file",
         )
Example #3
0
 async def test_csv_tag(self):
     with non_existant_tempfile() as csv_tempfile:
         # Move the pre-populated json data to a csv source
         with self.subTest(json_to_csv=True):
             await Merge.cli(
                 "dest=csv",
                 "src=json",
                 "-source-dest-filename",
                 csv_tempfile,
                 "-source-src-filename",
                 self.temp_filename,
                 "-source-src-allowempty",
                 "-source-dest-allowempty",
                 "-source-src-readwrite",
                 "-source-dest-readwrite",
             )
         # Merge one tag to another within the same file
         with self.subTest(merge_same_file=True):
             await Merge.cli(
                 "dest=csv",
                 "src=csv",
                 "-source-dest-filename",
                 csv_tempfile,
                 "-source-dest-tag",
                 "sometag",
                 "-source-src-filename",
                 csv_tempfile,
                 "-source-src-allowempty",
                 "-source-dest-allowempty",
                 "-source-src-readwrite",
                 "-source-dest-readwrite",
             )
         contents = Path(csv_tempfile).read_text()
         self.assertIn("untagged", contents)
         self.assertIn("sometag", contents)
         # Check the untagged source
         with self.subTest(tagged=None):
             async with CSVSource(
                 CSVSourceConfig(filename=csv_tempfile)
             ) as source:
                 async with source() as sctx:
                     repos = [repo async for repo in sctx.repos()]
                     self.assertEqual(len(repos), len(self.repos))
         contents = Path(csv_tempfile).read_text()
         self.assertIn("sometag", contents)
         self.assertIn("untagged", contents)
         # Check the tagged source
         with self.subTest(tagged="sometag"):
             async with CSVSource(
                 CSVSourceConfig(filename=csv_tempfile, tag="sometag")
             ) as source:
                 async with source() as sctx:
                     repos = [repo async for repo in sctx.repos()]
                     self.assertEqual(len(repos), len(self.repos))
         contents = Path(csv_tempfile).read_text()
         self.assertIn("sometag", contents)
         self.assertIn("untagged", contents)