async def test_json_to_csv(self): with non_existant_tempfile() as csv_tempfile: await Merge.cli( "dest=csv", "src=json", "-source-dest-filename", csv_tempfile, "-source-dest-key", "src_url", "-source-src-filename", self.temp_filename, "-source-src-allowempty", "-source-dest-allowempty", "-source-src-readwrite", "-source-dest-readwrite", ) contents = Path(csv_tempfile).read_text() self.assertEqual( contents, "src_url,label,prediction,confidence\n" + "\n".join( [f"{repo.src_url},unlabeled,," for repo in self.repos] ) + "\n", "Incorrect data in csv file", )
async def test_json_to_csv(self): with non_existant_tempfile() as csv_tempfile: await Merge.cli( "dest=csv", "src=json", "-source-dest-filename", csv_tempfile, "-source-dest-key", "key", "-source-src-filename", self.temp_filename, "-source-src-allowempty", "-source-dest-allowempty", "-source-src-readwrite", "-source-dest-readwrite", ) contents = Path(csv_tempfile).read_text() self.assertEqual( contents, "key,tag\n" + "\n".join( [f"{record.key},untagged" for record in self.records] ) + "\n", "Incorrect data in csv file", )
async def test_csv_tag(self): with non_existant_tempfile() as csv_tempfile: # Move the pre-populated json data to a csv source with self.subTest(json_to_csv=True): await Merge.cli( "dest=csv", "src=json", "-source-dest-filename", csv_tempfile, "-source-src-filename", self.temp_filename, "-source-src-allowempty", "-source-dest-allowempty", "-source-src-readwrite", "-source-dest-readwrite", ) # Merge one tag to another within the same file with self.subTest(merge_same_file=True): await Merge.cli( "dest=csv", "src=csv", "-source-dest-filename", csv_tempfile, "-source-dest-tag", "sometag", "-source-src-filename", csv_tempfile, "-source-src-allowempty", "-source-dest-allowempty", "-source-src-readwrite", "-source-dest-readwrite", ) contents = Path(csv_tempfile).read_text() self.assertIn("untagged", contents) self.assertIn("sometag", contents) # Check the untagged source with self.subTest(tagged=None): async with CSVSource( CSVSourceConfig(filename=csv_tempfile) ) as source: async with source() as sctx: repos = [repo async for repo in sctx.repos()] self.assertEqual(len(repos), len(self.repos)) contents = Path(csv_tempfile).read_text() self.assertIn("sometag", contents) self.assertIn("untagged", contents) # Check the tagged source with self.subTest(tagged="sometag"): async with CSVSource( CSVSourceConfig(filename=csv_tempfile, tag="sometag") ) as source: async with source() as sctx: repos = [repo async for repo in sctx.repos()] self.assertEqual(len(repos), len(self.repos)) contents = Path(csv_tempfile).read_text() self.assertIn("sometag", contents) self.assertIn("untagged", contents)