def _default(self): args = self.app.pargs kb = io.Reader().run(args.source_core, args.source_seq, strict=args.strict) if args.dest_core or args.dest_seq: io.Writer().run(kb, args.dest_core, args.dest_seq, set_repo_metadata_from_path=False) else: io.Writer().run(kb, args.source_core, args.source_seq, set_repo_metadata_from_path=False)
def test_read_write_eukaryote(self): fixtures = os.path.join(os.path.dirname(__file__), 'fixtures') core_path = os.path.join(fixtures, 'eukaryote_core.xlsx') seq_path = os.path.join(fixtures, 'eukaryote_seq.fna') reader = io.Reader() kb = reader.run(core_path, seq_path=seq_path, taxon='eukaryote', rewrite_seq_path=False)[core.KnowledgeBase][0] tmp_core_path = os.path.join(self.dir, 'tmp_eukaryote_core.xlsx') tmp_seq_path = os.path.join(self.dir, 'tmp_eukaryote_seq.fna') writer = io.Writer() writer.run(tmp_core_path, kb, seq_path=tmp_seq_path, taxon='eukaryote', data_repo_metadata=False) tmp_kb = reader.run(tmp_core_path, seq_path, taxon='eukaryote')[core.KnowledgeBase][0] self.assertTrue(kb.is_equal(tmp_kb)) self.assertTrue(filecmp.cmp(tmp_seq_path, seq_path, shallow=False))
def test_convert(self): path_core_1 = os.path.join(self.dir, 'core_1.xlsx') path_core_2 = os.path.join(self.dir, 'core_2-*.csv') path_core_3 = os.path.join(self.dir, 'core_3.xlsx') path_seq_1 = os.path.join(self.dir, 'seq_1.fna') path_seq_2 = os.path.join(self.dir, 'seq_2.fna') path_seq_3 = os.path.join(self.dir, 'seq_3.fna') io.Writer().run(path_core_1, self.kb, seq_path=path_seq_1, data_repo_metadata=False) self.assertTrue(filecmp.cmp(path_seq_1, self.seq_path, shallow=False)) io.convert(path_core_1, path_seq_1, path_core_2, path_seq_2) kb = io.Reader().run(path_core_2, seq_path=self.seq_path)[core.KnowledgeBase][0] self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_1, path_seq_2, shallow=False)) io.convert(path_core_2, path_seq_2, path_core_3, path_seq_3) kb = io.Reader().run(path_core_3, seq_path=self.seq_path)[core.KnowledgeBase][0] self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_2, path_seq_3, shallow=False))
def test_write_read_sloppy(self): core_path = os.path.join(self.dir, 'core.xlsx') seq_path = os.path.join(self.dir, 'test_seq.fna') writer = io.Writer() writer.run(core_path, self.kb, seq_path=seq_path, data_repo_metadata=False) wb = wc_utils.workbook.io.read(core_path) row = wb['!!KB'].pop(4) wb['!!KB'].insert(5, row) wc_utils.workbook.io.write(core_path, wb) reader = io.Reader() with self.assertRaisesRegex(ValueError, "cannot be loaded because"): reader.run(core_path, seq_path=self.seq_path) env = EnvironmentVarGuard() env.set('CONFIG__DOT__wc_kb__DOT__io__DOT__strict', '0') with env: kb = reader.run(core_path, self.seq_path)[core.KnowledgeBase][0] self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(self.seq_path, seq_path, shallow=False))
def test_convert_sloppy(self): path_core_1 = os.path.join(self.dir, 'core_1.xlsx') path_core_2 = os.path.join(self.dir, 'core_2-*.csv') path_core_3 = os.path.join(self.dir, 'core_3.xlsx') path_seq_1 = os.path.join(self.dir, 'seq_1.fna') path_seq_2 = os.path.join(self.dir, 'seq_2.fna') path_seq_3 = os.path.join(self.dir, 'seq_3.fna') io.Writer().run(path_core_1, self.kb, seq_path=path_seq_1, data_repo_metadata=False) self.assertTrue(filecmp.cmp(path_seq_1, self.seq_path, shallow=False)) wb = wc_utils.workbook.io.read(path_core_1) row = wb['!!KB'].pop(4) wb['!!KB'].insert(5, row) wc_utils.workbook.io.write(path_core_1, wb) with self.assertRaisesRegex(ValueError, "cannot be loaded because"): io.convert(path_core_1, path_seq_1, path_core_2, path_seq_2) env = EnvironmentVarGuard() env.set('CONFIG__DOT__wc_kb__DOT__io__DOT__strict', '0') with env: io.convert(path_core_1, path_seq_1, path_core_2, path_seq_2) kb = io.Reader().run(path_core_2, seq_path=self.seq_path)[core.KnowledgeBase][0] self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_1, path_seq_2, shallow=False)) io.convert(path_core_2, path_seq_2, path_core_3, path_seq_3) kb = io.Reader().run(path_core_3, seq_path=self.seq_path)[core.KnowledgeBase][0] self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_2, path_seq_3, shallow=False))
def test_convert_sloppy(self): path_core_1 = os.path.join(self.dir, 'core_1.xlsx') path_core_2 = os.path.join(self.dir, 'core_2-*.csv') path_core_3 = os.path.join(self.dir, 'core_3.xlsx') path_seq_1 = os.path.join(self.dir, 'seq_1.fna') path_seq_2 = os.path.join(self.dir, 'seq_2.fna') path_seq_3 = os.path.join(self.dir, 'seq_3.fna') io.Writer().run(self.kb, path_core_1, path_seq_1, set_repo_metadata_from_path=False) self.assertTrue(filecmp.cmp(path_seq_1, self.seq_path, shallow=False)) wb = wc_utils.workbook.io.read(path_core_1) row = wb['Knowledge base'].pop(0) wb['Knowledge base'].insert(1, row) wc_utils.workbook.io.write(path_core_1, wb) with self.assertRaisesRegex(ValueError, "The columns of worksheet 'Knowledge base' must be defined in this order"): io.convert(path_core_1, path_seq_1, path_core_2, path_seq_2) io.convert(path_core_1, path_seq_1, path_core_2, path_seq_2, strict=False) kb = io.Reader().run(path_core_2, self.seq_path) self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_1, path_seq_2, shallow=False)) io.convert(path_core_2, path_seq_2, path_core_3, path_seq_3) kb = io.Reader().run(path_core_3, self.seq_path) self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_2, path_seq_3, shallow=False))
def test_normalize(self): filename_core_1 = path.join(self.tempdir, 'model-1.xlsx') filename_seq_1 = path.join(self.tempdir, 'seq-1.fna') filename_core_2 = path.join(self.tempdir, 'model-2.xlsx') filename_seq_2 = path.join(self.tempdir, 'seq-2.fna') kb = wc_kb.KnowledgeBase(id='kb', name='KB', version='0.0.1a', wc_kb_version='0.0.0') io.Writer().run(kb, filename_core_1, filename_seq_1, set_repo_metadata_from_path=False) # with same dest with __main__.App( argv=['normalize', filename_core_1, filename_seq_1]) as app: app.run() kb2 = io.Reader().run(filename_core_1, filename_seq_1) self.assertTrue(kb2.is_equal(kb)) # with different dest with __main__.App(argv=[ 'normalize', filename_core_1, filename_seq_1, '--dest-core', filename_core_2, '--dest-seq', filename_seq_2 ]) as app: app.run() kb2 = io.Reader().run(filename_core_2, filename_seq_2) self.assertTrue(kb2.is_equal(kb))
def test_convert(self): filename_in_core = path.join(self.tempdir, 'in.core.xlsx') filename_in_seq = path.join(self.tempdir, 'in.seq.fna') filename_out_core = path.join(self.tempdir, 'out.core-*.csv') filename_out_seq = path.join(self.tempdir, 'out.seq.fna') kb = wc_kb.KnowledgeBase(id='kb', name='KB', version='0.0.1a', wc_kb_version='0.0.0') io.Writer().run(kb, filename_in_core, filename_in_seq, set_repo_metadata_from_path=False) with __main__.App(argv=[ 'convert', filename_in_core, filename_in_seq, filename_out_core, filename_out_seq, ]) as app: app.run() self.assertTrue( path.isfile(path.join(self.tempdir, 'out.core-Knowledge base.csv')))
def test_write_with_repo_metadata(self): with tempfile.TemporaryDirectory() as temp_dir: # create temp git repo & write file into it test_repo_name = 'test_wc_kb_test_io' test_github_repo = GitHubRepoForTests(test_repo_name) repo = test_github_repo.make_test_repo(temp_dir) _, core_path = tempfile.mkstemp(dir=temp_dir, suffix='.xlsx') _, seq_path = tempfile.mkstemp(dir=temp_dir, suffix='.fna') # write data repo metadata in data_file writer = io.Writer() writer.run(core_path, self.kb, seq_path=seq_path, data_repo_metadata=True) # deliberately read metadata reader = io.Reader() objs_read = reader.run(core_path, seq_path=seq_path, read_metadata=True) data_repo_metadata = objs_read[utils.DataRepoMetadata][0] self.assertTrue( data_repo_metadata.url.startswith('https://github.com/')) self.assertEqual(data_repo_metadata.branch, 'main') self.assertEqual(len(data_repo_metadata.revision), 40) # delete test repo test_github_repo.delete_test_repo()
def test_rewrite_seq_path_in_read_write(self): path_core_1 = os.path.join(self.dir, 'core_1.xlsx') path_core_2 = os.path.join(self.dir, 'core_2.xlsx') path_seq_1 = os.path.join(self.dir, 'seq_1.fna') path_seq_2 = os.path.join(self.dir, 'seq_2.fna') io.Writer().run(path_core_1, self.kb, seq_path=path_seq_1, data_repo_metadata=False) kb1 = io.Reader().run(path_core_1, seq_path=path_seq_1)[core.KnowledgeBase][0] kb2 = io.Reader().run(path_core_1, seq_path=path_seq_1, rewrite_seq_path=False)[core.KnowledgeBase][0] kb3 = io.Reader().run(path_core_1, seq_path=self.seq_path)[core.KnowledgeBase][0] kb4 = io.Reader().run(path_core_1, seq_path=self.seq_path, rewrite_seq_path=False)[core.KnowledgeBase][0] self.assertFalse(kb1.is_equal(self.kb)) self.assertFalse(kb2.is_equal(self.kb)) self.assertTrue(kb3.is_equal(self.kb)) self.assertFalse(kb4.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_1, self.seq_path, shallow=False)) io.Writer().run(path_core_2, self.kb, seq_path=path_seq_2, rewrite_seq_path=False, data_repo_metadata=False) kb5 = io.Reader().run(path_core_2, seq_path=path_seq_2)[core.KnowledgeBase][0] kb6 = io.Reader().run(path_core_2, seq_path=path_seq_2, rewrite_seq_path=False)[core.KnowledgeBase][0] kb7 = io.Reader().run(path_core_2, seq_path=self.seq_path)[core.KnowledgeBase][0] kb8 = io.Reader().run(path_core_2, seq_path=self.seq_path, rewrite_seq_path=False)[core.KnowledgeBase][0] self.assertFalse(kb5.is_equal(self.kb)) self.assertTrue(kb6.is_equal(self.kb)) self.assertTrue(kb7.is_equal(self.kb)) self.assertTrue(kb8.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_2, self.seq_path, shallow=False))
def _default(self): args = self.app.pargs kb = io.Reader().run(args.path_core, args.path_seq, strict=args.strict) kb.wc_kb_version = wc_kb.__version__ io.Writer().run( kb, args.path_core, args.path_seq, set_repo_metadata_from_path=args.set_repo_metadata_from_path)
def test_rewrite_seq_path_in_read_write(self): path_core_1 = os.path.join(self.dir, 'core_1.xlsx') path_core_2 = os.path.join(self.dir, 'core_2.xlsx') path_seq_1 = os.path.join(self.dir, 'seq_1.fna') path_seq_2 = os.path.join(self.dir, 'seq_2.fna') io.Writer().run(self.kb, path_core_1, path_seq_1, set_repo_metadata_from_path=False) kb1 = io.Reader().run(path_core_1, path_seq_1) kb2 = io.Reader().run(path_core_1, path_seq_1, rewrite_seq_path=False) self.assertFalse(kb1.is_equal(self.kb)) self.assertTrue(kb2.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_1, self.seq_path, shallow=False)) io.Writer().run(self.kb, path_core_2, path_seq_2, rewrite_seq_path=True, set_repo_metadata_from_path=False) kb3 = io.Reader().run(path_core_2, self.seq_path) kb4 = io.Reader().run(path_core_2, self.seq_path, rewrite_seq_path=False) self.assertFalse(kb3.is_equal(self.kb)) self.assertTrue(kb4.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_2, self.seq_path, shallow=False))
def _default(self): args = self.app.pargs kb = io.Reader().run(args.source_core, seq_path=args.source_seq, taxon=args.taxon)[core.KnowledgeBase][0] if args.dest_core or args.dest_seq: io.Writer().run(args.dest_core, kb, seq_path=args.dest_seq, taxon=args.taxon, data_repo_metadata=False, protected=(not args.unprotected)) else: io.Writer().run(args.source_core, kb, seq_path=args.source_seq, taxon=args.taxon, data_repo_metadata=False, protected=(not args.unprotected))
def _default(self): args = self.app.pargs kb = io.Reader().run(args.path_core, seq_path=args.path_seq, taxon=args.taxon)[core.KnowledgeBase][0] kb.wc_kb_version = wc_kb.__version__ io.Writer().run(args.path_core, kb, seq_path=args.path_seq, taxon=args.taxon, data_repo_metadata=args.data_repo_metadata, protected=(not args.unprotected))
def test_write_read(self): core_path = os.path.join(self.dir, 'core.xlsx') writer = io.Writer() writer.run(self.kb, core_path, set_repo_metadata_from_path=False) reader = io.Reader() kb = reader.run(core_path, self.seq_path) core_path = os.path.join(self.dir, 'core2.xlsx') seq_path = os.path.join(self.dir, 'seq2.fna') writer.run(kb, core_path, seq_path, set_repo_metadata_from_path=False) self.assertTrue(self.kb.is_equal(kb)) self.assertTrue(filecmp.cmp(self.seq_path, seq_path, shallow=False))
def test_write_read(self): core_path = os.path.join(self.dir, 'core.xlsx') writer = io.Writer() writer.run(core_path, self.kb, data_repo_metadata=False) reader = io.Reader() kb = reader.run(core_path, seq_path=self.seq_path)[core.KnowledgeBase][0] core_path = os.path.join(self.dir, 'core2.xlsx') seq_path = os.path.join(self.dir, 'seq2.fna') writer.run(core_path, kb, seq_path, data_repo_metadata=False) self.assertTrue(self.kb.is_equal(kb)) self.assertTrue(filecmp.cmp(self.seq_path, seq_path, shallow=False))
def test_write_with_repo_md(self): _, core_path = tempfile.mkstemp(suffix='.xlsx', dir='.') _, seq_path = tempfile.mkstemp(suffix='.fna', dir='.') self.assertEqual(self.kb.url, '') writer = io.Writer() writer.run(self.kb, core_path, seq_path, set_repo_metadata_from_path=True) self.assertIn(self.kb.url, [ 'https://github.com/KarrLab/wc_kb.git', 'ssh://[email protected]/KarrLab/wc_kb.git', '[email protected]:KarrLab/wc_kb.git', ]) os.remove(core_path) os.remove(seq_path)
def test_write_without_cell_relationships(self): core_path = os.path.join(self.dir, 'core.xlsx') seq_path = os.path.join(self.dir, 'test_seq.fna') with open(seq_path, 'w') as file: file.write('>chr_x\nACGT\n') dna = core.DnaSpeciesType(id='chr_x', sequence_path=seq_path) self.kb.cell.species_types.append(dna) trn = prokaryote_schema.TranscriptionUnitLocus(id='tu_x_0') dna.loci.append(trn) trn.cell = None writer = io.Writer() with self.assertRaisesRegex(ValueError, 'must be set to the instance of `Cell`'): writer.run(self.kb, core_path, seq_path, set_repo_metadata_from_path=False)
def test_read_flat_list_of_objects(self): core_path = os.path.join(self.dir, 'core.xlsx') writer = io.Writer() writer.run(core_path, self.kb, data_repo_metadata=False) reader = io.Reader() objs = reader.run(core_path, seq_path=self.seq_path) self.assertIsInstance(objs, dict) objs = reader.run(core_path, seq_path=self.seq_path, group_objects_by_model=False) self.assertIsInstance(objs, list) kb = next(obj for obj in objs if isinstance(obj, core.KnowledgeBase)) self.assertTrue(kb.is_equal(self.kb))
def test_validate(self): kb = wc_kb.KnowledgeBase(id='kb', name='KB', version='0.0.1a', wc_kb_version='0.0.1') self.assertEqual(Validator().run(kb, get_related=True), None) filename_core = path.join(self.tempdir, 'core.xlsx') filename_seq = path.join(self.tempdir, 'seq.fna') io.Writer().run(kb, filename_core, filename_seq, set_repo_metadata_from_path=False) with CaptureOutput() as capturer: with __main__.App( argv=['validate', filename_core, filename_seq]) as app: app.run() self.assertEqual(capturer.get_text(), 'Knowledge base is valid')
def test_read_write_eukaryote(self): fixtures = os.path.join(os.path.dirname(__file__), 'fixtures') core_path = os.path.join(fixtures, 'eukaryote_core.xlsx') seq_path = os.path.join(fixtures, 'eukaryote_seq.fna') reader = io.Reader() kb = reader.run(core_path, seq_path, schema=False) tmp_core_path = os.path.join(self.dir, 'tmp_eukaryote_core.xlsx') tmp_seq_path = os.path.join(self.dir, 'tmp_eukaryote_seq.fna') writer = io.Writer() writer.run(kb, tmp_core_path, tmp_seq_path, schema=False, set_repo_metadata_from_path=False) tmp_kb = reader.run(tmp_core_path, seq_path, schema=False) self.assertTrue(kb.is_equal(tmp_kb)) self.assertTrue(filecmp.cmp(tmp_seq_path, seq_path, shallow=False))
def test_write_read_sloppy(self): core_path = os.path.join(self.dir, 'core.xlsx') seq_path = os.path.join(self.dir, 'test_seq.fna') writer = io.Writer() writer.run(self.kb, core_path, seq_path, set_repo_metadata_from_path=False) wb = wc_utils.workbook.io.read(core_path) row = wb['Knowledge base'].pop(0) wb['Knowledge base'].insert(1, row) wc_utils.workbook.io.write(core_path, wb) reader = io.Reader() with self.assertRaisesRegex(ValueError, "The columns of worksheet 'Knowledge base' must be defined in this order"): kb = reader.run(core_path, self.seq_path) kb = reader.run(core_path, self.seq_path, strict=False) self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(self.seq_path, seq_path, shallow=False))
def test_validate_exception(self): kb = wc_kb.KnowledgeBase(id='kb', name='KB', version='0.0.1a', wc_kb_version='0.0.1') kb.cell = wc_kb.Cell(id='cell') kb.cell.compartments.create(id='c') kb.cell.compartments.create(id='c') self.assertNotEqual(Validator().run(kb, get_related=True), None) filename_core = path.join(self.tempdir, 'core.xlsx') filename_seq = path.join(self.tempdir, 'seq.fna') io.Writer().run(kb, filename_core, filename_seq, set_repo_metadata_from_path=False) with self.assertRaisesRegex(SystemExit, '^Knowledge base is invalid: '): with __main__.App( argv=['validate', filename_core, filename_seq]) as app: app.run()
def test_update_version_metadata(self): filename_core = path.join(self.tempdir, 'core.xlsx') filename_seq = path.join(self.tempdir, 'seq.fna') kb = wc_kb.KnowledgeBase(id='kb', name='KB', version='0.0.1a', wc_kb_version='0.0.0') self.assertNotEqual(kb.wc_kb_version, wc_kb.__version__) io.Writer().run(kb, filename_core, filename_seq, set_repo_metadata_from_path=False) with __main__.App(argv=[ 'update-version-metadata', filename_core, filename_seq, '--ignore-repo-metadata' ]) as app: app.run() kb = io.Reader().run(filename_core, filename_seq) self.assertEqual(kb.wc_kb_version, wc_kb.__version__)
def test_difference(self): kb1 = wc_kb.KnowledgeBase(id='kb', name='KB', version='0.0.1a', wc_kb_version='0.0.0') filename_core_1 = path.join(self.tempdir, 'core1.xlsx') filename_seq_1 = path.join(self.tempdir, 'seq1.fna') io.Writer().run(kb1, filename_core_1, filename_seq_1, set_repo_metadata_from_path=False) kb2 = wc_kb.KnowledgeBase(id='kb', name='KB', version='0.0.1a', wc_kb_version='0.0.0') filename_core_2 = path.join(self.tempdir, 'core2.xlsx') filename_seq_2 = path.join(self.tempdir, 'seq2.fna') io.Writer().run(kb2, filename_core_2, filename_seq_2, set_repo_metadata_from_path=False) kb3 = wc_kb.KnowledgeBase(id='kb', name='KB', version='0.0.1a', wc_kb_version='0.0.1') filename_core_3 = path.join(self.tempdir, 'core3.xlsx') filename_seq_3 = path.join(self.tempdir, 'seq3.fna') io.Writer().run(kb3, filename_core_3, filename_seq_3, set_repo_metadata_from_path=False) with CaptureOutput() as capturer: with __main__.App(argv=[ 'difference', filename_core_1, filename_seq_1, filename_core_2, filename_seq_2, ]) as app: app.run() self.assertEqual(capturer.get_text(), 'Knowledge bases are identical') with CaptureOutput() as capturer: with __main__.App(argv=[ 'difference', filename_core_1, filename_seq_1, filename_core_2, filename_seq_2, '--compare-files' ]) as app: app.run() self.assertEqual(capturer.get_text(), 'Knowledge bases are identical') with CaptureOutput() as capturer: with __main__.App(argv=[ 'difference', filename_core_1, filename_seq_1, filename_core_3, filename_seq_3, ]) as app: app.run() diff = ( 'Objects (KnowledgeBase: "kb", KnowledgeBase: "kb") have different attribute values:\n ' '`wc_kb_version` are not equal:\n 0.0.0 != 0.0.1') self.assertEqual(capturer.get_text(), diff) with CaptureOutput() as capturer: with __main__.App(argv=[ 'difference', filename_core_1, filename_seq_1, filename_core_3, filename_seq_3, '--compare-files' ]) as app: app.run() diff = 'Sheet Knowledge base:\n Row 8:\n Cell B: 0.0.0 != 0.0.1' self.assertEqual(capturer.get_text(), diff)