def test_record_to_json_and_back(self): filename = get_path_to_nisin_genbank() records = list(seqio.parse(open(filename), "genbank")) records = [ Record.from_biopython(rec, taxon="bacteria") for rec in records ] rec_results = [{}, {}, {}] results = serialiser.AntismashResults(filename, records, rec_results, "dummy") json_handle = StringIO() results.write_to_file(json_handle) json_handle.seek(0) new_results = serialiser.AntismashResults.from_file(json_handle, taxon="bacteria") assert results.to_json() == new_results.to_json() # check no records were lost assert len(new_results.records) == len(results.records) # check that the contents of the records is the same # by converting to biopython and writing to genbanks original = self.create_data_stream(results.records) new = self.create_data_stream(new_results.records) oldvalue = original.getvalue() newvalue = new.getvalue() with TemporaryDirectory(change=True): open("old.json", "w").write(oldvalue) open("new.json", "w").write(newvalue) for oldline, newline in zip(oldvalue.split('\n'), newvalue.split('\n')): assert oldline == newline
def read_data(sequence_file, options) -> serialiser.AntismashResults: """ Reads in the data to be used in the analysis run. Can be provided as as a sequence file (fasta/genbank) or as file of prior results Arguments: sequence_file: A fasta/genbank file to read (or None) options: An antismash Config instance Returns: a AntismashResults instance, populated only if reusing results """ if not sequence_file and not options.reuse_results: raise ValueError("No sequence file or prior results to read") if sequence_file: records = record_processing.parse_input_sequence( sequence_file, options.taxon, options.minlength, options.start, options.end) return serialiser.AntismashResults( sequence_file.rsplit(os.sep, 1)[-1], records, [{} for i in range(len(records))], __version__) logging.debug("Attempting to reuse previous results in: %s", options.reuse_results) with open(options.reuse_results) as handle: contents = handle.read() if not contents: raise ValueError("No results contained in file: %s" % options.reuse_results) results = serialiser.AntismashResults.from_file(options.reuse_results, options.taxon) return results
def read_data(sequence_file: Optional[str], options: ConfigType) -> serialiser.AntismashResults: """ Reads in the data to be used in the analysis run. Can be provided as as a sequence file (fasta/genbank) or as file of prior results Arguments: sequence_file: A fasta/genbank file to read (or None) options: An antismash Config instance Returns: a AntismashResults instance, populated only if reusing results """ if not sequence_file and not options.reuse_results: raise ValueError("No sequence file or prior results to read") if sequence_file: records = record_processing.parse_input_sequence( sequence_file, options.taxon, options.minlength, options.start, options.end, gff_file=options.genefinding_gff3) results = serialiser.AntismashResults(sequence_file.rsplit(os.sep, 1)[-1], records, [{} for i in records], __version__, taxon=options.taxon) update_config({"input_file": os.path.splitext(results.input_file)[1]}) else: logging.debug("Attempting to reuse previous results in: %s", options.reuse_results) with open(options.reuse_results) as handle: contents = handle.read() if not contents: raise ValueError("No results contained in file: %s" % options.reuse_results) results = serialiser.AntismashResults.from_file(options.reuse_results) for record in results.records: record.strip_antismash_annotations() if options.taxon != results.taxon: logging.info("Reusing taxon %s from prior results", results.taxon) update_config({"taxon": results.taxon}) update_config({"input_file": os.path.splitext(results.input_file)[0]}) return results