예제 #1
0
 def test_record_to_json_and_back(self):
     filename = get_path_to_nisin_genbank()
     records = list(seqio.parse(open(filename), "genbank"))
     records = [
         Record.from_biopython(rec, taxon="bacteria") for rec in records
     ]
     rec_results = [{}, {}, {}]
     results = serialiser.AntismashResults(filename, records, rec_results,
                                           "dummy")
     json_handle = StringIO()
     results.write_to_file(json_handle)
     json_handle.seek(0)
     new_results = serialiser.AntismashResults.from_file(json_handle,
                                                         taxon="bacteria")
     assert results.to_json() == new_results.to_json()
     # check no records were lost
     assert len(new_results.records) == len(results.records)
     # check that the contents of the records is the same
     #  by converting to biopython and writing to genbanks
     original = self.create_data_stream(results.records)
     new = self.create_data_stream(new_results.records)
     oldvalue = original.getvalue()
     newvalue = new.getvalue()
     with TemporaryDirectory(change=True):
         open("old.json", "w").write(oldvalue)
         open("new.json", "w").write(newvalue)
         for oldline, newline in zip(oldvalue.split('\n'),
                                     newvalue.split('\n')):
             assert oldline == newline
예제 #2
0
def read_data(sequence_file, options) -> serialiser.AntismashResults:
    """ Reads in the data to be used in the analysis run. Can be provided as
        as a sequence file (fasta/genbank) or as file of prior results

        Arguments:
            sequence_file: A fasta/genbank file to read (or None)
            options: An antismash Config instance

        Returns:
            a AntismashResults instance, populated only if reusing results

    """
    if not sequence_file and not options.reuse_results:
        raise ValueError("No sequence file or prior results to read")

    if sequence_file:
        records = record_processing.parse_input_sequence(
            sequence_file, options.taxon, options.minlength, options.start,
            options.end)
        return serialiser.AntismashResults(
            sequence_file.rsplit(os.sep, 1)[-1], records,
            [{} for i in range(len(records))], __version__)

    logging.debug("Attempting to reuse previous results in: %s",
                  options.reuse_results)
    with open(options.reuse_results) as handle:
        contents = handle.read()
        if not contents:
            raise ValueError("No results contained in file: %s" %
                             options.reuse_results)
    results = serialiser.AntismashResults.from_file(options.reuse_results,
                                                    options.taxon)
    return results
예제 #3
0
def read_data(sequence_file: Optional[str],
              options: ConfigType) -> serialiser.AntismashResults:
    """ Reads in the data to be used in the analysis run. Can be provided as
        as a sequence file (fasta/genbank) or as file of prior results

        Arguments:
            sequence_file: A fasta/genbank file to read (or None)
            options: An antismash Config instance

        Returns:
            a AntismashResults instance, populated only if reusing results

    """
    if not sequence_file and not options.reuse_results:
        raise ValueError("No sequence file or prior results to read")

    if sequence_file:
        records = record_processing.parse_input_sequence(
            sequence_file,
            options.taxon,
            options.minlength,
            options.start,
            options.end,
            gff_file=options.genefinding_gff3)
        results = serialiser.AntismashResults(sequence_file.rsplit(os.sep,
                                                                   1)[-1],
                                              records, [{} for i in records],
                                              __version__,
                                              taxon=options.taxon)
        update_config({"input_file": os.path.splitext(results.input_file)[1]})
    else:
        logging.debug("Attempting to reuse previous results in: %s",
                      options.reuse_results)
        with open(options.reuse_results) as handle:
            contents = handle.read()
            if not contents:
                raise ValueError("No results contained in file: %s" %
                                 options.reuse_results)
        results = serialiser.AntismashResults.from_file(options.reuse_results)
        for record in results.records:
            record.strip_antismash_annotations()
        if options.taxon != results.taxon:
            logging.info("Reusing taxon %s from prior results", results.taxon)
            update_config({"taxon": results.taxon})

    update_config({"input_file": os.path.splitext(results.input_file)[0]})
    return results