Python pre_process_sequencesの例、antismash.common.record_processing.pre_process_sequences Pythonの例

コード例 #1

0

ファイルを表示

 def test_limit_to_record_complete(self):
     records = self.read_double_nisin()
     config.update_config({"limit_to_record": "bad_id"})
     with self.assertRaisesRegex(AntismashInputError,
                                 "no sequences matched filter"):
         record_processing.pre_process_sequences(records, self.options,
                                                 self.genefinding)

コード例 #2

0

ファイルを表示

    def test_nisin_fasta_only(self):
        config.update_config({"genefinding_tool": "none"})
        filepath = path.get_full_path(__file__, "data", "nisin.fasta")
        records = record_processing.parse_input_sequence(filepath)
        assert len(records) == 1
        assert not records[0].get_cds_features()
        # make sure genefinding wasn't run with default options
        with self.assertRaisesRegex(AntismashInputError,
                                    "all records skipped"):
            record_processing.pre_process_sequences(records, self.options,
                                                    self.genefinding)
        assert not self.genefinding.was_run
        assert not records[0].get_cds_features()

        # make sure genefinding was run when not 'none'
        records[0].skip = False
        config.update_config({"genefinding_tool": "not-none"})
        # due to no genes actually being marked, it'll raise an error
        with self.assertRaisesRegex(AntismashInputError,
                                    "all records skipped"):
            record_processing.pre_process_sequences(records, self.options,
                                                    self.genefinding)
        # but genefinding was still run
        assert self.genefinding.was_run
        # still no features because we used dummy genefinding
        for record in records:
            assert not record.get_cds_features()
            assert record.skip.lower() == "no genes found"

コード例 #3

0

ファイルを表示

 def test_shotgun(self):
     filepath = path.get_full_path(__file__, "data", "wgs.gbk")
     records = record_processing.parse_input_sequence(filepath)
     with self.assertRaisesRegex(
             AntismashInputError,
             "incomplete whole genome shotgun records are not supported"):
         record_processing.pre_process_sequences(records, self.options,
                                                 self.genefinding)

コード例 #4

0

ファイルを表示

ファイル: test_record_processing.py プロジェクト: serina-robinson/antismash

 def test_limit_to_record_partial(self):
     records = self.read_double_nisin()
     assert all(rec.skip is None for rec in records)
     config.update_config({"limit_to_record": records[0].id})
     records[0].id += "_changed"
     record_processing.pre_process_sequences(records, self.options, self.genefinding)
     assert not records[1].skip
     assert records[0].skip.startswith("did not match filter")

コード例 #5

0

ファイルを表示

ファイル: test_record_processing.py プロジェクト: serina-robinson/antismash

 def test_nisin_fasta_gff(self):
     fasta = path.get_full_path(__file__, "data", "nisin.fasta")
     gff = path.get_full_path(__file__, "data", "nisin.gff3")
     config.update_config({"genefinding_gff3": gff})
     records = record_processing.parse_input_sequence(fasta, gff_file=gff)
     record_processing.pre_process_sequences(records, self.options, self.genefinding)
     assert not self.genefinding.was_run
     assert len(records[0].get_cds_features()) == 11

コード例 #6

0

ファイルを表示

ファイル: test_record_processing.py プロジェクト: serina-robinson/antismash

 def test_limit(self):
     records = self.read_double_nisin()
     assert all(rec.skip is None for rec in records)
     assert not self.options.triggered_limit
     config.update_config({"limit": 1})
     record_processing.pre_process_sequences(records, self.options, self.genefinding)
     assert records[0].skip is None
     assert records[1].skip.startswith("skipping all but first 1")
     assert self.options.triggered_limit

コード例 #7

0

ファイルを表示

 def test_duplicate_record_ids(self):
     records = self.read_double_nisin()
     assert records[0].id == records[1].id
     records = record_processing.pre_process_sequences(
         records, self.options, self.genefinding)
     assert len(records) == 2
     assert records[0].id != records[1].id

コード例 #8

0

ファイルを表示

ファイル: integration_prodigal.py プロジェクト: zachcp/antismash

 def test_nisin(self):
     record = parse_input_sequence(get_path_to_nisin_fasta())[0]
     assert record.get_feature_count() == 0
     record = pre_process_sequences([record], self.options, genefinding)[0]
     assert record.get_feature_count() == 12
     # and make sure they're all CDS features
     assert len(record.get_cds_features()) == 12

コード例 #9

0

ファイルを表示

ファイル: integration_glimmerhmm.py プロジェクト: zachcp/antismash

 def test_fumigatus_cluster(self):
     record = parse_input_sequence(self.data_file('fumigatus.cluster1.fna'),
                                   taxon="fungi")[0]
     assert record.get_feature_count() == 0
     record = pre_process_sequences([record], self.options, genefinding)[0]
     assert record.get_feature_count() == 11
     # and make sure they're all CDS features
     assert len(record.get_cds_features()) == 11

コード例 #10

0

ファイルを表示

ファイル: test_record_processing.py プロジェクト: eburgoswisc/antismash

    def test_nisin_fasta_only(self):
        config.update_config({"genefinding_tool": "none"})
        filepath = path.get_full_path(__file__, "data", "nisin.fasta")
        records = record_processing.parse_input_sequence(filepath)
        assert len(records) == 1
        assert not records[0].get_cds_features()
        # make sure genefinding wasn't run with default options
        record_processing.pre_process_sequences(records, self.options,
                                                self.genefinding)
        assert not self.genefinding.was_run
        assert not records[0].get_cds_features()

        # make sure genefinding was run when not 'none'
        records[0].skip = False
        config.update_config({"genefinding_tool": "not-none"})
        record_processing.pre_process_sequences(records, self.options,
                                                self.genefinding)
        assert self.genefinding.was_run
        # still no features because we used dummy genefinding
        assert not records[0].get_cds_features()

コード例 #11

0

ファイルを表示

ファイル: integration_glimmerhmm.py プロジェクト: yexianingyue/antismash

 def test_records_with_bad_names(self):
     # reuse fumigatus and change the id to bad ids
     for bad in [
             ".",  # changes due to glimmerhmm
             "-bad",  # could cause a fasta file to be created that is interpreted as an arg
     ]:
         record = parse_input_sequence(
             self.data_file('fumigatus.cluster1.fna'), taxon="fungi")[0]
         record.id = bad
         record = pre_process_sequences([record], self.options,
                                        genefinding)[0]
         assert record.get_cds_features()

コード例 #12

0

ファイルを表示

ファイル: main.py プロジェクト: rchurt/antismash

def _run_antismash(sequence_file: Optional[str], options: ConfigType) -> int:
    """ The real run_antismash, assumes logging is set up around it """
    logging.info("antiSMASH version: %s", options.version)
    _log_found_executables(options)

    detection_modules = get_detection_modules()
    analysis_modules = get_analysis_modules()
    output_modules = get_output_modules()
    modules = detection_modules + analysis_modules + output_modules

    if options.list_plugins:
        list_plugins(modules)
        return 0

    options.all_enabled_modules = list(filter(lambda x: x.is_enabled(options), modules))

    if options.check_prereqs_only:
        try:
            check_prerequisites(modules, options)
        except RuntimeError:
            print("Some module prerequisites not satisfied")
            return 1
        print("All prerequisites satisfied")
        return 0
    else:
        check_prerequisites(options.all_enabled_modules, options)

    # start up profiling if relevant
    if options.profile:
        profiler = cProfile.Profile()
        profiler.enable()

    # ensure the provided options are valid
    if not verify_options(options, options.all_enabled_modules):
        return 1

    # check that at least one module will run
    if not options.all_enabled_modules:
        raise ValueError("No detection or analysis modules enabled")

    start_time = datetime.now()

    results = read_data(sequence_file, options)

    # reset module timings
    results.timings_by_record.clear()

    prepare_output_directory(options.output_dir, sequence_file or options.reuse_results)

    results.records = record_processing.pre_process_sequences(results.records, options,
                                                              cast(AntismashModule, genefinding))
    for record, module_results in zip(results.records, results.results):
        # skip if we're not interested in it
        if record.skip:
            continue
        logging.info("Analysing record: %s", record.id)
        timings = run_detection(record, options, module_results)
        # and skip analysis if detection didn't find anything
        if not record.get_regions():
            continue
        analysis_timings = analyse_record(record, options, analysis_modules, module_results)
        timings.update(analysis_timings)
        results.timings_by_record[record.id] = timings

    # Write results
    json_filename = os.path.join(options.output_dir, results.input_file)
    json_filename = os.path.splitext(json_filename)[0] + ".json"
    logging.debug("Writing json results to '%s'", json_filename)
    results.write_to_file(json_filename)

    # now that the json is out of the way, annotate the record
    # otherwise we could double annotate some areas
    annotate_records(results)

    # create relevant output files
    write_outputs(results, options)

    # save profiling data
    if options.profile:
        profiler.disable()
        write_profiling_results(profiler, os.path.join(options.output_dir,
                                                       "profiling_results"))

    running_time = datetime.now() - start_time

    # display module runtimes before total time
    if options.debug:
        log_module_runtimes(results.timings_by_record)

    logging.debug("antiSMASH calculation finished at %s; runtime: %s",
                  datetime.now().strftime("%Y-%m-%d %H:%M:%S"), str(running_time))

    logging.info("antiSMASH status: SUCCESS")
    return 0

コード例 #13

0

ファイルを表示

ファイル: main.py プロジェクト: KatSteinke/antismash

def run_antismash(sequence_file: Optional[str], options) -> int:
    """ The complete antismash pipeline. Reads in data, runs detection and
        analysis modules over any records found, then outputs the results to
        file.

        Arguments:
            sequence_file: the sequence file to read in records from, can be
                            None if reusing results
            options: command line options as an argparse.Namespace
            detection_modules: None or a list of modules to use for detection,
                                if None defaults will be used
            analysis_modules: None or a list of modules to use for analysis,
                                if None defaults will be used

        Returns:
            0 if requested operations completed succesfully, otherwise 1
            Exceptions may also be raised
    """
    logfile = options.logfile
    setup_logging(logfile=logfile,
                  verbose=options.verbose,
                  debug=options.debug)

    detection_modules = get_detection_modules()
    analysis_modules = get_analysis_modules()
    modules = detection_modules + analysis_modules

    if options.list_plugins:
        list_plugins(modules)
        return 0

    options.all_enabled_modules = [
        module for module in modules if module.is_enabled(options)
    ]
    # converts from a namespace to an antismash.config.Config instance so
    # modules can't fiddle with it
    options = update_config(options)

    if options.check_prereqs_only:
        try:
            check_prerequisites(modules)
        except RuntimeError:
            print("Some module prerequisites not satisfied")
            return 1
        print("All prerequisites satisfied")
        return 0
    else:
        check_prerequisites(options.all_enabled_modules)

    # start up profiling if relevant
    if options.profile:
        profiler = cProfile.Profile()
        profiler.enable()

    # ensure the provided options are valid
    if not verify_options(options, options.all_enabled_modules):
        return 1  # TODO: change to a raise?

    # check that at least one module will run
    if not options.all_enabled_modules:
        raise ValueError("No detection or analysis modules enabled")

    start_time = datetime.now()

    results = read_data(sequence_file, options)

    # reset module timings
    results.timings_by_record = {}

    prepare_output_directory(options.output_dir, sequence_file
                             or options.reuse_results)

    results.records = record_processing.pre_process_sequences(
        results.records, options, genefinding)
    for seq_record, previous_result in zip(results.records, results.results):
        # skip if we're not interested in it
        if seq_record.skip:
            continue
        timings = run_detection(seq_record, options, previous_result)
        # and skip analysis if detection didn't find anything
        if not seq_record.get_clusters():
            continue
        analysis_timings = analyse_record(seq_record, options,
                                          analysis_modules, previous_result)
        timings.update(analysis_timings)
        results.timings_by_record[seq_record.id] = timings

    # Write results
    json_filename = os.path.join(options.output_dir, results.input_file)
    json_filename = os.path.splitext(json_filename)[0] + ".json"
    logging.debug("Writing json results to '%s'", json_filename)
    results.write_to_file(json_filename)

    # now that the json is out of the way, annotate the record
    # otherwise we could double annotate some areas
    annotate_records(results)

    # create relevant output files
    write_outputs(results, options)

    # save profiling data
    if options.profile:
        profiler.disable()
        write_profiling_results(
            profiler, os.path.join(options.output_dir, "profiling_results"))

    running_time = datetime.now() - start_time

    # display module runtimes before total time
    if options.debug:
        log_module_runtimes(results.timings_by_record)

    logging.debug("antiSMASH calculation finished at %s; runtime: %s",
                  str(datetime.now()), str(running_time))

    logging.info("antiSMASH status: SUCCESS")
    return 0

コード例 #14

0

ファイルを表示

 def run_on_records(self, records):
     record_processing.pre_process_sequences(records, self.options,
                                             self.genefinding)