Ejemplo n.º 1
0
    def __init__(self, config, logger):

        self.config = config
        self.logger = logger
        self.start_time = datetime.now()
        # set/create output directory
        create_output_directory(config)
        log_file = os.path.join(config.WORK_DIR, "application.log")
        log_file_handler = FileHandler(log_file, mode="w")
        if config.DEBUG:
            log_file_handler.setFormatter(
                Formatter(
                    "%(asctime)s %(levelname)8s %(message)s [%(module)s %(lineno)d]",
                    datefmt='%Y/%m/%d %H:%M:%S'))
        else:
            log_file_handler.setFormatter(
                Formatter("%(asctime)s %(message)s",
                          datefmt='%Y/%m/%d %H:%M:%S'))
        self.logger.addHandler(log_file_handler)

        self.logger.info(
            "DFAST pipeline started. (version {})".format(dfast_version))
        self.logger.info("Results will be generated into '{}'.".format(
            config.WORK_DIR))

        # initializing query genome.
        self.genome = Genome(config)

        self.ltg = LocusTagGenerator(self.genome, config)
        self.fu = FeatureUtil(self.genome, config)
        self.sa = StructuralAnnotation(self.genome, config)
        self.fa = FunctionalAnnotation(self.genome, config)
        self.ddbj = DDBJsubmission(self.genome, config)
        self.genbank = GenBankSubmission(self.genome, config)
Ejemplo n.º 2
0
def fix_origin(input_file, output_file=None, offset=0, logger=None):
    if logger is None:
        logger = getLogger(__name__)
    if __name__ != '__main__':
        loglevel_bkup = logger.level
        logger.setLevel(WARNING)

    logger.warning("[WARNING] Trying to locate a dnaA gene to fix the sequence origin of the chromosome. DO NOT APPLY THIS TO A DRAFT GENOME.")
    config = Config()
    config.GENOME_FASTA = input_file
    config.FUNCTIONAL_ANNOTATION[0]["options"]["offset"] = offset
    tmp_dir = tempfile.mkdtemp()
    config.WORK_DIR = tmp_dir
    logger.info("A temporary working directory was created in {}".format(config.WORK_DIR))
    genome = Genome(config)

    sa = StructuralAnnotation(genome, config)
    fa = FunctionalAnnotation(genome, config)
    sa.execute()
    fa.execute()

    if output_file:
        f = open(output_file, "w")
        logger.info("The origin-fixed genome fasta file is written to {}.".format(output_file))
    else:
        f = sys.stdout
    SeqIO.write(list(genome.seq_records.values()), f, "fasta")

    shutil.rmtree(config.WORK_DIR)
    logger.info("The temporary working {} was cleaned up.".format(config.WORK_DIR))
    if __name__ != '__main__':
        logger.setLevel(loglevel_bkup)
Ejemplo n.º 3
0
class Pipeline():
    def __init__(self, config, logger):

        self.config = config
        self.logger = logger
        self.start_time = datetime.now()
        # set/create output directory
        create_output_directory(config)
        log_file = os.path.join(config.WORK_DIR, "application.log")
        log_file_handler = FileHandler(log_file, mode="w")
        if config.DEBUG:
            log_file_handler.setFormatter(
                Formatter(
                    "%(asctime)s %(levelname)8s %(message)s [%(module)s %(lineno)d]",
                    datefmt='%Y/%m/%d %H:%M:%S'))
        else:
            log_file_handler.setFormatter(
                Formatter("%(asctime)s %(message)s",
                          datefmt='%Y/%m/%d %H:%M:%S'))
        self.logger.addHandler(log_file_handler)

        self.logger.info(
            "DFAST pipeline started. (version {})".format(dfast_version))
        self.logger.info("Results will be generated into '{}'.".format(
            config.WORK_DIR))

        # initializing query genome.
        self.genome = Genome(config)

        self.ltg = LocusTagGenerator(self.genome, config)
        self.fu = FeatureUtil(self.genome, config)
        self.sa = StructuralAnnotation(self.genome, config)
        self.fa = FunctionalAnnotation(self.genome, config)
        self.ddbj = DDBJsubmission(self.genome, config)
        self.genbank = GenBankSubmission(self.genome, config)

    def execute(self):
        self.sa.execute()  # execute structural annotation
        self.fu.execute()  # feature adjustment: sort, remove_partial, (merge)
        self.fa.execute()  # functional annotation
        self.ltg.execute()  # assigning locus_tags
        self.genome.add_source_features()  # set source feature

        # writing result files.
        write_results(self.genome, self.config)
        GenomeStat.execute(self.genome)
        self.ddbj.create_submission_file()
        self.genbank.create_submission_file()

        if self.config.DEBUG:
            self.genome.to_pickle(
                os.path.join(self.config.WORK_DIR, "genome.pickle"))

        end_time = datetime.now()
        running_time = end_time - self.start_time
        running_time = running_time.total_seconds()
        h, remainder = divmod(running_time, 3600)
        m, s = divmod(remainder, 60)
        self.logger.info("DFAST pipeline completed!")
        self.logger.info("Total running time: {0:.0f}h{1:.0f}m{2:.0f}s".format(
            h, m, s))

    def cleanup(self):
        self.sa.cleanup()
        self.fa.cleanup()
        shutil.rmtree(os.path.join(self.config.WORK_DIR, "input"))