Example #1
0
 def fetch_stats(cls, fpath, stats_fpath):
     if not fpath or not os.path.isfile(fpath):
         raise ValueError("You need to give a valid file path if you want the stats")
     if os.path.isfile(stats_fpath) and not cls._is_stats_file_older_than_data(fpath, stats_fpath) and \
             utils.can_read_file(stats_fpath):
         stats = HandleSamtoolsStats._get_stats(stats_fpath)
         logging.info("Reading stats from file %s" % stats_fpath)
     else:
         stats = HandleSamtoolsStats._generate_stats(fpath)
         logging.info("Generating stats for file %s" % fpath)
         if os.path.isfile(stats_fpath) and cls._is_stats_file_older_than_data(fpath, stats_fpath):
             logging.warning("The stats file is older than the actual file, you need to remove/update it. "
                             "Regenerating the stats, but without saving.")
     return stats
Example #2
0
 def fetch_stats(cls, fpath, stats_fpath):
     if not fpath or not os.path.isfile(fpath):
         raise ValueError(
             "You need to give a valid file path if you want the stats")
     if os.path.isfile(stats_fpath) and not cls._is_stats_file_older_than_data(fpath, stats_fpath) and \
             utils.can_read_file(stats_fpath):
         stats = HandleSamtoolsStats._get_stats(stats_fpath)
         logging.info("Reading stats from file %s" % stats_fpath)
     else:
         stats = HandleSamtoolsStats._generate_stats(fpath)
         logging.info("Generating stats for file %s" % fpath)
         if os.path.isfile(
                 stats_fpath) and cls._is_stats_file_older_than_data(
                     fpath, stats_fpath):
             logging.warning(
                 "The stats file is older than the actual file, you need to remove/update it. "
                 "Regenerating the stats, but without saving.")
     return stats
Example #3
0
    def compare_bam_and_cram_by_statistics(cls, bam_path, cram_path):
        errors = []
        # Check that it's a valid file path
        if not bam_path or (not utils.is_irods_path(bam_path) and not os.path.isfile(bam_path)):
            errors.append("The BAM file path: %s is not valid" % bam_path)
        if not cram_path or (not utils.is_irods_path(cram_path) and not os.path.isfile(cram_path)):
            errors.append("The CRAM file path:%s is not valid" % cram_path)
        if errors:
            logging.error("There are errors with the file paths you provided: %s" % errors)
            return errors

        # Check that the files are readable by me
        if not utils.is_irods_path(bam_path) and not utils.can_read_file(bam_path):
            errors.append("Can't read file %s" % bam_path)
        if not utils.is_irods_path(cram_path) and not utils.can_read_file(cram_path):
            errors.append("Can't read file %s" % cram_path)
        if errors:
            logging.error("There are problems reading the files: %s" % errors)
            return errors

        # # Checking on samtools version:
        # version_output = RunSamtoolsCommands.get_samtools_version_output()
        # try:
        #     HandleSamtoolsVersion.check_samtools_version(version_output)
        # except ValueError as e:
        #     errors.append(str(e))
        #     return errors

        # Quickcheck the files before anything:
        try:
            RunSamtoolsCommands.run_samtools_quickcheck(bam_path)
        except RuntimeError as e:
            errors.append(str(e))

        try:
            RunSamtoolsCommands.run_samtools_quickcheck(cram_path)
        except RuntimeError as e:
            errors.append(str(e))
        if errors:
            logging.error("There are problems running quickcheck on the files you've given: %s" % errors)
            return errors

        # Calculate and compare flagstat:
        try:
            flagstat_b = RunSamtoolsCommands.get_samtools_flagstat_output(bam_path)
        except RuntimeError as e:
            errors.append(str(e))

        try:
            flagstat_c = RunSamtoolsCommands.get_samtools_flagstat_output(cram_path)
        except RuntimeError as e:
            errors.append(str(e))

        if not errors:
            errors.extend(cls.compare_flagstats(flagstat_b, flagstat_c))
        else:
            logging.error("THere are problems running flagstat on the files you've given: %s" % errors)

        # Calculate and compare stats:
        stats_fpath_b = bam_path + ".stats"
        stats_fpath_c = cram_path + ".stats"
        stats_b, stats_c = None, None
        try:
            stats_b = HandleSamtoolsStats.fetch_stats(bam_path, stats_fpath_b)
        except (ValueError, RuntimeError) as e:
            errors.append(str(e))

        try:
            stats_c = HandleSamtoolsStats.fetch_stats(cram_path, stats_fpath_c)
        except (ValueError, RuntimeError) as e:
            errors.append(str(e))

        if not errors and stats_b and stats_c:
            errors.extend(cls.compare_stats_by_sequence_checksum(stats_b, stats_c))
        else:
            errors.append("Can't compare samtools stats.")
            logging.error("For some reason I can't compare samtools stats for your files.")

        # Persist stats:
        try:
            if stats_b and not utils.is_irods_path(bam_path):
                HandleSamtoolsStats.persist_stats(stats_b, stats_fpath_b)
        except IOError as e:
            errors.append("Can't save stats to disk for %s file" % bam_path)
            logging.error("Can't save stats to disk for %s file" % bam_path)

        try:
            if stats_c and not utils.is_irods_path(cram_path):
                HandleSamtoolsStats.persist_stats(stats_c, stats_fpath_c)
        except IOError as e:
            errors.append("Can't save stats to disk for %s file" % cram_path)
            logging.error("Can't save stats to disk for %s file" % cram_path)
        return errors
Example #4
0
    def compare_bam_and_cram_by_statistics(cls, bam_path, cram_path):
        errors = []
        # Check that it's a valid file path
        if not bam_path or (not utils.is_irods_path(bam_path)
                            and not os.path.isfile(bam_path)):
            errors.append("The BAM file path: %s is not valid" % bam_path)
        if not cram_path or (not utils.is_irods_path(cram_path)
                             and not os.path.isfile(cram_path)):
            errors.append("The CRAM file path:%s is not valid" % cram_path)
        if errors:
            logging.error(
                "There are errors with the file paths you provided: %s" %
                errors)
            return errors

        # Check that the files are readable by me
        if not utils.is_irods_path(bam_path) and not utils.can_read_file(
                bam_path):
            errors.append("Can't read file %s" % bam_path)
        if not utils.is_irods_path(cram_path) and not utils.can_read_file(
                cram_path):
            errors.append("Can't read file %s" % cram_path)
        if errors:
            logging.error("There are problems reading the files: %s" % errors)
            return errors

        # # Checking on samtools version:
        # version_output = RunSamtoolsCommands.get_samtools_version_output()
        # try:
        #     HandleSamtoolsVersion.check_samtools_version(version_output)
        # except ValueError as e:
        #     errors.append(str(e))
        #     return errors

        # Quickcheck the files before anything:
        try:
            RunSamtoolsCommands.run_samtools_quickcheck(bam_path)
        except RuntimeError as e:
            errors.append(str(e))

        try:
            RunSamtoolsCommands.run_samtools_quickcheck(cram_path)
        except RuntimeError as e:
            errors.append(str(e))
        if errors:
            logging.error(
                "There are problems running quickcheck on the files you've given: %s"
                % errors)
            return errors

        # Calculate and compare flagstat:
        try:
            flagstat_b = RunSamtoolsCommands.get_samtools_flagstat_output(
                bam_path)
        except RuntimeError as e:
            errors.append(str(e))

        try:
            flagstat_c = RunSamtoolsCommands.get_samtools_flagstat_output(
                cram_path)
        except RuntimeError as e:
            errors.append(str(e))

        if not errors:
            errors.extend(cls.compare_flagstats(flagstat_b, flagstat_c))
        else:
            logging.error(
                "THere are problems running flagstat on the files you've given: %s"
                % errors)

        # Calculate and compare stats:
        stats_fpath_b = bam_path + ".stats"
        stats_fpath_c = cram_path + ".stats"
        stats_b, stats_c = None, None
        try:
            stats_b = HandleSamtoolsStats.fetch_stats(bam_path, stats_fpath_b)
        except (ValueError, RuntimeError) as e:
            errors.append(str(e))

        try:
            stats_c = HandleSamtoolsStats.fetch_stats(cram_path, stats_fpath_c)
        except (ValueError, RuntimeError) as e:
            errors.append(str(e))

        if not errors and stats_b and stats_c:
            errors.extend(
                cls.compare_stats_by_sequence_checksum(stats_b, stats_c))
        else:
            errors.append("Can't compare samtools stats.")
            logging.error(
                "For some reason I can't compare samtools stats for your files."
            )

        # Persist stats:
        try:
            if stats_b and not utils.is_irods_path(bam_path):
                HandleSamtoolsStats.persist_stats(stats_b, stats_fpath_b)
        except IOError as e:
            errors.append("Can't save stats to disk for %s file" % bam_path)
            logging.error("Can't save stats to disk for %s file" % bam_path)

        try:
            if stats_c and not utils.is_irods_path(cram_path):
                HandleSamtoolsStats.persist_stats(stats_c, stats_fpath_c)
        except IOError as e:
            errors.append("Can't save stats to disk for %s file" % cram_path)
            logging.error("Can't save stats to disk for %s file" % cram_path)
        return errors