def main(): args = parse_args() log_level = (logging.CRITICAL - 10 * args.v) if args.v else logging.INFO log_file = args.log if args.log else 'compare_b2c.log' logging.basicConfig(level=log_level, format='%(levelname)s - %(asctime)s %(message)s', filename=log_file) if args.b and args.c: bam_path = args.b cram_path = args.c if not utils.is_irods_path(bam_path) and not os.path.isfile(bam_path): logging.error("This is not a file path: %s" % bam_path) #sys.exit(1) raise ValueError("This is not a file path: %s") if not utils.is_irods_path(cram_path) and not os.path.isfile(cram_path): logging.error("This is not a file path: %s" % cram_path) #sys.exit(1) raise ValueError("This is not a file path: %s") errors = CompareStatsForFiles.compare_bam_and_cram_by_statistics(bam_path, cram_path) if errors: if args.e: err_f = open(args.e, 'w') for err in errors: err_f.write(err + '\n') err_f.close() else: print(errors) sys.exit(1) else: logging.info("There were no errors and no differences between the stats for the 2 files.")
def main(): args = parse_args() log_level = (logging.CRITICAL - 10 * args.v) if args.v else logging.INFO log_file = args.log if args.log else 'compare_b2c.log' logging.basicConfig(level=log_level, format='%(levelname)s - %(asctime)s %(message)s', filename=log_file) if args.b and args.c: bam_path = args.b cram_path = args.c #begin david ref_path = '' srun = '' if args.r: ref_path = args.r if not os.path.isfile(ref_path): logging.error("This is not a file path: %s" % ref_path) raise ValueError("This is not a file path: %s") if args.s: srun = "srun" #end david if not utils.is_irods_path(bam_path) and not os.path.isfile(bam_path): logging.error("This is not a file path: %s" % bam_path) #sys.exit(1) raise ValueError("This is not a file path: %s") if not utils.is_irods_path(cram_path) and not os.path.isfile( cram_path): logging.error("This is not a file path: %s" % cram_path) #sys.exit(1) raise ValueError("This is not a file path: %s") errors = CompareStatsForFiles.compare_bam_and_cram_by_statistics( bam_path, cram_path, srun, ref_path) if errors: if args.e: err_f = open(args.e, 'w') for err in errors: err_f.write(err + '\n') err_f.close() else: print(errors) sys.exit(1) else: logging.info( "There were no errors and no differences between the stats for the 2 files." )
def compare_bam_and_cram_by_statistics(cls, bam_path, cram_path): errors = [] # Check that it's a valid file path if not bam_path or (not utils.is_irods_path(bam_path) and not os.path.isfile(bam_path)): errors.append("The BAM file path: %s is not valid" % bam_path) if not cram_path or (not utils.is_irods_path(cram_path) and not os.path.isfile(cram_path)): errors.append("The CRAM file path:%s is not valid" % cram_path) if errors: logging.error("There are errors with the file paths you provided: %s" % errors) return errors # Check that the files are readable by me if not utils.is_irods_path(bam_path) and not utils.can_read_file(bam_path): errors.append("Can't read file %s" % bam_path) if not utils.is_irods_path(cram_path) and not utils.can_read_file(cram_path): errors.append("Can't read file %s" % cram_path) if errors: logging.error("There are problems reading the files: %s" % errors) return errors # # Checking on samtools version: # version_output = RunSamtoolsCommands.get_samtools_version_output() # try: # HandleSamtoolsVersion.check_samtools_version(version_output) # except ValueError as e: # errors.append(str(e)) # return errors # Quickcheck the files before anything: try: RunSamtoolsCommands.run_samtools_quickcheck(bam_path) except RuntimeError as e: errors.append(str(e)) try: RunSamtoolsCommands.run_samtools_quickcheck(cram_path) except RuntimeError as e: errors.append(str(e)) if errors: logging.error("There are problems running quickcheck on the files you've given: %s" % errors) return errors # Calculate and compare flagstat: try: flagstat_b = RunSamtoolsCommands.get_samtools_flagstat_output(bam_path) except RuntimeError as e: errors.append(str(e)) try: flagstat_c = RunSamtoolsCommands.get_samtools_flagstat_output(cram_path) except RuntimeError as e: errors.append(str(e)) if not errors: errors.extend(cls.compare_flagstats(flagstat_b, flagstat_c)) else: logging.error("THere are problems running flagstat on the files you've given: %s" % errors) # Calculate and compare stats: stats_fpath_b = bam_path + ".stats" stats_fpath_c = cram_path + ".stats" stats_b, stats_c = None, None try: stats_b = HandleSamtoolsStats.fetch_stats(bam_path, stats_fpath_b) except (ValueError, RuntimeError) as e: errors.append(str(e)) try: stats_c = HandleSamtoolsStats.fetch_stats(cram_path, stats_fpath_c) except (ValueError, RuntimeError) as e: errors.append(str(e)) if not errors and stats_b and stats_c: errors.extend(cls.compare_stats_by_sequence_checksum(stats_b, stats_c)) else: errors.append("Can't compare samtools stats.") logging.error("For some reason I can't compare samtools stats for your files.") # Persist stats: try: if stats_b and not utils.is_irods_path(bam_path): HandleSamtoolsStats.persist_stats(stats_b, stats_fpath_b) except IOError as e: errors.append("Can't save stats to disk for %s file" % bam_path) logging.error("Can't save stats to disk for %s file" % bam_path) try: if stats_c and not utils.is_irods_path(cram_path): HandleSamtoolsStats.persist_stats(stats_c, stats_fpath_c) except IOError as e: errors.append("Can't save stats to disk for %s file" % cram_path) logging.error("Can't save stats to disk for %s file" % cram_path) return errors
def compare_bam_and_cram_by_statistics(cls, bam_path, cram_path): errors = [] # Check that it's a valid file path if not bam_path or (not utils.is_irods_path(bam_path) and not os.path.isfile(bam_path)): errors.append("The BAM file path: %s is not valid" % bam_path) if not cram_path or (not utils.is_irods_path(cram_path) and not os.path.isfile(cram_path)): errors.append("The CRAM file path:%s is not valid" % cram_path) if errors: logging.error( "There are errors with the file paths you provided: %s" % errors) return errors # Check that the files are readable by me if not utils.is_irods_path(bam_path) and not utils.can_read_file( bam_path): errors.append("Can't read file %s" % bam_path) if not utils.is_irods_path(cram_path) and not utils.can_read_file( cram_path): errors.append("Can't read file %s" % cram_path) if errors: logging.error("There are problems reading the files: %s" % errors) return errors # # Checking on samtools version: # version_output = RunSamtoolsCommands.get_samtools_version_output() # try: # HandleSamtoolsVersion.check_samtools_version(version_output) # except ValueError as e: # errors.append(str(e)) # return errors # Quickcheck the files before anything: try: RunSamtoolsCommands.run_samtools_quickcheck(bam_path) except RuntimeError as e: errors.append(str(e)) try: RunSamtoolsCommands.run_samtools_quickcheck(cram_path) except RuntimeError as e: errors.append(str(e)) if errors: logging.error( "There are problems running quickcheck on the files you've given: %s" % errors) return errors # Calculate and compare flagstat: try: flagstat_b = RunSamtoolsCommands.get_samtools_flagstat_output( bam_path) except RuntimeError as e: errors.append(str(e)) try: flagstat_c = RunSamtoolsCommands.get_samtools_flagstat_output( cram_path) except RuntimeError as e: errors.append(str(e)) if not errors: errors.extend(cls.compare_flagstats(flagstat_b, flagstat_c)) else: logging.error( "THere are problems running flagstat on the files you've given: %s" % errors) # Calculate and compare stats: stats_fpath_b = bam_path + ".stats" stats_fpath_c = cram_path + ".stats" stats_b, stats_c = None, None try: stats_b = HandleSamtoolsStats.fetch_stats(bam_path, stats_fpath_b) except (ValueError, RuntimeError) as e: errors.append(str(e)) try: stats_c = HandleSamtoolsStats.fetch_stats(cram_path, stats_fpath_c) except (ValueError, RuntimeError) as e: errors.append(str(e)) if not errors and stats_b and stats_c: errors.extend( cls.compare_stats_by_sequence_checksum(stats_b, stats_c)) else: errors.append("Can't compare samtools stats.") logging.error( "For some reason I can't compare samtools stats for your files." ) # Persist stats: try: if stats_b and not utils.is_irods_path(bam_path): HandleSamtoolsStats.persist_stats(stats_b, stats_fpath_b) except IOError as e: errors.append("Can't save stats to disk for %s file" % bam_path) logging.error("Can't save stats to disk for %s file" % bam_path) try: if stats_c and not utils.is_irods_path(cram_path): HandleSamtoolsStats.persist_stats(stats_c, stats_fpath_c) except IOError as e: errors.append("Can't save stats to disk for %s file" % cram_path) logging.error("Can't save stats to disk for %s file" % cram_path) return errors