Esempi in Python per store_md5_size

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: verify_util

Metodo/funzione: store_md5_size

Esempi su hotexamples.com: 4

store_md5_size in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per verify_util.store_md5_size, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: broad_cocleaning.py Progetto: Shenglai/Broad_cocleaning_pipeline

def main():
    parser = argparse.ArgumentParser('Broad cocleaning (Inderrealignment and BQSR) pipeline')

    # Logging flags.
    parser.add_argument('-d', '--debug',
        action = 'store_const',
        const = logging.DEBUG,
        dest = 'level',
        help = 'Enable debug logging.',
    )
    parser.set_defaults(level = logging.INFO)

    # Required flags.

    parser.add_argument('-r', '--reference_fasta_name',
                        required = True,
                        help = 'Reference fasta path.',
    )
    parser.add_argument('-indel','--known_1k_genome_indel_sites',
                        required=True,
                        help='Reference INDEL path.',
    )
    parser.add_argument('-snp','--dbsnp_known_snp_sites',
                        required=True,
                        help='Reference SNP path.',
    )
    parser.add_argument('-b', '--harmonized_bam_path',
                        required = False,
                        action="append",
                        help = 'Source bam path.',
    )
    parser.add_argument('-list', '--harmonized_bam_list_path',
                        required = False,
                        help = 'Source bam list path.',
    )
    parser.add_argument('-s', '--scratch_dir',
                        required = False,
                        type = is_dir,
                        help = 'Scratch file directory.',
    )
    parser.add_argument('-l', '--log_dir',
                        required = False,
                        type = is_dir,
                        help = 'Log file directory.',
    )
    parser.add_argument('-j', '--thread_count',
                        required = True,
                        type = is_nat,
                        help = 'Maximum number of threads for execution.',
    )
    parser.add_argument('-u', '--uuid',
                        required = True,
                        help = 'analysis_id string',
    )
    parser.add_argument('-m', '--md5',
                        required = False,
                        action = 'store_true',
                        help = 'calculate final size/MD5',
    )
    parser.add_argument('-e', '--eliminate_intermediate_files',
                        required = False,
                        action = 'store_true',
                        help = 'do not (really) reduce disk usage. set if you want to use more disk space!'
    )

    args = parser.parse_args()
    reference_fasta_name = args.reference_fasta_name
    known_1k_genome_indel_sites = args.known_1k_genome_indel_sites
    dbsnp_known_snp_sites = args.dbsnp_known_snp_sites
    uuid = args.uuid
    harmonized_bam_path = args.harmonized_bam_path
    if not args.harmonized_bam_list_path:
        list_dir = os.path.dirname(harmonized_bam_path[0])
        harmonized_bam_list_path = os.path.join(list_dir, uuid + '_harmonized_bam_list.list')
        with open(harmonized_bam_list_path, "w") as handle:
            for bam in harmonized_bam_path:
                handle.write(bam + "\n")
    else:
        harmonized_bam_list_path = args.harmonized_bam_list_path

    if not args.scratch_dir:
        scratch_dir = os.path.dirname(harmonized_bam_list_path)
    else:
        scratch_dir = args.scratch_dir
    if not args.log_dir:
        log_dir = os.path.dirname(harmonized_bam_list_path)
    else:
        log_dir = args.log_dir
    thread_count = str(args.thread_count)
    if not args.eliminate_intermediate_files:
        eliminate_intermediate_files = True
    else:
        eliminate_intermediate_files = False
    if not args.md5:
        md5 = False
    else:
        md5 = True

    ##logging
    logging.basicConfig(
        filename=os.path.join(log_dir, 'Broad_cocleaning_' + uuid + '.log'),  # /host for docker
        level=args.level,
        filemode='a',
        format='%(asctime)s %(levelname)s %(message)s',
        datefmt='%Y-%m-%d_%H:%M:%S_%Z',
    )
    logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
    logger = logging.getLogger(__name__)
    hostname = os.uname()[1]
    logger.info('hostname=%s' % hostname)
    logger.info('harmonized_bam_list_path=%s' % harmonized_bam_list_path)
    if not args.harmonized_bam_path:
        with open(harmonized_bam_list_path) as f:
            harmonized_bam_path = f.read().splitlines()
            for path in harmonized_bam_path:
                logger.info('harmonized_bam_path=%s' % path)
    else:
        for path in harmonized_bam_path:
            logger.info('harmonized_bam_path=%s' % path)

    engine_path = 'sqlite:///' + os.path.join(log_dir, uuid + '_Broad_cocleaning.db')
    engine = sqlalchemy.create_engine(engine_path, isolation_level='SERIALIZABLE')
    
    ##Pipeline
    #check .bai file, call samtools index if not exist
    RealignerTargetCreator.index(uuid, harmonized_bam_list_path, engine, logger)
    
    #call RealignerTargetCreator for harmonized bam list
    harmonized_bam_intervals_path = RealignerTargetCreator.RTC(uuid, harmonized_bam_list_path, thread_count, reference_fasta_name, known_1k_genome_indel_sites, engine, logger)
    
    #call IndelRealigner together but save the reads in the output coresponding to the input that the read came from.
    harmonized_IR_bam_list_path = IndelRealigner.IR(uuid, harmonized_bam_list_path, reference_fasta_name, known_1k_genome_indel_sites, harmonized_bam_intervals_path, engine, logger)
    
    #call BQSR table individually and apply it on bam
    Analysis_ready_bam_list_path = []
    for bam in harmonized_IR_bam_list_path:
        harmonized_IR_bam_BQSR_table_path = BaseRecalibrator.BQSR(uuid, bam, thread_count, reference_fasta_name, dbsnp_known_snp_sites, engine, logger)
        Analysis_ready_bam_path = PrintReads.PR(uuid, bam, thread_count, reference_fasta_name, harmonized_IR_bam_BQSR_table_path, engine, logger)
        bam_validate.bam_validate(uuid, Analysis_ready_bam_path, engine, logger)
        Analysis_ready_bam_list_path.append(Analysis_ready_bam_path)
    
    if md5:
        for bam in Analysis_ready_bam_list_path:
            bam_name = os.path.basename(bam)
            bam_dir = os.path.dirname(bam)
            bam_basename, bam_ext = os.path.splitext(bam_name)
            bai_name = bam_basename + '.bai'
            bai_path = os.path.join(bam_dir, bai_name)
            verify_util.store_md5_size(uuid, bam, engine, logger)
            verify_util.store_md5_size(uuid, bai_path, engine, logger)
    
    if eliminate_intermediate_files:
            pipe_util.remove_file_list(uuid, harmonized_IR_bam_list_path, engine, logger)
    
    for bam in Analysis_ready_bam_list_path:
        validate_file = bam_validate.bam_validate(uuid, bam, engine, logger)

Esempio n. 2

Mostra file

File: variant_calling.py Progetto: Shenglai/muse_variant_calling_pipeline

def main():
    parser = argparse.ArgumentParser('MuSE variant calling pipeline')

    # Logging flags.
    parser.add_argument('-d', '--debug',
        action = 'store_const',
        const = logging.DEBUG,
        dest = 'level',
        help = 'Enable debug logging.',
    )
    parser.set_defaults(level = logging.INFO)

    # Required flags.

    parser.add_argument('-r', '--reference_fasta_name',
                        required = True,
                        help = 'Reference fasta path.',
    )
    parser.add_argument('-snp','--dbsnp_known_snp_sites',
                        required=True,
                        help='Reference SNP path, that should be bgzip compressed, tabix indexed',
    )
    parser.add_argument('-tb', '--analysis_ready_tumor_bam_path',
                        required = True,
                        nargs = '?',
                        default = [sys.stdin],
                        help = 'Source patient tumor bam path.',
    )
    parser.add_argument('-nb', '--analysis_ready_normal_bam_path',
                        required = True,
                        nargs = '?',
                        default = [sys.stdin],
                        help = 'Source patient normal bam path.',
    )
    parser.add_argument('-g', '--Whole_genome_squencing_data',
                        required = False,
                        action = 'store_true',
                        help = 'Whole genome squencing data',
    )
    parser.add_argument('-bs', '--Parallel_Block_Size',
                        type = is_nat,
                        default = 50000000,
                        help = 'Parallel Block Size',
    )
    parser.add_argument('-s', '--scratch_dir',
                        required = False,
                        type = is_dir,
                        help = 'Scratch file directory.',
    )
    parser.add_argument('-l', '--log_dir',
                        required = False,
                        type = is_dir,
                        help = 'Log file directory.',
    )
    parser.add_argument('-j', '--thread_count',
                        required = True,
                        type = is_nat,
                        help = 'Maximum number of threads for execution.',
    )
    parser.add_argument('-u', '--uuid',
                        required = True,
                        help = 'analysis_id string',
    )
    parser.add_argument('-m', '--md5',
                        required = False,
                        action = 'store_true',
                        help = 'calculate final size/MD5',
    )
    parser.add_argument('-e', '--eliminate_intermediate_files',
                        required = False,
                        action = 'store_true',
                        help = 'do not (really) reduce disk usage. set if you want to use more disk space!'
    )

    args = parser.parse_args()
    reference_fasta_name = args.reference_fasta_name
    dbsnp_known_snp_sites = args.dbsnp_known_snp_sites
    uuid = args.uuid
    analysis_ready_tumor_bam_path = args.analysis_ready_tumor_bam_path
    analysis_ready_normal_bam_path = args.analysis_ready_normal_bam_path
    blocksize = args.Parallel_Block_Size
    if not args.scratch_dir:
        scratch_dir = os.path.dirname(analysis_ready_tumor_bam_path)
    else:
        scratch_dir = args.scratch_dir
    if not args.log_dir:
        log_dir = os.path.dirname(analysis_ready_tumor_bam_path)
    else:
        log_dir = args.log_dir
    thread_count = str(args.thread_count)
    if not args.eliminate_intermediate_files:
        eliminate_intermediate_files = True
    else:
        eliminate_intermediate_files = False
    if not args.md5:
        md5 = False
    else:
        md5 = True

    ##logging
    logging.basicConfig(
        filename=os.path.join(log_dir, 'MuSE_variant_calling' + uuid + '.log'),  # /host for docker
        level=args.level,
        filemode='a',
        format='%(asctime)s %(levelname)s %(message)s',
        datefmt='%Y-%m-%d_%H:%M:%S_%Z',
    )
    logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
    logger = logging.getLogger(__name__)
    hostname = os.uname()[1]
    logger.info('hostname=%s' % hostname)
    logger.info('analysis_ready_tumor_bam_path=%s' % analysis_ready_tumor_bam_path)
    logger.info('analysis_ready_normal_bam_path=%s' % analysis_ready_normal_bam_path)
    engine_path = 'sqlite:///' + os.path.join(log_dir, uuid + '_MuSE_variant_calling.db')
    engine = sqlalchemy.create_engine(engine_path, isolation_level='SERIALIZABLE')
    
    ##Pipeline
    #faidx reference fasta file if needed.
    fai_path = reference_fasta_name + '.fai'
    if os.path.isfile(fai_path):
      logger.info('reference_fai_path=%s' % fai_path)
    else:
      fai_path = index_util.samtools_faidx(uuid, reference_fasta_name, engine, logger)
      logger.info('reference_fai_path=%s' % fai_path)
    
    #index input bam files if needed.
    bam_path = []
    bam_path.extend([analysis_ready_tumor_bam_path, analysis_ready_normal_bam_path])
    for path in bam_path:
        bai_path = path + '.bai'
        if os.path.isfile(bai_path):
            logger.info('analysis_ready_bam_bai_path=%s' % bai_path)
        else:
            bai_path = index_util.samtools_bam_index(uuid, path, engine, logger)
            logger.info('analysis_ready_bam_bai_path=%s' % bai_path)
    
    #bgzip compress and tabix index dbsnp file if needed.
    dbsnp_name, dbsnp_ext = os.path.splitext(dbsnp_known_snp_sites)
    dbsnp_tabix_path = dbsnp_known_snp_sites + '.tbi'
    if dbsnp_ext == '.bgz':
        logger.info('dbsnp file is already bgzip compressed =%s' % dbsnp_known_snp_sites)
        if os.path.isfile(dbsnp_tabix_path):
            logger.info('tabix index of dbsnp_bgz file =%s' % dbsnp_tabix_path)
        else:
            dbsnp_tabix_path = index_util.tabix_index(uuid, dbsnp_known_snp_sites, engine, logger)
            logger.info('tabix index of dbsnp_bgz file =%s' % dbsnp_tabix_path)
    else:
        dbsnp_known_snp_sites = index_util.bgzip_compress(uuid, dbsnp_known_snp_sites, engine, logger)
        logger.info('dbsnp file is already bgzip compressed =%s' % dbsnp_known_snp_sites)
        dbsnp_tabix_path = index_util.tabix_index(uuid, dbsnp_known_snp_sites, engine, logger)
        logger.info('tabix index of dbsnp_bgz file =%s' % dbsnp_tabix_path)
        #sys.exit('!!!Reference dbSNP file should be bgzip compressed!!!')
    
    #MuSE call
    muse_call_output_path = muse_call.call_region(uuid, thread_count, analysis_ready_tumor_bam_path, analysis_ready_normal_bam_path, reference_fasta_name, fai_path, blocksize, engine, logger)

    #MuSE sump
    if not args.Whole_genome_squencing_data:
        muse_vcf = muse_sump.sump_wxs(uuid, muse_call_output_path, dbsnp_known_snp_sites, engine, logger)
    else:
        muse_vcf = muse_sump.sump_wgs(uuid, muse_call_output_path, dbsnp_known_snp_sites, engine, logger)
        
    #picard sortvcf
    muse_srt_vcf = index_util.picard_sortvcf(uuid, muse_vcf, reference_fasta_name, engine, logger)
    
    if eliminate_intermediate_files:
        pipe_util.remove_file_list(uuid, [muse_call_output_path], engine, logger)
        pipe_util.remove_file_list(uuid, [muse_vcf], engine, logger)
        
    if md5:
        verify_util.store_md5_size(uuid, muse_srt_vcf, engine, logger)

Esempio n. 3

Mostra file

File: broad_hc.py Progetto: Shenglai/Broad_haplotypecaller_pipeline

def main():
    parser = argparse.ArgumentParser('Broad HaplotypeCaller (GVCF) pipeline')

    # Logging flags.
    parser.add_argument('-d', '--debug',
        action = 'store_const',
        const = logging.DEBUG,
        dest = 'level',
        help = 'Enable debug logging.',
    )
    parser.set_defaults(level = logging.INFO)

    # Required flags.

    parser.add_argument('-r', '--reference_fasta_name',
                        required = True,
                        help = 'Reference fasta path.',
    )
    parser.add_argument('-indel','--known_1k_genome_indel_sites',
                        required=True,
                        help='Reference INDEL path.',
    )
    parser.add_argument('-snp','--dbsnp_known_snp_sites',
                        required=True,
                        help='Reference SNP path.',
    )
    parser.add_argument('-b', '--analysis_ready_bam_path',
                        required = False,
                        action="append",
                        help = 'Source bam path.',
    )
    parser.add_argument('-list', '--analysis_ready_bam_list_path',
                        required = False,
                        help = 'Source bam list path.',
    )
    parser.add_argument('-intervals', '--source_bam_files_intervals',
                        required = False,
                        help = 'Source bam files intervals.',
    )
    parser.add_argument('-s', '--scratch_dir',
                        required = False,
                        type = is_dir,
                        help = 'Scratch file directory.',
    )
    parser.add_argument('-l', '--log_dir',
                        required = False,
                        type = is_dir,
                        help = 'Log file directory.',
    )
    parser.add_argument('-j', '--thread_count',
                        required = True,
                        type = is_nat,
                        help = 'Maximum number of threads for execution.',
    )
    parser.add_argument('-u', '--uuid',
                        required = True,
                        help = 'analysis_id string',
    )
    parser.add_argument('-m', '--md5',
                        required = False,
                        action = 'store_true',
                        help = 'calculate final size/MD5',
    )
    parser.add_argument('-e', '--eliminate_intermediate_files',
                        required = False,
                        action = 'store_true',
                        help = 'do not (really) reduce disk usage. set if you want to use more disk space!'
    )

    args = parser.parse_args()
    reference_fasta_name = args.reference_fasta_name
    known_1k_genome_indel_sites = args.known_1k_genome_indel_sites
    dbsnp_known_snp_sites = args.dbsnp_known_snp_sites
    uuid = args.uuid
    analysis_ready_bam_path = args.analysis_ready_bam_path
    if not args.analysis_ready_bam_list_path:
        list_dir = os.path.dirname(analysis_ready_bam_path[0])
        analysis_ready_bam_list_path = os.path.join(list_dir, uuid + '_analysis_ready_bam_list.list')
        with open(analysis_ready_bam_list_path, "w") as handle:
            for bam in analysis_ready_bam_path:
                handle.write(bam + "\n")
    else:
        analysis_ready_bam_list_path = args.analysis_ready_bam_list_path

    if not args.scratch_dir:
        scratch_dir = os.path.dirname(analysis_ready_bam_list_path)
    else:
        scratch_dir = args.scratch_dir
    if not args.log_dir:
        log_dir = os.path.dirname(analysis_ready_bam_list_path)
    else:
        log_dir = args.log_dir
    thread_count = str(args.thread_count)
    if not args.eliminate_intermediate_files:
        eliminate_intermediate_files = True
    else:
        eliminate_intermediate_files = False
    if not args.md5:
        md5 = False
    else:
        md5 = True

    ##logging
    logging.basicConfig(
        filename=os.path.join(log_dir, 'Broad_HaplotypeCaller_' + uuid + '.log'),  # /host for docker
        level=args.level,
        filemode='a',
        format='%(asctime)s %(levelname)s %(message)s',
        datefmt='%Y-%m-%d_%H:%M:%S_%Z',
    )
    logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
    logger = logging.getLogger(__name__)
    hostname = os.uname()[1]
    logger.info('hostname=%s' % hostname)
    logger.info('analysis_ready_bam_list_path=%s' % analysis_ready_bam_list_path)
    if not args.analysis_ready_bam_path:
        with open(analysis_ready_bam_list_path) as f:
            analysis_ready_bam_path = f.read().splitlines()
            for path in analysis_ready_bam_path:
                logger.info('analysis_ready_bam_path=%s' % path)
    else:
        for path in analysis_ready_bam_path:
            logger.info('analysis_ready_bam_path=%s' % path)

    engine_path = 'sqlite:///' + os.path.join(log_dir, uuid + '_Broad_HaplotypeCaller.db')
    engine = sqlalchemy.create_engine(engine_path, isolation_level='SERIALIZABLE')
    
    ##Pipeline
    #check .bai file, call samtools index if not exist
    RealignerTargetCreator.index(uuid, analysis_ready_bam_list_path, engine, logger)
    
    #call RealignerTargetCreator for bam list if intervals not provided
    if not args.source_bam_files_intervals:
        intervals = RealignerTargetCreator.RTC(uuid, analysis_ready_bam_list_path, thread_count, reference_fasta_name, known_1k_genome_indel_sites, engine, logger)
    else:
        intervals = args.source_bam_files_intervals
    
    #call HaplotypeCaller
    hc_output_gvcfs = HaplotypeCaller.HC(uuid, analysis_ready_bam_list_path, intervals, thread_count, reference_fasta_name, dbsnp_known_snp_sites, engine, logger)

    if md5:
        for gvcf in hc_output_gvcfs:
            verify_util.store_md5_size(uuid, gvcf, engine, logger)

Esempio n. 4

Mostra file

File: aln.py Progetto: Shenglai/apipe

def main():
    parser = argparse.ArgumentParser('harmonization pipeline')

    # Logging flags.
    parser.add_argument('-d','--debug',
        action='store_const',
        const=logging.DEBUG,
        dest='level',
        help='Enable debug logging.',
    )
    parser.set_defaults(level=logging.INFO)

    # Required flags.
    parser.add_argument('-g','--s3_reference_bucket',
                        required=True,
                        help='S3 bucket name containing reference fasta.',
    )
    parser.add_argument('-b','--s3_bam_bucket',
                        required=True,
                        help='S3 bucket name containing source bam file.',
    )
    parser.add_argument('-r','--reference_fasta_name',
                        required=True,
                        help='Reference fasta S3 key name.',
    )
    parser.add_argument('-a','--bam_analysis_id',
                        required=True,
                        help='Source bam file S3 key name.',
    )
    parser.add_argument('-s','--scratch_dir',
                        required=True,
                        type=is_dir,
                        help='Scratch file directory.',
    )
    parser.add_argument('-l','--log_dir',
                        required=True,
                        type=is_dir,
                        help='Log file directory.',
    )
    parser.add_argument('-f','--s3cfg_dir',
                        required=True,
                        type=is_dir,
                        help='.s3cfg file directory.',
    )
    parser.add_argument('-t','--thread_count',
                        required=True,
                        type=is_nat,
                        help='Maximum number of threads for execution.',
    )
    parser.add_argument('-c','--csv_stats',
                        required=True,
                        help='Write to csv rather than postgres'
    )
    parser.add_argument('-u','--postgres_user',
                        required=False,
                        help='postgres username'
    )
    parser.add_argument('-p','--postgres_password',
                        required=False,
                        help='postgres password'
    )
    parser.add_argument('-n','--postgres_hostname',
                        required=False,
                        help='postgres hostname'
    )

    args = parser.parse_args()
    s3_reference_bucket = args.s3_reference_bucket
    s3_bam_bucket = args.s3_bam_bucket
    reference_fasta_name = args.reference_fasta_name
    reference_fasta_name = reference_fasta_name.strip('/')
    bam_analysis_id = args.bam_analysis_id
    bam_analysis_id = bam_analysis_id.strip('/')
    scratch_dir = args.scratch_dir
    log_dir = args.log_dir
    s3cfg_dir= args.s3cfg_dir
    thread_count = str(args.thread_count)
    csv_stats = literal_eval(args.csv_stats)
    postgres_user = args.postgres_user
    postgres_password = args.postgres_password
    postgres_hostname = args.postgres_hostname


    ##logging
    uuid=pipe_util.get_uuid_from_path(bam_analysis_id)
    logging.basicConfig(
        filename=os.path.join(log_dir,'aln_'+uuid+'.log'), #/host for docker
        level=args.level,
        filemode='a',
        format='%(asctime)s %(levelname)s %(message)s',
        datefmt='%Y-%m-%d_%H:%M:%S_%Z',
    )
    logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
    logger=logging.getLogger(__name__)
    hostname=os.uname()[1]
    logger.info('hostname=%s' % hostname)


    ##open stats and timing db
    if not csv_stats:
        if (postgres_user is None) or (postgres_password is None) or (postgres_hostname is None):
            logger.debug('must enter postgres user, password and hostname if not writing to csv (or use -c True)')
            sys.exit(1)
        engine_path='postgresql://'+postgres_user+':'+postgres_password+'@'+postgres_hostname+'/gdc_harmonize'
        engine=sqlalchemy.create_engine(engine_path,isolation_level='SERIALIZABLE')

    
    ##get reference genome
    get_s3_objects(uuid,s3_reference_bucket,reference_fasta_name,scratch_dir,s3cfg_dir,engine,logger)
    
    reference_fasta_path=os.path.join(scratch_dir,reference_fasta_name)
    logger.info('scratch_dir=%s' % scratch_dir)
    logger.info('reference_fasta_name=%s' % reference_fasta_name)
    logger.info('reference_fasta_path=%s' % reference_fasta_path)
    
    ##get bam to be harmonized
    get_s3_objects(uuid,s3_bam_bucket,bam_analysis_id,scratch_dir,s3cfg_dir,engine,logger)
    bam_path=bam_util.get_bam_path(bam_analysis_id,scratch_dir,logger)
    #get original reference genome for stats
    original_fasta_name=bam_util.get_bam_reference(bam_path,logger)
    if not (original_fasta_name.endswith('.fa') or original_fasta_name.endswith('.fasta')):
        original_fasta_name+='.fa'
    original_fasta_path=os.path.join(scratch_dir,original_fasta_name)
    get_s3_objects(uuid,s3_reference_bucket,original_fasta_name,scratch_dir,s3cfg_dir,engine,logger)

    
    ##pipeline
    #verify_util.verify_cgquery_md5(reference_fasta_name,logger)
    verify_util.verify_cgquery(uuid,bam_path,engine,logger)
    bam_validate.bam_validate(uuid,bam_path,engine,logger)
    bam_stats.bam_stats(uuid,bam_path,original_fasta_path,engine,logger)
    bam_util.bam_to_fastq(uuid,bam_path,engine,logger)
    fastq_length=fastq_validate.fastqc_validate(uuid,bam_path,thread_count,engine,logger)
    readgroup_path_dict=bam_util.write_readgroups(uuid,bam_path,engine,logger)#to file and db

    
    #MEM_ALN_CUTOFF=70
    #if fastq_length<MEM_ALN_CUTOFF: # do bwa aln...
    #    bam_path_list=bwa.bwa(uuid,bam_path,reference_fasta_path,readgroup_path_dict,thread_count,engine,logger)
    #    bam_sort_path_list=bam_sort.bam_sort(uuid,bam_path,bam_path_list,reference_fasta_path,thread_count,engine,logger)
    #    bam_merge_path=bam_merge.bam_merge(uuid,bam_path,bam_sort_path_list,engine,logger)
    #    bam_md_path=bam_mark_duplicates.bam_mark_duplicates(uuid,bam_merge_path,thread_count,engine,logger)
    #    bam_validate.bam_validate(uuid,bam_md_path,engine,logger)
    #    bam_stats.bam_stats(uuid,bam_md_path,reference_fasta_path,engine,logger)
    #    verify_util.store_md5_size(uuid,bam_md_path,engine,logger)
    #    bai_md_path=bam_md_path+'.bai'
    #    verify_util.store_md5_size(uuid,bai_md_path,engine,logger)
    
    # ...and do bwa mem
    bam_path_list=bwa_mem.bwa_mem(uuid,bam_path,reference_fasta_path,readgroup_path_dict,thread_count,engine,logger)
    bam_sort_path_list=bam_sort.bam_sort(uuid,bam_path,bam_path_list,reference_fasta_path,thread_count,engine,logger)
    bam_merge_path=bam_merge.bam_merge(uuid,bam_path,bam_sort_path_list,engine,logger)
    bam_md_path=bam_mark_duplicates.bam_mark_duplicates(uuid,bam_merge_path,thread_count,engine,logger)
    bam_validate.bam_validate(uuid,bam_md_path,engine,logger)
    #bam_stats.bam_stats(uuid,bam_md_path,reference_fasta_path,engine,logger)
    verify_util.store_md5_size(uuid,bam_md_path,engine,logger)
    bai_md_path=bam_md_path+'.bai'
    verify_util.store_md5_size(uuid,bai_md_path,engine,logger)