Beispiel #1
0
def main():
    parser = argparse.ArgumentParser("TCGA", description="TCGA formatted results generation")

    # Logging flag
    parser.add_argument(
        "-d", "--debug", action="store_const", const=logging.DEBUG, dest="level", help="Enable debug logging."
    )
    parser.set_defaults(level=logging.INFO)

    # Required flags
    parser.add_argument("-w", "--db_connect", required=True, help="Name of desired miRbase.")
    parser.add_argument("-g", "--genome_version", required=True, choices=["hg38"], help="Genome Version of Annotation.")
    parser.add_argument("-e", "--species_code", required=True, choices=["hsa"], help="Organism species code.")
    parser.add_argument("-s", "--sam_path", required=True, help="Path to directory containing bams.")
    parser.add_argument("-p", "--mirna_species", required=True, help="Path to mirna_species.txt")
    parser.add_argument("-x", "--crossmapped", required=True, help="Path to crossmapped.txt")
    parser.add_argument("-i", "--isoforms", required=True, help="Path to isoforms.txt")
    parser.add_argument("-u", "--uuid", required=True, help="UUID/GDC_ID for the harmonized BAM.")
    parser.add_argument("-r", "--barcode", required=True, help="BAM barcode")

    # Optional DB Flags
    parser.add_argument("-y", "--db_cred_s3url", required=False, help="String s3url of the postgres db_cred file")
    parser.add_argument("-z", "--s3cfg_path", required=False, help="Path to the s3cfg file.")

    args = parser.parse_args()

    connect_path = args.db_connect
    genome_version = args.genome_version
    species_code = args.species_code
    sam_path = args.sam_path
    mirna_species = args.mirna_species
    crossmapped = args.crossmapped
    isoforms = args.isoforms
    uuid = args.uuid
    barcode = args.barcode

    if args.db_cred_s3url:
        db_cred_s3url = args.db_cred_s3url
        s3cfg_path = args.s3cfg_path
    else:
        db_cred_s3url = None

    logger = pipe_util.setup_logging("mir_profiler_tcga", args, uuid)

    if db_cred_s3url is not None:
        conn_dict = pipe_util.get_connect_dict(db_cred_s3url, s3cfg_path, logger)
        engine = sqlalchemy.create_engine(sqlalchemy.engine.url.URL(**conn_dict))
    else:  # local sqllite case
        sqlite_name = "mir_profiler_tcga" + uuid + ".db"
        engine_path = "sqlite:///" + sqlite_name
        engine = sqlalchemy.create_engine(engine_path, isolation_level="SERIALIZABLE")

    # Generate TCGA formatted results
    logger.info("Beginning: TCGA formatted results generation")
    tcga_CMD = [
        "perl",
        "/home/ubuntu/bin/mirna-profiler/v0.2.7/code/custom_output/tcga/tcga.pl",
        "-d",
        connect_path,
        "-o",
        species_code,
        "-g",
        genome_version,
        "-s",
        sam_path,
        "-r",
        mirna_species,
        "-c",
        crossmapped,
        "-i",
        isoforms,
    ]
    output = pipe_util.do_command(tcga_CMD, logger)
    df = time_util.store_time(uuid, tcga_CMD, output, logger)
    df["bam_name"] = barcode
    unique_key_dict = {"uuid": uuid, "bam_name": barcode}
    table_name = "time_mem_mir_tcga"
    df_util.save_df_to_sqlalchemy(df, unique_key_dict, table_name, engine, logger)
    logger.info("Completed: TCGA formatted results generation")
def main():
    parser = argparse.ArgumentParser('miRNA matrix mimat development', description = 'Mature miRNA gene expression matrix genreation',)

    # Logging flag
    parser.add_argument('-d', '--debug',
                        action = 'store_const',
                        const = logging.DEBUG,
                        dest = 'level',
                        help = 'Enable debug logging.',
    )
    parser.set_defaults(level = logging.INFO)

    # Required flags
    parser.add_argument('-w', '--db_connect',
                        required = True,
                        help = 'Name of desired miRbase.',
    )
    parser.add_argument('-e', '--species_code',
                        required = True,
                        choices = ['hsa'],
                        help = 'Organism species code.',
    )
    parser.add_argument('-s', '--sam_path',
                        required = True,
                        help = 'Path to SAM file',
    )
    parser.add_argument('-m', '--mirna_path',
                        required = True,
                        help = 'Path to miRNA.txt file',
    )
    parser.add_argument('-x', '--crossmapped_path',
                        required = True,
                        help = 'Path to crossmapped.txt file',
    )
    parser.add_argument('-u', '--uuid',
                        required = True,
                        help = 'UUID/GDC_ID for the harmonized BAM.',
    )
    parser.add_argument('-r', '--barcode',
                        required = True,
                        help = 'BAM barcode',
    )
    

    # Optional DB Flags
    parser.add_argument('-y', '--db_cred_s3url',
                        required = False,
                        help = 'String s3url of the postgres db_cred file',
    )
    parser.add_argument('-z', '--s3cfg_path',
                        required = False,
                        help = 'Path to the s3cfg file.',
    )

    args = parser.parse_args()

    db_connect = args.db_connect
    species_code = args.species_code
    sam_path = args.sam_path
    mirna_path = args.mirna_path
    crossmapped_path = args.crossmapped_path
    uuid = args.uuid
    barcode = args.barcode

    if args.db_cred_s3url:
        db_cred_s3url = args.db_cred_s3url
        s3cfg_path = args.s3cfg_path
    else:
        db_cred_s3url = None

    logger = pipe_util.setup_logging('mir_profiler_mimat', args, uuid)
    
    if db_cred_s3url is not None:
        conn_dict = pipe_util.get_connect_dict(db_cred_s3url, s3cfg_path, logger)
        engine = sqlalchemy.create_engine(sqlalchemy.engine.url.URL(**conn_dict))
    else: #local sqllite case
        sqlite_name = 'mir_profiler_mimat' + uuid + '.db'
        engine_path = 'sqlite:///' + sqlite_name
        engine = sqlalchemy.create_engine(engine_path, isolation_level='SERIALIZABLE')

    # Get stats from the alignment annotations
    logger.info('Beginning: Mature miRNA gene expression matrix genreation')
    mimat_CMD = ['perl', '/home/ubuntu/bin/mirna-profiler/v0.2.7/code/library_stats/expression_matrix_mimat.pl', '-d', db_connect, '-o', species_code, '-s', sam_path, '-r', mirna_path, '-c', crossmapped_path]
    output = pipe_util.do_command(mimat_CMD, logger)
    df = time_util.store_time(uuid, mimat_CMD, output, logger)
    df['bam_name'] = barcode
    unique_key_dict = {'uuid': uuid, 'bam_name': barcode}
    table_name = 'time_mem_mir_expn_mimat'
    df_util.save_df_to_sqlalchemy(df, unique_key_dict, table_name, engine, logger)
    logger.info('Completed: Mature miRNA gene expression matrix genreation')
def main():
    parser = argparse.ArgumentParser('SAM alignment stats', description = 'Generate alignment stats for the miRNA in the annotated SAM file',)

    # Logging flag
    parser.add_argument('-d', '--debug',
                        action = 'store_const',
                        const = logging.DEBUG,
                        dest = 'level',
                        help = 'Enable debug logging.',
    )
    parser.set_defaults(level = logging.INFO)

    # Required flags
    parser.add_argument('-s', '--sam_path',
                        required = True,
                        help = 'Path to SAM file',
    )
    parser.add_argument('-a', '--adapter_path',
                        required = True,
                        help = 'Path to adapter report',
    )
    parser.add_argument('-u', '--uuid',
                        required = True,
                        help = 'UUID/GDC_ID for the harmonized BAM.',
    )
    parser.add_argument('-r', '--barcode',
                        required = True,
                        help = 'BAM barcode',
    )
    # Optional DB Flags
    parser.add_argument('-y', '--db_cred_s3url',
                        required = False,
                        help = 'String s3url of the postgres db_cred file',
    )
    parser.add_argument('-z', '--s3cfg_path',
                        required = False,
                        help = 'Path to the s3cfg file.',
    )
    
    args = parser.parse_args()

    sam_path = args.sam_path
    adapter_path = args.adapter_path
    uuid = args.uuid
    barcode = args.barcode

    if args.db_cred_s3url:
        db_cred_s3url = args.db_cred_s3url
        s3cfg_path = args.s3cfg_path
    else:
        db_cred_s3url = None

    logger = pipe_util.setup_logging('mir_profiler_stats', args, uuid)
    
    if db_cred_s3url is not None:
        conn_dict = pipe_util.get_connect_dict(db_cred_s3url, s3cfg_path, logger)
        engine = sqlalchemy.create_engine(sqlalchemy.engine.url.URL(**conn_dict))
    else: #local sqllite case
        sqlite_name = 'mir_profiler_stats' + uuid + '.db'
        engine_path = 'sqlite:///' + sqlite_name
        engine = sqlalchemy.create_engine(engine_path, isolation_level='SERIALIZABLE')

    # Get stats from the alignment annotations
    logger.info('Beginning: Alignment stats generation')
    stats_CMD = ['perl', '/home/ubuntu/bin/mirna-profiler/v0.2.7/code/library_stats/alignment_stats.pl', '-s', sam_path, '-a', adapter_path]
    output = pipe_util.do_command(stats_CMD, logger)
    df = time_util.store_time(uuid, stats_CMD, output, logger)
    df['bam_name'] = barcode
    unique_key_dict = {'uuid': uuid, 'bam_name': barcode}
    table_name = 'time_mem_mir_alignment_stats'
    df_util.save_df_to_sqlalchemy(df, unique_key_dict, table_name, engine, logger)
    logger.info('Completed: Alignment stats generation')
def main():
    parser = argparse.ArgumentParser('SAM Annotator', description = 'Annotates the SAM files with miRNA hits',)

    # Logging flag
    parser.add_argument('-d', '--debug',
                        action = 'store_const',
                        const = logging.DEBUG,
                        dest = 'level',
                        help = 'Enable debug logging.',
    )
    parser.set_defaults(level = logging.INFO)

    # Required flags
    parser.add_argument('-e', '--species_code',
                        required = True,
                        choices = ['hsa'],
                        help = 'Organism species code.',
    )
    parser.add_argument('-s', '--sam_path',
                        required = True,
                        help = 'Path to directory containing bams.',
    )
    parser.add_argument('-w', '--db_connect',                  
                        required = True,
                        help = 'Path to db_connection file',                  
    )
    parser.add_argument('-u', '--uuid',
                        required = True,
                        help = 'UUID/GDC_ID for the harmonized BAM.',
    )
    parser.add_argument('-r', '--barcode',
                        required = True,
                        help = 'BAM barcode',
    )
    

    # Optional DB Flags
    parser.add_argument('-y', '--db_cred_s3url',
                        required = False,
                        help = 'String s3url of the postgres db_cred file',
    )
    parser.add_argument('-z', '--s3cfg_path',
                        required = False,
                        help = 'Path to the s3cfg file.',
    )
    
    args = parser.parse_args()

    species_code = args.species_code
    sam_path = args.sam_path
    connect_path = args.db_connect
    uuid = args.uuid
    barcode = args.barcode

    if args.db_cred_s3url:
        db_cred_s3url = args.db_cred_s3url
        s3cfg_path = args.s3cfg_path
    else:
        db_cred_s3url = None

    logger = pipe_util.setup_logging('mir_profiler_annotator', args, uuid)

    if db_cred_s3url is not None:
        conn_dict = pipe_util.get_connect_dict(db_cred_s3url, s3cfg_path, logger)
        engine = sqlalchemy.create_engine(sqlalchemy.engine.url.URL(**conn_dict))
    else: #local sqllite case
        sqlite_name = 'mir_profiler_annotator' + uuid + '.db'
        engine_path = 'sqlite:///' + sqlite_name
        engine = sqlalchemy.create_engine(engine_path, isolation_level='SERIALIZABLE')

    # Annotate the SAM files
    logger.info('Beginning: SAM file annotation')
    annotate_CMD = ['perl', '/home/ubuntu/bin/mirna-profiler/v0.2.7/code/annotation/annotate.pl', '-d', connect_path, '-o', species_code, '-s', sam_path]
    output = pipe_util.do_command(annotate_CMD, logger)
    df = time_util.store_time(uuid, annotate_CMD, output, logger)
    df['bam_name'] = barcode
    unique_key_dict = {'uuid': uuid, 'bam_name': barcode}
    table_name = 'time_mem_mir_sam_annotator'
    df_util.save_df_to_sqlalchemy(df, unique_key_dict, table_name, engine, logger)
    logger.info('Completed: SAM file annotation')
def main():
    parser = argparse.ArgumentParser('BAM to SAM conversion',
                                     description = 'Use samtools to convert a SAM to BAM.',
    )

    # Logging flag
    parser.add_argument('-d', '--debug',
                        action = 'store_const',
                        const = logging.DEBUG,
                        dest = 'level',
                        help = 'Enable debug logging.',
    )
    parser.set_defaults(level = logging.INFO)

    # Required flags
    parser.add_argument('-b', '--bam_path',
                        required = True,
                        help = 'Path to BAM file.',
    )
    parser.add_argument('-o', '--output_name',
                        required = True,
                        help = 'Desired name for output SAM.',
    )
    parser.add_argument('-u', '--uuid',
                        required = True,
                        help = 'UUID/GDC_ID for the harmonized BAM.',
    )
    parser.add_argument('-r', '--barcode',
                        required = True,
                        help = 'BAM barcode',
    )
    

    # Optional DB Flags
    parser.add_argument('-y', '--db_cred_s3url',
                        required = False,
                        help = 'String s3url of the postgres db_cred file',
    )
    parser.add_argument('-z', '--s3cfg_path',
                        required = False,
                        help = 'Path to the s3cfg file.',
    )
    
    args = parser.parse_args()

    bam_path = args.bam_path
    output_name = args.output_name
    uuid = args.uuid
    barcode = args.barcode

    if args.db_cred_s3url:
        db_cred_s3url = args.db_cred_s3url
        s3cfg_path = args.s3cfg_path
    else:
        db_cred_s3url = None

    logger = pipe_util.setup_logging('mir_profiler_samtools', args, uuid)
    
    if db_cred_s3url is not None:
        conn_dict = pipe_util.get_connect_dict(db_cred_s3url, s3cfg_path, logger)
        engine = sqlalchemy.create_engine(sqlalchemy.engine.url.URL(**conn_dict))
    else: #local sqllite case
        sqlite_name = 'mir_profiler_samtools' + uuid + '.db'
        engine_path = 'sqlite:///' + sqlite_name
        engine = sqlalchemy.create_engine(engine_path, isolation_level='SERIALIZABLE')

    # Convert the BAMs to SAMs if they do not already exist
    logger.info('Beginning: BAM to SAM conversion')
    BAMtoSAM_CMD = ['samtools', 'view', '-h', bam_path, '-o', output_name]
    shell_BtS_CMD = ' '.join(BAMtoSAM_CMD)
    output = pipe_util.do_shell_command(shell_BtS_CMD, logger)
    df = time_util.store_time(uuid, shell_BtS_CMD, output, logger)
    df['bam_name'] = barcode
    unique_key_dict = {'uuid': uuid, 'bam_name': barcode}
    table_name = 'time_mem_mir_samtools_view'
    df_util.save_df_to_sqlalchemy(df, unique_key_dict, table_name, engine, logger)
    logger.info('Completed: BAM to SAM conversion')
Beispiel #6
0
def main():
    parser = argparse.ArgumentParser("Graph generation", description="Generate graphs for different miRNA stats")

    # Logging flag
    parser.add_argument(
        "-d", "--debug", action="store_const", const=logging.DEBUG, dest="level", help="Enable debug logging."
    )
    parser.set_defaults(level=logging.INFO)

    # Required flags
    parser.add_argument("-s", "--sam_path", required=True, help="Path to SAM file")
    parser.add_argument("-f", "--filtered_taglen", required=True, help="Path to filtered_taglength.csv")
    parser.add_argument("-v", "--softclip_taglen", required=True, help="Path to softclip_taglength.csv")
    parser.add_argument("-a", "--adapter_report", required=True, help="Path to adapter report")
    parser.add_argument("-c", "--chastity_taglen", required=True, help="Path to chastity_taglength.csv")
    parser.add_argument("-l", "--alignment_stats", required=True, help="Path to alignment_stats.csv")
    parser.add_argument("-u", "--uuid", required=True, help="UUID/GDC_ID for the harmonized BAM.")
    parser.add_argument("-r", "--barcode", required=True, help="BAM barcode")

    # Optional DB Flags
    parser.add_argument("-y", "--db_cred_s3url", required=False, help="String s3url of the postgres db_cred file")
    parser.add_argument("-z", "--s3cfg_path", required=False, help="Path to the s3cfg file.")

    args = parser.parse_args()

    sam_path = args.sam_path
    filtered_taglen = args.filtered_taglen
    softclip_taglen = args.softclip_taglen
    adapter_report = args.adapter_report
    chastity_taglen = args.chastity_taglen
    alignment_stats = args.alignment_stats
    uuid = args.uuid
    barcode = args.barcode

    if args.db_cred_s3url:
        db_cred_s3url = args.db_cred_s3url
        s3cfg_path = args.s3cfg_path
    else:
        db_cred_s3url = None

    logger = pipe_util.setup_logging("mir_profiler_graph", args, uuid)

    if db_cred_s3url is not None:
        conn_dict = pipe_util.get_connect_dict(db_cred_s3url, s3cfg_path, logger)
        engine = sqlalchemy.create_engine(sqlalchemy.engine.url.URL(**conn_dict))
    else:  # local sqllite case
        sqlite_name = "mir_profiler_graph" + uuid + ".db"
        engine_path = "sqlite:///" + sqlite_name
        engine = sqlalchemy.create_engine(engine_path, isolation_level="SERIALIZABLE")

    # Generate the graphs for the annotation data
    logger.info("Beginning: Annotation graph generation")
    graph_CMD = [
        "perl",
        "/home/ubuntu/bin/mirna-profiler/v0.2.7/code/library_stats/graph_libs.pl",
        "-s",
        sam_path,
        "-f",
        filtered_taglen,
        "-o",
        softclip_taglen,
        "-a",
        adapter_report,
        "-c",
        chastity_taglen,
        "-t",
        alignment_stats,
    ]
    output = pipe_util.do_command(graph_CMD, logger)
    df = time_util.store_time(uuid, graph_CMD, output, logger)
    df["bam_name"] = barcode
    unique_key_dict = {"uuid": uuid, "bam_name": barcode}
    table_name = "time_mem_mir_graph"
    df_util.save_df_to_sqlalchemy(df, unique_key_dict, table_name, engine, logger)
    # Store time command will go here
    logger.info("Completed: Annotation graph generation")
def main():
    parser = argparse.ArgumentParser(
        "miRNA adapter report",
        description="Generate adapter report for alignments that did not have adapter trimming done",
    )

    # Logging flag
    parser.add_argument(
        "-d", "--debug", action="store_const", const=logging.DEBUG, dest="level", help="Enable debug logging."
    )
    parser.set_defaults(level=logging.INFO)

    parser.add_argument("-s", "--sam_path", required=True, help="Path to sam file.")
    parser.add_argument("-u", "--uuid", required=True, help="UUID/GDC_ID for the harmonized BAM.")
    parser.add_argument("-r", "--barcode", required=True, help="BAM barcode")

    # Optional DB Flags
    parser.add_argument("-y", "--db_cred_s3url", required=False, help="String s3url of the postgres db_cred file")
    parser.add_argument("-z", "--s3cfg_path", required=False, help="Path to the s3cfg file.")

    args = parser.parse_args()

    sam_path = args.sam_path
    uuid = args.uuid
    barcode = args.barcode

    if args.db_cred_s3url:
        db_cred_s3url = args.db_cred_s3url
        s3cfg_path = args.s3cfg_path
    else:
        db_cred_s3url = None

    logger = pipe_util.setup_logging("mir_profiler_adapter_report", args, uuid)

    if db_cred_s3url is not None:
        conn_dict = pipe_util.get_connect_dict(db_cred_s3url, s3cfg_path, logger)
        engine = sqlalchemy.create_engine(sqlalchemy.engine.url.URL(**conn_dict))
    else:  # local sqllite case
        sqlite_name = "mir_profiler_adapter_report" + uuid + ".db"
        engine_path = "sqlite:///" + sqlite_name
        engine = sqlalchemy.create_engine(engine_path, isolation_level="SERIALIZABLE")

    logger.info("Beginning: Adapter report generation")
    sam_name = os.path.basename(sam_path)
    sam_base, sam_ext = os.path.splitext(sam_name)
    adapter_name = sam_base + "_adapter.report"
    adapter_CMD = [
        "cat",
        sam_path,
        "|",
        "awk '{arr[length($10)]+=1} END {for (i in arr) {print i\" \"arr[i]}}'",
        "|",
        'sort -t " " -k1n >',
        adapter_name,
    ]
    shell_adapter_CMD = " ".join(adapter_CMD)
    output = pipe_util.do_shell_command(shell_adapter_CMD, logger)
    df = time_util.store_time(uuid, shell_adapter_CMD, output, logger)
    df["bam_name"] = barcode
    unique_key_dict = {"uuid": uuid, "bam_name": barcode}
    table_name = "time_mem_mir_adapter_report"
    df_util.save_df_to_sqlalchemy(df, unique_key_dict, table_name, engine, logger)
    logger.info("Completed: Adapter report generation")