Ejemplo n.º 1
0
def mergeAlignStatsResults(input_prefix_list, output_prefix):

    ############################################
    # Merge individual alignment.summary files #
    ############################################
    printtime("Merging individual alignment.summary files")

    config_out = ConfigParser.RawConfigParser()
    config_out.optionxform = str  # don't convert to lowercase
    config_out.add_section('global')

    quallist = ['Q7', 'Q10', 'Q17', 'Q20', 'Q30', 'Q47']
    bplist = [50, 100, 150, 200, 250, 300, 350, 400]

    fixedkeys = ['Genome', 'Genome Version', 'Index Version', 'Genomesize']

    numberkeys = [
        'Total number of Reads', 'Total Mapped Reads',
        'Total Mapped Target Bases', 'Filtered Mapped Bases in Q7 Alignments',
        'Filtered Mapped Bases in Q10 Alignments',
        'Filtered Mapped Bases in Q17 Alignments',
        'Filtered Mapped Bases in Q20 Alignments',
        'Filtered Mapped Bases in Q30 Alignments',
        'Filtered Mapped Bases in Q47 Alignments', 'Filtered Q7 Alignments',
        'Filtered Q10 Alignments', 'Filtered Q17 Alignments',
        'Filtered Q20 Alignments', 'Filtered Q30 Alignments',
        'Filtered Q47 Alignments'
    ]

    for q in quallist:
        for bp in bplist:
            numberkeys.append('Filtered %s%s Reads' % (bp, q))

    maxkeys = [
        'Filtered Q7 Longest Alignment', 'Filtered Q10 Longest Alignment',
        'Filtered Q17 Longest Alignment', 'Filtered Q20 Longest Alignment',
        'Filtered Q30 Longest Alignment', 'Filtered Q47 Longest Alignment'
    ]

    # init
    for key in fixedkeys:
        value_out = 'unknown'
        config_out.set('global', key, value_out)
    for key in numberkeys:
        value_out = 0
        config_out.set('global', key, int(value_out))
    for key in maxkeys:
        value_out = 0
        config_out.set('global', key, int(value_out))

    config_in = MyConfigParser()
    config_in.optionxform = str  # don't convert to lowercase
    for input_prefix in input_prefix_list:
        alignmentfile = input_prefix + 'alignment.summary'
        if os.path.exists(alignmentfile):
            config_in.read(os.path.join(alignmentfile))

            for key in numberkeys:
                value_in = config_in.get('global', key)
                value_out = config_out.get('global', key)
                config_out.set('global', key, int(value_in) + int(value_out))
            for key in maxkeys:
                value_in = config_in.get('global', key)
                value_out = config_out.get('global', key)
                config_out.set('global', key, max(int(value_in),
                                                  int(value_out)))
            for key in fixedkeys:
                value_in = config_in.get('global', key)
                value_out = config_out.get('global', key)
                #todo
                config_out.set('global', key, value_in)

        else:
            printtime("ERROR: skipped %s" % alignmentfile)

    # Regenerate trickier alignment.summary metrics

    for qual in quallist:
        try:
            q_bases = config_out.get(
                'global', 'Filtered Mapped Bases in %s Alignments' % qual)
            q_reads = config_out.get('global', 'Filtered %s Alignments' % qual)

            q_readlen = 0
            if q_reads > 0:
                q_readlen = q_bases / q_reads
            config_out.set('global',
                           'Filtered %s Mean Alignment Length' % qual,
                           q_readlen)

            genomesize = float(config_out.get('global', 'Genomesize'))
            q_coverage = 0.0
            if genomesize > 0:
                q_coverage = q_bases / genomesize
            config_out.set('global', 'Filtered %s Mean Coverage Depth' % qual,
                           '%1.1f' % q_coverage)

            # Not mergeable at this point
            config_out.set('global', 'Filtered %s Coverage Percentage' % qual,
                           'N/A')

        except:
            pass

    with open(output_prefix + 'alignment.summary', 'wb') as configfile:
        config_out.write(configfile)

    #########################################
    # Merge individual alignTable.txt files #
    #########################################
    printtime("Merging individual alignTable.txt files")

    table = 0
    header = None
    for input_prefix in input_prefix_list:
        alignTableFile = input_prefix + 'alignTable.txt'
        if os.path.exists(alignTableFile):
            if header is None:
                header = numpy.loadtxt(alignTableFile,
                                       dtype='string',
                                       comments='#')
            table += numpy.loadtxt(alignTableFile,
                                   dtype='int',
                                   comments='#',
                                   skiprows=1)
        else:
            printtime("ERROR: skipped %s" % alignTableFile)
    #fix first column
    if header is not None:
        table[:, 0] = (header[1:, 0])
        f_handle = open(output_prefix + 'alignTable.txt', 'w')
        numpy.savetxt(f_handle, header[0][None], fmt='%s', delimiter='\t')
        numpy.savetxt(f_handle, table, fmt='%i', delimiter='\t')
        f_handle.close()