Beispiel #1
0
def output_gm(prefix, gm_res):
    header = '\t'.join(
        ['# Scaffold', 'cDNA IDs', 'Status', 'Linkage group(s)'])
    full_out = '-'.join([prefix, 'full-genetic-map-results-table.tsv'])
    with open(full_out, 'w') as outfile:
        print >> outfile, util.report_cmd()
        print >> outfile, header
        for status, result in gm_res.items(
        ):  ### what do I do with status here? Or is it just discarded because there should only be complete, single cDNAs in gm_res
            for res in result:
                for t in util.LG_table_formatter(res):
                    print >> outfile, t
Beispiel #2
0
def output_cDNA(prefix, cDNA_results, gm=''):
    # write out cDNA:scaffold mappings
    full_out = '-'.join([prefix, 'full-cDNA-results-table.tsv'])
    with open(full_out, 'w') as outfile:
        if len(gm) > 0:
            header = '\t'.join(
                ['# cDNA ID', 'Status', 'Scaffold', 'Linkage group'])
            print >> outfile, util.report_cmd()
            print >> outfile, header
            for status, result in cDNA_results.items():
                for res in result:
                    for t in util.table_formatter_wGM(res, gm):
                        print >> outfile, t
        else:
            header = '\t'.join(['# cDNA ID', 'Status', 'Scaffold'])
            print >> outfile, util.report_cmd()
            print >> outfile, header
            for status, result in cDNA_results.items():
                for res in result:
                    for t in util.table_formatter(res):
                        print >> outfile, t
Beispiel #3
0
def report_cDNA(prefix, cDNA_results, TOT):
    """calc percentages and report cDNA results"""
    num_complete = len(cDNA_results['Complete'])
    num_duplicated = len(cDNA_results['Duplicated'])
    num_partial = len(cDNA_results['Partial'])
    num_fragmented = len(cDNA_results['Fragmented'])
    num_poor = len(cDNA_results['Poorly mapped'])
    num_missing = len(cDNA_results['Missing'])
    rate_complete = float(num_complete) / float(TOT)
    rate_duplicated = float(num_duplicated) / float(TOT)
    rate_partial = float(num_partial) / float(TOT)
    rate_fragmented = float(num_fragmented) / float(TOT)
    rate_poor = float(num_poor) / float(TOT)
    rate_missing = float(num_missing) / float(TOT)
    pct_complete = round(100.0 * rate_complete, 2)
    pct_duplicated = round(100.0 * rate_duplicated, 2)
    pct_partial = round(100.0 * rate_partial, 2)
    pct_fragmented = round(100 * rate_fragmented, 2)
    pct_poor = round(100 * rate_poor, 2)
    pct_missing = round(100 * rate_missing, 2)

    sum_complete = num_complete + num_duplicated
    pct_sum_complete = pct_complete + pct_duplicated

    # report if the right number of sequences have a result
    num_counted = sum([
        num_complete, num_duplicated, num_fragmented, num_partial, num_poor,
        num_missing
    ])
    rate_counted = float(num_counted) / float(TOT)
    pct_counted = round(100 * rate_counted, 2)

    # write to tsv
    tsvout = '-'.join([prefix, 'results.tsv'])
    header_txt = [
        '', 'Complete', 'Complete, single copy', 'Complete, multiple copies',
        'Fragmented', 'Partial', 'Poorly Mapped', 'Missing',
        'Total cDNAs searched'
    ]
    with open(tsvout, 'w') as outfile:
        header = '\t'.join(header_txt)
        nums = '\t'.join([
            str(x) for x in [
                'Number', sum_complete, num_complete, num_duplicated,
                num_fragmented, num_partial, num_poor, num_missing, num_counted
            ]
        ])
        pcts = '\t'.join([
            str(x) for x in [
                'Percent', pct_sum_complete, pct_complete, pct_duplicated,
                pct_fragmented, pct_partial, pct_poor, pct_missing, pct_counted
            ]
        ])
        print >> outfile, util.report_cmd()
        print >> outfile, header
        print >> outfile, nums
        print >> outfile, pcts
    jiraout = '-'.join([prefix, 'results.jira'])
    with open(jiraout, 'w') as outfile:
        header_txt.extend(
            [''])  # add final empty string for jira table separator
        header = '||'.join(header_txt)
        nums = [
            sum_complete, num_complete, num_duplicated, num_fragmented,
            num_partial, num_poor, num_missing, num_counted
        ]
        pcts = [
            pct_sum_complete, pct_complete, pct_duplicated, pct_fragmented,
            pct_partial, pct_poor, pct_missing, pct_counted
        ]
        res = '|' + '|'.join([util.jira_formatter(x)
                              for x in zip(nums, pcts)]) + '|'

        print >> outfile, util.report_cmd()
        print >> outfile, header
        print >> outfile, res

    # print to STDOUT
    print '\n=== GNAVIGATOR cDNA RESULTS ==='
    print '%s (%s%%) complete sequences' % (sum_complete, pct_sum_complete)
    print '  %s (%s%%) complete, single copy sequences' % (num_complete,
                                                           pct_complete)
    print '  %s (%s%%) complete, multiple copy sequences' % (num_duplicated,
                                                             pct_duplicated)
    print '%s (%s%%) fragmented sequences' % (num_fragmented, pct_fragmented)
    print '%s (%s%%) partial sequences' % (num_partial, pct_partial)
    print '%s (%s%%) poorly mapped sequences' % (num_poor, pct_poor)
    print '%s (%s%%) missing sequences' % (num_missing, pct_missing)
    print '%s (%s%%) sequences were evaluated' % (num_counted, pct_counted)
    print util.report_time()
Beispiel #4
0
def report_gm_cDNA(gm_results, uniqMapDat_select, prefix):
    """Report genetic map results as a proportion of eligible cDNAs"""

    # input cDNA_results is dict with keys == cDNA status
    # see src.classify.check_aln
    gm_cdna_res = {}
    statuses = {
        'Solo': 0,
        'Same LG, expected order': 0,
        'Same LG, unexpected order': 0,
        'Different LG': 0,
        'Same LG, order undetermined': 0
    }
    tot = 0

    for res in gm_results.items():
        for pres in res[1]:
            scaf, cdnaS, stat, lg = pres
            cdnaL = cdnaS.split(';')
            for cdna in cdnaL:
                gm_cdna_res[cdna] = [scaf, stat]
                statuses[stat] += 1
                tot += 1

    miss = 'Solo'
    for rec in uniqMapDat_select.iterrows():
        cdna = rec[1][9]
        scaf = rec[1][13]

        if cdna not in gm_cdna_res:
            gm_cdna_res[cdna] = [scaf, miss]
            statuses[miss] += 1
            tot += 1

    num_solo = statuses['Solo']
    num_good = statuses['Same LG, expected order']
    num_wo = statuses['Same LG, unexpected order']
    num_diff = statuses['Different LG']
    num_undet = statuses['Same LG, order undetermined']

    rate_solo = float(statuses['Solo']) / float(tot)
    rate_good = float(statuses['Same LG, expected order']) / float(tot)
    rate_wo = float(statuses['Same LG, unexpected order']) / float(tot)
    rate_diff = float(statuses['Different LG']) / float(tot)
    rate_undet = float(statuses['Same LG, order undetermined']) / float(tot)

    pct_solo = round(100.0 * rate_solo, 2)
    pct_good = round(100.0 * rate_good, 2)
    pct_wo = round(100.0 * rate_wo, 2)
    pct_diff = round(100.0 * rate_diff, 2)
    pct_undet = round(100.0 * rate_undet, 2)

    # write to tsv
    tsvout = '-'.join([prefix, 'genetic-map-cDNA-results.tsv'])
    with open(tsvout, 'w') as outfile:
        header_txt = [
            '', 'Solo (unevaluated)', 'Same LG, expected order',
            'Same LG, unexpected order', 'Different LG',
            'Same LG, order undetermined'
        ]
        header = '\t'.join(header_txt)
        nums = '\t'.join([
            str(x) for x in
            ['Number', num_solo, num_good, num_wo, num_diff, num_undet]
        ])
        pcts = '\t'.join([
            str(x) for x in
            ['Percent', pct_solo, pct_good, pct_wo, pct_diff, pct_undet]
        ])
        print >> outfile, util.report_cmd()
        print >> outfile, header
        print >> outfile, nums
        print >> outfile, pcts
    jiraout = '-'.join([prefix, 'genetic-map-cDNA-results.jira'])
    with open(jiraout, 'w') as outfile:
        header_txt.extend(
            [''])  # add final empty string for jira table separator
        header = '||'.join(header_txt)
        nums = [num_solo, num_good, num_wo, num_diff, num_undet]
        pcts = [pct_solo, pct_good, pct_wo, pct_diff, pct_undet]
        res = '|' + '|'.join([util.jira_formatter(x)
                              for x in zip(nums, pcts)]) + '|'
        print >> outfile, util.report_cmd()
        print >> outfile, header
        print >> outfile, res

    print '\n=== GNAVIGATOR GENETIC MAP RESULTS per COMPLETE cDNA ==='
    print ' '.join([
        '%s (%s%%) were found alone on a scaffold and were not evaluated',
        'further.'
    ]) % (num_solo, pct_solo)
    print ' '.join([
        '%s (%s%%) were from the same linkage group and were found on scaffolds in',
        'the expected order'
    ]) % (num_good, pct_good)
    print ' '.join([
        '%s (%s%%) were from the same linkage group but were found on scaffolds in an',
        'unexpected order.'
    ]) % (num_wo, pct_wo)
    print ' '.join([
        '%s (%s%%) were from different linkage groups but were found on the same',
        'scaffold.'
    ]) % (num_diff, pct_diff)
    print ' '.join([
        '%s (%s%%) were from the same linkage group but their order could not be',
        'determined.'
    ]) % (num_undet, pct_undet)
    print util.report_time()

    # return statuses so certain stats can be used elsewhere
    return statuses
Beispiel #5
0
def report_gm(uniqDatMap_select, gm_results, gm_cdna_statuses, prefix):
    """report summary of genetic map results"""
    num_scaff_toCheck = len(uniqDatMap_select.tname.unique())
    num_solo = gm_cdna_statuses['Solo']
    #tot = num_scaff_toCheck + gm_cdna_statuses['Solo']
    num_goodLG = len(gm_results['goodLG'])
    num_WO_LG = len(gm_results['WO_LG'])
    num_diffLG = len(gm_results['diffLG'])
    num_undet = len(gm_results['undet'])
    num_scaff_checked = num_goodLG + num_WO_LG + num_diffLG + num_undet + num_solo
    num_2plus_scaff = num_goodLG + num_WO_LG + num_diffLG + num_undet

    tot = num_scaff_checked  # for legacy reasons
    if num_scaff_toCheck == num_scaff_checked:
        rate_solo = float(num_solo) / float(tot)
        rate_LGscaff = float(num_2plus_scaff) / float(tot)
        rate_goodLG = float(num_goodLG) / float(tot)
        rate_WO_LG = float(num_WO_LG) / float(tot)
        rate_diffLG = float(num_diffLG) / float(tot)
        rate_undet = float(num_undet) / float(tot)
        pct_solo = round(100.0 * rate_solo, 2)
        pct_LGscaff = round(100.0 * rate_LGscaff, 2)
        pct_goodLG = round(100.0 * rate_goodLG, 2)
        pct_WO_LG = round(100.0 * rate_WO_LG, 2)
        pct_diffLG = round(100.0 * rate_diffLG, 2)
        pct_undet = round(100.0 * rate_undet, 2)
    else:
        print ''.join('Not all scaffolds to be checked against genetic map'
                      ' were successfully checked.')
        print 'Maybe something is wrong with the input data?'
        print util.report_time()
        sys.exit(2)

    # write to tsv
    tsvout = '-'.join([prefix, 'genetic-map-scaffold-results.tsv'])
    with open(tsvout, 'w') as outfile:
        header_txt = [
            '', '1 complete cDNA', '2+ complete cDNAs',
            'Same LG, expected order', 'Same LG, unexpected order',
            'Different LG', 'Same LG, undetermined order'
        ]
        header = '\t'.join(header_txt)
        nums = '\t'.join([
            str(x) for x in [
                'Number', num_solo, num_2plus_scaff, num_goodLG, num_WO_LG,
                num_diffLG, num_undet
            ]
        ])
        pcts = '\t'.join([
            str(x) for x in [
                'Percent', pct_solo, pct_LGscaff, pct_goodLG, pct_WO_LG,
                pct_diffLG, pct_undet
            ]
        ])
        print >> outfile, util.report_cmd()
        print >> outfile, header
        print >> outfile, nums
        print >> outfile, pcts
    jiraout = '-'.join([prefix, 'genetic-map-scaffold-results.jira'])
    with open(jiraout, 'w') as outfile:
        header_txt.extend(
            [''])  # add final empty string for jira table separator
        header = '||'.join(header_txt)
        nums = [
            num_solo, num_2plus_scaff, num_goodLG, num_WO_LG, num_diffLG,
            num_undet
        ]
        pcts = [
            pct_solo, pct_LGscaff, pct_goodLG, pct_WO_LG, pct_diffLG, pct_undet
        ]
        res = '|' + '|'.join([util.jira_formatter(x)
                              for x in zip(nums, pcts)]) + '|'
        print >> outfile, util.report_cmd()
        print >> outfile, header
        print >> outfile, res

    # print to STDOUT
    print '\n=== GNAVIGATOR GENETIC MAP RESULTS per SCAFFOLD with 1+ COMPLETE cDNA ==='
    print ' '.join([
        '%s (%s%%) had exactly 1 complete cDNA from the genetic map',
        'aligned to them.'
    ]) % (num_solo, pct_solo)
    print ' '.join([
        '%s (%s%%) had 2+ complete cDNAs from the genetic map',
        'aligned to them.'
    ]) % (num_2plus_scaff, pct_LGscaff)
    print ' '.join([
        '  %s (%s%%) were from the same linkage group and in the',
        'expected order.'
    ]) % (num_goodLG, pct_goodLG)
    print ' '.join([
        '  %s (%s%%) were from the same linkage group, but NOT in',
        'the expected order.'
    ]) % (num_WO_LG, pct_WO_LG)
    print '  %s (%s%%) were from different linkage groups.' % (num_diffLG,
                                                               pct_diffLG)
    print ' '.join([
        '  %s (%s%%) were from the same linkage group but their order',
        'could not be determined.'
    ]) % (num_undet, pct_undet)
    print util.report_time()