Beispiel #1
0
def sample_clonesize_stat(sample, samdir, freqs=[], numtop=50, args=None):
    # calculate: number of clones/ counts that lie within each freq
    # range. Note: freqs must be sorted, or the func will sort it
    # <numtop>: number of top clones whose freqs will be report
    stat = CloneSizeStat(sorted(freqs))
    stat.set_sample_info(sample)
    if args:
        numtop = args[0]
    clones = libsample.sample_all_clones(samdir)
    sorted_clones = sorted(clones, reverse=True, key=lambda c: c.freq)
   
    for index, clone in enumerate(sorted_clones):
        if index < numtop:
            stat.topfreqs.append(clone.freq)

        for i, minfreq in enumerate(stat.freqs):
            maxfreq = float('inf')
            if i + 1 < len(stat.freqs):
                maxfreq = stat.freqs[i + 1]
            if minfreq <= clone.freq and clone.freq < maxfreq:
                stat.numclones[i] += 1
                stat.counts[i] += clone.count
    # convert to frequencies:
    stat.numclones = [libcommon.get_pc(c, stat.numclone) for c in
                                                                stat.numclones]
    stat.counts = [libcommon.get_pc(c, stat.size) for c in stat.counts]
    # get cumulative stats:
    stat.numclones_cumul = libcommon.get_cumulative(stat.numclones)
    stat.counts_cumul = libcommon.get_cumulative(stat.counts)
    stat.topfreqs_cumul = libcommon.get_cumulative(stat.topfreqs, True)
    return stat
Beispiel #2
0
def sample_lendist_stat(sample, samdir, args=None):
    # lendist with counts and with number of clones
    len2clones = {}
    len2reads = {}
    clones = libsample.sample_all_clones(samdir)
    totalclone = 0
    for clone in clones:
        if clone.aa:
            l = len(clone.aa)
            if clone.vdel is not None:
                totalclone += 1
                if l not in len2clones:
                    len2clones[l] = 1
                else:
                    len2clones[l] += 1
            if l not in len2reads:
                len2reads[l] = clone.freq
            else:
                len2reads[l] += clone.freq
    # convert the number of clones into % total clones
    for l, numclone in len2clones.iteritems():
        len2clones[l] = float(numclone) / totalclone

    stat = LenDistStat()
    stat.set_sample_info(sample)
    stat.set_stats(len2clones, len2reads)
    return stat
Beispiel #3
0
def sample_geneusage_stat(sample, samdir, args=None):
    # gene usage of a specific number
    # initialize usage
    types = ['v', 'd', 'j', 'vj', 'dj']
    type2gene2clones = {}
    type2gene2reads = {}
    for t in types:
        type2gene2clones[t] = {}
        type2gene2reads[t] = {}

    # get usage
    clones = libsample.sample_all_clones(samdir)
    for clone in clones:
        # update each genetype usage
        genetypes = ['v', 'd', 'j']
        for type in genetypes:
            gene2clones = type2gene2clones[type]
            gene2reads = type2gene2reads[type]
            gene = clone[type]
            numclone = 1.0
            freq = clone.freq
            if gene not in gene2clones:
                gene2clones[gene] = numclone
                gene2reads[gene] = freq
            else:
                gene2clones[gene] += numclone
                gene2reads[gene] += freq

        # update gene combination usage
        for type in ['vj', 'dj']:
            gene2clones = type2gene2clones[type]
            gene2reads = type2gene2reads[type]
            g0 = clone[type[0]]
            g1 = clone[type[1]]
            numclone = 1.0
            freq = clone.freq
            combi = "|".join([g0, g1])
            if combi not in gene2clones:
                gene2clones[combi] = numclone
                gene2reads[combi] = freq
            else:
                gene2clones[combi] += numclone
                gene2reads[combi] += freq
    # convert the number of clones into % total clones:
    for type, gene2clones in type2gene2clones.iteritems():
        for gene, numclone in gene2clones.iteritems():
            gene2clones[gene] = float(numclone) / sample.numclone

    # get the stat obj
    stat = GeneUsageStat()
    stat.set_sample_info(sample)
    stat.set_stats(type2gene2clones, type2gene2reads)
    return stat