Exemple #1
0
                    for g in gsets[ii]:
                        counts[g] = counts.get(g, 0) + 1
                mx = max(counts.values())
                genesets[ii] = set(
                    [x for x, y in counts.iteritems() if y == mx])
                print 'empty common genes:', ii, 'clone_size:', clone_size, 'mx-genecount', mx, 'newgeneset:', genesets[
                    ii], em

        for genes, segtype in zip(genesets, segtypes_lowercase):
            assert genes
            tag = segtype + '_genes'
            assert tag in outl  # should already be there, now over-writing
            outl[tag] = ';'.join(sorted(genes))

            ## update reps
            reps = sorted(set((util.get_rep(x, organism) for x in genes)))
            tag = segtype + '_reps'
            assert tag in outl  # should already be there, now over-writing
            outl[tag] = ';'.join(reps)

            ## update countreps
            countreps = sorted(
                set((util.get_mm1_rep_gene_for_counting(x, organism)
                     for x in genes)))
            tag = segtype + '_countreps'
            assert tag in outl  # should already be there, now over-writing
            outl[tag] = ';'.join(countreps)

        total_clones += 1

        out.write(make_tsv_line(outl, outfields, '-') + '\n')
Exemple #2
0
def reconstruct_field_from_data(field, l, organism):
    try:
        if field in l:
            return l[field]

        if field == 'subject':
            if 'mouse' in l:
                return l['mouse']
        elif field.endswith('_prob'):
            if prob_warning not in warnings:
                print prob_warning
                warnings.add(prob_warning)
            return 1.0
        elif field == 'clone_size':
            if clone_size_warning not in warnings:
                print clone_size_warning
                warnings.add(clone_size_warning)
            return 1
        elif field[:3] in ['va_', 'ja_', 'vb_',
                           'jb_']:  ## these are all pretty similar
            prefix = field[:3]
            tag = field[3:]
            #print 'prefix:',prefix,tag,l.keys()
            if tag == 'gene':
                # only one place to get this
                genes_field = prefix + 'genes'
                if genes_field not in l:
                    return None
                genes = l[genes_field].split(listsep)
                #print 'genes:',genes
                return sorted(genes)[0]
            elif tag == 'genes':
                if prefix + 'blast_hits' in l:
                    hits = l[prefix + 'blast_hits']
                    if hits == '-':  #failed to find any hits
                        return None
                    else:
                        return listsep.join(sorted(util.get_top_genes(hits)))
                elif prefix + 'gene' in l:  ## just take the one gene we know
                    return l[prefix + 'gene']
            elif tag == 'rep':
                if prefix + 'gene' in l:
                    if "*" in l[prefix + 'gene']:
                        return util.get_rep(l[prefix + 'gene'], organism)
                    else:
                        if rep_warning not in warnings:
                            print rep_warning
                            warnings.add(rep_warning)
                        return l[prefix + 'gene']
            elif tag == 'reps':
                ## we should already have hit 'genes' in the list of fields we are trying to fill !!!
                if "*" in l[prefix + 'gene']:
                    return listsep.join(
                        sorted((util.get_rep(x, organism)
                                for x in l[prefix + 'genes'].split(listsep))))
                else:
                    if rep_warning not in warnings:
                        print rep_warning
                        warnings.add(rep_warning)
                    return l[prefix + 'gene']
            elif tag == 'countreps':
                if "*" in l[prefix + 'gene']:
                    return listsep.join(
                        sorted(
                            util.countreps_from_genes(
                                l[prefix + 'genes'].split(listsep), organism)))
                else:
                    if rep_warning not in warnings:
                        print rep_warning
                        warnings.add(rep_warning)
                    return l[prefix + 'gene']
        elif field.endswith('_quals'):
            seqfield = field[:5] + '_nucseq'
            if seqfield not in l:
                return None
            return '.'.join(['60'] * len(l[seqfield]))

    except Exception as inst:  ## this is not the best way to handle it...
        print 'Hit an exception trying to get field {} from line'.format(
            field), inst

    print 'Failed to reconstruct {} from the input fields: {}'\
        .format( field, ' '.join( sorted( l.keys() ) ) )
    return None
Exemple #3
0
 def __init__(self, organism, va_genes, vb_genes, cdr3a, cdr3b):
     self.va_reps = frozenset((util.get_rep(x, organism) for x in va_genes))
     self.vb_reps = frozenset((util.get_rep(x, organism) for x in vb_genes))
     self.cdr3a = cdr3a[:]
     self.cdr3b = cdr3b[:]