def get_sex_info():
    from short_read_analysis import preprocess_radtag_lane
    db = preprocess_radtag_lane.get_table_as_dict('DB_library_data')
    sex = dict([(l['sampleid'],l['sex']) for l in db if l.has_key('sex')])
    hyb_parent_sex = {}
    for l in db:
        if l.has_key('sire') and l.has_key('dam'):                      
            if l['sire'].startswith('F1') and l['dam'].startswith('BW'):
                hyb_parent_sex[l['sampleid']] = 'M'
            elif l['dam'].startswith('F1') and l['sire'].startswith('BW'):
                hyb_parent_sex[l['sampleid']] = 'F'

    return hyb_parent_sex, sex
Exemple #2
0
def write_spagedi_genotypes(vcf_data,
                            outfile,
                            keys_to_write=None,
                            indiv_to_write=None):
    '''generates output intended for SPAGeDi

    currently treats all individuals as originating from a single population;
    this will need to be elaborated upon
    '''

    from short_read_analysis import preprocess_radtag_lane
    lookup = dict([
        (l['sampleid'], l['population'])
        for l in preprocess_radtag_lane.get_table_as_dict('DB_library_data')
        if l.has_key('population')
    ])

    if keys_to_write is None:
        keys_to_write = vcf_data.keys()

    keys_to_write.sort(key=lambda x: (x[0], int(x[1])))

    if indiv_to_write is None:
        indiv_to_write = set()
        for k in keys_to_write:
            v = vcf_data[k]
            indiv_to_write = indiv_to_write.union(set(v['indiv_gt'].keys()))
        indiv_to_write = sorted(list(indiv_to_write))

    ofh = open(outfile, 'w')
    #write header
    ofh.write('%s\t1\t0\t%s\t1\t2\n0\nInd\tPop\t%s\n' % \
              (len(indiv_to_write),len(keys_to_write), '\t'.join(['%s.%s' % (c,p) for c,p in keys_to_write])))

    #write genotypes
    for ind in indiv_to_write:
        ofh.write('%s\t%s' % (ind, lookup.get(ind, 'pop1')))
        for k in keys_to_write:
            try:
                gt = '/'.join([
                    str(int(i) + 1)
                    for i in vcf_data[k]['indiv_gt'][ind]['GT'].split('/')
                ])
                ofh.write('\t' + gt)
            except KeyError:
                ofh.write('\t0/0')
        ofh.write('\n')

    ofh.write('END\n')
    ofh.close()
Exemple #3
0
def write_spagedi_genotypes(vcf_data, outfile, keys_to_write = None, indiv_to_write = None):
    '''generates output intended for SPAGeDi

    currently treats all individuals as originating from a single population;
    this will need to be elaborated upon
    '''

    from short_read_analysis import preprocess_radtag_lane
    lookup = dict([(l['sampleid'],l['population']) for l in preprocess_radtag_lane.get_table_as_dict('DB_library_data') if l.has_key('population')])

    if keys_to_write is None:
        keys_to_write = vcf_data.keys()

    keys_to_write.sort(key = lambda x: (x[0],int(x[1])))

    if indiv_to_write is None:
        indiv_to_write = set()
        for k in keys_to_write:
            v = vcf_data[k]
            indiv_to_write = indiv_to_write.union(set(v['indiv_gt'].keys()))
        indiv_to_write = sorted(list(indiv_to_write))

    ofh = open(outfile,'w')
    #write header
    ofh.write('%s\t1\t0\t%s\t1\t2\n0\nInd\tPop\t%s\n' % \
              (len(indiv_to_write),len(keys_to_write), '\t'.join(['%s.%s' % (c,p) for c,p in keys_to_write])))

    #write genotypes
    for ind in indiv_to_write:
        ofh.write('%s\t%s' % (ind,lookup.get(ind,'pop1')))
        for k in keys_to_write:
            try:
                gt = '/'.join([str(int(i)+1) for i in vcf_data[k]['indiv_gt'][ind]['GT'].split('/')])
                ofh.write('\t'+gt)
            except KeyError:
                ofh.write('\t0/0')
        ofh.write('\n')

    ofh.write('END\n')
    ofh.close()
            else:
                print >> sys.stderr, 'no matching genotypes for pheno line %s' % pd['id']
        else:
            print >> sys.stderr, 'no id in %s' % pd
            
    return phenomaploci,phenomap

if __name__ == '__main__':
    db,mapfile,outfile = sys.argv[1:4]

    if ',' in mapfile:
        mapf,mIDf = m.split(',')
    else:
        mapf = mapfile
        mIDf = False

    if ',' in db:
        phenotypes = []
        for db_i in db.split(','):
            phenotypes.extend(preprocess_radtag_lane.get_table_as_dict(db_i,suppress_fc_check=True))
    else:
        phenotypes = preprocess_radtag_lane.get_table_as_dict(db,suppress_fc_check=True)
    
    maploci,genotypes = extract_genotypes_from_mclgr.load_cross_radtag_genotypes(mapf,mIDf)
    
    phenomaploci,phenomap = add_pheno_to_map(phenotypes,maploci,genotypes)
    print >> sys.stderr, '%s pheno+map loci, %s lines' % (len(phenomaploci),len(phenomap))
    og,mID = extract_genotypes_from_mclgr.output_cross_radtag_genotypes(phenomaploci,phenomap,outfile)