def get_sex_info(): from short_read_analysis import preprocess_radtag_lane db = preprocess_radtag_lane.get_table_as_dict('DB_library_data') sex = dict([(l['sampleid'],l['sex']) for l in db if l.has_key('sex')]) hyb_parent_sex = {} for l in db: if l.has_key('sire') and l.has_key('dam'): if l['sire'].startswith('F1') and l['dam'].startswith('BW'): hyb_parent_sex[l['sampleid']] = 'M' elif l['dam'].startswith('F1') and l['sire'].startswith('BW'): hyb_parent_sex[l['sampleid']] = 'F' return hyb_parent_sex, sex
def write_spagedi_genotypes(vcf_data, outfile, keys_to_write=None, indiv_to_write=None): '''generates output intended for SPAGeDi currently treats all individuals as originating from a single population; this will need to be elaborated upon ''' from short_read_analysis import preprocess_radtag_lane lookup = dict([ (l['sampleid'], l['population']) for l in preprocess_radtag_lane.get_table_as_dict('DB_library_data') if l.has_key('population') ]) if keys_to_write is None: keys_to_write = vcf_data.keys() keys_to_write.sort(key=lambda x: (x[0], int(x[1]))) if indiv_to_write is None: indiv_to_write = set() for k in keys_to_write: v = vcf_data[k] indiv_to_write = indiv_to_write.union(set(v['indiv_gt'].keys())) indiv_to_write = sorted(list(indiv_to_write)) ofh = open(outfile, 'w') #write header ofh.write('%s\t1\t0\t%s\t1\t2\n0\nInd\tPop\t%s\n' % \ (len(indiv_to_write),len(keys_to_write), '\t'.join(['%s.%s' % (c,p) for c,p in keys_to_write]))) #write genotypes for ind in indiv_to_write: ofh.write('%s\t%s' % (ind, lookup.get(ind, 'pop1'))) for k in keys_to_write: try: gt = '/'.join([ str(int(i) + 1) for i in vcf_data[k]['indiv_gt'][ind]['GT'].split('/') ]) ofh.write('\t' + gt) except KeyError: ofh.write('\t0/0') ofh.write('\n') ofh.write('END\n') ofh.close()
def write_spagedi_genotypes(vcf_data, outfile, keys_to_write = None, indiv_to_write = None): '''generates output intended for SPAGeDi currently treats all individuals as originating from a single population; this will need to be elaborated upon ''' from short_read_analysis import preprocess_radtag_lane lookup = dict([(l['sampleid'],l['population']) for l in preprocess_radtag_lane.get_table_as_dict('DB_library_data') if l.has_key('population')]) if keys_to_write is None: keys_to_write = vcf_data.keys() keys_to_write.sort(key = lambda x: (x[0],int(x[1]))) if indiv_to_write is None: indiv_to_write = set() for k in keys_to_write: v = vcf_data[k] indiv_to_write = indiv_to_write.union(set(v['indiv_gt'].keys())) indiv_to_write = sorted(list(indiv_to_write)) ofh = open(outfile,'w') #write header ofh.write('%s\t1\t0\t%s\t1\t2\n0\nInd\tPop\t%s\n' % \ (len(indiv_to_write),len(keys_to_write), '\t'.join(['%s.%s' % (c,p) for c,p in keys_to_write]))) #write genotypes for ind in indiv_to_write: ofh.write('%s\t%s' % (ind,lookup.get(ind,'pop1'))) for k in keys_to_write: try: gt = '/'.join([str(int(i)+1) for i in vcf_data[k]['indiv_gt'][ind]['GT'].split('/')]) ofh.write('\t'+gt) except KeyError: ofh.write('\t0/0') ofh.write('\n') ofh.write('END\n') ofh.close()
else: print >> sys.stderr, 'no matching genotypes for pheno line %s' % pd['id'] else: print >> sys.stderr, 'no id in %s' % pd return phenomaploci,phenomap if __name__ == '__main__': db,mapfile,outfile = sys.argv[1:4] if ',' in mapfile: mapf,mIDf = m.split(',') else: mapf = mapfile mIDf = False if ',' in db: phenotypes = [] for db_i in db.split(','): phenotypes.extend(preprocess_radtag_lane.get_table_as_dict(db_i,suppress_fc_check=True)) else: phenotypes = preprocess_radtag_lane.get_table_as_dict(db,suppress_fc_check=True) maploci,genotypes = extract_genotypes_from_mclgr.load_cross_radtag_genotypes(mapf,mIDf) phenomaploci,phenomap = add_pheno_to_map(phenotypes,maploci,genotypes) print >> sys.stderr, '%s pheno+map loci, %s lines' % (len(phenomaploci),len(phenomap)) og,mID = extract_genotypes_from_mclgr.output_cross_radtag_genotypes(phenomaploci,phenomap,outfile)