def add_pop_to_smartpca(smartpca_inbase,smartpca_outbase,db_name='FULL',prefix='RB'): td = get_table_as_dict(db_name,suppress_fc_check=True) pop_lookup = dict([(prefix+d['id'],d['location']) for d in td]) infile = smartpca_inbase+'.ind' outfile = smartpca_outbase+'.ind' outfh = open(outfile,'w') for l in open(infile): ind = l.strip().split()[0] newl = '\t'.join(l.split()[:2]+[pop_lookup[ind]+'\n']) outfh.write(newl) outfh.close() open(smartpca_outbase+'.par','w').write('genotypename:\t%s.ancestrymapgeno\n' \ 'snpname:\t%s.snp\n' \ 'indivname:\t%s.ind\n' \ 'evecoutname:\t%s.evec\n' \ 'evaloutname:\t%s.eval\n' \ 'snpweightoutname:\t%s.snpweightout\n' 'phylipoutname:\t%s.fst\n' % tuple([smartpca_outbase]*7)) ret = os.system('cp %s %s' % (smartpca_inbase+'.ancestrymapgeno',smartpca_outbase+'.ancestrymapgeno')) if ret != 0: raise OSError ret = os.system('cp %s %s' % (smartpca_inbase+'.snp',smartpca_outbase+'.snp')) if ret != 0: raise OSError
def wigs_phenotypes_from_DB(db_name,indiv_lists,survive_field='recap1',survive_value='R'): '''indiv_lists is tuple of lists, one list per pop, of individual ids ''' td = get_table_as_dict(db_name,suppress_fc_check=True) phenos = [] for pop_n,indivs in enumerate(indiv_lists): phenos.append([]) for ind_n,indiv in enumerate(indivs): phenos[-1].append([int(d[survive_field] == survive_value) for d in td if d['id'] == indiv][0]) return phenos
def indiv_lists_by_enc(vcf_obj,db_name,query_d,prefix='RB',sort_key=None): ''' see write_wigs_all_simple for example of query_d ''' td = get_table_as_dict(db_name,suppress_fc_check=True) vcf_indivs = indivs_in_vcf(vcf_obj) if 'site' in query_d.keys(): sites = query_d['site'] indiv_lists = tuple([[d['id'] for d in td if d['site'] == this_site and prefix+d['id'] in vcf_indivs and all([d[k] in v for k,v in query_d.items() if k != 'site'])] for this_site in sites]) else: indiv_lists = tuple([[d['id'] for d in td if prefix+d['id'] in vcf_indivs and all([d[k] in v for k,v in query_d.items()])]]) if sort_key is not None: indiv_lists = [sorted(l,key=sort_key) for l in indiv_lists] return indiv_lists
#!/usr/bin/env python import os,sys import run_safe from rtd import preprocess_radtag_lane,overlap_preprocess #import rtd.overlap_preprocess recs = preprocess_radtag_lane.get_table_as_dict('DB_adapt_trim_seqs',suppress_fc_check=True) print >> sys.stderr, recs #def get_adaptseq(table='DB_adapt_trim_seqs'): # return dict([(d['adapterstype'],{'r1':d['r1'],'r2':d['r2']}) for d in preprocess_radtag_lane.get_table_as_dict(table,suppress_fc_check=True)]) def get_adaptseq(table='DB_adapt_trim_seqs'): return dict([(d['adapterstype'],{'r1':d['r1'],'r2':d['r2']}) for d in recs ]) adaptseq = get_adaptseq() #print >> sys.stderr, 'use adapterstype: %s\nadaptA: %s\nadaptB: %s' % (adapterstype,adaptA,adaptB) print >> sys.stderr, adaptseq