def updateAll(): global rec2, selRec2, popNames, remPops, remLoci, locusFst #,rec global summPanel, empiricalPanel, frame global myFst, maxRun, minRun # This is really a private var to be reused on report empiricalPanel.ignoreChanges = True myFst = -1 maxRun = -1 minRun = -1 selRec2 = FileParser.read(rec2.fname) #print rec2.fname, countPops(rec2) #for locus in remLoci: selRec2.remove_loci_by_name(remLoci, lpath + os.sep + "sr.tmp") shutil.copyfile(lpath + os.sep + "sr.tmp", lpath + os.sep + "sr") selRec2 = FileParser.read(lpath + os.sep + "sr") #print len(popNames), remPops for i in range(len(popNames)-1, -1, -1): #print i, popNames[i] if popNames[i] in remPops: selRec2.remove_population(i, lpath + os.sep + "sr.tmp") shutil.copyfile(lpath + os.sep + "sr.tmp", lpath + os.sep + "sr") selRec2 = FileParser.read(lpath + os.sep + "sr") #print i, os.path.getsize(lpath + os.sep + "sr"), countPops(selRec2) summPanel.update(rec2, popNames, remPops, remLoci) enableAllMenus() empiricalPanel.setTotalPops(len(popNames)) def after(): chartPanel.setMarkers(locusFst) chartPanel.updateChartDataset() empiricalPanel.knownPops = countPops(selRec2) empiricalPanel.ignoreChanges = False runDatacal(after)
def checkAlleles(file): df = FileParser.read(file) countAlleles = [] for locus in df.loci_list: countAlleles.append(set()) indiv = df.get_individual() while indiv: if type(indiv) == tuple: name, loci = indiv for l in range(len(loci)): for a in loci[l]: if a: countAlleles[l].add(a) indiv = df.get_individual() probLoci = [] for l in range(len(countAlleles)): if len(countAlleles[l]) > 2: probLoci.append(df.loci_list[l]) if probLoci != []: if len(probLoci) > 5: error(frame, "Many (%d) loci have more than 2 alleles" % (len(probLoci))) else: error(frame, "Some loci have more than 2 alleles: %s" % (str(probLoci)))
def test_remove_features(self): """Testing the ability to remove population/loci via class methods.""" for index in range(len(self.files)): fname = self.files[index] ftemp = tempfile.NamedTemporaryFile(mode="w+", delete=False) ftemp.close() rec = FileParser.read(fname) rec.remove_loci_by_position([0], ftemp.name) with open(ftemp.name, 'r') as ft: ft.seek(0) rec2 = GenePop.read(iter(ft)) self.assertEqual(rec.loci_list[1:], rec2.loci_list) rec.remove_locus_by_position(0, ftemp.name) with open(ftemp.name, 'r') as ft: ft.seek(0) rec3 = GenePop.read(iter(ft)) self.assertEqual(rec.loci_list[1:], rec3.loci_list) rec.remove_locus_by_name(rec.loci_list[0], ftemp.name) with open(ftemp.name, 'r') as ft: ft.seek(0) rec4 = GenePop.read(iter(ft)) self.assertEqual(rec.loci_list[1:], rec4.loci_list) rec.remove_loci_by_name([rec.loci_list[0]], ftemp.name) with open(ftemp.name, 'r') as ft: ft.seek(0) rec5 = GenePop.read(iter(ft)) self.assertEqual(rec.loci_list[1:], rec5.loci_list) os.remove(ftemp.name) rec._handle.close()
def test_remove_features(self): """Testing the ability to remove population/loci via class methods.""" for index in range(len(self.files)): fname = self.files[index] ftemp = tempfile.NamedTemporaryFile(mode="w+", delete=False) ftemp.close() rec = FileParser.read(fname) rec.remove_loci_by_position([0], ftemp.name) with open(ftemp.name, "r") as ft: ft.seek(0) rec2 = GenePop.read(iter(ft)) self.assertEqual(rec.loci_list[1:], rec2.loci_list) rec.remove_locus_by_position(0, ftemp.name) with open(ftemp.name, "r") as ft: ft.seek(0) rec3 = GenePop.read(iter(ft)) self.assertEqual(rec.loci_list[1:], rec3.loci_list) rec.remove_locus_by_name(rec.loci_list[0], ftemp.name) with open(ftemp.name, "r") as ft: ft.seek(0) rec4 = GenePop.read(iter(ft)) self.assertEqual(rec.loci_list[1:], rec4.loci_list) rec.remove_loci_by_name([rec.loci_list[0]], ftemp.name) with open(ftemp.name, "r") as ft: ft.seek(0) rec5 = GenePop.read(iter(ft)) self.assertEqual(rec.loci_list[1:], rec5.loci_list) os.remove(ftemp.name) rec._handle.close()
def _convert_genepop_to_fdist_big(gp_rec, report_pops=None): """Converts a big GenePop record to a FDist one. Parameters: gp_rec - Genepop Record (Big) Returns: FDist record. """ fd_rec = Bio.PopGen.FDist.Record() fd_rec.data_org = 1 fd_rec.num_loci = len(gp_rec.loci_list) num_loci = len(gp_rec.loci_list) loci = [] for i in range(num_loci): loci.append(set()) pops = [] work_rec = FileParser.read(gp_rec.fname) lParser = work_rec.get_individual() def init_pop(): my_pop = [] for i in range(num_loci): my_pop.append({}) return my_pop curr_pop = init_pop() num_pops = 1 if report_pops: report_pops(num_pops) while lParser: if lParser is not True: for loci_pos in range(num_loci): for al in lParser[1][loci_pos]: if al is not None: loci[loci_pos].add(al) curr_pop[loci_pos][al] = curr_pop[loci_pos].get(al, 0) + 1 else: pops.append(curr_pop) num_pops += 1 if report_pops: report_pops(num_pops) curr_pop = init_pop() lParser = work_rec.get_individual() work_rec._handle.close() # TODO - Needs a proper fix pops.append(curr_pop) fd_rec.num_pops = num_pops for loci_pos in range(num_loci): alleles = list(loci[loci_pos]) alleles.sort() loci_rec = [len(alleles), []] for pop in pops: pop_rec = [] for allele in alleles: pop_rec.append(pop[loci_pos].get(allele, 0)) loci_rec[1].append(pop_rec) fd_rec.loci_data.append(tuple(loci_rec)) return fd_rec
def test_file_record_parser(self): """Basic operation of the File Record Parser.""" for index in range(len(self.files)): fname = self.files[index] rec = FileParser.read(fname) self.assertIsInstance(rec, FileParser.FileRecord) self.assertTrue( str(rec).startswith( "Generated by createGenePop.py - (C) Tiago Antao\n" "136255903\n" "136257048\n" "136257636\n" "Pop\n" ), "Did not expect this:\n%s" % rec, ) self.assertEqual(len(rec.loci_list), self.num_loci[index]) for skip in range(self.pops_indivs[index][0]): self.assertIn( rec.skip_population(), (True, None), msg="Not enough populations" ) self.assertFalse(rec.skip_population(), msg="Too much populations") for i in range(self.pops_indivs[index][0]): continue rec._handle.close() # TODO - Needs a proper fix
def _convert_genepop_to_fdist_big(gp_rec, report_pops = None): """Converts a big GenePop record to a FDist one. Parameters: gp_rec - Genepop Record (Big) Returns: FDist record. """ fd_rec = Bio.PopGen.FDist.Record() fd_rec.data_org = 1 fd_rec.num_loci = len(gp_rec.loci_list) num_loci = len(gp_rec.loci_list) loci = [] for i in range(num_loci): loci.append(set()) pops = [] work_rec = FileParser.read(gp_rec.fname) lParser = work_rec.get_individual() def init_pop(): my_pop = [] for i in range(num_loci): my_pop.append({}) return my_pop curr_pop = init_pop() num_pops = 1 if report_pops: report_pops(num_pops) while lParser: if lParser is not True: for loci_pos in range(num_loci): for al in lParser[1][loci_pos]: if al is not None: loci[loci_pos].add(al) curr_pop[loci_pos][al]= curr_pop[loci_pos].get(al,0)+1 else: pops.append(curr_pop) num_pops += 1 if report_pops: report_pops(num_pops) curr_pop = init_pop() lParser = work_rec.get_individual() work_rec._handle.close() # TODO - Needs a proper fix pops.append(curr_pop) fd_rec.num_pops = num_pops for loci_pos in range(num_loci): alleles = list(loci[loci_pos]) alleles.sort() loci_rec = [len(alleles), []] for pop in pops: pop_rec = [] for allele in alleles: pop_rec.append(pop[loci_pos].get(allele, 0)) loci_rec[1].append(pop_rec) fd_rec.loci_data.append(tuple(loci_rec)) return fd_rec
def test_convert_big(self): """Big interface conversion test. """ for i in range(len(self.names)): gp_rec = FileParser.read(self.names[i]) fd_rec = convert_genepop_to_fdist(gp_rec) assert (fd_rec.num_loci == 3) assert (fd_rec.num_pops == 3)
def test_convert_big(self): """Big interface conversion test. """ for i in range(len(self.names)): gp_rec = FileParser.read(self.names[i]) fd_rec = convert_genepop_to_fdist(gp_rec) assert(fd_rec.num_loci == 3) assert(fd_rec.num_pops == 3)
def loadGenePop(file): global rec2, popNames, remPops, remLoci #,rec #rec = GenePop.read(open(str(file))) try: rec2 = FileParser.read(file) if isDominant: checkAlleles(file) remPops = [] remLoci = [] #popNames = ['pop' + str(i+1) for i in range(len(rec.populations))] popNames = ['pop' + str(i+1) for i in range(countPops(rec2))] except: error(frame, "Not a genepop file!")
def test_file_record_parser(self): """Basic operation of the File Record Parser. """ for index in range(len(self.files)): fname = self.files[index] rec = FileParser.read(fname) assert isinstance(rec, FileParser.FileRecord) assert len(rec.loci_list) == self.num_loci[index] for skip in range(self.pops_indivs[index][0]): if rec.skip_population() == False: raise Error("Not enough populations") if rec.skip_population() == True: raise Error("Too much populations") for i in range(self.pops_indivs[index][0]): continue assert len(rec.populations[i]) == \ self.pops_indivs[index][1][i]
def test_file_record_parser(self): """Basic operation of the File Record Parser.""" for index in range(len(self.files)): fname = self.files[index] rec = FileParser.read(fname) self.assertIsInstance(rec, FileParser.FileRecord) self.assertTrue(str(rec).startswith( "Generated by createGenePop.py - (C) Tiago Antao\n" "136255903\n" "136257048\n" "136257636\n" "Pop\n"), "Did not expect this:\n%s" % rec) self.assertEqual(len(rec.loci_list), self.num_loci[index]) for skip in range(self.pops_indivs[index][0]): if rec.skip_population() is False: raise Exception("Not enough populations") if rec.skip_population() is True: raise Exception("Too much populations") for i in range(self.pops_indivs[index][0]): continue rec._handle.close() # TODO - Needs a proper fix
def checkAlleles(file): df = FileParser.read(file) countAlleles = [] for locus in df.loci_list: countAlleles.append(set()) indiv = df.get_individual() while indiv: if type(indiv) == tuple: name, loci = indiv for l in range(len(loci)): for a in loci[l]: if a: countAlleles[l].add(a) indiv = df.get_individual() probLoci = [] for l in range(len(countAlleles)): if len(countAlleles[l])>2: probLoci.append(df.loci_list[l]) if probLoci != []: if len(probLoci)>5: error(frame, "Many (%d) loci have more than 2 alleles" % (len(probLoci))) else: error(frame, "Some loci have more than 2 alleles: %s" % (str(probLoci)))
def countPops(rec): f2 = FileParser.read(rec.fname) popCnt = 1 while f2.skip_population(): popCnt += 1 return popCnt
def _convert_genepop_to_fdist_big_old(gp_rec, report_loci=None): """Converts a big GenePop record to a FDist one. Parameters: gp_rec - Genepop Record (Big) Returns: FDist record. """ fd_rec = Bio.PopGen.FDist.Record() def countPops(rec): f2 = FileParser.read(rec.fname) popCnt = 1 while f2.skip_population(): popCnt += 1 return popCnt fd_rec.data_org = 0 fd_rec.num_loci = len(gp_rec.loci_list) work_rec0 = FileParser.read(gp_rec.fname) fd_rec.num_pops = countPops(work_rec0) num_loci = len(gp_rec.loci_list) for lc_i in range(num_loci): if report_loci: report_loci(lc_i, num_loci) work_rec = FileParser.read(gp_rec.fname) work_rec2 = FileParser.read(gp_rec.fname) alleles = [] pop_data = [] lParser = work_rec.get_individual() while lParser: if lParser is not True: for al in lParser[1][lc_i]: if al is not None and al not in alleles: alleles.append(al) lParser = work_rec.get_individual() # here we go again (necessary...) alleles.sort() def process_pop(pop_data, alleles, allele_counts): allele_array = [] # We need the same order as in alleles for allele in alleles: allele_array.append(allele_counts.get(allele, 0)) pop_data.append(allele_array) lParser = work_rec2.get_individual() allele_counts = {} for allele in alleles: allele_counts[allele] = 0 allele_counts[None] = 0 while lParser: if lParser is True: process_pop(pop_data, alleles, allele_counts) allele_counts = {} for allele in alleles: allele_counts[allele] = 0 allele_counts[None] = 0 else: for al in lParser[1][lc_i]: allele_counts[al] += 1 lParser = work_rec2.get_individual() process_pop(pop_data, alleles, allele_counts) fd_rec.loci_data.append((len(alleles), pop_data)) return fd_rec
def report(fst): global numAttempts global fda, fdc, fdRequest, runState, selRec2, splitSize global chartPanel, simsDonePanel, systemPanel, empiricalPanel global empiricalPanel, menuHandles, statusPanel, frame global myFst, maxRun, minRun # This is really a private var to be reused global isTemporal, tempSamples, fdt if isTemporal: fdt.acquire() else: fda.acquire() if myFst < 0: myFst = empiricalPanel.getFst() if maxRun < 0: maxRun = 1.0 minRun = 0.0 ext = FDistExtra.getExt() fdc = FDistController('.', ext) ci = systemPanel.getCI() chartPanel.drawCI = True confLines = changeChartCI(False) simsDonePanel.increment(splitSize / 1000.0) if isTemporal: desiredNe = empiricalPanel.getNe() else: desiredFst = empiricalPanel.getFst() if simsDonePanel.getRange() == simsDonePanel.getValue(): #print runState if isTemporal: fdt.release() # We are the last one, this is safe else: fda.release() # We are the last one, this is safe if runState == 'ForceBeforeNeutral' or runState == 'Force': os.remove(lpath + os.sep + 'out.dat') #careful, not for 5000 case #print "max", maxRun, "min", minRun nextFst, maxRun, minRun = approximate_fst(desiredFst, fst, myFst, maxRun, minRun) #print "obtained", fst, "desired", desiredFst, "next", nextFst, "max", maxRun, "min", minRun numAttempts += 1 if nextFst == myFst or numAttempts == 20: numSims = systemPanel.getNumSims() if runState == 'Force': statusPanel.setStatus('Running final simulation', Color.YELLOW) runState = 'Final' else: runState = 'Neutral' statusPanel.setStatus('Simulation pass to determine initial neutral set', Color.CYAN) else: statusPanel.setStatus('Forcing correct mean Fst, current error is ' + str(round(abs(fst - desiredFst), 3)), Color.RED) numSims = 50000 myFst = nextFst npops = empiricalPanel.getTotalPops() nsamples = countPops(selRec2) numCores = systemPanel.getNumCores() sampSize = empiricalPanel.getSampleSize() if isDominant: theta = empiricalPanel.getTheta() beta = empiricalPanel.getBeta() crit = empiricalPanel.getCrit() mut = None else: theta = beta = crit = None mutStr = empiricalPanel.mut.getSelectedItem() mut = getMut(mutStr) if isTemporal: pass #XXX else: runFDistPart(False, selRec2, mut, numSims, npops, nsamples, myFst, sampSize, theta, beta, crit, numCores) elif runState == 'Neutral': maxRun = -1 minRun = -1 myFst = -1 if isDominant: pv = fdc.run_pv(data_dir = lpath, version=2) else: pv = fdc.run_pv(data_dir = lpath, version=1) #pv = get_pv(data_dir = lpath) selLoci = getSelLoci(pv) if fdRequest == 'Neutral': runState = 'Final' numSims = systemPanel.getNumSims() statusPanel.setStatus('Running final simulation', Color.YELLOW) else: runState = 'Force' numAttempts = 0 statusPanel.setStatus('Forcing correct mean Fst for final pass', Color.RED) numSims = 50000 neutralRec = FileParser.read(selRec2.fname) #for locus in selLoci: neutralRec.remove_loci_by_name(selLoci, lpath+os.sep+"nr.tmp") shutil.copyfile(lpath + os.sep + "nr.tmp", lpath + os.sep + "nr") neutralRec = FileParser.read(lpath + os.sep + "nr") createInfile(convert_genepop_to_fdist(neutralRec)) if isTemporal: dc = Datacal() dc.compute(lpath + os.sep + "infile", lpath + os.sep + "data_fst_outfile") dc.computeNe(tempSamples[-1] - tempSamples[0]) myNe = dc.getNe() elif isDominant: crit = empiricalPanel.getCrit() beta = empiricalPanel.getBeta() myFst, _sampSize, _loci, _pops, _F, _obs = \ fdc.run_datacal(data_dir = lpath, version=2, crit_freq=crit, p=0.5, beta=beta) else: myFst, _sampSize = fdc.run_datacal(data_dir = lpath) #if myFst < 0.005: # myFst = 0.005 if isTemporal: empiricalPanel.setNe(myNe) #actually it is Ne else: empiricalPanel.setFst(myFst) if not isDominant: mutStr = empiricalPanel.mut.getSelectedItem() mut = getMut(mutStr) else: mut = None npops = empiricalPanel.getTotalPops() nsamples = countPops(selRec2) numCores = systemPanel.getNumCores() sampSize = empiricalPanel.getSampleSize() if isDominant: theta = empiricalPanel.getTheta() beta = empiricalPanel.getBeta() crit = empiricalPanel.getCrit() else: theta = beta = crit = None os.remove(lpath + os.sep + 'out.dat') #careful, not for 5000 case createInfile(convert_genepop_to_fdist(selRec2)) if isTemporal: dc = Datacal() dc.compute(lpath + os.sep + "infile", lpath + os.sep + "data_fst_outfile") dc.computeNe(tempSamples[-1] - tempSamples[0]) ne = dc.getNe() elif isDominant: _fst, _sampSize, _loci, _pops, _F, _obs = \ fdc.run_datacal(data_dir = lpath, version=2, crit_freq = crit, p=0.5, beta=beta) else: _fst, _sampSize = fdc.run_datacal(data_dir = lpath) if isTemporal: runFtempPart(False, selRec2, numSims, npops, nsamples, ne, sampSize, numCores) else: runFDistPart(False, selRec2, mut, numSims, npops, nsamples, myFst, sampSize, theta, beta, crit, numCores) elif runState == 'Final': maxRun = -1 minRun = -1 myFst = -1 statusPanel.setStatus('Done (preparing selection table, please wait...)', Color.GRAY) if isDominant: pv = fdc.run_pv(data_dir = lpath, version=2) else: pv = fdc.run_pv(data_dir = lpath, version=1) selLoci = getSelLoci(pv) chartPanel.setSelLoci(pv, selRec2.loci_list, selLoci) sp = SelPanel(frame, chartPanel, selRec2.loci_list, pv, systemPanel.getCI(), confLines, locusFst, isDominant, systemPanel.getFDR()) if isTemporal: info(frame, "Done") else: info(frame, "Simulated Fst: %f" % (fst,)) statusPanel.setStatus('Done') sp.show() enablePanel(empiricalPanel) enablePanel(systemPanel) enableAllMenus(True) systemPanel.enableChartFun = True else: if isTemporal: fdt.release() else: fda.release()
def countPops(rec2): f2 = FileParser.read(rec2.fname) pop = 1 while f2.skip_population(): pop += 1 return pop
def _convert_genepop_to_fdist_big_old(gp_rec, report_loci=None): """Converts a big GenePop record to a FDist one. Parameters: gp_rec - Genepop Record (Big) Returns: FDist record. """ fd_rec = Bio.PopGen.FDist.Record() def countPops(rec): f2 = FileParser.read(rec.fname) popCnt = 1 while f2.skip_population(): popCnt += 1 return popCnt fd_rec.data_org = 0 fd_rec.num_loci = len(gp_rec.loci_list) work_rec0 = FileParser.read(gp_rec.fname) fd_rec.num_pops = countPops(work_rec0) num_loci = len(gp_rec.loci_list) for lc_i in range(num_loci): if report_loci: report_loci(lc_i, num_loci) work_rec = FileParser.read(gp_rec.fname) work_rec2 = FileParser.read(gp_rec.fname) alleles = [] pop_data = [] lParser = work_rec.get_individual() while lParser: if lParser is not True: for al in lParser[1][lc_i]: if al is not None and al not in alleles: alleles.append(al) lParser = work_rec.get_individual() #here we go again (necessary...) alleles.sort() def process_pop(pop_data, alleles, allele_counts): allele_array = [] # We need the same order as in alleles for allele in alleles: allele_array.append(allele_counts.get(allele, 0)) pop_data.append(allele_array) lParser = work_rec2.get_individual() allele_counts = {} for allele in alleles: allele_counts[allele] = 0 allele_counts[None] = 0 while lParser: if lParser is True: process_pop(pop_data, alleles, allele_counts) allele_counts = {} for allele in alleles: allele_counts[allele] = 0 allele_counts[None] = 0 else: for al in lParser[1][lc_i]: allele_counts[al] += 1 lParser = work_rec2.get_individual() process_pop(pop_data, alleles, allele_counts) fd_rec.loci_data.append((len(alleles), pop_data)) return fd_rec