def load_file_to_dict(file_loc, sites): """ Dictionary format: sites: {CHROM: {pos:(CC_info, GP_info), pos: (...)}, CHROM:{...}} """ with open(file_loc) as f: for line in f: mpile_site = mpileLine(line) sites[mpile_site.chrom] = sites.get(mpile_site.chrom, {}) sites[mpile_site.chrom][mpile_site.pos] = \ sites[mpile_site.chrom].get(mpile_site.pos, []) sites[mpile_site.chrom][mpile_site.pos].append(site_info(mpile_site)) return
return freq if __name__ == "__main__": mpile_in,mpile_out = sys.argv[1],sys.argv[2] outFile = open(mpile_out,"w") # min_alt = int(sys.argv[3]) min_freq = 0.3 with open(mpile_in) as mp_file: for line in mp_file: try: mpLine = mpileLine(line) if mpLine.chrom not in "pseudo0mitochondrionchloroplast": mutant = mpLine.getMutant() if mutant != None: alt_freq = freq_all_samples(mutant.majorAltBase, mpLine.samples) if alt_freq >= min_freq: outFile.write(line) else: print alt_freq except: print "ERROR" print sys.exc_info()[0]
GP_smpls = old_GP_smpls #Assign idList to one of CC or GP smples if "CC" in mpile_in: idList = CC_smpls elif "GP" in mpile_in: idList = GP_smpls else: print "Error incorrect mpile_in name" sys.exit() ##Loop though mpile file with open(mpile_in) as mp_file: for line in mp_file: try: mpLine = mpileLine(line) mutant = mpLine.getMutant() mutantID = mpLine.getMutantID(idList) counts[mutantID] = counts.get(mutantID,0) + 1 except: print sys.exc_info()[0] print traceback.format_exc() print line #Write out counts dict to outFile for key in counts.keys(): outFile.write(str(key) + ":" + str(counts[key]) + "\n")
from vcfDict import vcfDict from vcfLine import vcfLine from vcfSample import vcfSample from mpileLine import mpileLine if __name__ == "__main__": mpile_in,vcf_in,csv_out = sys.argv[1],sys.argv[2],sys.argv[3] #Load dictionary of sites in vcf_in file vcfSites = vcfDict(vcf_in) vcfSites.loadDict() #Create output file for csv_out outFile = open(csv_out,"w") with open(mpile_in) as pileFile: for line in pileFile: try: pile_line = mpileLine(line) print pile_line.repr() if vcfSites.siteExists(pile_line.siteID): vcf_line = vcfLine(vcfSites.getLine(pile_line.siteID)) except Exception, e: print sys.exc_info()[0] print traceback.format_exc() print line outFile.close() pileFile.close()