default="sync") parser.add_option_group(group) (options, args) = parser.parse_args() ########################################################### code ################################################### ################################################## read parameters ################################################# pops = map(int, options.pops.split(",")) out = open(options.out + "_table.txt", "w") ############################################## test for format of input file ################################### if options.type == "sync": filehandle = SyncReader(options.input) else: filehandle = CMHReader(options.input) ############################### read divergence file ######################################################################### divhash = collections.defaultdict(lambda: "") for l in open(options.div, "r"): if "Chr" in l: continue a = l.split() chr, pos, s1a, s2a = a[:4] ID = chr + "_" + pos ## store simulans specific allele divhash[ID] = s2a
parser.add_option("--input", dest="input", help="A synchronized input file") parser.add_option( "--population", dest="populations", help= "Determine whether the given position is a SNP based on these populations") parser.add_option("--test", action="store_true", dest="test", help="run the doctest") parser.add_option_group(group) (options, args) = parser.parse_args() popstotest = map(int, options.populations.split(",")) for sync in SyncReader(options.input): # extract the correct populations pops = sync.subpopulations(popstotest) # obtain counts for the two major alleles (alcount, majora, minora) = modules.RCMH.Utility.getMajorAlleleCount(pops) toprint = [] toprint.append("data") toprint.append("{0}_{1}".format(sync.chr, sync.pos)) for a in alcount: cov = a[0] + a[1] toprint.append(str(cov)) toprint.append(str(a[1])) print "\t".join(toprint)
secondchar = "N" return secondchar if (options.test): import doctest doctest.testmod(verbose=1) sys.exit() #2L 4910 A 6:0:0:0:0:0 - 5:0:0:0:0:0 25:0:0:0:0:0 33:0:0:0:0:0 #2L 4911 G 0:0:0:7:0:0 - 0:0:0:5:0:0 0:0:0:27:0:0 0:0:0:33:0:0 #2L 4912 A 7:0:0:0:0:0 - 5:0:0:0:0:0 27:0:0:0:0:0 32:0:0:0:0:0 #2L 4913 G 0:0:0:7:0:0 - 0:0:0:5:0:0 0:0:0:27:0:0 0:0:0:34:0:0 #2L 4914 A 7:0:0:0:0:0 - 5:0:0:0:0:0 26:0:0:0:0:0 33:0:0:0:0:0 #2L 4915 G 0:0:0:8:0:0 - 0:0:0:5:0:0 0:0:0:22:0:0 0:0:0:33:0:0 #2L 4916 A 8:0:0:0:0:0 - 6:0:0:0:0:0 25:0:0:0:0:0 33:0:0:0:0:0 #2L 4917 G 0:0:0:8:0:0 - 0:0:0:6:0:0 0:0:0:23:0:0 0:0:0:32:0:0 #2L 4918 C 0:0:8:0:0:0 - 0:0:6:0:0:0 0:0:22:0:0:0 0:0:30:0:0:0 print "#chr\tpos\trc\tallele_states\t+" for p in SyncReader(options.sync): b = str(p.chr) + "_" + str(p.pos) bh = {"A": 0, "C": 0, "G": 0, "T": 0} for pop in p.populations: bh["A"] += pop.A bh["T"] += pop.T bh["C"] += pop.C bh["G"] += pop.G maxnonrefc = maxkeyfornotrefc(bh, p.refc) print p.chr + "\t" + str(p.pos) + "\t" + p.refc + "\t" + maxnonrefc + "\t+"