import simplejson import vcftoRmatrix ''' vcftoRmatrix.flatfilevcf(`'../1000GenomesData/CEU.low_coverage.2010_09.genotypes.vcf', '../genotypes/19CEUlowcov') print "1" vcftoRmatrix.flatfilevcf('../1000GenomesData/YRI.low_coverage.2010_09.genotypes.vcf', '../genotypes/19YRIlowcov') print "2" vcftoRmatrix.flatfilevcf('../1000GenomesData/CHBJPT.low_coverage.2010_09.genotypes.vcf', '../genotypes/19CHBJPTlowcov') vcftoRmatrix.flatfilevcf('../1000GenomesData/YRI.trio.2010_09.genotypes.vcf', '../genotypes/19YRItrio') vcftoRmatrix.flatfilevcf('../1000GenomesData/CEU.trio.2010_09.genotypes.vcf', '../genotypes/19CEUtrio') ''' ''' file = open('./omni19snps') snps = simplejson.load(file) file.close() ''' arrayworkflow.flatfilevcf('../1000GenomesData/CEU.low_coverage.2010_09.genotypes.vcf', '../genotypes/CEUlowcov') file = open('./omniexpresssnps') lines = file.readlines() file.close() snps = [] for l in lines: snps.append(l.strip('\n')) names = ['../genotypes/19CEUlowcov','../genotypes/19CHBJPTlowcov', '../genotypes/19YRIlowcov', '../genotypes/19CEUtrio', '../genotypes/19YRItrio', '../genotypes/hapmap'] vcftoRmatrix.combineflatgenos(names, snps) def getlines(names, genotypefile, chosenlines): lines = [] for n in names:
vcftoRmatrix.getrefalt('../1000GenomesData/YRI.low_coverage.2010_09.genotypes.vcf', '../genotypes/19YRIlowcov') vcftoRmatrix.getrefalt('../1000GenomesData/CHBJPT.low_coverage.2010_09.genotypes.vcf', '../genotypes/19CHBJPTlowcov') vcftoRmatrix.getrefalt('../1000GenomesData/YRI.trio.2010_09.genotypes.vcf', '../genotypes/19YRItrio') vcftoRmatrix.getrefalt('../1000GenomesData/CEU.trio.2010_09.genotypes.vcf', '../genotypes/19CEUtrio') ''' names = ['../genotypes/19CEUlowcov','../genotypes/19CHBJPTlowcov', '../genotypes/19YRIlowcov', '../genotypes/19CEUtrio', '../genotypes/19YRItrio'] inputs = ['../1000GenomesData/CEU.low_coverage.2010_09.genotypes.vcf', '../1000GenomesData/YRI.low_coverage.2010_09.genotypes.vcf', '../1000GenomesData/CHBJPT.low_coverage.2010_09.genotypes.vcf', '../1000GenomesData/YRI.trio.2010_09.genotypes.vcf', '../1000GenomesData/CEU.trio.2010_09.genotypes.vcf'] for i in range(0,len(names)): name = names[i] input = inputs[i] vcftoRmatrix.flatfilevcf(input,name) omniIDs = simplejson.load(open('./omni19IDs')) omniIDsrefalt = {} for name in names: snppos = simplejson.load(open(name+'RefAlt')) for id in omniIDs.keys(): omniIDsrefalt[id] = snppos[omniIDs[id]] simplejson.dump(omniIDsrefalt, open(name+'RefAltIDs','w'))