def get_align_parts(data): data['leftSeqFileName'] = data['alignOutDir'] + '/' + 'Contig.genomedir.fa.left' data['rightSeqFileName'] = data['alignOutDir'] + '/' + 'Contig.genomedir.fa.right' data['genomeFragAlignFileName'] = data['alignOutDir'] + '/' + 'genomeFrag.align.fa' contigSeq = genutils.read_fasta_to_string(data['contigSeqFileName']) # print data['contigLeftFragAlignBegin'],data['contigLeftFragAlignEnd'] # print data['contigRightFragAlignBegin'],data['contigRightFragAlignEnd'] # print data['genomeFragAlignBegin'],data['genomeFragAlignEnd'] leftSeq = contigSeq[data['contigLeftFragAlignBegin']-1:data['contigLeftFragAlignEnd']] rightSeq = contigSeq[data['contigRightFragAlignBegin']-1:data['contigRightFragAlignEnd']] leftSeqStr = genutils.add_breaks_to_line(leftSeq) outFile= open(data['leftSeqFileName'],'w') outFile.write('>left\n%s\n' % leftSeqStr) outFile.close() rightSeqStr = genutils.add_breaks_to_line(rightSeq) outFile= open(data['rightSeqFileName'],'w') outFile.write('>right\n%s\n' % rightSeqStr) outFile.close() # the genome part region = data['chromName'] + ':' + str(data['genomeFragAlignBegin']) + '-' + str(data['genomeFragAlignEnd']) cmd = 'samtools faidx ' + data['refGenomeFasta'] + ' ' + region + ' > ' + data['genomeFragAlignFileName'] # print cmd genutils.runCMD(cmd)
def run_align(data): data['leftAlignFileName'] = data['leftSeqFileName'] + '.align' data['rightAlignFileName'] = data['rightSeqFileName'] + '.align' cmd = 'stretcher ' + data['leftSeqFileName'] + ' ' + data['genomeFragAlignFileName'] + ' ' + data['leftAlignFileName'] print cmd genutils.runCMD(cmd) cmd = 'stretcher ' + data['rightSeqFileName'] + ' ' + data['genomeFragAlignFileName'] + ' ' + data['rightAlignFileName'] print cmd genutils.runCMD(cmd)
def get_genome_frag(data): region = data['chromName'] + ':' + str(data['chromFragStart']) + '-' + str(data['chromFragEnd']) data['genomeFragFileName'] = data['alignOutDir'] + '/' + 'genomeFrag.fa' cmd = 'samtools faidx ' + data['refGenomeFasta'] + ' ' + region + ' > ' + data['genomeFragFileName'] print cmd genutils.runCMD(cmd) genomeSeq = genutils.read_fasta_to_string(data['genomeFragFileName']) genomeSeq = genomeSeq.upper() data['genomeFragSeq'] = genomeSeq
def run_rm(data,run=True): # change species here cmd = 'RepeatMasker ' + data['genomeFragFileName'] if run is True: genutils.runCMD(cmd) data['genomeFragFileNameRM'] = data['genomeFragFileName'] + '.out' # change species here cmd = 'RepeatMasker ' + data['contigSeqFileName'] # print cmd if run is True: genutils.runCMD(cmd) data['contigSeqFileNameRM'] = data['contigSeqFileName'] + '.out'
def novelCoordinates(coord): # Stores coordinates of novel contigs within chrNovel.fa infofile = '/home/jmkidd/kidd-lab-scratch/feichens-projects/kmer/canFam31/unique_kmers/canFam3.1-withnovel/chromNovel.merge.info' infoFile = open(infofile, 'r') for line in infoFile: line = line.rstrip() line = line.split() tmp = line[0] if re.match(r">(\S+)", tmp) is not None: match = re.match(r">(\S+)", tmp) data['contigID'] = match.group(1) data['offset'] = int(line[3]) - 1 # chrom start[0] end[1] chrom[2] offset[3] coordTable[data['contigID']] = [line[3],line[4], data['contigID'], data['offset']] novelfile = '/home/ampend/kidd-lab/ampend-projects/CGH_Array_Design_Dog/CGH_Array_Analysis/inputData/ProbeBEDFiles/NocanFam3Coords/novelContigs.probesel.pass.bed' novelFile = open(novelfile, 'r') correctednovel = novelfile + '.corrected' correctedNovel = open(correctednovel, 'w') for line in novelFile: line = line.rstrip() line = line.split() data['novelChromID'] = line[0] data['probeID'] = line[3] if data['novelChromID'] in coordTable: if data['novelChromID'] == 'zoey-scaffold-686': continue data['novelChrom'] = 'chrNovel' #data['novelChrom'] = coordTable[data['novelChromID']][2] data['novelStart'] = int(coordTable[data['novelChromID']][3]) + int(line[1]) data['novelEnd'] = int(coordTable[data['novelChromID']][3]) + int(line[2]) data['novelProbeID'] = data['probeID'] novelTable[data['novelProbeID']] = (data['novelChrom'], data['novelStart'], data['novelEnd'],data['novelProbeID']) correctedNovel.write('%s\t%s\t%s\t%s\n' % (data['novelChrom'],data['novelStart'],data['novelEnd'],data['novelProbeID'])) cmd = 'cat %s../inputData/ProbeBEDFiles/Mappable/* %s../inputData/ProbeBEDFiles/NocanFam3Coords/ChrY.probesel.pass.bed %s../inputData/ProbeBEDFiles/NocanFam3Coords/LINEs_RefInsertions.probesel.bed.pass.bed.sorted %s../inputData/ProbeBEDFiles/NocanFam3Coords/novelContigs.probesel.pass.bed.corrected %s../inputData/ProbeBEDFiles/NocanFam3Coords/SINEs_ReferenceInsertions.probesel.pass.bed.sorted > %s../inputData/ProbeBEDFiles/TOTAL_Probes_FINALCoordinates_all.bed' % (options.directory, options.directory, options.directory, options.directory, options.directory, options.directory) print cmd genutils.runCMD(cmd)
def run_pear(myData): # PEAR aligns/merges overlapping read pairs, which is the case that we have here myData['pearBase'] = myData['outDir'] + myData['sampleName'] + '.pear' cmd = 'pear --nbase -f %s -r %s -o %s' % (myData['r1fq'],myData['r2fq'],myData['pearBase']) myData['assembledFQ'] = myData['pearBase'] + '.assembled.fastq' myData['discardedFQ'] = myData['pearBase'] + '.discarded.fastq' myData['notAssemF'] = myData['pearBase'] + '.unassembled.forward.fastq' myData['notAssemR'] = myData['pearBase'] + '.unassembled.reverse.fastq' # check to see if should run outgz = myData['assembledFQ'] + '.gz' if os.path.isfile(outgz) is True: print 'found gzip output already, will not rerun' myData['assembledFQ'] += '.gz' myData['discardedFQ'] += '.gz' myData['notAssemF'] += '.gz' myData['notAssemR'] += '.gz' else: print cmd genutils.runCMD(cmd) cmd = 'gzip ' + myData['assembledFQ'] print cmd genutils.runCMD(cmd) myData['assembledFQ'] += '.gz' cmd = 'gzip ' + myData['discardedFQ'] print cmd genutils.runCMD(cmd) myData['discardedFQ'] += '.gz' cmd = 'gzip ' + myData['notAssemF'] print cmd genutils.runCMD(cmd) myData['notAssemF'] += '.gz' cmd = 'gzip ' + myData['notAssemR'] print cmd genutils.runCMD(cmd) myData['notAssemR'] += '.gz'
line = line.split() siteID = line[0] if siteID == 'siteID': continue if line[1] == 'NO_CANDIDATE': continue print line data = {} data['tmpDir'] = options.tmpDir if os.path.isdir(data['tmpDir']) is False: cmd = 'mkdir ' + data['tmpDir'] print 'making tmp dir' print cmd genutils.runCMD(cmd) data['refGenomeFasta'] = refGenomeFasta data['siteID'] = siteID chrom = siteID.split('_') chrom = chrom[0:-1] chrom = '_'.join(chrom) p = int(siteID.split('_')[-1]) startBp = p - regDelta endBp = p + regDelta data['chromName'] = chrom data['chromFragStart'] =startBp data['chromFragEnd'] = endBp
def print_pretty_alignment(data): #left end is blue, right start is red data['3wayAlignFilePrettyName'] = data['genomeFragFileName'] + '.3wayalign.pretty' data['3wayAlignFilePrettyNamePS'] = data['alignOutDir'] + '/' + data['siteID'] + '.3wayalign.pretty.ps' data['3wayAlignFilePrettyNamePDF'] = data['alignOutDir'] + '/' + data['siteID'] + '.3wayalign.pretty.pdf' outFile = open(data['3wayAlignFilePrettyName'],'w') outFile.write('Site ID: %s\n' % (data['siteID'])) outFile.write('%s\t%s\n' % (data['contigName'],data['contigDir'])) outFile.write('%s:%i-%i\n' % (data['chromName'],data['genomeFragAlignBegin'],data['genomeFragAlignEnd'])) # left BP in chromFrag and Contig outFile.write('~color{0 0 1}end left match~color{default} chromFrag %i Contig %i\n' % (data['leftBpGenomeFragCoords'],data['leftBpContigCoord'])) # right BP in chromFrag and Contig outFile.write('~color{1 0 0}start right match~color{default} chromFrag %i Contig %i\n' % (data['rightBpGenomeFragCoords'],data['rightBpContigCoord'])) #go through and add in the colors # do the colors individually print 'ready to start' print data['leftBpContigCoord'],data['leftBpGenomeFragCoords'] print data['rightBpContigCoord'],data['rightBpGenomeFragCoords'] for i in range(0,len(data['genome3way'])): if data['left3wayPos'][i] == (data['leftBpContigCoord'] - data['contigLeftFragAlignBegin'] + 1): if data['left3way'][i] == '-': print 'left is -' else: data['left3way'][i] = '~color{0 0 1}' + data['left3way'][i] + '~color{default}' data['3wayParse'][i] = '~color{0 0 1}' + data['3wayParse'][i] + '~color{default}' print 'LEFT contig',i,data['left3wayPos'][i] if data['genome3wayPos'][i] == data['leftBpGenomeFragCoords'] and data['genome3way'][i] != '-': data['genome3way'][i] = '~color{0 0 1}' + data['genome3way'][i] + '~color{default}' print 'LEFT GENOME',i,data['genome3wayPos'][i] if data['right3wayPos'][i] == (data['rightBpContigCoord'] - data['contigRightFragAlignBegin'] +1): if data['right3way'][i] == '-': print i,'right is -' else: data['right3way'][i] = '~color{1 0 0}' + data['right3way'][i] + '~color{default}' data['3wayParse'][i] = '~color{1 0 0}' + data['3wayParse'][i] + '~color{default}' print i,'right contig',i,data['right3wayPos'][i] if data['genome3wayPos'][i] == data['rightBpGenomeFragCoords'] and data['genome3way'][i] != '-' : data['genome3way'][i] = '~color{1 0 0}' + data['genome3way'][i] + '~color{default}' print i,'RIGHT GENOME',i,data['genome3wayPos'][i] leftName = 'left ' rightName = 'right ' chromName = 'chrom ' passeName = ' ' outFile.write('\n\n') # do it in runs of 50 width = 70 sliceS = 0 sliceE = sliceS + width while True: if sliceS >= len(data['genome3way']): break if sliceE > len(data['genome3way']): sliceE = len(data['genome3way']) l = data['left3way'][sliceS:sliceE] g = data['genome3way'][sliceS:sliceE] r = data['right3way'][sliceS:sliceE] p = data['3wayParse'][sliceS:sliceE] l = leftName + ''.join(l) g = chromName + ''.join(g) r = rightName + ''.join(r) p = passeName + ''.join(p) outFile.write('%s\n%s\n%s\n%s\n\n' % (l,g,r,p)) sliceS = sliceE sliceE = sliceS + width outFile.close() print 'Clean up PS and PDF' cmd = 'rm ' + data['3wayAlignFilePrettyNamePS'] print cmd genutils.runCMDNoFail(cmd) cmd = 'rm ' + data['3wayAlignFilePrettyNamePDF'] print cmd genutils.runCMDNoFail(cmd) cmd = 'enscript %s -o %s -e~ -B -2r' % (data['3wayAlignFilePrettyName'],data['3wayAlignFilePrettyNamePS']) print cmd genutils.runCMD(cmd) cmd = 'ps2pdf ' + data['3wayAlignFilePrettyNamePS'] + ' ' + data['3wayAlignFilePrettyNamePDF'] print cmd genutils.runCMD(cmd)
def run_miropeats(data): if 'miropeatSValue' in data: s = data['miropeatSValue'] else: s = 80 s = 40 # for the dogs... data['miropeatSValue'] = s data['miroOutPS'] = data['alignOutDir'] + '/' + 'miropeats.' + str(s) + '.ps' data['miroOutInfo'] = data['alignOutDir'] + '/' + 'miropeats.' + str(s) + '.out' if 'tmpDir' in data: tmpDir = data['tmpDir'] else: tmpDir = '/home/jmkidd/kidd-lab-scratch/jmkidd-projects/tmp/' tmpGenome = tmpDir + 'genome.fa' tmpContig = tmpDir + 'contig.fa' tmpMRPS = tmpDir + 'tmp.MRPS' tmpMROUT = tmpDir + 'tmp.MROUT' cmd = 'cp %s %s' % (data['genomeFragFileName'],tmpGenome) print cmd genutils.runCMD(cmd) cmd = 'cp %s %s' % (data['contigSeqFileName'],tmpContig) print cmd genutils.runCMD(cmd) cmd = 'miropeats -s %i -onlyinter -o %s -seq %s -seq %s > %s' % (s,tmpMRPS,tmpGenome,tmpContig,tmpMROUT) print cmd genutils.runCMD(cmd) #cp if os.path.isfile(tmpMRPS) is True: cmd = 'cp %s %s' % (tmpMRPS,data['miroOutPS']) print cmd genutils.runCMD(cmd) else: data['miroOutPS'] = 'FAILURE' cmd = 'cp %s %s' % (tmpMROUT,data['miroOutInfo']) print cmd genutils.runCMD(cmd) # clean up cmd = 'rm %s %s' % (tmpGenome,tmpContig) print cmd genutils.runCMD(cmd) if os.path.isfile(tmpMRPS) is True: cmd = 'rm ' + tmpMRPS print cmd genutils.runCMD(cmd) if os.path.isfile(tmpMROUT) is True: cmd = 'rm ' + tmpMROUT print cmd genutils.runCMD(cmd)
def get_genome_gaps(data,run=True): data['genomeFragGapsFileName'] = data['genomeFragFileName'] + '.gaps' cmd = 'get_gaps.pl ' + data['genomeFragFileName'] + ' > ' + data['genomeFragGapsFileName'] if run is True: genutils.runCMD(cmd)
def get_contig_gaps(data,run=True): data['contigSeqGapsFileName'] = data['contigSeqFileName'] + '.gaps' cmd = 'get_gaps.pl ' + data['contigSeqFileName'] + ' > ' + data['contigSeqGapsFileName'] if run is True: genutils.runCMD(cmd) get_genome_gaps(data,run)
def bwa_index_alleles(data): cmd = 'bwa-0.5.9 index %s' % (data['alleleFa']) # print cmd genutils.runCMD(cmd)
def make_alternative_seqs(data,bpOutTable,allelesBaseDir,fragmentExtension): alleleDir = allelesBaseDir + data['siteID'] if os.path.isdir(alleleDir) is False: cmd = 'mkdir ' + alleleDir print cmd genutils.runCMD(cmd) alleleDir += '/' genomeLeftFa = alleleDir + 'genomeLeft.fa' genomeRightFa = alleleDir + 'genomeRight.fa' genomeWholeFa = alleleDir + 'genomeWhole.fa' alleleFa = alleleDir + 'alleles.fa' data['alleleFa'] = alleleFa data['alleleDir'] = alleleDir gTSDs = data['rightBpChromCoords'] gTSDe = data['leftBpChromCoords'] gTSDl = gTSDe - gTSDs + 1 data['gTSDl'] = gTSDl if gTSDl <= -1: # deletion in chromosome print 'deletion of %i in genome' % gTSDl leftChromBp = data['leftBpChromCoords'] leftChromStart = leftChromBp - fragmentExtension + 1 rightChromBp = data['rightBpChromCoords'] rightChromEnd = rightChromBp + fragmentExtension - 1 data['insSite'] = leftChromBp # print leftChromBp,leftChromStart,rightChromBp,rightChromEnd region = data['chromName'] + ':' + str(leftChromStart) + '-' + str(leftChromBp) cmd = 'samtools faidx ' + data['refGenomeFasta'] + ' ' + region + ' > ' + genomeLeftFa genutils.runCMD(cmd) genomeLeftSeq = genutils.read_fasta_to_string(genomeLeftFa) genomeLeftSeq = genomeLeftSeq.upper() region = data['chromName'] + ':' + str(rightChromBp) + '-' + str(rightChromEnd) cmd = 'samtools faidx ' + data['refGenomeFasta'] + ' ' + region + ' > ' + genomeRightFa genutils.runCMD(cmd) genomeRightSeq = genutils.read_fasta_to_string(genomeRightFa) genomeRightSeq = genomeRightSeq.upper() # get the chrom sequence region = data['chromName'] + ':' + str(leftChromStart) + '-' + str(rightChromEnd) cmd = 'samtools faidx ' + data['refGenomeFasta'] + ' ' + region + ' > ' + genomeWholeFa genutils.runCMD(cmd) genomeWholeSeq = genutils.read_fasta_to_string(genomeWholeFa) genomeWholeSeq = genomeWholeSeq.upper() data['mapFragStart'] = leftChromStart data['mapFragEnd'] = rightChromEnd # since that BP is in contig contigStart = data['leftBpContigCoord'] # already last bp contigEnd = data['rightBpContigCoord'] contigSeq = data['contigSeqGenomeDir'][contigStart:contigEnd-1] # print out genome outFile = open(alleleFa,'w') outFile.write('>%s\n' % (data['siteID']+'_genome')) gSeq = genomeWholeSeq gSeq = genutils.add_breaks_to_line(gSeq) outFile.write('%s\n' % gSeq) outFile.write('>%s\n' % (data['siteID']+'_insertion')) iSeq = genomeLeftSeq + contigSeq + genomeRightSeq iSeq = genutils.add_breaks_to_line(iSeq) outFile.write('%s\n' % iSeq) outFile.close() # print 'left',genomeLeftSeq # print 'right',genomeRightSeq # print 'contig',contigSeq # print len(contigSeq) elif gTSDl >= 1: # has TSD # print 'has TSD len %i' % gTSDl # note that they cross leftChromBp = data['rightBpChromCoords'] leftChromStart = leftChromBp - fragmentExtension + 1 rightChromBp = data['leftBpChromCoords'] rightChromEnd = rightChromBp + fragmentExtension - 1 data['insSite'] = leftChromBp # print leftChromBp,leftChromStart,rightChromBp,rightChromEnd region = data['chromName'] + ':' + str(leftChromStart) + '-' + str(leftChromBp) cmd = 'samtools faidx ' + data['refGenomeFasta'] + ' ' + region + ' > ' + genomeLeftFa genutils.runCMD(cmd) genomeLeftSeq = genutils.read_fasta_to_string(genomeLeftFa) genomeLeftSeq = genomeLeftSeq.upper() region = data['chromName'] + ':' + str(rightChromBp) + '-' + str(rightChromEnd) cmd = 'samtools faidx ' + data['refGenomeFasta'] + ' ' + region + ' > ' + genomeRightFa genutils.runCMD(cmd) genomeRightSeq = genutils.read_fasta_to_string(genomeRightFa) genomeRightSeq = genomeRightSeq.upper() # get the chrom sequence region = data['chromName'] + ':' + str(leftChromStart) + '-' + str(rightChromEnd) cmd = 'samtools faidx ' + data['refGenomeFasta'] + ' ' + region + ' > ' + genomeWholeFa genutils.runCMD(cmd) data['mapFragStart'] = leftChromStart data['mapFragEnd'] = rightChromEnd genomeWholeSeq = genutils.read_fasta_to_string(genomeWholeFa) genomeWholeSeq = genomeWholeSeq.upper() # since that BP is in contig contigStart = data['leftBpContigCoord'] - gTSDl + 1 # to get over to right size, include the TSD contigEnd = data['rightBpContigCoord'] + gTSDl - 1 # to get over to the right size, include the TSD contigSeq = data['contigSeqGenomeDir'][contigStart:contigEnd-1] # print out genome outFile = open(alleleFa,'w') outFile.write('>%s\n' % (data['siteID']+'_genome')) gSeq = genomeWholeSeq gSeq = genutils.add_breaks_to_line(gSeq) outFile.write('%s\n' % gSeq) outFile.write('>%s\n' % (data['siteID']+'_insertion')) iSeq = genomeLeftSeq + contigSeq + genomeRightSeq iSeq = genutils.add_breaks_to_line(iSeq) outFile.write('%s\n' % iSeq) outFile.close() # print 'left',genomeLeftSeq # print 'right',genomeRightSeq # print 'contig',contigSeq # print len(contigSeq) elif gTSDl == 0: # has no TSD # print 'has TSD len %i' % gTSDl # note that they cross leftChromBp = data['leftBpChromCoords'] leftChromStart = leftChromBp - fragmentExtension + 1 rightChromBp = data['rightBpChromCoords'] rightChromEnd = rightChromBp + fragmentExtension - 1 data['insSite'] = leftChromBp # print leftChromBp,leftChromStart,rightChromBp,rightChromEnd region = data['chromName'] + ':' + str(leftChromStart) + '-' + str(leftChromBp) cmd = 'samtools faidx ' + data['refGenomeFasta'] + ' ' + region + ' > ' + genomeLeftFa genutils.runCMD(cmd) genomeLeftSeq = genutils.read_fasta_to_string(genomeLeftFa) genomeLeftSeq = genomeLeftSeq.upper() region = data['chromName'] + ':' + str(rightChromBp) + '-' + str(rightChromEnd) cmd = 'samtools faidx ' + data['refGenomeFasta'] + ' ' + region + ' > ' + genomeRightFa genutils.runCMD(cmd) genomeRightSeq = genutils.read_fasta_to_string(genomeRightFa) genomeRightSeq = genomeRightSeq.upper() # get the chrom sequence region = data['chromName'] + ':' + str(leftChromStart) + '-' + str(rightChromEnd) cmd = 'samtools faidx ' + data['refGenomeFasta'] + ' ' + region + ' > ' + genomeWholeFa genutils.runCMD(cmd) data['mapFragStart'] = leftChromStart data['mapFragEnd'] = rightChromEnd genomeWholeSeq = genutils.read_fasta_to_string(genomeWholeFa) genomeWholeSeq = genomeWholeSeq.upper() # since that BP is in contig contigStart = data['leftBpContigCoord'] contigEnd = data['rightBpContigCoord'] contigSeq = data['contigSeqGenomeDir'][contigStart:contigEnd-1] # print out genome outFile = open(alleleFa,'w') outFile.write('>%s\n' % (data['siteID']+'_genome')) gSeq = genomeWholeSeq gSeq = genutils.add_breaks_to_line(gSeq) outFile.write('%s\n' % gSeq) outFile.write('>%s\n' % (data['siteID']+'_insertion')) iSeq = genomeLeftSeq + contigSeq + genomeRightSeq iSeq = genutils.add_breaks_to_line(iSeq) outFile.write('%s\n' % iSeq) outFile.close() # print 'left',genomeLeftSeq # print 'right',genomeRightSeq # print 'contig',contigSeq # print len(contigSeq) else: print 'What TSD size?' print gTSDl sys.exit() # make out file nl = [data['siteID'],data['chromName'],data['insSite'],gTSDl,data['mapFragStart'],data['mapFragEnd'] ] nl = [str(i) for i in nl] nl = '\t'.join(nl) + '\n' bpOutTable.write(nl) bwa_index_alleles(data)
######## NOVEL CONTIGS ########## ################################# print 'Now annotating novel contigs in fosmid...\n' #novelContigFasta = '~/kidd-lab/ampend-projects/Novel_Sequence_Analysis/NovelSequence/novel.v2.fa.masked' #New non-reundant Fasta novelContigFasta = '~/kidd-lab/ampend-projects/Novel_Sequence_Analysis/RedundantNovelContigs/Final_chrNovel_Fasta/novelContigs_NonRedundant.fa.masked' bottomRM = options.bottomRM masked_bottomRM = bottomRM.replace(".out",".masked") contigfile = 'BLAT_novelContigs_vs_fosmid.blat' #cmd = 'blat -fine -minMatch=1 -minScore=10 -out=blast9 %s %s %s' % (novelContigFasta,masked_bottomRM,contigfile) cmd = 'blat -noHead %s %s %s' % (novelContigFasta,masked_bottomRM,contigfile) print cmd genutils.runCMD(cmd) n = 0 #ypos = bottomLine - 0.25 exon_pos = gap_pos - 0.03 ypos = exon_pos contigFile = open('BLAT_novelContigs_vs_fosmid.blat','r') contigList = [] print 'NOVEL CONTIG start ypos', ypos for b in contigFile: b = b.rstrip() b = b.split() if b[0].isdigit() is False:
def run_repeatmasker(fastaFile): cmd = 'RepeatMasker --species human %s ' % (fastaFile) # change this to use other species or libraries # cmd = 'RepeatMasker --species dog %s ' % (fastaFile) if os.path.isfile(fastaFile) is True: genutils.runCMD(cmd)