def parse_refgpd(tdir,geneprednames,simplenames): # get the reference genepreds ready to use in work column_number = 0 entry_number = 0 of_entries = open(tdir+"/entries.txt",'w') for file in geneprednames: column_number += 1 of_ref = open(tdir+"/reference."+str(column_number)+".bed",'w') gfr = FileBasics.GenericFileReader(file) while True: line = gfr.readline() if not line: break if re.match('^#',line): continue entry_number += 1 line = line.rstrip("\n") entry = GenePredBasics.line_to_entry(line) entry_length = 0 for i in range(0,len(entry['exonStarts'])): entry_length += entry['exonEnds'][i]-entry['exonStarts'][i] of_entries.write(str(column_number)+ "\t" + simplenames[column_number-1] + "\t" + str(entry_number) + "\t" + entry['gene_name'] + "\t" + entry['name']+"\t"+str(entry_length)+"\n") exon_number = 0 for i in range(0,len(entry['exonStarts'])): exon_number += 1 of_ref.write(entry['chrom'] + "\t" + str(entry['exonStarts'][i]) + "\t" \ + str(entry['exonEnds'][i]) + "\t" + str(entry_number) + "\t" \ + entry['gene_name'] + "\t" \ + entry['name'] + "\t" + str(len(entry['exonStarts'])) + "\t" \ + entry['strand'] + "\t" + str(exon_number) \ + "\n") gfr.close() of_ref.close() of_entries.close()
def parse_gpdfile(tdir,gpdfile,smoothing_factor): # Go through the long reads and make a genepred if gpdfile != '-': fr = FileBasics.GenericFileReader(gpdfile) else: fr = sys.stdin seennames = {} longreadnumber = 0 of_gpd = open(tdir+'/longreads.gpd','w') while True: line = fr.readline() if not line: break if re.match('^#',line): #skip comments continue longreadnumber += 1 entry = GenePredBasics.smooth_gaps( \ GenePredBasics.line_to_entry(line.rstrip()) \ ,smoothing_factor) readname = entry['name'] if readname in seennames: sys.stderr.write("Warning: repeat name '"+readname+"'\n") #set our first name to our bin entry['name'] = str(longreadnumber) gline = GenePredBasics.entry_to_line(entry) of_gpd.write(gline+"\n") fr.close() of_gpd.close()
def read_fastq_file(self,filename): gfr = FileBasics.GenericFileReader(filename) linecount = 0 while True and linecount < self.max_read_count: line1 = gfr.readline().rstrip() if not line1: break line2 = gfr.readline().rstrip() if not line2: break line3 = gfr.readline().rstrip() if not line3: break line4 = gfr.readline().rstrip() if not line4: break self.record_observation(line4) linecount += 1 gfr.close()