def build(self, clobber=False, separator='!'): """Build index file. @keyword clobber: Overwrite existing index file (default=False) """ if clobber: try: self.cursor.execute('drop table Offsets;') except: pass if not self.isIndexed(): schema = """ CREATE TABLE Offsets ( id INTEGER, accession TEXT, offset INTEGER ); create index idx_offsets_id on Offsets (id); create index idx_offsets_accession on Offsets (accession); """ self.connection.executescript(schema) tmpFile = tempfile.NamedTemporaryFile() for i,(accession,offset) in enumerate(self._byteOffsetGenerator()): print >> tmpFile, "%i%s%s%s%i" % (i,separator,accession,separator,offset) if i % 1000==0: progressMessage("Sequences: %s", i+1) progressMessage("Sequences: %s\n", i+1) tmpFile.flush() cmd = """sqlite3 -separator '%s' %s '.import "%s" Offsets'""" \ % (separator, self.idxFilename, tmpFile.name) os.system(cmd) tmpFile.close()
def build(self, clobber=False, separator='!'): """Build index file. @keyword clobber: Overwrite existing index file (default=False) """ if clobber: try: self.cursor.execute('drop table Offsets;') except: pass if not self.isIndexed(): schema = """ CREATE TABLE Offsets ( id INTEGER, accession TEXT, offset INTEGER ); create index idx_offsets_id on Offsets (id); create index idx_offsets_accession on Offsets (accession); """ self.connection.executescript(schema) tmpFile = tempfile.NamedTemporaryFile() for i, (accession, offset) in enumerate(self._byteOffsetGenerator()): print >> tmpFile, "%i%s%s%s%i" % (i, separator, accession, separator, offset) if i % 1000 == 0: progressMessage("Sequences: %s", i + 1) progressMessage("Sequences: %s\n", i + 1) tmpFile.flush() cmd = """sqlite3 -separator '%s' %s '.import "%s" Offsets'""" \ % (separator, self.idxFilename, tmpFile.name) os.system(cmd) tmpFile.close()
-Sanger format can encode a Phred quality score from 0 to 93 using ASCII 33 to 126 (although in raw read data the Phred quality score rarely exceeds 60, higher scores are possible in assemblies or read maps). -Solexa/Illumina 1.0 format can encode a Solexa/Illumina quality score from -5 to 62 using ASCII 59 to 126 (although in raw read data Solexa scores from -5 to 40 only are expected) -Illumina 1.3+ format can encode a Phred quality score from 0 to 62 using ASCII 64 to 126 (although in raw read data Phred scores from 0 to 40 only are expected). """ import os, sys from srt.fastq import FastqFile from srt.useful import progressMessage iFilename = sys.argv[1] oFilename = sys.argv[2] if oFilename=="-": oFilename = sys.stdout iFile = FastqFile(iFilename) oFile = FastqFile(oFilename, "w") i = 0 for header,seq,qual in iFile: i += 1 if (i % 1000)==0: progressMessage("# lines %s", i) qual2 = "".join([chr(ord(q)-64+33) for q in qual]) oFile.write(header, seq, qual2) progressMessage("# lines %s\n", i) oFile.close()
Author: Tony Papenfuss Date: Fri Nov 6 15:44:08 EST 2009 -Sanger format can encode a Phred quality score from 0 to 93 using ASCII 33 to 126 (although in raw read data the Phred quality score rarely exceeds 60, higher scores are possible in assemblies or read maps). -Solexa/Illumina 1.0 format can encode a Solexa/Illumina quality score from -5 to 62 using ASCII 59 to 126 (although in raw read data Solexa scores from -5 to 40 only are expected) -Illumina 1.3+ format can encode a Phred quality score from 0 to 62 using ASCII 64 to 126 (although in raw read data Phred scores from 0 to 40 only are expected). """ import os, sys from srt.fastq import FastqFile from srt.useful import progressMessage iFilename = sys.argv[1] oFilename = sys.argv[2] if oFilename == "-": oFilename = sys.stdout iFile = FastqFile(iFilename) oFile = FastqFile(oFilename, "w") i = 0 for header, seq, qual in iFile: i += 1 if (i % 1000) == 0: progressMessage("# lines %s", i) qual2 = "".join([chr(ord(q) - 64 + 33) for q in qual]) oFile.write(header, seq, qual2) progressMessage("# lines %s\n", i) oFile.close()