Python UtilityFunctions Examples

Programming Language: Python

Namespace/Package Name: SAP

Class/Type: UtilityFunctions

Examples at hotexamples.com: 4

Python UtilityFunctions - 4 examples found. These are the top rated real world Python examples of SAP.UtilityFunctions extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

randomString(1)

safeReadFastaCache(1)

safeReadTaxonomyCache(1)

systemCall(1)

writeFile(1)

Example #1

Show file

    def _netblast_search(self, fastaRecord, excludelist=[], usecache=True):
        """
        Blast against genbank over web
        """

        ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ##########################################
        if self.options.ONEMISSING:
            orig_limit_query = self.options.limitquery
            genus, species = re.search(r'_([^_]+)_([^_]+)$',
                                       fastaRecord.title).groups()
            self.options.limitquery = "barcode[keyword] NOT %s %s[ORGN]" % (
                genus, species)
            print "REFORMATTING LIMIT QUERY TO", self.options.limitquery
        ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ##########################################

        # Make a query to filter the returned results:
        if excludelist:
            entrezQuery = '(' + self.options.limitquery + ') NOT (uncultured[WORD] OR ' + '[ORGN] OR '.join(
                excludelist) + '[ORGN])'
        else:
            entrezQuery = '(' + self.options.limitquery + ') NOT uncultured[WORD]'

        ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ##########################################
        if self.options.ONEMISSING:
            self.options.limitquery = orig_limit_query
        ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ##########################################

        fileSuffix = ''
        for name in excludelist:
            l = re.split(r'\s+', name)
            for n in l:
                fileSuffix += n[0]
        if fileSuffix:
            fileSuffix = '_' + fileSuffix

        # File name used for blast cache
        blastFileName = os.path.join(
            self.options.blastcache,
            "%s.%d_%s%s.xml" % (fastaRecord.title, self.options.maxblasthits,
                                self.options.minsignificance, fileSuffix))

        if usecache and os.path.exists(
                blastFileName) and os.path.getsize(blastFileName) > 0:
            # Use cached blast result
            if excludelist:
                print "\n\t\tUsing cached Blast results (excluding %s)..." % ', '.join(
                    excludelist),
            else:
                print "\n\t\tUsing cached Blast results...",
            sys.stdout.flush()
        else:
            # Make a query to filter the returned results:
            if excludelist:
                print "\n\t\tSearching database (excluding %s)..." % ', '.join(
                    excludelist),
            else:
                print "\n\t\tSearching database...",
            sys.stdout.flush()

            fastaRecordFileName = os.path.join(self.options.project,
                                               utils.randomString(8))
            fastaRecordFile = open(fastaRecordFileName, 'w')
            fastaRecordFile.write(str(fastaRecord))
            fastaRecordFile.close()
            resultHandle = None
            if self.options.nolowcomplexfilter:
                filterOption = '-dust no'
            else:
                filterOption = '-dust yes'  # FIXME: Check that this is an ok default... It is not the defalut in blastn

            if self.options.blastwordsize:
                wordSize = '-word_size %s' % self.options.blastwordsize
            else:
                wordSize = ''

            blastCmd = 'blastn -remote -outfmt 5 -db nt %s %s -evalue %s -max_target_seqs %s -entrez_query "%s" -query %s -out %s' \
                       % (wordSize, filterOption, self.options.minsignificance, self.options.maxblasthits, \
                          entrezQuery, fastaRecordFileName, blastFileName)

            for i in range(20):
                time.sleep(2 * i)
                error = utils.systemCall(blastCmd,
                                         stdout='IGNORE',
                                         stderr='IGNORE')
                try:

                    #                     retval = os.system(blastCmd)
                    #                     if retval != 0:
                    #                        print "Netblast failed with return value %d. Trying again..." % retval
                    error = utils.systemCall(blastCmd)
                    if error or not os.path.exists(
                            blastFileName) or os.path.getsize(
                                blastFileName) == 0:
                        print "Netblast failed. Trying again..."
                        continue

                    break
                except KeyboardInterrupt:
                    sys.exit()
                except:
                    print "Netblast failed. Trying again..."
                    pass
            os.remove(fastaRecordFileName)

        # Read file from cache
        blastHandle = open(blastFileName, 'r')

        # Parse the result:
        try:
            blastRecord = NCBIXML.read(blastHandle)
            print "done.\n\t\t\t",
            sys.stdout.flush()
        except:
            blastRecord = None
        blastHandle.close()

        return blastRecord

Example #2

Show file

File: GenBank.py Project: kaspermunch/sap

    def _netblast_search(self, fastaRecord, excludelist=[], usecache=True):
        """
        Blast against genbank over web
        """
        
        ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ##########################################
        if self.options.ONEMISSING:
           orig_limit_query = self.options.limitquery
           genus, species = re.search(r'_([^_]+)_([^_]+)$', fastaRecord.title).groups()
           self.options.limitquery = "barcode[keyword] NOT %s %s[ORGN]" % (genus, species)
           print "REFORMATTING LIMIT QUERY TO", self.options.limitquery
        ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ##########################################


        # Make a query to filter the returned results:
        if excludelist:
            entrezQuery = '(' + self.options.limitquery + ') NOT (uncultured[WORD] OR ' + '[ORGN] OR '.join(excludelist) + '[ORGN])'
        else:
            entrezQuery = '(' + self.options.limitquery + ') NOT uncultured[WORD]'

        ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ##########################################
        if self.options.ONEMISSING:
           self.options.limitquery = orig_limit_query
        ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ##########################################
            
        fileSuffix = ''
        for name in excludelist:
            l = re.split(r'\s+', name)
            for n in l:
                fileSuffix += n[0]
        if fileSuffix:
           fileSuffix = '_' + fileSuffix

        # File name used for blast cache
        blastFileName = os.path.join(self.options.blastcache, "%s.%d_%s%s.xml" % (fastaRecord.title,
                                               self.options.maxblasthits, self.options.minsignificance, fileSuffix))
        
        if usecache and os.path.exists(blastFileName) and os.path.getsize(blastFileName)>0:
            # Use cached blast result
            if excludelist:
               print "\n\t\tUsing cached Blast results (excluding %s)..." % ', '.join(excludelist),
            else:
                print "\n\t\tUsing cached Blast results...", 
            sys.stdout.flush()
        else:
            # Make a query to filter the returned results:
            if excludelist:
                print "\n\t\tSearching database (excluding %s)..." % ', '.join(excludelist), 
            else:
                print "\n\t\tSearching database...", 
            sys.stdout.flush()

            fastaRecordFileName = os.path.join(self.options.project, utils.randomString(8))
            fastaRecordFile = open(fastaRecordFileName, 'w')
            fastaRecordFile.write(str(fastaRecord))
            fastaRecordFile.close()
            resultHandle = None
            if self.options.nolowcomplexfilter:
                filterOption = '-dust no'
            else:
                filterOption = '-dust yes' # FIXME: Check that this is an ok default... It is not the defalut in blastn

            if self.options.blastwordsize:
                wordSize = '-word_size %s' % self.options.blastwordsize
            else:
                wordSize = ''

            blastCmd = 'blastn -remote -outfmt 5 -db nt %s %s -evalue %s -max_target_seqs %s -entrez_query "%s" -query %s -out %s' \
                       % (wordSize, filterOption, self.options.minsignificance, self.options.maxblasthits, \
                          entrezQuery, fastaRecordFileName, blastFileName)

            for i in range(20):
                time.sleep(2 * i)
                error = utils.systemCall(blastCmd, stdout='IGNORE', stderr='IGNORE')
                try:

#                     retval = os.system(blastCmd)
#                     if retval != 0:
#                        print "Netblast failed with return value %d. Trying again..." % retval                   
                    error = utils.systemCall(blastCmd)
                    if error or not os.path.exists(blastFileName) or os.path.getsize(blastFileName) == 0:
                       print "Netblast failed. Trying again..."
                       continue

                    break
                except KeyboardInterrupt:
                    sys.exit()
                except:
                    print "Netblast failed. Trying again..."
                    pass
            os.remove(fastaRecordFileName)

        # Read file from cache
        blastHandle = open(blastFileName, 'r')

        # Parse the result:
        try:
           blastRecord = NCBIXML.read(blastHandle)
           print "done.\n\t\t\t",
           sys.stdout.flush()
        except:
            blastRecord = None
        blastHandle.close()

        return blastRecord

Example #3

Show file

    def get(self, gi):
        """
        Look up genbank records by their GI
        """

        taxonomyFileName = os.path.join(self.options.dbcache, gi + ".tax")
        fastaFileName = os.path.join(self.options.dbcache, gi + ".fasta")

        if (os.path.exists(taxonomyFileName)
                and os.path.getsize(taxonomyFileName) != 0
                and os.path.exists(fastaFileName)
                and os.path.getsize(fastaFileName) != 0):
            retrievalStatus = "(c)"
            taxonomy = utils.safeReadTaxonomyCache(taxonomyFileName)
            sequence = utils.safeReadFastaCache(fastaFileName)
        else:
            retrievalStatus = "(d)"

            taxonXref = None
            seqLength = None

            successful = False
            for tries in range(10):
                try:
                    Entrez.email = self.options.email
                    Entrez.tool = 'sapwebserver'
                    fp = Entrez.efetch(db="nucleotide", id=gi, retmode="xml")

                    # Get the cross ref to the taxonomy database:
                    taxonXrefRE = re.compile(
                        "<GBQualifier_value>taxon:(\d+)</GBQualifier_value>")
                    seqLengthRE = re.compile(
                        "<GBSeq_length>(\d+)</GBSeq_length>")
                    sequenceRE = re.compile(
                        "<GBSeq_sequence>([a-zA-Z]+)</GBSeq_sequence>")

                    taxonXref = None
                    seqLength = None
                    sequence = None

                    while taxonXref is None or sequence is None:
                        line = fp.readline()
                        if not line:
                            break
                        taxonMatch = taxonXrefRE.search(line)
                        lengthMatch = seqLengthRE.search(line)
                        sequenceMatch = sequenceRE.search(line)
                        if taxonMatch:
                            if taxonXref is None:
                                taxonXref = taxonMatch.group(1)
#                             else:
#                                print "There was more than one taxon xref for %s. Picking the first one (%s)." % (gi, taxonXref)
                        if lengthMatch:
                            seqLength = lengthMatch.group(1)
                        if sequenceMatch:
                            sequence = sequenceMatch.group(1)

                    if not (taxonXref and sequence):
                        # Give it another try:
                        continue

                except KeyboardInterrupt:
                    sys.exit()
                except MemoryError:
                    # Write an empty file to cache to keep the script from
                    # trying to download the sequence next time.
                    utils.writeFile(fastaFileName, '')
                    return None, retrievalStatus.replace(")", "!M)")
                except:
                    ## print ' retrieving failed - retrying'
                    time.sleep(tries * 5)
                    continue
                else:
                    successful = True
                    fp.close()
                    break
                if not successful:
                    return None, retrievalStatus.replace(")", "!D2)")

            if not (taxonXref and gi and sequence):
                # The entry did not have a cross ref to the taxonomy database:
                return None, retrievalStatus.replace(")", "!T2)")

            # Make an object to hold the taxonomy:
            taxonomy = Taxonomy.Taxonomy()
            try:
                taxonomy.populateFromNCBI(
                    dbid=taxonXref,
                    #                                          allow_unclassified=self.options.unclassified,
                    minimaltaxonomy=self.options.minimaltaxonomy)
            except Taxonomy.NCBIPopulationError, X:
                return None, retrievalStatus.replace(")", " !%s)" % X.status)

            # Dump the taxonomy object to a file:
            fp = open(taxonomyFileName, 'w')
            pickle.dump(taxonomy, fp)
            fp.close()

            # Upcase the sequence:
            sequence = sequence.upper()

            # Cache the sequence:
            fastaEntry = ">%s\n%s\n" % (gi, sequence)
            utils.writeFile(fastaFileName, fastaEntry)

Example #4

Show file

File: GenBank.py Project: kaspermunch/sap

    def get(self, gi):
        """
        Look up genbank records by their GI
        """

        taxonomyFileName = os.path.join(self.options.dbcache, gi + ".tax")
        fastaFileName = os.path.join(self.options.dbcache, gi + ".fasta")

        if (os.path.exists(taxonomyFileName) and os.path.getsize(taxonomyFileName) != 0 and
            os.path.exists(fastaFileName) and os.path.getsize(fastaFileName) != 0):
            retrievalStatus = "(c)"
            taxonomy = utils.safeReadTaxonomyCache(taxonomyFileName)
            sequence = utils.safeReadFastaCache(fastaFileName)
        else:
            retrievalStatus = "(d)"

            taxonXref = None
            seqLength = None

            successful = False
            for tries in range(10):
                try:
                    Entrez.email = self.options.email
                    Entrez.tool = 'sapwebserver'
                    fp = Entrez.efetch(db="nucleotide", id=gi, retmode="xml")

                    # Get the cross ref to the taxonomy database:
                    taxonXrefRE = re.compile("<GBQualifier_value>taxon:(\d+)</GBQualifier_value>")
                    seqLengthRE = re.compile("<GBSeq_length>(\d+)</GBSeq_length>")
                    sequenceRE = re.compile("<GBSeq_sequence>([a-zA-Z]+)</GBSeq_sequence>")

                    taxonXref = None
                    seqLength = None
                    sequence = None

                    while taxonXref is None or sequence is None:
                        line = fp.readline()
                        if not line:
                            break
                        taxonMatch = taxonXrefRE.search(line)
                        lengthMatch = seqLengthRE.search(line)
                        sequenceMatch = sequenceRE.search(line)
                        if taxonMatch:
                            if taxonXref is None:
                               taxonXref = taxonMatch.group(1)
#                             else:
#                                print "There was more than one taxon xref for %s. Picking the first one (%s)." % (gi, taxonXref)
                        if lengthMatch:
                            seqLength = lengthMatch.group(1)
                        if sequenceMatch:
                            sequence = sequenceMatch.group(1)

                    if not (taxonXref and sequence):
                       # Give it another try:
                       continue

                except KeyboardInterrupt:
                   sys.exit()
                except MemoryError:
                    # Write an empty file to cache to keep the script from
                    # trying to download the sequence next time.
                    utils.writeFile(fastaFileName, '')
                    return None, retrievalStatus.replace(")", "!M)")
                except:
                   ## print ' retrieving failed - retrying'
                   time.sleep(tries * 5)
                   continue
                else:
                   successful = True
                   fp.close()
                   break
                if not successful:
                   return None, retrievalStatus.replace(")", "!D2)")

            if not (taxonXref and gi and sequence):
                # The entry did not have a cross ref to the taxonomy database:
                return None, retrievalStatus.replace(")", "!T2)")

            # Make an object to hold the taxonomy:
            taxonomy = Taxonomy.Taxonomy()
            try:
               taxonomy.populateFromNCBI(dbid=taxonXref,
#                                          allow_unclassified=self.options.unclassified,
                                         minimaltaxonomy=self.options.minimaltaxonomy)
            except Taxonomy.NCBIPopulationError, X:
               return None, retrievalStatus.replace(")", " !%s)" % X.status)
               
            # Dump the taxonomy object to a file:
            fp = open(taxonomyFileName, 'w')
            pickle.dump(taxonomy, fp)
            fp.close()

            # Upcase the sequence:
            sequence = sequence.upper()

            # Cache the sequence:
            fastaEntry = ">%s\n%s\n" % (gi, sequence)
            utils.writeFile(fastaFileName, fastaEntry)