Esempio n. 1
0
    def next(self):
        """next(self) -> object

        Return the next Prosite record from the file.  If no more records,
        return None.

        """
        # Skip the copyright info, if it's the first record.
        line = self._uhandle.peekline()
        if line[:2] == 'CC':
            while 1:
                line = self._uhandle.readline()
                if not line:
                    break
                if line[:2] == '//':
                    break
                if line[:2] != 'CC':
                    raise ValueError("Oops, where's the copyright?")
        
        lines = []
        while 1:
            line = self._uhandle.readline()
            if not line:
                break
            lines.append(line)
            if line[:2] == '//':
                break
            
        if not lines:
            return None
            
        data = "".join(lines)
        if self._parser is not None:
            return self._parser.parse(File.StringHandle(data))
        return data
Esempio n. 2
0
 def __getitem__(self, key):
     start, len = self._index[key]
     self._handle.seek(start)
     data = self._handle.read(len)
     if self._parser is not None:
         return self._parser.parse(File.StringHandle(data))
     return data
Esempio n. 3
0
    def __getitem__(self, id):
        """__getitem__(self, id) -> object

        Return a Prodoc entry.  id is either the id or accession
        for the entry.  Raises a KeyError if there's an error.
        
        """
        import time
        from Bio import ExPASy
        # First, check to see if enough time has passed since my
        # last query.
        if self.last_query_time is not None:
            delay = self.last_query_time + self.delay - time.time()
            if delay > 0.0:
                time.sleep(delay)
        self.last_query_time = time.time()

        try:
            handle = ExPASy.get_prodoc_entry(id)
        except IOError:
            raise KeyError(id)
        try:
            handle = File.StringHandle(_extract_record(handle))
        except ValueError:
            raise KeyError(id)

        if self.parser is not None:
            return self.parser.parse(handle)
        return handle.read()
Esempio n. 4
0
    def next(self):
        """next(self) -> object

        Return the next contig record from the file. If no more records
        return None.
        """

        lines = []
        while 1:
            # if at beginning, skip the AS and look for first CO command
            line = self._uhandle.readline()
            if not line:  # empty or corrupt file
                return None
            if line[:2] == 'CO':
                lines.append(line)
                break
        while 1:
            line = self._uhandle.readline()
            if not line:
                break
            # If a new record, then put the line back and stop.
            if lines and line[:2] == 'CO':
                self._uhandle.saveline(line)
                break
            lines.append(line)

        if not lines:
            return None

        data = ''.join(lines)
        if self._parser is not None:
            return self._parser.parse(File.StringHandle(data))
        return data
Esempio n. 5
0
    def next(self):
        """Return the next IntelliGenetics record from the handle.

        Will return None if we ran out of records.
        """
        data = self._reader.next()

        if self._parser is not None:
            if data:
                return self._parser.parse(File.StringHandle(data))

        return data
Esempio n. 6
0
    def next(self):
        """Return the next NBRF record from the handle.

        Will return None if we ran out of records.
        """
        data = self._reader.next()

        if self._parser is not None:
            if data:
                dumpfile = open('dump', 'w')
                dumpfile.write(data)
                dumpfile.close()
                return self._parser.parse(File.StringHandle(data))

        return data
Esempio n. 7
0
 def next(self):
     self._parser = RecordParser()
     lines = []
     while 1:
         line = self._uhandle.readline()
         if not line: break
         if line[:2] == '//':
             break
         lines.append(line)
     if not lines:
         return None
     lines.append('//')
     data = string.join(lines, '')
     if self._parser is not None:
         return self._parser.parse(File.StringHandle(data))
     return data
Esempio n. 8
0
    def next(self):
        lines = []
        while 1:
            line = self._uhandle.readline()
            if not line:
                break
            if line[0:4] == "Ali1" and lines:
                self._uhandle.saveline(line)
                break

            lines.append(line)

        if not lines:
            return None

        data = ''.join(lines)
        return self._parser.parse(File.StringHandle(data))
Esempio n. 9
0
    def blast(self, sequence):
        # generate temporary file to contain sequence
        tmpfile = tempfile.NamedTemporaryFile()
        print >> tmpfile, "> tmpfile"
        print >> tmpfile, sequence
        tmpfile.flush()

        # run blast
        data = self.blastfile(tmpfile.name)
        tmpfile.close()

        # parse output
        if self.parser:
            b_record = self.parser.parse(File.StringHandle(data))

        # return record
        return b_record
Esempio n. 10
0
    def next(self):
        lines = []
        while 1:
            line = self._uhandle.readline()
            if not line:
                break
            # If a new record, then put the line back and stop.
            if lines and line[:14] == 'BEGIN_SEQUENCE':
                self._uhandle.saveline(line)
                break
            lines.append(line)

        if not lines:
            return None

        data = ''.join(lines)
        if self._parser is not None:
            return self._parser.parse(File.StringHandle(data))
        return data
Esempio n. 11
0
    def next(self):
        """next(self) -> object

        Return the next Prodoc record from the file.  If no more records,
        return None.

        """
        lines = []
        while 1:
            line = self._uhandle.readline()
            if not line:
                break
            lines.append(line)
            if line[:5] == '{END}':
                break

        if not lines:
            return None

        data = "".join(lines)
        if self._parser is not None:
            return self._parser.parse(File.StringHandle(data))
        return data
Esempio n. 12
0
    def __getitem__(self, id):
        """__getitem__(self, id) -> object

        Return a Prosite entry.  id is either the id or accession
        for the entry.  Raises a KeyError if there's an error.
        
        """
        from Bio import ExPASy
        # First, check to see if enough time has passed since my
        # last query.
        self.limiter.wait()

        try:
            handle = ExPASy.get_prosite_entry(id)
        except IOError:
            raise KeyError(id)
        try:
            handle = File.StringHandle(_extract_record(handle))
        except ValueError:
            raise KeyError(id)

        if self.parser is not None:
            return self.parser.parse(handle)
        return handle.read()
Esempio n. 13
0
def download_many(ids,
                  callback_fn,
                  broken_fn=None,
                  batchsize=500,
                  parser=None):
    """Download multiple PubMed records, no return value (DEPRECATED).

    Please use Bio.Entrez instead as described in the Biopython Tutorial.

    Download many records from PubMed.  ids is a list of either the
    Medline Unique ID or the PubMed ID's of the articles.  Each time a
    record is downloaded, callback_fn is called with the text of the
    record.  broken_fn is an optional function that is called with the
    id of records that were not able to be downloaded.  batchsize is the
    number of records to request each time.

    """
    # parser is an undocumented parameter that allows people to
    # specify an optional parser to handle each record.  This is
    # dangerous because the results may be malformed, and exceptions
    # in the parser may disrupt the whole download process.
    if batchsize > 500 or batchsize < 1:
        raise ValueError("batchsize must be between 1 and 500")
    current_batchsize = batchsize

    # Loop until all the ids are processed.  We want to process as
    # many as possible with each request.  Unfortunately, errors can
    # occur.  Some id may be incorrect, or the server may be
    # unresponsive.  In addition, one broken id out of a list of id's
    # can cause a non-specific error.  Thus, the strategy I'm going to
    # take, is to start by downloading as many as I can.  If the
    # request fails, I'm going to half the number of records I try to
    # get.  If there's only one more record, then I'll report it as
    # broken and move on.  If the request succeeds, I'll double the
    # number of records until I get back up to the batchsize.
    nsuccesses = 0
    while ids:
        if current_batchsize > len(ids):
            current_batchsize = len(ids)

        id_str = ','.join(ids[:current_batchsize])

        try:
            # Query PubMed.  If one or more of the id's are broken,
            # this will raise an IOError.
            handle = Entrez.efetch(db="pubmed",
                                   id=id_str,
                                   retmode='text',
                                   rettype='medlars')

            # I'm going to check to make sure PubMed returned the same
            # number of id's as I requested.  If it didn't then I'm going
            # to raise an exception.  This could take a lot of memory if
            # the batchsize is large.
            results = handle.read()
            num_ids = 0
            for x in Medline.Iterator(File.StringHandle(results)):
                num_ids = num_ids + 1
            if num_ids != current_batchsize:
                raise IOError
            handle = File.StringHandle(results)
        except IOError:  # Query did not work.
            if current_batchsize == 1:
                # There was only 1 id in the query.  Report it as
                # broken and move on.
                id = ids.pop(0)
                if broken_fn is not None:
                    broken_fn(id)
            else:
                # I don't know which one is broken.  Try again with
                # fewer id's.
                current_batchsize = current_batchsize / 2
            nsuccesses = 0
            continue
        nsuccesses = nsuccesses + 1

        # Iterate through the results and pass the records to the
        # callback.
        idnum = 0
        for rec in Medline.Iterator(handle, parser):
            callback_fn(ids[idnum], rec)
            idnum = idnum + 1

        ids = ids[current_batchsize:]

        # If I'm not downloading the maximum number of articles,
        # double the number for next time.
        if nsuccesses >= 2 and current_batchsize < batchsize:
            current_batchsize = current_batchsize * 2
            if current_batchsize > batchsize:
                current_batchsize = batchsize
Esempio n. 14
0
    ninput, nfound, nmissed = 0, 0, 0
    
    for line in sys.stdin:
        if line[0] == "#": continue
        
        id = line[:-1].split("\t")[0]
        ninput += 1

        try:
            result = ExPASy.get_sprot_raw(id).read()
        except IOError:
            options.stdlog.write( "# warning: sequence for id %s not found." % id )
            nmissed += 1
            continue
        
        s_iterator = SProt.Iterator(File.StringHandle(result), s_parser)
        try:
            cur_record = s_iterator.next()        
        except SyntaxError:
            print "# Sequence not found: %s" % id
            continue

        columns = [ id, ]
        for f in options.fields:
            if f == "description":
                columns.append( cur_record.description )
                
            elif f == "references":
                for ref in cur_record.references:
                    columns.append( ref.authors, ref.title )                    
            elif f == "organism_classification":
Esempio n. 15
0
print is_blank_line('', allow_spaces=1)  # 1
print is_blank_line('', allow_spaces=0)  # 1
print is_blank_line(string.whitespace, allow_spaces=1)  # 1
print is_blank_line('hello')  # 0
print is_blank_line('hello', allow_spaces=1)  # 0
print is_blank_line('hello', allow_spaces=0)  # 0
print is_blank_line(string.whitespace, allow_spaces=0)  # 0

### safe_readline

print "Running tests on safe_readline"

data = """This
file"""

h = File.UndoHandle(File.StringHandle(data))

safe_readline = ParserSupport.safe_readline
print safe_readline(h)  # "This"
print safe_readline(h)  # "file"
try:
    safe_readline(h)
except ValueError:
    print "correctly failed"
else:
    print "ERROR, should have failed"

### safe_peekline

print "Running tests on safe_peekline"
safe_peekline = ParserSupport.safe_peekline
Esempio n. 16
0
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

import os
import string
from Bio import File

data = """This
is
a multi-line
file"""

### StringHandle

h = File.StringHandle(data)
print repr(h.readline())  # 'This'
print len(h.readlines())  # 3
print repr(h.readline())  # ''
h.close()

### UndoHandle

h = File.UndoHandle(File.StringHandle(data))

print h.readline()  # 'This'
print h.peekline()  # 'is'
print h.readline()  # 'is'
h.saveline("saved")
print h.peekline()  # 'saved'
h.saveline("another")