def generic_run(commandline): """Run an application with the given commandline (OBSOLETE). This expects a pre-built commandline that derives from AbstractCommandline, and returns a ApplicationResult object to get results from a program, along with handles of the standard output and standard error. WARNING - This will read in the full program output into memory! This may be in issue when the program writes a large amount of data to standard output. NOTE - This function is considered to be obsolete, and we intend to deprecate it and then remove it in future releases of Biopython. We now recommend you invoke subprocess directly, using str(commandline) to turn an AbstractCommandline wrapper into a command line string. This will give you full control of the tool's input and output as well. """ #We don't need to supply any piped input, but we setup the #standard input pipe anyway as a work around for a python #bug if this is called from a Windows GUI program. For #details, see http://bugs.python.org/issue1124861 child = subprocess.Popen(str(commandline), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform != "win32")) #Use .communicate as might get deadlocks with .wait(), see Bug 2804/2806 r_out, e_out = child.communicate() # capture error code: error_code = child.returncode return ApplicationResult(commandline, error_code), \ File.UndoHandle(StringIO.StringIO(r_out)), \ File.UndoHandle(StringIO.StringIO(e_out))
def next(self): """next(self) -> object Return the next Prosite record from the file. If no more records, return None. """ # Skip the copyright info, if it's the first record. line = self._uhandle.peekline() if line[:2] == 'CC': while 1: line = self._uhandle.readline() if not line: break if line[:2] == '//': break if line[:2] != 'CC': raise ValueError("Oops, where's the copyright?") lines = [] while 1: line = self._uhandle.readline() if not line: break lines.append(line) if line[:2] == '//': break if not lines: return None data = "".join(lines) if self._parser is not None: return self._parser.parse(File.StringHandle(data)) return data
def getRemotePDBHandle(self, id): handle = urllib.request.urlopen(rcsb_url % (id)) uhandle = File.UndoHandle(handle) if not uhandle.peekline(): raise BaseException("Couldn't retrieve ", rcsb_url) return uhandle
def next(self): """next(self) -> object Return the next contig record from the file. If no more records return None. """ lines = [] while 1: # if at beginning, skip the AS and look for first CO command line = self._uhandle.readline() if not line: # empty or corrupt file return None if line[:2] == 'CO': lines.append(line) break while 1: line = self._uhandle.readline() if not line: break # If a new record, then put the line back and stop. if lines and line[:2] == 'CO': self._uhandle.saveline(line) break lines.append(line) if not lines: return None data = ''.join(lines) if self._parser is not None: return self._parser.parse(File.StringHandle(data)) return data
def __getitem__(self, id): """__getitem__(self, id) -> object Return a Prodoc entry. id is either the id or accession for the entry. Raises a KeyError if there's an error. """ import time from Bio import ExPASy # First, check to see if enough time has passed since my # last query. if self.last_query_time is not None: delay = self.last_query_time + self.delay - time.time() if delay > 0.0: time.sleep(delay) self.last_query_time = time.time() try: handle = ExPASy.get_prodoc_entry(id) except IOError: raise KeyError(id) try: handle = File.StringHandle(_extract_record(handle)) except ValueError: raise KeyError(id) if self.parser is not None: return self.parser.parse(handle) return handle.read()
def __getitem__(self, key): start, len = self._index[key] self._handle.seek(start) data = self._handle.read(len) if self._parser is not None: return self._parser.parse(File.StringHandle(data)) return data
def parse(self, handle): if isinstance(handle, File.UndoHandle): uhandle = handle else: uhandle = File.UndoHandle(handle) self._scanner.feed(uhandle, self._consumer) return self._consumer.enzyme_record
def getRemotePDBHandle(self, id, rcsb_url=settings.rcsb_url): """ Get the coordinate file remotely from the RCSB. @param id: pdb code, 4 characters @type id: str @param rcsb_url: template url for pdb download (default: L{settings.rcsb_url}) @type rcsb_url: str @return: the requested pdb file as a file handle @rtype: open file handle @raise PDBParserError: if couldn't retrieve PDB file """ try: from Bio import File except: raise PDBParserError('Could not find Biopython - ' + \ 'remote fetching of PDBs is not supported.') handle = urllib.urlopen(rcsb_url % (id, id)) uhandle = File.UndoHandle(handle) if not uhandle.peekline(): raise PDBParserError("Couldn't retrieve ", rcsb_url) return uhandle
def getRemotePDBHandle(self, pdb_id, rcsb_url=settings.rcsb_url): """ Get the coordinate file remotely from the RCSB. :param id: pdb code, 4 characters :type id: str :param rcsb_url: template url for pdb download (default: :class:`settings.rcsb_url`) :type rcsb_url: str :return: the requested pdb file as a file handle :rtype: open file handle :raise PDBParserError: if couldn't retrieve PDB file """ try: from Bio import File except: raise PDBParserError('Could not find Biopython - ' + \ 'remote fetching of PDBs is not supported.') resource = urllib.request.urlopen(rcsb_url % pdb_id) self.encoding = resource.headers.get_content_charset() uhandle = File.UndoHandle(resource) if not uhandle.peekline(): raise PDBParserError("Couldn't retrieve ", rcsb_url) return uhandle
def _open(cgi, params={}, get=1): """_open(cgi, params={}, get=1) -> UndoHandle Open a handle to SCOP. cgi is the URL for the cgi script to access. params is a dictionary with the options to pass to it. get is a boolean that describes whether a GET should be used. Does some simple error checking, and will raise an IOError if it encounters one. """ import urllib from Bio import File # Open a handle to SCOP. options = urllib.urlencode(params) if get: # do a GET fullcgi = cgi if options: fullcgi = "%s?%s" % (cgi, options) handle = urllib.urlopen(fullcgi) else: # do a POST handle = urllib.urlopen(cgi, options) # Wrap the handle inside an UndoHandle. uhandle = File.UndoHandle(handle) # Should I check for 404? timeout? etc? return uhandle
def feed(self, handle, consumer): """feed(self, handle, consumer) Feed in Prosite data for scanning. handle is a file-like object that contains prosite data. consumer is a Consumer object that will receive events as the report is scanned. """ if isinstance(handle, File.UndoHandle): uhandle = handle else: uhandle = File.UndoHandle(handle) consumer.finished = False while not consumer.finished: line = uhandle.peekline() if not line: break elif is_blank_line(line): # Skip blank lines between records uhandle.readline() continue elif line[:2] == 'ID': self._scan_record(uhandle, consumer) elif line[:2] == 'CC': self._scan_copyrights(uhandle, consumer) else: raise ValueError("There doesn't appear to be a record")
def treeappend(trees, file, format, **kwargs): """appending version of Phylo._io.write()""" if isinstance(trees, (BaseTree.Tree, BaseTree.Clade)): # Passed a single tree instead of an iterable -- that's OK trees = [trees] with File.as_handle(file, 'a+') as fp: n = getattr(supported_formats[format], 'write')(trees, fp, **kwargs) return n
def __init__(self, handle): import warnings warnings.warn( "Bio.Compass.Iterator is deprecated; please use the parse() function in this module instead", Bio.BiopythonDeprecationWarning) self._uhandle = File.UndoHandle(handle) self._parser = RecordParser()
def write(trees, file, format, **kwargs): """Write a sequence of trees to file in the given format.""" if isinstance(trees, (BaseTree.Tree, BaseTree.Clade)): # Passed a single tree instead of an iterable -- that's OK trees = [trees] with File.as_handle(file, 'w+') as fp: n = getattr(supported_formats[format], 'write')(trees, fp, **kwargs) return n
def write(trees, file, format, **kwargs): """Write a sequence of trees to file in the given format.""" if isinstance(trees, (BaseTree.Tree, BaseTree.Clade)): # Passed a single tree instead of an iterable -- that's OK trees = [trees] with File.as_handle(file, "w+") as fp: n = getattr(supported_formats[format], "write")(trees, fp, **kwargs) return n
def test_path(self): "Test as_handle with a path argument" p = self._path('test_file.fasta') mode = 'wb' with File.as_handle(p, mode=mode) as handle: self.assertEqual(p, handle.name) self.assertEqual(mode, handle.mode) self.assertFalse(handle.closed) self.assertTrue(handle.closed)
def test_safe_readline(self): data = """\ This file""" h = File.UndoHandle(StringIO(data)) safe_readline = ParserSupport.safe_readline self.assertEqual(safe_readline(h), "This\n") self.assertEqual(safe_readline(h), "file") self.assertRaises(ValueError, safe_readline, h)
def test_string_path(self): "Test as_handle with a string path argument" p = self._path('test_file.fasta') mode = 'wb' with File.as_handle(p, mode=mode) as handle: self.assertEqual(p, handle.name) self.assertEqual(mode, handle.mode) self.assertFalse(handle.closed) self.assertTrue(handle.closed)
def feed(self, handle, consumer): if isinstance(handle, File.UndoHandle): uhandle = handle else: uhandle = File.UndoHandle(handle) self._scan_header(uhandle, consumer) self._scan_matches(uhandle, consumer) self._scan_annotated_matches(uhandle, consumer)
def __init__(self, consumer): import Bio warnings.warn( "SGMLStrippingConsumer is deprecated, and is likely to be removed in a future version of Biopython", Bio.BiopythonDeprecationWarning) if type(consumer) is not InstanceType: raise ValueError("consumer should be an instance") self._consumer = consumer self._prev_attr = None self._stripper = File.SGMLStripper()
def test_path_object(self): "Test as_handle with a pathlib.Path object" from pathlib import Path p = Path(self._path('test_file.fasta')) mode = 'wb' with File.as_handle(p, mode=mode) as handle: self.assertEqual(str(p.absolute()), handle.name) self.assertEqual(mode, handle.mode) self.assertFalse(handle.closed) self.assertTrue(handle.closed)
def __init__(self, handle, parser=None): """Create a new iterator. Create a new iterator. handle is a file-like object. parser is an optional Parser object to change the results into another form. If set to None, then the raw contents of the file will be returned. """ import warnings warnings.warn("Bio.Sequencing.Phd.Iterator is deprecated. Please use Bio.Sequencing.Phd.parse(handle) instead of Bio.Sequencing.Phd.Iterator(handle, RecordParser())", DeprecationWarning) self._uhandle = File.UndoHandle(handle) self._parser = parser
def __init__(self, handle, parser=None): """Initialize the iterator. Arguments: o handle - A handle with IntelliGenetics entries to iterate through. o parser - An optional parser to pass the entries through before returning them. If None, then the raw entry will be returned. """ self.handle = File.UndoHandle(handle) self._reader = IntelliGeneticsReader(self.handle) self._parser = parser
def __init__(self, handle, parser=None): """Initialize the iterator. Arguments: o handle - A handle with NBRF entries to iterate through. o parser - An optional parser to pass the entries through before returning them. If None, then the raw entry will be returned. """ self.handle = File.UndoHandle(handle) self._reader = RecordReader.StartsWith(self.handle, ">") self._parser = parser
def next(self): """Return the next IntelliGenetics record from the handle. Will return None if we ran out of records. """ data = self._reader.next() if self._parser is not None: if data: return self._parser.parse(File.StringHandle(data)) return data
def test_handle(self): "Test as_handle with a file-like object argument" p = self._path('test_file.fasta') with open(p, 'wb') as fp: with File.as_handle(fp) as handle: self.assertEqual(fp, handle, "as_handle should " "return argument when given a file-like object") self.assertFalse(handle.closed) self.assertFalse(handle.closed, "Exiting as_handle given a file-like object should not " "close the file")
def __init__(self, handle, parser=None): """__init__(self, handle, parser=None) Create a new iterator. handle is a file-like object. parser is an optional Parser object to change the results into another form. If set to None, then the raw contents of the file will be returned. """ if type(handle) is not FileType and type(handle) is not InstanceType: raise ValueError("I expected a file handle or file-like object") self._uhandle = File.UndoHandle(handle) self._parser = parser
def feed(self, handle, consumer): """feed(self, handle, consumer) Feed in SwissProt data for scanning. handle is a file-like object that contains swissprot data. consumer is a Consumer object that will receive events as the report is scanned. """ if isinstance(handle, File.UndoHandle): uhandle = handle else: uhandle = File.UndoHandle(handle) self._scan_record(uhandle, consumer)
def feed(self, handle, consumer): """ Feeds in MEME output for scanning. handle should implement the readline method. consumer is a Consumer object that can receive the salient events. """ if isinstance(handle, File.UndoHandle): uhandle = handle else: uhandle = File.UndoHandle(handle) self._scan_header(uhandle, consumer) self._scan_motifs(uhandle, consumer)
def feed(self, handle): """feed(self, handle ) Feed in data for scanning. handle is a file-like object containing html. """ if isinstance(handle, File.UndoHandle): uhandle = handle else: uhandle = File.UndoHandle(handle) text = uhandle.read() sgmllib.SGMLParser.feed(self, text)
def test_undohandle_read_block(self): for block in [1, 2, 10]: s = StringIO(data) h = File.UndoHandle(s) h.peekline() new = "" while True: tmp = h.read(block) if not tmp: break new += tmp self.assertEqual(data, new) h.close()
def feed(self, handle, consumer): """Feed in COMPASS ouput""" if isinstance(handle, File.UndoHandle): pass else: handle = File.UndoHandle(handle) assert isinstance(handle, File.UndoHandle), \ "handle must be an UndoHandle" if handle.peekline(): self._scan_record(handle, consumer)
def test_custom_path_like_object(self): "Test as_handle with a custom path-like object" class CustomPathLike: def __init__(self, path): self.path = path def __fspath__(self): return self.path p = CustomPathLike(self._path('test_file.fasta')) mode = 'wb' with File.as_handle(p, mode=mode) as handle: self.assertEqual(p.path, handle.name) self.assertEqual(mode, handle.mode) self.assertFalse(handle.closed) self.assertTrue(handle.closed)
def parse_pdb_header(infile): """Return the header lines of a pdb file as a dictionary. Dictionary keys are: head, deposition_date, release_date, structure_method, resolution, structure_reference, journal_reference, author and compound. """ header = [] with File.as_handle(infile, 'r') as f: for l in f: record_type = l[0:6] if record_type in ("ATOM ", "HETATM", "MODEL "): break else: header.append(l) return _parse_pdb_header_list(header)
def parse(file, format, **kwargs): """Iteratively parse a file and return each of the trees it contains. If a file only contains one tree, this still returns an iterable object that contains one element. Example ------- >>> trees = parse('../../Tests/PhyloXML/apaf.xml', 'phyloxml') >>> for tree in trees: ... print(tree.rooted) True """ with File.as_handle(file, 'r') as fp: for tree in getattr(supported_formats[format], 'parse')(fp, **kwargs): yield tree
def test_stringio(self): """Testing passing StringIO handles.""" s = StringIO() with File.as_handle(s) as handle: self.assertIs(s, handle)
def test_bgzf(self): with File._open_for_random_access("Quality/example.fastq.bgz") as handle: self.assertIsInstance(handle, bgzf.BgzfReader)
def test_stringio(self): s = StringIO() with File.as_handle(s) as handle: self.assertEqual(s, handle)
def test_plain(self): with File._open_for_random_access("Quality/example.fastq") as handle: self.assertTrue("r" in handle.mode) self.assertTrue("b" in handle.mode)