def parseHiC(filename, **kwargs): """Returns an :class:`.HiC` from a Hi-C data file. This function extends :func:`.parseHiCStream`. :arg filename: the filename to the Hi-C data file. :type filename: str """ import os, struct title = kwargs.get('title') if title is None: title = os.path.basename(filename) else: title = kwargs.pop('title') if isURL(filename): hic = parseHiCBinary(filename, title=title, **kwargs) else: with open(filename,'rb') as req: magic_number = struct.unpack('<3s',req.read(3))[0] if magic_number == b"HIC": hic = parseHiCBinary(filename, title=title, **kwargs) else: with open(filename, 'r') as filestream: hic = parseHiCStream(filestream, title=title, **kwargs) return hic
def update(self, source=None): """Update data and files from CATH.""" self._source = source = self._source or source self.reset() if source is None: return LOGGER.timeit('_cath_update') type_ = 0 tree = None if isinstance(source, str): if isfile(source): type_ = 1 elif isURL(source): type_ = 0 else: type_ = 2 elif hasattr(source, 'read'): type_ = 1 else: raise TypeError( 'source must be either an url, file name, file handle, ' 'or text in xml format') if type_ == 0: LOGGER.info('Fetching data from CATH...') self._fetch() LOGGER.info('Parsing CATH files...') self._parse() elif type_ == 1: LOGGER.info('Reading data from the local xml file...') tree = ET.parse(source) elif type_ == 2: LOGGER.info('Parsing input string...') tree = ET.fromstring(source) # post-processing if type_ > 0: root = tree.getroot() nodes = root.iter() # remove prefix from node tags for node in nodes: node.tag = node.tag.lstrip('id.') # convert int to str length_nodes = root.findall('.//*[@length]') for node in length_nodes: node.attrib['length'] = int(node.attrib['length']) copy2(root, self.root) self._update_map() LOGGER.report('CATH local database built in %.2fs.', '_cath_update')
def update(self, source=None): """Update data and files from CATH.""" self._source = source = self._source or source self.reset() if source is None: return LOGGER.timeit('_cath_update') type_ = 0 tree = None if isinstance(source, str): if isfile(source): type_ = 1 elif isURL(source): type_ = 0 else: type_ = 2 elif hasattr(source, 'read'): type_ = 1 else: raise TypeError('source must be either an url, file name, file handle, ' 'or text in xml format') if type_ == 0: LOGGER.info('Fetching data from CATH...') self._fetch() LOGGER.info('Parsing CATH files...') self._parse() elif type_ == 1: LOGGER.info('Reading data from the local xml file...') tree = ET.parse(source) elif type_ == 2: LOGGER.info('Parsing input string...') tree = ET.fromstring(source) # post-processing if type_ > 0: root = tree.getroot() nodes = root.iter() # remove prefix from node tags for node in nodes: node.tag = node.tag.lstrip('id.') # convert int to str length_nodes = root.findall('.//*[@length]') for node in length_nodes: node.attrib['length'] = int(node.attrib['length']) copy2(root, self.root) self._update_map() LOGGER.report('CATH local database built in %.2fs.', '_cath_update')