def _process_boolean_group_args(values, postfix): if postfix: values = { '%s%s' % (val, postfix) for val in common.to_list(values) } return ' or '.join(common.to_set(values))
def setup(self): self.name = self._get_param('name') self.id_type_enzyme = self._get_param('id_type_enzyme', 'genesymbol') self.id_type_substrate = self._get_param( 'id_type_substrate', 'genesymbol', ) self.id_type_substrate = common.to_list(self.id_type_substrate) self.ncbi_tax_id = self._get_param('ncbi_tax_id', 9606) self.organisms_supported = self._get_param( 'organisms_supported', False, ) self.allow_mixed_organisms = self._get_param( 'allow_mixed_organisms', False, ) self.input_method = self._get_param('input_method') self.set_method()
def biomart_query( attrs, filters=None, transcript=False, gene=False, dataset='hsapiens_gene_ensembl', ): """ Use https://www.ensembl.org/biomart/martview/ to check for attribute and dataset names. """ _attrs = [] if gene: _attrs.append('ensembl_gene_id') if transcript: _attrs.append('ensembl_transcript_id') _attrs.extend(common.to_list(attrs)) filters = common.to_list(filters) record = collections.namedtuple('EnsemblRecord', _attrs) _logger._log( 'Downloading data from Ensembl Biomart: ' 'dataset=`%s`, ' '%s' 'attributes=`%s`.' % ( dataset, ('filters=`%s`, ' % ', '.join(filters) if filters else ''), ', '.join(_attrs), )) rewsp = re.compile(r'\n\s+') xml_template_path = os.path.join(common.DATA, 'ensembl_biomart_query.xml') with open(xml_template_path, 'r') as fp: xml_template = fp.read() filter_part = ''.join(_filter_xml_template % _filter for _filter in filters) attr_part = ''.join(_attr_xml_template % _attr for _attr in _attrs) xml_query = xml_template % ( dataset, filter_part, attr_part, ) xml_query = rewsp.sub('', xml_query) biomart_url = urls.urls['ensembl']['biomart_url'] % xml_query c = curl.Curl(biomart_url, large=True, silent=False) success = False for line in c.result: line = line.strip('\n\r').split('\t') success = success or line[0] == '[success]' if len(line) == len(record._fields): yield record(*line) if not success: _logger._log('Error: Interrupted transfer while downlading data ' 'from Ensembl Biomart (missing `success` tag).')
def pathwaycommons_interactions( resources=None, types=None, by_interaction=False, version=12, ): interactions = collections.defaultdict(set) if by_interaction else [] types = common.to_set(types) resources = { res.lower() for res in (common.to_list(resources) or ( pc_res.name for pc_res in pathwaycommons_resources)) } prg = progress.Progress( len(resources), 'Processing PathwayCommons', 1, percent=False, ) url = urls.urls['pwcommons']['url'] for resource in pathwaycommons_resources: if not resources & {resource.pc_label, resource.name.lower()}: continue prg.step() _version = min(resource.version, version) resource_url = url % (_version, _version, resource.pc_label) c = curl.Curl(resource_url, silent=False, large=True) for l in c.result: if hasattr(l, 'decode'): l = l.decode('ascii') l = l.strip('\n\r').split('\t') if not types or l[1] in types: if by_interaction: a_b = (l[0], l[1], l[2]) b_a = (l[2], l[1], l[0]) directed = l[1] in pathwaycommons_directed_types key = (b_a if (a_b not in interactions and not directed and b_a in interactions) else a_b) interactions[key].add( PathwayCommonsInteraction(*key, resource=resource.name)) else: l.append(resource.name) interactions.append(PathwayCommonsInteraction(*l)) return interactions