コード例 #1
0
ファイル: intercell.py プロジェクト: rfour92/pypath
    def _process_boolean_group_args(values, postfix):

        if postfix:

            values = {
                '%s%s' % (val, postfix)
                for val in common.to_list(values)
            }

        return ' or '.join(common.to_set(values))
コード例 #2
0
    def setup(self):

        self.name = self._get_param('name')
        self.id_type_enzyme = self._get_param('id_type_enzyme', 'genesymbol')
        self.id_type_substrate = self._get_param(
            'id_type_substrate',
            'genesymbol',
        )
        self.id_type_substrate = common.to_list(self.id_type_substrate)
        self.ncbi_tax_id = self._get_param('ncbi_tax_id', 9606)
        self.organisms_supported = self._get_param(
            'organisms_supported',
            False,
        )
        self.allow_mixed_organisms = self._get_param(
            'allow_mixed_organisms',
            False,
        )
        self.input_method = self._get_param('input_method')
        self.set_method()
コード例 #3
0
def biomart_query(
    attrs,
    filters=None,
    transcript=False,
    gene=False,
    dataset='hsapiens_gene_ensembl',
):
    """
    Use https://www.ensembl.org/biomart/martview/ to check for attribute
    and dataset names.
    """

    _attrs = []

    if gene:

        _attrs.append('ensembl_gene_id')

    if transcript:

        _attrs.append('ensembl_transcript_id')

    _attrs.extend(common.to_list(attrs))
    filters = common.to_list(filters)

    record = collections.namedtuple('EnsemblRecord', _attrs)

    _logger._log(
        'Downloading data from Ensembl Biomart: '
        'dataset=`%s`, '
        '%s'
        'attributes=`%s`.' % (
            dataset,
            ('filters=`%s`, ' % ', '.join(filters) if filters else ''),
            ', '.join(_attrs),
        ))

    rewsp = re.compile(r'\n\s+')

    xml_template_path = os.path.join(common.DATA, 'ensembl_biomart_query.xml')

    with open(xml_template_path, 'r') as fp:

        xml_template = fp.read()

    filter_part = ''.join(_filter_xml_template % _filter
                          for _filter in filters)
    attr_part = ''.join(_attr_xml_template % _attr for _attr in _attrs)

    xml_query = xml_template % (
        dataset,
        filter_part,
        attr_part,
    )
    xml_query = rewsp.sub('', xml_query)

    biomart_url = urls.urls['ensembl']['biomart_url'] % xml_query

    c = curl.Curl(biomart_url, large=True, silent=False)

    success = False

    for line in c.result:

        line = line.strip('\n\r').split('\t')

        success = success or line[0] == '[success]'

        if len(line) == len(record._fields):

            yield record(*line)

    if not success:

        _logger._log('Error: Interrupted transfer while downlading data '
                     'from Ensembl Biomart (missing `success` tag).')
コード例 #4
0
ファイル: pathwaycommons.py プロジェクト: rfour92/pypath
def pathwaycommons_interactions(
    resources=None,
    types=None,
    by_interaction=False,
    version=12,
):

    interactions = collections.defaultdict(set) if by_interaction else []

    types = common.to_set(types)

    resources = {
        res.lower()
        for res in (common.to_list(resources) or (
            pc_res.name for pc_res in pathwaycommons_resources))
    }

    prg = progress.Progress(
        len(resources),
        'Processing PathwayCommons',
        1,
        percent=False,
    )

    url = urls.urls['pwcommons']['url']

    for resource in pathwaycommons_resources:

        if not resources & {resource.pc_label, resource.name.lower()}:

            continue

        prg.step()
        _version = min(resource.version, version)
        resource_url = url % (_version, _version, resource.pc_label)
        c = curl.Curl(resource_url, silent=False, large=True)

        for l in c.result:

            if hasattr(l, 'decode'):

                l = l.decode('ascii')

            l = l.strip('\n\r').split('\t')

            if not types or l[1] in types:

                if by_interaction:

                    a_b = (l[0], l[1], l[2])
                    b_a = (l[2], l[1], l[0])

                    directed = l[1] in pathwaycommons_directed_types

                    key = (b_a if (a_b not in interactions and not directed
                                   and b_a in interactions) else a_b)

                    interactions[key].add(
                        PathwayCommonsInteraction(*key,
                                                  resource=resource.name))

                else:

                    l.append(resource.name)
                    interactions.append(PathwayCommonsInteraction(*l))

    return interactions