Esempio n. 1
    def all_remote_versions(self):
        Get all Versions from catalogue archive plus latest version.
        :return: list of variables of the type DataSourceVersion

        # get from release catalogue archive
        archive_path = posixpath.join(self.BASEURL, self.BASEPATH, 'archive')

        file_list = downloader.list_ftp_dir(archive_path)
        # get names of release catalogue files
        release_catalog_list = set(
            [ for x in file_list if 'RefSeq-release' in])

        # get release numbers from names
        # RefSeq-release20.catalog.gz => 20
        version_numbers = [
            int(x.split('-')[1].split('.')[0].replace('release', ''))
            for x in release_catalog_list

        # put in DataSourceVersion, add latest
        ds_versions = [DataSourceVersion(x) for x in version_numbers]

        return ds_versions
Esempio n. 2
    def latest_remote_version(self):
        Versions are named with 'year_month', e.g. '2016_01'.
        Data is parsed from 'RELEASE.metalink' file in 'current_release' directory of FTP server.

        :return: Latest remote version.
        :rtype: DataSourceVersion
        current_release_path = posixpath.join(self.UNIPROT_BASEURL,

        release_file = posixpath.join(current_release_path, 'RELEASE.metalink')

        # read XML file from server into string
        release_metalink_xml = downloader.get_single_file_ftp(

        # replace xml namespace thing to avoid dealing with namespaces
        release_metalink_xml = release_metalink_xml.replace(
            ' xmlns="', ' xmlnamespace="')

        # parse XML
        tree = ElementTree.fromstring(release_metalink_xml)
        version = tree.find('version').text

        return DataSourceVersion(self._date_from_name(version))
Esempio n. 3
 def latest_remote_version(self):
     Get number of latest release from
     :return: DataSourceVersion of latest remote version
     # returns a BytesIO object
     release_file = downloader.get_single_file_ftp(
     # to get the text simply 'read' and 'decode'
     release_number = int(
     return DataSourceVersion(release_number)
Esempio n. 4
    def all_remote_versions(self):
        Get all versions available on miRBase FTP server.

        All versions are a directory in /pub/mirbase.

        :return: List of DataSourceVersions available on server.
        :rtype: list(DataSourceVersion)
        versions = [DataSourceVersion(x) for x in VERSION_2_URL]
        return versions
Esempio n. 5
    def all_remote_versions(self):
        Get all versions available on miRBase FTP server.

        All versions are a directory in /pub/mirbase.

        :return: List of DataSourceVersions available on server.
        :rtype: list(DataSourceVersion)
        ftp_list = downloader.list_ftp_dir(self.mirbase_baseurl,
        versions = [DataSourceVersion( for x in ftp_list]

        return versions
Esempio n. 6
    def get_catalog_file_path(instance):
        Return the path to the Catalog file for a given instance.

        :param instance: The DataSource instance
        :return: The Catlog file path
        version = DataSourceVersion.version_from_string(instance.version)

        file_name = 'RefSeq-release{0}.catalog.gz'.format(version)
        file_path = os.path.join(instance.instance_dir, file_name)
        if os.path.exists(file_path):
            return file_path
            file_name = 'RefSeq-release{0}.catalog.filtered.gz'.format(version)
            file_path = os.path.join(instance.instance_dir, file_name)
            return file_path
Esempio n. 7
    def get_accession2geneid_file_path(instance):
        Return the path to the Catalog file for a given instance.

        :param instance: The DataSource instance
        :return: The Catlog file path
        version = DataSourceVersion.version_from_string(instance.version)

        file_name = 'release{0}.accession2geneid.gz'.format(version)
        file_path = os.path.join(instance.instance_dir, file_name)
        # try to return unfiltered
        if os.path.exists(file_path):
            return file_path
            file_name = 'release{0}.accession2geneid.filtered.gz'.format(
            file_path = os.path.join(instance.instance_dir, file_name)
            return file_path
 def latest_remote_version(self):
     Only the latest version is accessible.
     return DataSourceVersion('03-2018')
Esempio n. 9
    def parse_xml(self):
        Parse descriptor XML file.
        mesh_instance = self.get_instance_by_name('Mesh')

        version = DataSourceVersion.version_from_string(

        descriptor_xml = mesh_instance.get_file('desc{}.xml'.format(str(version)))
        log.debug("XML file {}".format(descriptor_xml))

        tree = ET.parse(descriptor_xml)
        root = tree.getroot()

        check_qualifier = set()
        check_concepts = set()
        check_terms = set()

        for descriptor_record in root.getchildren():
            descriptor_ui = descriptor_record.find('DescriptorUI').text

            # <DescriptorName>
            #  <String>Calcimycin</String>
            # </DescriptorName>
            descriptor_name = descriptor_record.find('.DescriptorName/String').text

            self.descriptor.add_node({'sid': descriptor_ui, 'name': descriptor_name})

            #   <AllowableQualifiersList>
            #   <AllowableQualifier>
            #    <QualifierReferredTo>
            #     <QualifierUI>Q000302</QualifierUI>
            #      <QualifierName>
            #      <String>isolation &amp; purification</String>
            #      </QualifierName>
            #    </QualifierReferredTo>
            #    <Abbreviation>IP</Abbreviation>
            #   </AllowableQualifier>
            #   </AllowableQualifiersList>

            allowed_qualifiers = descriptor_record.findall(
            for qualifier in allowed_qualifiers:
                qualifier_ui = qualifier.find('.QualifierUI').text

                # add qualifier node id not exists
                if qualifier_ui not in check_qualifier:
                    qualifier_name = qualifier.find('.QualifierName/String').text
                    self.qualifier.add_node({'sid': qualifier_ui, 'name': qualifier_name})

                # add descriptor -> qualifier relationship
                    {'sid': descriptor_ui}, {'sid': qualifier_ui}, {'source': 'mesh'}

            #  <ConceptList>
            #    <Concept PreferredConceptYN="Y">
            #     <ConceptUI>M0000001</ConceptUI>
            #     <ConceptName>
            #      <String>Calcimycin</String>
            #     </ConceptName>
            #     <CASN1Name>4-Benzoxazolecarboxylic acid, 5-(methylamino)-2-((3,9,11-trimethyl-8-(1-methyl-2-oxo-2-(1H-pyrrol-2-yl)ethyl)-1,7-dioxaspiro(5.5)undec-2-yl)methyl)-, (6S-(6alpha(2S*,3S*),8beta(R*),9beta,11alpha))-</CASN1Name>
            #     <RegistryNumber>37H9VM9WZL</RegistryNumber>
            #     <ScopeNote>An ionophorous, polyether antibiotic from Streptomyces chartreusensis. It binds and transports CALCIUM and other divalent cations across membranes and uncouples oxidative phosphorylation while inhibiting ATPase of rat liver mitochondria. The substance is used mostly as a biochemical tool to study the role of divalent cations in various biological systems.
            #     </ScopeNote>
            #     <RelatedRegistryNumberList>
            #      <RelatedRegistryNumber>52665-69-7 (Calcimycin)</RelatedRegistryNumber>
            #     </RelatedRegistryNumberList>
            #     <ConceptRelationList>
            #      <ConceptRelation RelationName="NRW">
            #      <Concept1UI>M0000001</Concept1UI>
            #      <Concept2UI>M0353609</Concept2UI>
            #      </ConceptRelation>
            #     </ConceptRelationList>
            #     <TermList>
            #      <Term  ConceptPreferredTermYN="Y"  IsPermutedTermYN="N"  LexicalTag="NON"  RecordPreferredTermYN="Y">
            #       <TermUI>T000002</TermUI>
            #       <String>Calcimycin</String>
            #       <DateCreated>
            #        <Year>1999</Year>
            #        <Month>01</Month>
            #        <Day>01</Day>
            #       </DateCreated>
            #       <ThesaurusIDlist>
            #        <ThesaurusID>FDA SRS (2014)</ThesaurusID>
            #        <ThesaurusID>NLM (1975)</ThesaurusID>
            #       </ThesaurusIDlist>
            #      </Term>
            #     </TermList>
            #    </Concept>

            concepts = descriptor_record.findall('.ConceptList/Concept')

            for concept in concepts:
                preferred_concept = concept.attrib['PreferredConceptYN']

                concept_ui = concept.find('.ConceptUI').text

                # concept node if not exists
                if concept_ui not in check_concepts:
                    concept_properties = {}
                    concept_properties['sid'] = concept_ui
                    concept_properties['name'] = concept.find('.ConceptName/String').text

                        concept_properties['scope_note'] = concept.find('.ScopeNote').text
                    except AttributeError as e:



                # (Descriptor)--(Concept) relation
                self.descriptor_has_concept.add_relationship({'sid': descriptor_ui}, {'sid': concept_ui},
                                                             {'preferred': preferred_concept})

                # concept relations
                for concept_relation in concept.findall('.ConceptRelationList/ConceptRelation'):
                    left = concept_relation.find('.Concept1UI').text
                    right = concept_relation.find('.Concept2UI').text
                    name = concept_relation.attrib['RelationName']

                    self.concept_related_concept.add_relationship({'sid': left}, {'sid': right}, {'name': name})

                # iterate Terms for concept
                for term in concept.findall('.TermList/Term'):
                    term_ui = term.find('TermUI').text
                    concept_preferred_term = term.attrib['ConceptPreferredTermYN']

                    # Term node if not exists
                    if term_ui not in check_terms:
                        term_name = term.find('.String').text
                        self.term.add_node({'sid': term_ui, 'name': term_name})


                    # (Concept)--(Term)
                    self.concept_has_term.add_relationship({'sid': concept_ui}, {'sid': term_ui},
                                                           {'preferred': concept_preferred_term})