Exemple #1
0
    def get_catalog_file_path(instance):
        """
        Return the path to the Catalog file for a given instance.

        :param instance: The DataSource instance
        :return: The Catlog file path
        """
        version = DataSourceVersion.version_from_string(instance.version)

        file_name = 'RefSeq-release{0}.catalog.gz'.format(version)
        file_path = os.path.join(instance.instance_dir, file_name)
        if os.path.exists(file_path):
            return file_path
        else:
            file_name = 'RefSeq-release{0}.catalog.filtered.gz'.format(version)
            file_path = os.path.join(instance.instance_dir, file_name)
            return file_path
Exemple #2
0
    def get_accession2geneid_file_path(instance):
        """
        Return the path to the Catalog file for a given instance.

        :param instance: The DataSource instance
        :return: The Catlog file path
        """
        version = DataSourceVersion.version_from_string(instance.version)

        file_name = 'release{0}.accession2geneid.gz'.format(version)
        file_path = os.path.join(instance.instance_dir, file_name)
        # try to return unfiltered
        if os.path.exists(file_path):
            return file_path
        else:
            file_name = 'release{0}.accession2geneid.filtered.gz'.format(
                version)
            file_path = os.path.join(instance.instance_dir, file_name)
            return file_path
Exemple #3
0
    def parse_xml(self):
        """
        Parse descriptor XML file.
        """
        mesh_instance = self.get_instance_by_name('Mesh')

        version = DataSourceVersion.version_from_string(
            mesh_instance.version
        )

        descriptor_xml = mesh_instance.get_file('desc{}.xml'.format(str(version)))
        log.debug("XML file {}".format(descriptor_xml))

        tree = ET.parse(descriptor_xml)
        root = tree.getroot()

        check_qualifier = set()
        check_concepts = set()
        check_terms = set()

        for descriptor_record in root.getchildren():
            descriptor_ui = descriptor_record.find('DescriptorUI').text

            # <DescriptorName>
            #  <String>Calcimycin</String>
            # </DescriptorName>
            descriptor_name = descriptor_record.find('.DescriptorName/String').text

            self.descriptor.add_node({'sid': descriptor_ui, 'name': descriptor_name})

            #   <AllowableQualifiersList>
            #   <AllowableQualifier>
            #    <QualifierReferredTo>
            #     <QualifierUI>Q000302</QualifierUI>
            #      <QualifierName>
            #      <String>isolation &amp; purification</String>
            #      </QualifierName>
            #    </QualifierReferredTo>
            #    <Abbreviation>IP</Abbreviation>
            #   </AllowableQualifier>
            #   </AllowableQualifiersList>

            allowed_qualifiers = descriptor_record.findall(
                '.AllowableQualifiersList/AllowableQualifier/QualifierReferredTo')
            for qualifier in allowed_qualifiers:
                qualifier_ui = qualifier.find('.QualifierUI').text

                # add qualifier node id not exists
                if qualifier_ui not in check_qualifier:
                    qualifier_name = qualifier.find('.QualifierName/String').text
                    self.qualifier.add_node({'sid': qualifier_ui, 'name': qualifier_name})
                    check_qualifier.add(qualifier_ui)

                # add descriptor -> qualifier relationship
                self.descriptor_allowed_qualifier.add_relationship(
                    {'sid': descriptor_ui}, {'sid': qualifier_ui}, {'source': 'mesh'}
                )

            #  <ConceptList>
            #    <Concept PreferredConceptYN="Y">
            #     <ConceptUI>M0000001</ConceptUI>
            #     <ConceptName>
            #      <String>Calcimycin</String>
            #     </ConceptName>
            #     <CASN1Name>4-Benzoxazolecarboxylic acid, 5-(methylamino)-2-((3,9,11-trimethyl-8-(1-methyl-2-oxo-2-(1H-pyrrol-2-yl)ethyl)-1,7-dioxaspiro(5.5)undec-2-yl)methyl)-, (6S-(6alpha(2S*,3S*),8beta(R*),9beta,11alpha))-</CASN1Name>
            #     <RegistryNumber>37H9VM9WZL</RegistryNumber>
            #     <ScopeNote>An ionophorous, polyether antibiotic from Streptomyces chartreusensis. It binds and transports CALCIUM and other divalent cations across membranes and uncouples oxidative phosphorylation while inhibiting ATPase of rat liver mitochondria. The substance is used mostly as a biochemical tool to study the role of divalent cations in various biological systems.
            #     </ScopeNote>
            #     <RelatedRegistryNumberList>
            #      <RelatedRegistryNumber>52665-69-7 (Calcimycin)</RelatedRegistryNumber>
            #     </RelatedRegistryNumberList>
            #     <ConceptRelationList>
            #      <ConceptRelation RelationName="NRW">
            #      <Concept1UI>M0000001</Concept1UI>
            #      <Concept2UI>M0353609</Concept2UI>
            #      </ConceptRelation>
            #     </ConceptRelationList>
            #     <TermList>
            #      <Term  ConceptPreferredTermYN="Y"  IsPermutedTermYN="N"  LexicalTag="NON"  RecordPreferredTermYN="Y">
            #       <TermUI>T000002</TermUI>
            #       <String>Calcimycin</String>
            #       <DateCreated>
            #        <Year>1999</Year>
            #        <Month>01</Month>
            #        <Day>01</Day>
            #       </DateCreated>
            #       <ThesaurusIDlist>
            #        <ThesaurusID>FDA SRS (2014)</ThesaurusID>
            #        <ThesaurusID>NLM (1975)</ThesaurusID>
            #       </ThesaurusIDlist>
            #      </Term>
            #     </TermList>
            #    </Concept>

            concepts = descriptor_record.findall('.ConceptList/Concept')

            for concept in concepts:
                preferred_concept = concept.attrib['PreferredConceptYN']

                concept_ui = concept.find('.ConceptUI').text

                # concept node if not exists
                if concept_ui not in check_concepts:
                    concept_properties = {}
                    concept_properties['sid'] = concept_ui
                    concept_properties['name'] = concept.find('.ConceptName/String').text

                    try:
                        concept_properties['scope_note'] = concept.find('.ScopeNote').text
                    except AttributeError as e:
                        pass

                    self.concept.add_node(concept_properties)

                    check_concepts.add(concept_ui)

                # (Descriptor)--(Concept) relation
                self.descriptor_has_concept.add_relationship({'sid': descriptor_ui}, {'sid': concept_ui},
                                                             {'preferred': preferred_concept})

                # concept relations
                for concept_relation in concept.findall('.ConceptRelationList/ConceptRelation'):
                    left = concept_relation.find('.Concept1UI').text
                    right = concept_relation.find('.Concept2UI').text
                    name = concept_relation.attrib['RelationName']

                    self.concept_related_concept.add_relationship({'sid': left}, {'sid': right}, {'name': name})

                # iterate Terms for concept
                for term in concept.findall('.TermList/Term'):
                    term_ui = term.find('TermUI').text
                    concept_preferred_term = term.attrib['ConceptPreferredTermYN']

                    # Term node if not exists
                    if term_ui not in check_terms:
                        term_name = term.find('.String').text
                        self.term.add_node({'sid': term_ui, 'name': term_name})

                        check_terms.add(term_ui)

                    # (Concept)--(Term)
                    self.concept_has_term.add_relationship({'sid': concept_ui}, {'sid': term_ui},
                                                           {'preferred': concept_preferred_term})