Beispiel #1
0
 def _siphon_family_interactions(self, triples):
     ints = []
     FIELDS = ("FAM", "ID", "INT_TYPE")
     for row in iterate_csv(self._get_path("homodomain_interaction.csv",
                                           is_db=False),
                            num_skip=1,
                            delimiter="\t",
                            fieldnames=FIELDS):
         ints.append((row["FAM"], row["FAM"], row["INT_TYPE"]))
     FIELDS = ("FAM1", "ID1", "FAM2", "ID2", "INT_TYPE")
     for row in iterate_csv(self._get_path("heterodomain_interaction.csv",
                                           is_db=False),
                            num_skip=1,
                            delimiter="\t",
                            fieldnames=FIELDS):
         ints.append((row["FAM1"], row["FAM2"], row["INT_TYPE"]))
     int_types = set()
     for fam1, fam2, int_type in ints:
         fam1 = O.uri(O.PFAM_ID, fam1)
         fam2 = O.uri(O.PFAM_ID, fam2)
         int_type = O.uri(O.IPFAM_INT_TYPE, int_type)
         blank = B()
         triples.extend([
             (blank, O.RDF.type, O.IPFAM_INT),
             (blank, O.IPFAM_INT_HAS_INT_TYPE, int_type),
             (blank, O.IPFAM_INT_HAS_PFAM, fam1),
             (blank, O.IPFAM_INT_HAS_PFAM, fam2),
         ])
         self._add_if_new(triples, int_types, int_type, O.RDF.type,
                          O.IPFAM_INT_TYPE)
     del ints
Beispiel #2
0
 def _siphon_regions(self, triples):
     FIELDS = (
         "REGION",  # int(10)
         "PROT_FAM",  # int(11)
         "PROT_FAM_ACC",  # varchar(45)
         "PDB_ID",  # varchar(4)
         "CHAIN",  # varchar(1)
         "START",  # int(11)
         "START_ICODE",  # varchar(1)
         "END",  # int(11)
         "END_ICODE",  # varchar(1)
         "REGION_SOURCE_DB",  # varchar(12)
     )
     for row in iterate_csv(self._get_path("pdb_protein_region.txt"),
                            delimiter="\t",
                            fieldnames=FIELDS):
         region = O.uri(O.IPFAM_REGION, row["REGION"])
         pfam = O.uri(O.PFAM_ID, row["PROT_FAM_ACC"])
         pdb_id_chain = U(O.PDBR + row["PDB_ID"].lower() + "_" +
                          row["CHAIN"])
         triples.extend([
             (region, O.RDF.type, O.IPFAM_REGION),
             (region, O.IPFAM_REGION_INSTANCE_OF, pfam),
             (region, O.IPFAM_REGION_OCCURS_IN, pdb_id_chain),
             (region, O.IPFAM_REGION_STARTS_AT, L(int(row["START"]))),
             (region, O.IPFAM_REGION_STOPS_AT, L(int(row["END"]))),
         ])
Beispiel #3
0
 def _siphon_cog(self, triples):
     FIELDS = ("TAXON.STRING_ID", "START", "STOP", "CLUSTER_ID",
               "ANNOTATION")
     path = self._get_path("COG.mappings.v{}.txt").format(self._version)
     for row in iterate_csv(path,
                            delimiter="\t",
                            fieldnames=FIELDS,
                            num_skip=1):
         parts = row["TAXON.STRING_ID"].split(".")
         taxon = parts[0]
         string_id = ".".join(parts[1:])
         if taxon != self._taxon:
             continue
         string_id = O.uri(O.STRING_ID, string_id)
         cluster_id = row["CLUSTER_ID"]
         if cluster_id.startswith("COG"):
             cluster_id = O.uri(O.COG_CLUSTER_ID, cluster_id)
             triples.extend([(string_id, O.STRING_ID_IN_COG, cluster_id),
                             (cluster_id, O.RDF.type, O.COG_CLUSTER_ID)])
         elif cluster_id.startswith("KOG"):
             cluster_id = O.uri(O.KOG_CLUSTER_ID, cluster_id)
             triples.extend([(string_id, O.STRING_ID_IN_KOG, cluster_id),
                             (cluster_id, O.RDF.type, O.COG_CLUSTER_ID)])
         elif cluster_id.startswith("NOG"):
             cluster_id = O.uri(O.NOG_CLUSTER_ID, cluster_id)
             triples.extend([(string_id, O.STRING_ID_IN_NOG, cluster_id),
                             (cluster_id, O.RDF.type, O.COG_CLUSTER_ID)])
Beispiel #4
0
    def _siphon_domains(self, triples):
        """Converts the `domains.tab` file.

        The data comes from an InterPro scan over the SGD entries.
        """
        FIELDS = (
            "FEAT_NAME",  # S. cerevisiae systematic name (ID of the input sequence)
            "CRC64",  # CRC of the proteic sequence
            "LENGTH",  # Lenght of the sequence in AA
            "METHOD",  # Analysis method
            "DB_MEMBERS",  # DB members entry for this match
            "DB_DESCRIPTION",  # DB member description for the entry
            "START",  # start of the domain match
            "STOP",  # end of the domain match
            "EVALUE",  # E-value of the match (defined by DB)
            "STATUS",  # Status of the match: T=true, ?=unknown
            "DATE",  # Date of the run
            "IPR_ID",  # InterPro ID
            "IPR_DESCRIPTION",  # InterPro description
            "IPR_GO",  # GO description of the InterPro entry
        )
        for row in iterate_csv(self._get_path("domains.tab"),
                               delimiter="\t",
                               fieldnames=FIELDS):
            feat_id = O.uri(O.SGD_FEATURE, self._sanitize(row["FEAT_NAME"]))
            is_true = L({"T": True, "?": False}[row["STATUS"]])

            db_id = L(None)
            if row["METHOD"] == "Pfam" and row["STATUS"] == "T":
                db_id = O.uri(O.PFAM_ID, row["DB_MEMBERS"])

            try:
                evalue = L(float(row["EVALUE"]))
            except ValueError:
                evalue = L(-1.0)

            _ = B()
            triples.extend([
                (_, O.RDF.type, O.SGD_IPR_HIT),
                (_, O.SGD_IPR_HIT_HAS_ID, L(row["IPR_ID"])),
                (_, O.SGD_IPR_HIT_HAS_METHOD, L(row["METHOD"])),
                (_, O.SGD_IPR_HIT_HAS_DB_ID, db_id),
                (_, O.SGD_IPR_HIT_STARTS_AT, L(int(row["START"]))),
                (_, O.SGD_IPR_HIT_STOPS_AT, L(int(row["STOP"]))),
                (_, O.SGD_IPR_HIT_HAS_EVALUE, evalue),
                (_, O.SGD_IPR_HIT_IS_TRUE, is_true),
                (feat_id, O.SGD_FEATURE_HAS_IPR_HIT, _),
            ])
Beispiel #5
0
    def _siphon_pdb_homologues(self, triples):
        """Converts the `pdb_homologs.tab` file."""
        FIELDS = (
            "FEAT_NAME",  # S. cerevisiae systematic name
            "START_COORD_QUERY",  # start coord (aa position) in yeast
            "STOP_COORD_QUERY",  # stop coord (aa position) in yeast
            "START_COORD_TARGET",  # start coord (aa position) in target
            "STOP_COORD_TARGET",  # stop coord (aa position) in target
            "PERCENT_ALIGNED",  # percent of yeast contained in target
            "SCORE",  # log of the expectation value
            "TARGET_PDB_ID",  # PDB identifier
            "TARGET_TAXON_ID",  # target taxon ID
            "TARGET_TAXON_NAME",  # target taxon species name
        )
        for row in iterate_csv(self._get_path("pdb_homologs.tab"),
                               delimiter="\t",
                               fieldnames=FIELDS):
            pdb_id_chain, _ = self._pdb_to_uri(row["TARGET_PDB_ID"])

            match = B()
            triples.extend([
                (match, O.RDF.type, O.SGD_PDB_HOMOLOGY),
                (match, O.SGD_PDB_HAS_QUERY,
                 O.uri(O.SGD_FEATURE, row["FEAT_NAME"])),
                (match, O.SGD_PDB_ALIGNMENT,
                 L(float(row["PERCENT_ALIGNED"]) / 100.0)),
                (match, O.SGD_PDB_HAS_TARGET, pdb_id_chain),
            ])
Beispiel #6
0
 def _siphon_aliases(self, triples):
     from urllib2 import urlopen
     BOXES = (
         #(  0,  75, "GD"),           # Gene designation
         ( 75,  95, "OLN"),          # Ordered locus name
         ( 95, 106, "SP_ACC"),       # Swiss-Prot accession
         #(106, 118, "SP_NAME"),      # Swiss-Prot entry name
         #(118, 128, "SGD_ACCESSION"),# SGD accession
         # XXX there are three more fields we do not care about
     )
     try:
         url = urlopen("http://www.uniprot.org/docs/yeast.txt")
         state = 0
         for line in url.read().split("\n"):
             if "____" in line or "----" in line:
                 state += 1
             elif state == 5:
                 parts = { box[2]: map(str.strip, line[box[0]:box[1]].strip().split(";")) for box in BOXES }
                 assert len(parts["OLN"]) == 1
                 assert len(parts["SP_ACC"]) == 1
                 yip_acc = O.uri(O.YIP_PROTEIN, parts["OLN"][0])
                 sp_acc = U(O.UNIPROT_ID + parts["SP_ACC"][0])
                 triples.append((yip_acc, O.OWL.sameAs, sp_acc))
         url.close()
     except Exception, e:
         print e
Beispiel #7
0
 def _siphon_interactions(self, triples):
     FIELDS = ("ITEM_ID_A", "ITEM_ID_B", "MODE", "ACTION", "A_IS_ACTING",
               "SCORE", "SOURCES", "TRANSFERRED_SOURCES")
     path = self._get_path("{}.protein.actions.detailed.v{}.txt") \
         .format(self._taxon, self._version)
     for row in iterate_csv(path,
                            delimiter="\t",
                            fieldnames=FIELDS,
                            num_skip=1):
         id_a, id_b = row["ITEM_ID_A"], row["ITEM_ID_B"]
         if not (id_a.startswith("{}".format(self._taxon)) and \
                 id_b.startswith("{}".format(self._taxon))):
             continue
         id_a = O.uri(O.STRING_ID, id_a.split(".", 1)[1])
         id_b = O.uri(O.STRING_ID, id_b.split(".", 1)[1])
         mode = O.uri(O.STRING_ACTION_MODE, row["MODE"])
         triples.append((id_a, mode, id_b))
Beispiel #8
0
    def _siphon_aliases(self, triples):
        FIELDS = ("TAXON", "STRING_ID", "ALIAS_ID", "ALIAS_TYPE")
        path = self._get_path("{}.protein.aliases.v{}.txt") \
            .format(self._taxon, self._version)
        for row in iterate_csv(path,
                               delimiter="\t",
                               fieldnames=FIELDS,
                               num_skip=1):
            string_id = O.uri(O.STRING_ID, row["STRING_ID"])
            triples.append((string_id, O.RDF.type, O.OCELOT.STRING_ID))

            alias_id = row["ALIAS_ID"]
            for alias_type in row["ALIAS_TYPE"].split():
                if alias_type == "SGD" and \
                   (alias_id.startswith("S0") or alias_id.startswith("L0")):
                    db_alias_id = O.uri(O.SGD_ID, alias_id)
                else:
                    # XXX handle UniProt ACs here
                    continue
                triples.append((string_id, O.OWL.sameAs, db_alias_id))
Beispiel #9
0
    def _siphon_sequences(self, triples):
        """Converts the `orf_trans_all.fasta` file."""
        for header, sequence in read_fasta(
                self._get_path("orf_trans_all.fasta")):
            assert "SGDID:" in header

            sgd_ids = filter(lambda word: word.startswith("SGDID:"),
                             header.split())
            assert len(sgd_ids) == 1

            sgd_id = O.uri(O.SGD_ID,
                           self._sanitize(sgd_ids[0].split(":")[1].strip(",")))
            triples.append((sgd_id, O.SGD_ID_HAS_SEQUENCE, L(sequence)))
Beispiel #10
0
 def _siphon_region_interactions(self, triples):
     FIELDS = (
         "REGION_INT",  # `auto_reg_int` bigint(20) NOT NULL AUTO_INCREMENT,
         "PDB_ID",  # `pdb_id` varchar(4) NOT NULL,
         "REGION_A",  # `region_id_A` int(10) unsigned NOT NULL,
         "REGION_B",  # `region_id_B` int(10) unsigned NOT NULL,
         "IS_INTRACHAIN",  # `intrachain` tinyint(1) NOT NULL,
         "QUALITY_CONTROL",  # `quality_control` int(10) unsigned NOT NULL,
     )
     for row in iterate_csv(self._get_path("pdb_protein_region_int.txt"),
                            delimiter="\t",
                            fieldnames=FIELDS):
         region_int = O.uri(O.IPFAM_REGION_INT, row["REGION_INT"])
         region_a = O.uri(O.IPFAM_REGION, row["REGION_A"])
         region_b = O.uri(O.IPFAM_REGION, row["REGION_B"])
         pdb_id = U(O.PDBR + row["PDB_ID"].lower())
         triples.extend([
             (region_int, O.RDF.type, O.IPFAM_REGION_INT),
             (region_int, O.IPFAM_REGION_INT_OCCURS_IN, pdb_id),
             (region_int, O.IPFAM_REGION_INT_HAS_REGION, region_a),
             (region_int, O.IPFAM_REGION_INT_HAS_REGION, region_b),
         ])
Beispiel #11
0
    def _siphon_families(self, triples):
        """Converts the `protein_family.txt file."""
        FIELDS = (
            "FAMILY_INT",  # `auto_prot_fam` int(11) NOT NULL AUTO_INCREMENT,
            "FAMILY_ACC",  # `accession` varchar(45) DEFAULT NULL,
            "FAMILY_ID",  # `identifier` varchar(45) DEFAULT NULL,
            "DESCRIPTION",  # `description` text,
            "COMMENT",  # `comment` longtext,
            "FAMILY_TYPE",  # `type` enum('family','domain','motif','repeat') DEFAULT NULL,
            "SOURCE_DB",  # `source_db` enum('pfama') DEFAULT NULL,
            "COLOUR",  # `colour` varchar(7) DEFAULT NULL,
            "NUMBER_FAM_INT",  # `number_fam_int` int(5) DEFAULT '0',
            "NUMBER_LIG_INT",  # `number_lig_int` int(5) DEFAULT '0',
            "NUMBER_PDBS",  # `number_pdbs` int(5) DEFAULT '0',
        )

        pfam_types = set()
        for row in iterate_csv(self._get_path("protein_family.txt"),
                               num_skip=1,
                               delimiter="\t",
                               fieldnames=FIELDS):
            assert len(row["FAMILY_ACC"]) > 0

            if row["SOURCE_DB"] != "pfama":
                print "Warning: invalid row '{}'".format(row.items())
                continue

            pfam_acc = O.uri(O.PFAM_ID, row["FAMILY_ACC"])
            triples.append((pfam_acc, O.RDF.type, O.PFAM_ID))
            if row["FAMILY_TYPE"] is None:
                continue
            if row["FAMILY_TYPE"]:
                pfam_type = O.uri(O.PFAM_TYPE, row["FAMILY_TYPE"])
                triples.append((pfam_acc, O.PFAM_ID_HAS_TYPE, pfam_type))
                if not pfam_type in pfam_types:
                    pfam_types.add(pfam_type)
                    triples.append((pfam_type, O.RDF.type, O.PFAM_TYPE))
Beispiel #12
0
    def _siphon_interactions(self, triples):
        """Converts the `interaction_data.tab` file."""
        FIELDS = (
            "BAIT_FEAT_NAME",  # Bait: feature (ORF) name and gene name (optional)
            "BAIT_STD_NAME",  #
            "HIT_FEAT_NAME",  # Hit: feature (ORF) name and gene name (optional)
            "HIT_STD_NAME",  #
            "EXPERIMENT_TYPE",  # Description of the experiment
            "INTERACTION_TYPE",  # 'Genetic' or 'Physical'
            "SOURCE_DATABASE",  # Source database
            "CURATION_TYPE",  # Manual or high-throughput
            "NOTES",  # Free text (useless)
            "PHENOTYPE",  # Phenotype of the interaction (optional)
            "REFERENCE",  # List of references as 'SGD_REF:" (SGDID) or 'PMID:' (PubMed)
            "CITATION",  # List of citations
        )
        int_types, exp_types, cur_types, sources = set(), set(), set(), set()
        for row in iterate_csv(self._get_path("interaction_data.tab"),
                               delimiter="\t",
                               fieldnames=FIELDS):
            bait = O.uri(O.SGD_FEATURE, self._sanitize(row["BAIT_FEAT_NAME"]))
            hit = O.uri(O.SGD_FEATURE, self._sanitize(row["HIT_FEAT_NAME"]))
            int_type = O.uri(O.SGD_INT_TYPE,
                             self._sanitize(row["INTERACTION_TYPE"]))
            exp_type = O.uri(O.SGD_INT_EXP_TYPE,
                             self._sanitize(row["EXPERIMENT_TYPE"]))
            cur_type = O.uri(O.SGD_INT_CUR_TYPE,
                             self._sanitize(row["CURATION_TYPE"]))
            source = O.uri(O.SGD_INT_SOURCE,
                           self._sanitize(row["SOURCE_DATABASE"]))

            self._add_if_new(triples, int_types, int_type, O.RDF.type,
                             O.SGD_INT_TYPE)
            self._add_if_new(triples, exp_types, exp_type, O.RDF.type,
                             O.SGD_INT_EXP_TYPE)
            self._add_if_new(triples, cur_types, cur_type, O.RDF.type,
                             O.SGD_INT_CUR_TYPE)
            self._add_if_new(triples, source, source, O.RDF.type,
                             O.SGD_INT_SOURCE)

            interaction = B()
            triples.extend([
                (interaction, O.RDF.type, O.SGD_INT),
                (interaction, O.SGD_INT_HAS_BAIT, bait),
                (interaction, O.SGD_INT_HAS_HIT, hit),
                (interaction, O.SGD_INT_HAS_TYPE, int_type),
                (interaction, O.SGD_INT_HAS_EXP_TYPE, exp_type),
                (interaction, O.SGD_INT_HAS_CUR_TYPE, cur_type),
                (interaction, O.SGD_INT_HAS_SOURCE, source),
            ])
Beispiel #13
0
 def _siphon_goslims(self, triples):
     """Converts the `go_slim_mapping.tab` file."""
     # XXX there are a few goslim annotations with commas
     FIELDS = (
         "GENE_ORF",  # Systematic gene name
         "GENE_STD",  # Gene name (optional)
         "SGD_ID",  # Gene SGD ID
         "GO_ASPECT",  # P = Process, F = Function, C = Component
         "GO_TERM",  # GO SLIM term
         "GO_ID",  # GO term ID
         "FEATURE_TYPE",  # Such as 'ORF' or 'tRNA'
     )
     for row in iterate_csv(self._get_path("go_slim_mapping.tab"),
                            delimiter="\t",
                            fieldnames=FIELDS):
         if row["GO_TERM"] in ("other", "not_yet_annotated"):
             continue
         assert len(row["GO_ID"]), row
         sgd_id = O.uri(O.SGD_ID, self._sanitize(row["SGD_ID"]))
         goslim = O.go_to_uri(self._sanitize(row["GO_ID"]))
         triples.append((sgd_id, O.SGD_ID_HAS_GOSLIM, goslim))
Beispiel #14
0
 def _siphon_xrefs(self, triples):
     """Converts the `dbxref.tab` file."""
     FIELDS = (
         "XREF_ID",  # Cross-reference ID
         "XREF_ID_SOURCE",  # Cross-reference database ID
         "XREF_ID_TYPE",  # Cross-reference type (like 'PDB chain' or 'PDB best hit')
         "FEAT_NAME",  # ORF name
         "SGD_ID",  # SGD ID
         "UNDOCUMENTED",  # Undocumented
     )
     for row in iterate_csv(self._get_path("dbxref.tab"),
                            delimiter="\t",
                            fieldnames=FIELDS):
         sgd_id = O.uri(O.SGD_ID, self._sanitize(row["SGD_ID"]))
         xref_source = self._sanitize(row["XREF_ID_SOURCE"])
         xref_type = self._sanitize(row["XREF_ID_TYPE"])
         if (xref_source, xref_type) == ("EBI", self._sanitize("UniProt/TrEMBL ID")) or \
            (xref_source, xref_type) == ("EBI", self._sanitize("UniProt/Swiss-Prot ID")):
             xref_id = O.UNIPROT_ID + row["XREF_ID"]
         else:
             continue
         triples.append((sgd_id, O.SGD_ID_HAS_XREF, xref_id))
Beispiel #15
0
    def _siphon_gos(self, triples):
        """Converts the `gene_association.sgd` file."""
        FIELDS = (
            "DB",  # always "SGD"
            "SGD_ID",  # SGD ID
            "DONTCARE_ID",  # yeah
            "QUALIFIERS",  # Stuff like NOT, contributes_to, colocalizes; pipe-separated
            "GO_ID",  # GO term ID
            "DB_REFS",  # DB references; pipe-separated
            "ECODE",  # Evidence code for the annotation
            "WITH_FROM",  # With or From optional qualifiers
            "GO_ASPECT",  # P = Process, F = Function, C = Component
            "DB_NAMES",  # DB object names; pipe-separated
            "DB_SYNONYMS",  # DB synonyms; pipe-separated
            "DB_TYPES",  # DB object types; pipe-separated
            "TAXIDS",  # Taxon IDs; pipe-separated
            "DATE",  # Date the GO annotation was defined in YYYYMMDD
            "ASSIGNED_BY",  # always "SGD"
            "EXTENSION",  # yeah
            "VARIANTS",  # yeah
        )
        for row in iterate_csv(self._get_path("gene_association.sgd"),
                               num_skip=26,
                               delimiter="\t",
                               fieldnames=FIELDS):
            assert len(row["SGD_ID"]), row
            assert len(row["GO_ID"]), row
            sgd_id = O.uri(O.SGD_ID, self._sanitize(row["SGD_ID"]))
            go_term = O.go_to_uri(self._sanitize(row["GO_ID"]))

            annotation = B()
            triples.extend([
                (annotation, O.RDF.type, O.SGD_GO_ANNOTATION),
                (annotation, O.SGD_GO_ANNOTATION_HAS_SGD_ID, sgd_id),
                (annotation, O.SGD_GO_ANNOTATION_HAS_TERM, go_term),
                (annotation, O.SGD_GO_ANNOTATION_HAS_ECODE, L(row["ECODE"])),
            ])
Beispiel #16
0
    def _siphon_features(self, triples):
        """Converts the `SGD_features.tab` file."""
        # XXX there may be chromosomes not marked as features
        FIELDS = (
            "SGD_ID",  # Primary SGD ID
            "FEAT_TYPE",  # Feature type
            "FEAT_QUALIFIER",  # Feature qualifier (optional)
            "FEAT_NAME",  # Feature name (optional)
            "STD_GENE_NAME",  # Standard gene name (optional)
            "ALIASES",  # Aliases (optional, |-separated)
            "PARENT_FEAT_NAME",  # Parent feature name (optional)
            "OTHER_SGD_IDS",  # Secondary SGD IDs (optional, |-separated)
            "CHROMOSOME",  # Chromosome (optional)
            "START_COORD",  # Start coordinate (optional)
            "STOP_COORD",  # Stop coordinate (optional)
            "STRAND",  # Strand (optional)
            "GENETIC_POS",  # Genetic position (optional)
            "COORD_VERSION",  # Coordinate version (optional)
            "SEQ_VERSION",  # Sequence version (optional)
            "DESCRIPTION",  # Description (optional)
        )

        feat_types, qualifiers, chromosomes = set(), set(), set()
        for row in iterate_csv(self._get_path("SGD_features.tab"),
                               delimiter="\t",
                               fieldnames=FIELDS):
            sgd_id = O.uri(O.SGD_ID, self._sanitize(row["SGD_ID"]))
            feat_id = O.uri(O.SGD_FEATURE, self._sanitize(row["FEAT_NAME"]))
            feat_type = O.uri(O.SGD_FEATURE_TYPE,
                              self._sanitize(row["FEAT_TYPE"]))
            triples.extend([
                (sgd_id, O.RDF.type, O.SGD_ID),
                (feat_id, O.RDF.type, O.SGD_FEATURE),
                (sgd_id, O.OWL.sameAs, feat_id),
                (sgd_id, O.SGD_ID_HAS_TYPE, feat_type),
            ])
            self._add_if_new(triples, feat_types, feat_type, O.RDF.type,
                             O.SGD_FEATURE_TYPE)
            for q in filter(lambda q: len(q),
                            self._sanitize_all(row["FEAT_QUALIFIER"])):
                q = O.uri(O.SGD_ID_QUALIFIER, q)
                self._add_if_new(triples, qualifiers, q, O.RDF.type,
                                 O.SGD_ID_QUALIFIER)
                triples.append((sgd_id, O.SGD_ID_HAS_QUALIFIER, q))
            for a in filter(lambda a: len(a),
                            self._sanitize_all(row["OTHER_SGD_IDS"])):
                triples.append((sgd_id, O.OWL.sameAs, O.uri(O.SGD_ID, a)))
            parent = row["PARENT_FEAT_NAME"]
            if len(parent):
                parent = O.uri(O.SGD_FEATURE, self._sanitize(parent))
                triples.extend([
                    (parent, O.RDF.type, O.SGD_FEATURE),
                    (sgd_id, O.DCTERMS.isPartOf, parent),
                ])
            chromosome = row["CHROMOSOME"]
            if len(chromosome):
                chromosome = O.uri(O.SGD_FEATURE, "chrosomosome_" + chromosome)
                self._add_if_new(triples, chromosomes, chromosome, O.RDF.type,
                                 O.SGD_CHROMOSOME)
                triples.append((sgd_id, O.SGD_ID_IN_CHROMOSOME, chromosome))
            if len(row["STRAND"]):
                triples.append((sgd_id, O.SGD_ID_IN_STRAND, L(row["STRAND"])))
            if len(row["START_COORD"]):
                triples.append(
                    (sgd_id, O.SGD_ID_STARTS_AT, L(int(row["START_COORD"]))))
            if len(row["STOP_COORD"]):
                triples.append(
                    (sgd_id, O.SGD_ID_STOPS_AT, L(int(row["STOP_COORD"]))))
Beispiel #17
0
 def _siphon_ids(self, triples):
     for path in glob(self._get_path("divided/mmCIF")):
         pdb_id = O.uri(O.PDB_ID, f.split(".")[0])
         triples.append((pdb_id, O.RDF.type, O.PDBID))
Beispiel #18
0
 def _r_uri(p, d, r):
     return O.uri(O.YIP_RESIDUE, p + "_" + d + "_" + r)
Beispiel #19
0
 def _pfam_uri(d):
     return O.uri(O.PFAM_ID, d)
Beispiel #20
0
 def _d_uri(p, d):
     return O.uri(O.YIP_DOMAIN, p + "_" + d)
Beispiel #21
0
 def _sgd_uri(p):
     return O.uri(O.SGD_FEATURE, p)
Beispiel #22
0
 def _p_uri(p):
     return O.uri(O.YIP_PROTEIN, p)