def get_gene(name): query = prefixes + """ SELECT ?Gene (GROUP_CONCAT (DISTINCT ?_Gene_Name ; separator=',\\n') AS ?Gene_Name) (GROUP_CONCAT (DISTINCT ?_Accession ; separator=',\\n') AS ?Accession) (GROUP_CONCAT (DISTINCT ?_Sub_Category ; separator=',\\n') AS ?Sub_Category) (GROUP_CONCAT (DISTINCT ?_Category_Id ; separator=',\\n') AS ?Category_Id) (GROUP_CONCAT (DISTINCT ?_ARO_Accession ; separator=',\\n') AS ?ARO_Accession) (GROUP_CONCAT (DISTINCT ?_ARO_Id ; separator=',\\n') AS ?ARO_Id) (GROUP_CONCAT (DISTINCT ?_VFO_Id ; separator=',\\n') AS ?VFO_Id) WHERE { { ?Gene rdf:type gfvo:gene . ?Gene :has_name "%s"^^xsd:string . ?Gene :has_name ?_Gene_Name} OPTIONAL { ?Gene :has_category ?_Category_Id} OPTIONAL { ?Gene :has_has_sub_category ?_Sub_Category} OPTIONAL { ?Gene :has_vfo_id ?_VFO_Id} OPTIONAL { ?Gene :has_aro_accession ?_ARO_Accession} OPTIONAL { ?Gene :has_aro_id ?_ARO_Id} } GROUP BY ?Gene ORDER BY (?Gene) """ % (name) return Endpoint.query(query)
def get_all_genes(type_="all"): gene_type = "" if type_ == "vf": gene_type = "?Gene rdf:type :virulence_factor ." elif type_ == "amr": gene_type = "?Gene rdf:type :antimicrobial_resistance ." query = prefixes + """ SELECT ?Gene (GROUP_CONCAT (DISTINCT ?_Gene_Name ; separator=',\\n') AS ?Gene_Name) (GROUP_CONCAT (DISTINCT ?_Category ; separator=',\\n') AS ?Category) (GROUP_CONCAT (DISTINCT ?_Sub_Category ; separator=',\\n') AS ?Sub_Category) WHERE { { ?Gene rdf:type gfvo:gene . ?Gene :has_name ?_Gene_Name . %s} OPTIONAL { ?Gene :has_category ?_Category} OPTIONAL { ?Gene :has_sub_category ?_Sub_Category} } GROUP BY ?Gene ORDER BY (?Gene) """ % (gene_type) return Endpoint.query(query)
def get_categories(type_): query = prefixes if type_ == "vf": query += """ SELECT ?Category ?Subcategory WHERE { ?cat rdfs:subClassOf vfo:category . ?subcat rdfs:subClassOf ?cat . ?cat rdfs:label ?Category . ?subcat rdfs:label ?Subcategory . FILTER (?cat != ?subcat && ?cat != vfo:category) . MINUS { ?cat owl:equivalentClass ?equivclass . } } """ elif type == "amr": ## Temporarily loads vf data until amr categories are figured out query += """ SELECT ?Category ?Subcategory WHERE { ?cat rdfs:subClassOf vfo:category . ?subcat rdfs:subClassOf ?cat . ?cat rdfs:label ?Category . ?subcat rdfs:label ?Subcategory . FILTER (?cat != ?subcat && ?cat != vfo:category) . MINUS { ?cat owl:equivalentClass ?equivclass . } } """ else: raise ValueError("Non-valid gene type inserted.") return Endpoint.query(query)
def get_regions(genome_list, gene_list): genes = "" genomes = "" for gene in gene_list: genes = genes + ' "%s" ' % gene for genome in genome_list: genomes = genomes + ' :%s ' % genome query = prefixes + """ SELECT ?Region ?Gene_Name ?Genome WHERE { VALUES ?Genome { %s } VALUES ?Gene_Names { %s } ?Region a faldo:Region . ?Gene :has_copy ?Region ; :has_name ?Gene_Name . ?Contig :has_gene ?Region ; :is_contig_of ?Genome . FILTER (?Gene_Name = ?Gene_Names) } """ % (genomes, genes) print "query is:", query return Endpoint.query(query)
def last_user(): return Endpoint.query(""" PREFIX user: <https://github.com/superphy#User> PREFIX RDF_type: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> SELECT ?s WHERE { ?s RDF_type: user: } ORDER BY DESC(?s) LIMIT 1 """)
def get_genome_metadata(accession): string = prefixes + """ PREFIX : <https://github.com/superphy#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX gfvo: <http://www.biointerchange.org/gfvo#> SELECT ?Genome_Uri (GROUP_CONCAT (DISTINCT ?_Syndrome ; separator=',\\n') AS ?Syndromes)(GROUP_CONCAT (DISTINCT ?_Accession ; separator=',\\n') AS ?Accession) (GROUP_CONCAT (DISTINCT ?_Biosample_Id ; separator=',\\n') AS ?Biosample_Id)(GROUP_CONCAT (DISTINCT ?_Bioproject_Id ; separator=',\\n') AS ?Bioproject_Id)(GROUP_CONCAT (DISTINCT ?_Strain ; separator=',\\n') AS ?Strain)(GROUP_CONCAT (DISTINCT ?_Serotype_O ; separator=',\\n') AS ?Serotype_O)(GROUP_CONCAT (DISTINCT ?_Serotype_H ; separator=',\\n') AS ?Serotype_H)(GROUP_CONCAT (DISTINCT ?_Scientific_Name ; separator=',\\n') AS ?Scientific_Name)(GROUP_CONCAT (DISTINCT ?_Common_Name ; separator=',\\n') AS ?Common_Name)(GROUP_CONCAT (DISTINCT ?_Isolation_Date ; separator=',\\n') AS ?Isolation_Date)(GROUP_CONCAT (DISTINCT ?_Geographic_Location ; separator=',\\n') AS ?Geographic_Location) WHERE { { ?Genome_Uri a gfvo:Genome . ?Genome_Uri :has_accession "%s"^^xsd:string} OPTIONAL { ?Genome_Uri :has_bioproject ?_Bioproject_Id} OPTIONAL { ?Genome_Uri :has_biosample ?_Biosample_Id} OPTIONAL { ?Genome_Uri :has_Htype ?_Serotype_H_Uri . ?_Serotype_H_Uri rdfs:label ?_Serotype_H } OPTIONAL { ?Genome_Uri :has_Otype ?_Serotype_O_Uri . ?_Serotype_O_Uri rdfs:label ?_Serotype_O } OPTIONAL { ?Genome_Uri :has_geographic_location ?_Geographic_Location} OPTIONAL { ?Genome_Uri :has_strain ?_Strain} OPTIONAL { ?Genome_Uri :has_attribute ?_From_Host_Uri . ?_From_Host_Uri rdf:type :isolation_from_host . ?_From_Host_Uri :has_attribute ?_Host_Uri . ?_Host_Uri :scientific_name ?_Scientific_Name . ?_Host_Uri :common_name ?_Common_Name } OPTIONAL { ?Genome_Uri :has_isolation_date ?_Isolation_Date} OPTIONAL { ?Genome_Uri :has_isolation_attribute ?_Syndrome_Uri . ?_Syndrome_Uri rdf:type :isolation_syndrome . ?_Syndrome_Uri rdfs:label ?_Syndrome } } GROUP BY ?Genome_Uri ORDER BY (?Genome_Uri) """ % (accession) return Endpoint.query(string)
def find_regions(gene, genome): query = prefixes + """ SELECT ?Region WHERE { { ?Region rdf:type faldo:Region . ?Gene :has_copy ?Region . ?Contig :has_gene ?Region . ?Contig :is_contig_of ?Genome . ?Gene :has_name "%s"^^xsd:string . #?Genome :has_accession "%s"^^xsd:string . } } """ % (gene, genome) return Endpoint.query(query)
def get_all_literals(): return Endpoint.query(""" SELECT ?s ?p ?o WHERE { {?s ?p ?o FILTER ISLITERAL(?o) } UNION {?s ?p ?o FILTER ISLITERAL(?p) } UNION {?s ?p ?o FILTER ISLITERAL(?s) } }""")
def get_all_uri_triples(): return Endpoint.query(""" SELECT ?s ?p ?o WHERE { {?s ?p ?o} MINUS {?s ?p ?o FILTER ISLITERAL(?o) } MINUS { FILTER ISLITERAL(?p) } MINUS { FILTER ISLITERAL(?s) } }""")
def get_all_syndromes(): """ input - None output - list of all the unique syndromes """ string = prefixes + """ SELECT ?syndromes WHERE { ?_Syndrome_Uri rdf:type :isolation_syndrome . ?_Syndrome_Uri rdfs:label ?syndromes } group by ?syndromes""" syndrome_query = Endpoint.query(string) syndromes = [] for item in syndrome_query['results']['bindings']: syndromes.append(item[syndrome_query['head']['vars'][0]]['value']) return syndromes
def get_all_genome_metadata(): string = prefixes + """ SELECT ?Genome_Uri (GROUP_CONCAT (DISTINCT ?_Syndrome ; separator=',\\n') AS ?Syndromes)(GROUP_CONCAT (DISTINCT ?_Accession ; separator=',\\n') AS ?Accession) (GROUP_CONCAT (DISTINCT ?_Biosample_Id ; separator=',\\n') AS ?Biosample_Id)(GROUP_CONCAT (DISTINCT ?_Bioproject_Id ; separator=',\\n') AS ?Bioproject_Id)(GROUP_CONCAT (DISTINCT ?_Strain ; separator=',\\n') AS ?Strain)(GROUP_CONCAT (DISTINCT ?_Serotype_O ; separator=',\\n') AS ?Serotype_O)(GROUP_CONCAT (DISTINCT ?_Serotype_H ; separator=',\\n') AS ?Serotype_H)(GROUP_CONCAT (DISTINCT ?_Scientific_Name ; separator=',\\n') AS ?Scientific_Name)(GROUP_CONCAT (DISTINCT ?_Common_Name ; separator=',\\n') AS ?Common_Name)(GROUP_CONCAT (DISTINCT ?_Isolation_Date ; separator=',\\n') AS ?Isolation_Date)(GROUP_CONCAT (DISTINCT ?_Geographic_Location ; separator=',\\n') AS ?Geographic_Location) WHERE { { ?Genome_Uri a gfvo:Genome } OPTIONAL { ?Genome_Uri :has_bioproject ?_Bioproject_Id} OPTIONAL { ?Genome_Uri :has_biosample ?_Biosample_Id} OPTIONAL { ?Genome_Uri :has_Htype ?_Serotype_H_Uri . ?_Serotype_H_Uri rdfs:label ?_Serotype_H } OPTIONAL { ?Genome_Uri :has_Otype ?_Serotype_O_Uri . ?_Serotype_O_Uri rdfs:label ?_Serotype_O } OPTIONAL { ?Genome_Uri :has_geographic_location ?_Geographic_Location} OPTIONAL { ?Genome_Uri :has_accession ?_Accession} OPTIONAL { ?Genome_Uri :has_strain ?_Strain} OPTIONAL { ?Genome_Uri :has_attribute ?_From_Host_Uri . ?_From_Host_Uri rdf:type :isolation_from_host . ?_From_Host_Uri :has_attribute ?_Host_Uri . ?_Host_Uri :scientific_name ?_Scientific_Name . ?_Host_Uri :common_name ?_Common_Name } OPTIONAL { ?Genome_Uri :has_isolation_date ?_Isolation_Date} OPTIONAL { ?Genome_Uri :has_isolation_attribute ?_Syndrome_Uri . ?_Syndrome_Uri rdf:type :isolation_syndrome . ?_Syndrome_Uri rdfs:label ?_Syndrome } } GROUP BY ?Genome_Uri ORDER BY (?Genome_Uri) """ return Endpoint.query(string)
def get_x_triples(x): return Endpoint.query("""SELECT * {?s ?p ?o} LIMIT %s""" % (x))
def get_object_literals(): #Verified return Endpoint.query( """SELECT ?s ?p ?o WHERE {?s ?p ?o FILTER ISLITERAL(?o)}""")
def get_all_triples(): #Verified return Endpoint.query("""SELECT * {?s ?p ?o}""")
def get_object_literals(): #Verified return Endpoint.query("""SELECT ?s ?p ?o WHERE {?s ?p ?o FILTER ISLITERAL(?o)}""")