def fetchGene(GeneName): service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") template = service.get_template('Gene_GenomicDNA') rows = template.rows( E = {"op": "LOOKUP", "value": GeneName, "extra_value": "S. cerevisiae"} ) # this service seems to return multiple similar genes but we want the first one only, so count # and it returns information about the gene you want count=0 for row in rows: count=count+1 if count==1: descr= row["description"] GeneSeq=Seq(row["sequence.residues"]) GeneSysName=row["secondaryIdentifier"] #let's create a record for the oldGene GeneRecord = SeqRecord(GeneSeq, id=GeneSysName) #now let's add some more information to make it useful GeneRecord.name=GeneName GeneRecord.features=GeneSysName GeneRecord.description=descr return GeneRecord
def templates(request): # Determine available InterMines and associated templates selected_mines = request.GET.get('mines') if selected_mines is not None: selected_mines = selected_mines.split('+') existing_mines = [] existing_templates = {} intermines = InterMine.objects.all() for im in intermines: existing_mines.append(im.name) if not (selected_mines is None or im.name in selected_mines): continue base_url = im.url.rstrip('/') try: service = Service(base_url) except: # service is inaccessible, or some other error continue for t_name in service.templates: t = service.get_template(t_name) if t_name in existing_templates: existing_templates[t_name]['mines'].append(im.name) else: existing_templates[t_name] = { 'name': t.name, 'title': t.title, 'description': t.description, 'mines': [im.name] } # Sort existing_templates properly, and convert it to a list for t_name in existing_templates: existing_templates[t_name]['mines'] = sorted( existing_templates[t_name]['mines'], key=lambda m: m.lower()) existing_templates = list(existing_templates.values()) existing_templates = sorted(existing_templates, key=lambda t: t['title'].lower()) context = { 'existing_mines': existing_mines, 'existing_templates': existing_templates, 'user_mines': selected_mines, } return render(request, 'intermine_mgr/templates.html', context)
def fetchGene(GeneName): service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") template = service.get_template('Gene_GenomicDNA') rows = template.rows(E={ "op": "LOOKUP", "value": GeneName, "extra_value": "S. cerevisiae" }) # this service seems to return multiple similar genes but we want the first one only, so count # and it returns information about the gene you want count = 0 for row in rows: count = count + 1 if count == 1: descr = row["description"] GeneSeq = Seq(row["sequence.residues"]) GeneSysName = row["secondaryIdentifier"] #print(" ") #print("I think you want...... "+row["secondaryIdentifier"]) #print(row["description"]) #print(" ") #print(row["sequence.residues"]) #print(" ") #print("Good choice! I have a feeling you're going to get lucky with this one.") #print(" ") #print("Give me a second to put some of my ducks in a circle...") #let's create a record for the oldGene GeneRecord = SeqRecord(GeneSeq, id=GeneSysName) #now let's add some more information to make it useful GeneRecord.name = GeneName GeneRecord.features = GeneSysName GeneRecord.description = descr return GeneRecord
def fetchGene(GeneName): service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") template = service.get_template('Gene_GenomicDNA') rows = template.rows( E = {"op": "LOOKUP", "value": GeneName, "extra_value": "S. cerevisiae"} ) # this service seems to return multiple similar genes but we want the first one only, so count # and it returns information about the gene you want count=0 for row in rows: count=count+1 if count==1: descr= row["description"] GeneSeq=Seq(row["sequence.residues"]) GeneSysName=row["secondaryIdentifier"] print(" ") print("I think you want...... "+row["secondaryIdentifier"]) print(row["description"]) print(" ") print(row["sequence.residues"]) print(" ") print("Good choice! I have a feeling you're going to get lucky with this one.") print(" ") print("Give me a second to put some of my ducks in a circle...") #let's create a record for the oldGene GeneRecord = SeqRecord(GeneSeq, id=GeneSysName) #now let's add some more information to make it useful GeneRecord.name=GeneName GeneRecord.features=GeneSysName return GeneRecord
def get_gene_genomic_seq_as_FASTA(gene_id, extension_for_saving=extension_for_saving, return_text=False): ''' Main function of script. Takes a gene's systematic name, standard name, or alias as defined at gene page at yeastgenome.org, retrieves the associated information from YeastMine, and saves or returns the genomic sequence of the gene in FASTA format. Use `return_text` if calling from IPython or a Jupyter notebook and you want the FASTA record returned as text, ''' # Get gene information from YeastMine #--------------------------------------------------------------------------- # Based on the template Gene_Genomic DNA available under # 'Gene --> Genomic DNA' when under 'Templates' on navigation bar # in middle of page at YeastMine. Direct link: # https://yeastmine.yeastgenome.org/yeastmine/template.do?name=Gene_GenomicDNA&scope=all service = Service( "https://yeastmine.yeastgenome.org:443/yeastmine/service") # Retrieve genomic DNA (DNA sequence with introns) for the specified gene. template = service.get_template('Gene_GenomicDNA') # You can edit the constraint values below # E Gene rows = template.rows(E={ "op": "LOOKUP", "value": gene_id, "extra_value": "S. cerevisiae" }) results = [] for row in rows: results.append(row) # store corresponding gene genomic sequence genomic_seq = results[0]["sequence.residues"] # format gene_nom_info for making output file name or anything else needing # that information gene_nom_info = {} gene_nom_info['sys_nom'] = results[0]["secondaryIdentifier"] gene_nom_info['std_nom'] = results[0]["symbol"] gene_nom_info['aliases'] = results[0]["sgdAlias"] #print (gene_nom_info['aliases'] ) # FOR DEBUGGING ONLY #print (gene_nom_info['std_nom'] ) # FOR DEBUGGING ONLY #print (gene_nom_info['sys_nom'] ) # FOR DEBUGGING ONLY # feedback sys.stderr.write("looking up the gene associated with " "{}...".format(gene_id)) # Make output FASTA record #--------------------------------------------------------------------------- # based on handling worked out in # `delete_seq_following_pattern_within_multiFASTA.py` record_description = '{}'.format(gene_nom_info['sys_nom']) record = SeqRecord(Seq(genomic_seq, generic_dna), id=gene_nom_info['std_nom'], description=record_description) #based # on https://www.biostars.org/p/48797/ and `.ungap()` method, see # https://github.com/biopython/biopython/issues/1511 , and `description` # from what I've seen for `id` plus https://biopython.org/wiki/SeqIO #print (records[indx]) # ONLY FOR DEBUGGING sys.stderr.write("getting genomic sequence for the gene...") # Return text if called with `return_text = True`. Otherwise, consider # called from command line & save file. #--------------------------------------------------------------------------- if return_text == True: # based on section 4.6 at #http://biopython.org/DIST/docs/tutorial/Tutorial.html#sec:SeqRecord-format # Feedback sys.stderr.write("\nReturning genomic sequence in FASTA format.") return record.format("fasta") else: output_file_name = generate_output_file_name(gene_nom_info, extension_for_saving) SeqIO.write(record, output_file_name, "fasta") # Feedback sys.stderr.write("\n\nFile of genomic sequence " "saved as '{}'.".format(output_file_name)) sys.stderr.write("\nFinished.\n")
import pandas as pd service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") #-------------------------------------------------------------------# # Gene Info #-------------------------------------------------------------------# gene = service.model.Gene.where(symbol = 'HFA1').first() print gene.symbol + "\n" + gene.description print gene #-------------------------------------------------------------------# # Model templates #-------------------------------------------------------------------# template = service.get_template("Gene_Pathways") for row in template.results(A={"symbol":"HFA1"}): print row #-------------------------------------------------------------------# # Query #-------------------------------------------------------------------# query = service.new_query("Gene") query.add_view("primaryIdentifier","name","symbol","pathways.name") query.add_constraint("Gene", "LOOKUP", "HFA1") for row in query.rows(): print row # The view specifies the output columns query.add_view( "primaryIdentifier", "secondaryIdentifier", "symbol", "name", "sgdAlias",
def template_constraints(request): mines_dict = {} intermines = InterMine.objects.all() for im in intermines: mines_dict[im.name] = im q = request.GET.get('q') qq = q.split('__') q_template = qq[0] q_mine_name = qq[1] q_mine = mines_dict[q_mine_name] base_url = q_mine.url.rstrip('/') # Note that q_service must exist if this template exists, so there is no need for a try/except here q_service = Service(base_url) selected_template = q_service.get_template(q_template) nc = len(selected_template.constraints ) # number of constraints in selected_template constraints = [] kw_constraints = {} gene_lists = q_service.get_all_list_names() gene_lists = [ l for l in gene_lists if q_service.get_list(l).list_type == 'Gene' ] base_filters_str = '?q=%s' % (q) use_default_constraints = False for i in range(nc): stc_i = selected_template.constraints[i] stc_i_dict = stc_i.to_dict() is_gene_related = pathIsGeneRelated(stc_i.path) ch = chr(ord('A') + i) # constraints - return to template constraint = { 'code': ch, 'path': stc_i.path, 'edit': stc_i.editable, 'gene_related': is_gene_related } operator = request.GET.get('op' + ch) value = request.GET.get('value' + ch) value2 = request.GET.get('value2' + ch) value_list = getValueList(q_service, stc_i.path) if value_list is not None: constraint['value_list'] = value_list # organism_list is for the value2 (extraValue, extra_value) field in a ternary constraint organism_list = getValueList(q_service, 'Gene.organism.shortName') gene_operator = request.GET.get('gene_op' + ch) gene_value = request.GET.get('gene_value' + ch) if gene_operator is not None: # gene list-based constraint # (set operator and value for use in kw_constraints below) constraint['gene_op'] = operator = gene_operator constraint['gene_value'] = value = gene_value constraint['op'] = stc_i.op constraint['value'] = stc_i.value if 'extraValue' in stc_i_dict: constraint['value2'] = stc_i_dict['extraValue'] constraints.append(constraint) if stc_i.editable: base_filters_str += '&op%s=%s&value%s=%s&gene_op%s=%s&gene_value%s=%s' % ( ch, stc_i.op, ch, stc_i.value, ch, gene_operator, ch, gene_value) elif operator is not None: # regular (binary) constraint constraint['op'] = operator constraint['value'] = value if value2 is not None: # ternary constraint constraint['value2'] = value2 if organism_list is not None: constraint['value_list'] = organism_list constraints.append(constraint) if stc_i.editable: base_filters_str += '&op%s=%s&value%s=%s' % (ch, operator, ch, value) if value2 is not None: base_filters_str += '&value2%s=%s' % (ch, value2) else: # user submitted no constraints (yet), so use default values from selected template use_default_constraints = True try: constraint['op'] = stc_i.op constraint['value'] = stc_i.value if 'extraValue' in stc_i_dict: constraint['value2'] = stc_i_dict['extraValue'] if organism_list is not None: constraint['value_list'] = organism_list constraints.append(constraint) except: # ignore if any fields are missing pass continue # so as not to submit default values to template query if stc_i.editable: # kw_constraints - submit to template query kw_constraints[ch] = {'op': operator, 'value': value} if value2 not in [None, 'Any']: kw_constraints[ch]['extra_value'] = value2 if use_default_constraints: context = { 'user_q': q, 'user_mine': q_mine_name, 'user_template': selected_template, 'user_constraints': constraints, 'gene_lists': gene_lists, } return render(request, 'intermine_mgr/template_constraints.html', context) # Paging page = request.GET.get('page') if page is None: page = 1 else: page = int(page) results_per_page = request.GET.get('rows') # Note that start indices are 0-based in the view, 1-based in the template if results_per_page is None: start = 0 else: results_per_page = int(results_per_page) start = (page - 1) * results_per_page # Extract facet filters (only mines, for now) facet_filters = {} facet_filters_str = '' mine = request.GET.get('Mine') if mine: facet_filters['Mine'] = mine facet_filters_str += '&Mine=' + mine facets = {'Mine': {}} total_hits = 0 results = [] for im in intermines: if not (mine is None or mine == im.name): continue base_url = im.url.rstrip('/') try: service = Service(base_url) except: # service is inaccessible, or some other error continue try: template = service.get_template(q_template) # Execute the (possibly modified) query rr = template.rows(**kw_constraints) for row in rr: rd = row.to_d() rd.update({'mine': im.name}) results.append(rd) im_total_hits = len(rr) total_hits += im_total_hits # Aggregating facets is more complicated: facets['Mine'][im.name] = im_total_hits except: # For example, if the constraints do not apply to the template for this mine continue # This yields the total hits and facet information. # Remove any mines with no hits. mm = list(facets['Mine'].keys()) for m in mm: if facets['Mine'][m] == 0: del facets['Mine'][m] sort_tag = selected_template.get_sort_order( ).sort_orders[0].path # TODO: sort by multiple columns? results = sorted(results, key=lambda result: safeSort(result, sort_tag)) if results_per_page is None: end = total_hits last_page = 1 else: end = min(start + results_per_page, total_hits) results = results[start:end] last_page = (total_hits - 1) // results_per_page + 1 base_filters_str += '&rows=%d' % (results_per_page) context = { 'user_q': q, 'user_mine': q_mine_name, 'user_template': selected_template, 'user_constraints': constraints, 'gene_lists': gene_lists, 'base_filters_str': base_filters_str, 'facet_filters': facet_filters, 'facet_filters_str': facet_filters_str, 'page': page, 'results_per_page': results_per_page, 'last_page': last_page, 'start_row': start + 1, 'end_row': end, 'num_rows': total_hits, 'results': results, 'facets': facets, } return render(request, 'intermine_mgr/template_constraints.html', context)
def get_protein_seq_as_FASTA(gene_id, extension_for_saving = extension_for_saving, return_text = False): ''' Main function of script. Takes a gene's systematic name, standard name, or alias as defined at gene page at yeastgenome.org, retrieves the associated information from YeastMine, and saves or returns the protein sequence in FASTA format. Use `return_text` if calling from IPython or a Jupyter notebook and you want the FASTA record returned as text, ''' # Get gene information from YeastMine #--------------------------------------------------------------------------- # Based on the template Gene_ProteinSequence available under # 'Gene --> Protein Sequence' when clicking on 'Proteins' on navigation bar # in middle of page at YeastMine. Direct link: # https://yeastmine.yeastgenome.org/yeastmine/template.do?name=Gene_ProteinSequence&scope=global service = Service("https://yeastmine.yeastgenome.org:443/yeastmine/service") # Retrieve protein sequence for a specified gene. template = service.get_template('Gene_ProteinSequence') # You can edit the constraint values below # B Gene rows = template.rows( B = {"op": "LOOKUP", "value": gene_id, "extra_value": "S. cerevisiae"} ) results = [] for row in rows: results.append(row) # store corresponding protein sequence prot_seq = results[0]["proteins.sequence.residues"] # format gene_nom_info for making output file name or anything else needing # that information gene_nom_info = {} gene_nom_info['sys_nom'] = results[0]["secondaryIdentifier"] gene_nom_info['std_nom'] = results[0]["symbol"] gene_nom_info['aliases'] = results[0]["sgdAlias"] #print (gene_nom_info['aliases'] ) # FOR DEBUGGING ONLY #print (gene_nom_info['std_nom'] ) # FOR DEBUGGING ONLY #print (gene_nom_info['sys_nom'] ) # FOR DEBUGGING ONLY # feedback sys.stderr.write("looking up the gene associated with " "{}...".format(gene_id)) # Make output FASTA record #--------------------------------------------------------------------------- # based handling worked out in # `delete_seq_following_pattern_within_multiFASTA.py` record_description = '{}'.format(gene_nom_info['sys_nom']) record = SeqRecord(Seq(prot_seq, generic_protein), id=gene_nom_info['std_nom'], description=record_description)#based # on https://www.biostars.org/p/48797/ and `.ungap()` method, see # https://github.com/biopython/biopython/issues/1511 , and `description` # from what I've seen for `id` plus https://biopython.org/wiki/SeqIO #print (records[indx]) # ONLY FOR DEBUGGING sys.stderr.write("getting protein sequence...") # Return text if called with `return_text = True`. Otherwise, consider # called from command line & save file. #--------------------------------------------------------------------------- if return_text == True: # based on section 4.6 at #http://biopython.org/DIST/docs/tutorial/Tutorial.html#sec:SeqRecord-format # Feedback sys.stderr.write("\nReturning protein sequence in FASTA format.") return record.format("fasta") else: output_file_name = generate_output_file_name( gene_nom_info,extension_for_saving) SeqIO.write(record,output_file_name, "fasta"); # Feedback sys.stderr.write("\n\nFile of protein sequence " "saved as '{}'.".format(output_file_name)) sys.stderr.write("\nFinished.\n")
# To install the client, run the following command from a terminal: # # sudo easy_install intermine # # For further documentation you can visit: # http://intermine.readthedocs.org/en/latest/web-services/ # The following two lines will be needed in every python script: from intermine.webservice import Service service = Service("https://phytozome.jgi.doe.gov/phytomine/service") # Look up the GO terms associated with a gene, or use a saved list of genes and # look up the GO terms associated with all genes on the list. template = service.get_template('GO-Annotations') # You can edit the constraint values below # A Gene.primaryIdentifier with open("genes_polya_frompac", "r") as l: for i in l.readlines(): # print i rows = template.rows(A={"op": "=", "value": str(i.strip())}) for row in rows: # print "1" print row["primaryIdentifier"], "\t", row["secondaryIdentifier"], row["length"], "\t", \ row["chromosomeLocation.start"], "\t", row["chromosomeLocation.end"], "\t", \ row["goAnnotation.ontologyTerm.identifier"], "\t", row["goAnnotation.ontologyTerm.name"], "\t", \ row["goAnnotation.ontologyTerm.description"], "\t", row["briefDescription"]
# # Saccharomyces Genome Database (SGD) # http://yeastmine.yeastgenome.org/yeastmine/api.do?subtab=python # # YeastMine example script # from intermine.webservice import Service service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") # List all GO annotations for a specified gene. Searches for the # primaryIdentifier (SGDID), secondaryIdentifier (Systematic Name), symbol # (Standard Gene Name) and wild card queries (such as *YAL*) are supported. # Manually curated, high-throughput, and computational GO annotations are # included. Genes include Uncharacterized and Verified ORFs, pseudogenes, # transposable element genes, RNAs, and genes Not in Systematic Sequence of # S228C. template = service.get_template('Gene_GO') # You can edit the constraint values below # A Gene Show GO annotations for gene: rows = template.rows( A = {"op": "LOOKUP", "value": "YAL018C", "extra_value": "S. cerevisiae"} ) for row in rows: print row