Exemple #1
0
def get_gene_id(gene_name):
    '''Retrieve systematic yeast gene name from the common name.

    :param gene_name: Common name for yeast gene (e.g. ADE2).
    :type gene_name: str
    :returns: Systematic name for yeast gene (e.g. YOR128C).
    :rtype: str

    '''
    from intermine.webservice import Service

    service = Service('http://yeastmine.yeastgenome.org/yeastmine/service')

    # Get a new query on the class (table) you will be querying:
    query = service.new_query('Gene')

    # The view specifies the output columns
    query.add_view('primaryIdentifier', 'secondaryIdentifier', 'symbol',
                   'name', 'sgdAlias', 'crossReferences.identifier',
                   'crossReferences.source.name')

    # Uncomment and edit the line below (the default) to select a custom sort
    # order:
    # query.add_sort_order('Gene.primaryIdentifier', 'ASC')

    # You can edit the constraint values below
    query.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B')
    query.add_constraint('Gene', 'LOOKUP', gene_name, code='A')

    # Uncomment and edit the code below to specify your own custom logic:
    # query.set_logic('A and B')

    for row in query.rows():
        gid = row['secondaryIdentifier']
    return gid
Exemple #2
0
def get_gene_id(gene_name):
    """Retrieve systematic yeast gene name from the common name.

    :param gene_name: Common name for yeast gene (e.g. ADE2).
    :type gene_name: str
    :returns: Systematic name for yeast gene (e.g. YOR128C).
    :rtype: str

    """
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")

    # Get a new query on the class (table) you will be querying:
    query = service.new_query("Gene")

    # The view specifies the output columns
    query.add_view("primaryIdentifier", "secondaryIdentifier", "symbol",
                   "name", "sgdAlias", "crossReferences.identifier",
                   "crossReferences.source.name")

    # Uncomment and edit the line below (the default) to select a custom sort
    # order:
    # query.add_sort_order("Gene.primaryIdentifier", "ASC")

    # You can edit the constraint values below
    query.add_constraint("organism.shortName", "=", "S. cerevisiae", code="B")
    query.add_constraint("Gene", "LOOKUP", gene_name, code="A")

    # Uncomment and edit the code below to specify your own custom logic:
    # query.set_logic("A and B")

    for row in query.rows():
        gid = row["secondaryIdentifier"]
    return gid
 def search_SGD(self, gene_code=None):
     service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")
     query = service.new_query("Gene")
     query.add_view(
         "chromosome.primaryIdentifier",
         "chromosomeLocation.end",
         "chromosomeLocation.start",
         "chromosomeLocation.strand",
         "secondaryIdentifier",
     )
     query.add_constraint("symbol", "=", gene_code, code="A")
     for row in query.rows():
         print(
             [
                 row["secondaryIdentifier"],
                 row["chromosome.primaryIdentifier"],
                 row["chromosomeLocation.start"],
                 row["chromosomeLocation.end"],
                 "+" if row["chromosomeLocation.strand"] else "-",
             ]
         )
         return [
             row["secondaryIdentifier"],
             row["chromosome.primaryIdentifier"][3:],
             row["chromosomeLocation.start"],
             row["chromosomeLocation.end"],
             "+" if row["chromosomeLocation.strand"] else "-",
         ]
Exemple #4
0
def index_genes(organism, mod):
    backup_filename = organism + "mine_genes_" + time.strftime("%m_%d_%Y") + ".bkp"
    if os.path.isfile(backup_filename):
        print "Restoring fetched data from today from " + organism + "mine"

        backup = open(backup_filename, 'rb')
        genes = pickle.load(backup)
    else:
        print "Fetching data from " + organism + "mine"
        service = Service(mod["mine_service_url"])

        query = service.new_query("Gene")
        query.add_view(mod["gene_fields"].values())

        query.add_constraint("organism.name", "=", mod["mine_organism_name"], code="B")

        rows = query.rows()

        genes = {}

        for row in rows:
            id = row[mod["gene_fields"]["id"]]

            if id in genes:
                genes[id]["go_ids"].append(row[mod["gene_fields"]["go_id"]])
                genes[id]["go_names"].append(row[mod["gene_fields"]["go_name"]])
            else:
                genes[id] = {
                    "name": row[mod["gene_fields"]["gene_name"]],
                    "symbol": row[mod["gene_fields"]["gene_symbol"]],
                    "synonym": row[mod["gene_fields"]["gene_synonym"]],
                    "go_ids": [row[mod["gene_fields"]["go_id"]]],
                    "go_names": [row[mod["gene_fields"]["go_name"]]],
                    "href": mod["url_prefix"] + row["primaryIdentifier"] + mod["url_suffix"],
                    "organism": organism,
                    "category": "gene"
                }

        with open(backup_filename, 'wb') as backup:
            pickle.dump(genes, backup)

    print "Indexing " + str(len(genes)) + " " + organism + " genes"

    bulk_data = []
    for gene in genes.keys():
        bulk_data.append({
            'index': {
                '_index': INDEX_NAME,
                '_type': DOC_TYPE,
                '_id': organism + "_" + gene
            }
        })
        bulk_data.append(genes[gene])

        if len(bulk_data) % 500 == 0:
            es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True)
            bulk_data = []

    if len(bulk_data) > 0:
        es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True)
def fetchGene(GeneName):
    
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")
    template = service.get_template('Gene_GenomicDNA')

    rows = template.rows(
        E = {"op": "LOOKUP", "value": GeneName, "extra_value": "S. cerevisiae"}
    )
    
    # this service seems to return multiple similar genes but we want the first one only, so count
    # and it returns information about the gene you want
    count=0
    for row in rows:
        
        count=count+1
        if count==1:
            descr= row["description"]
            GeneSeq=Seq(row["sequence.residues"])
            GeneSysName=row["secondaryIdentifier"]
       
    #let's create a record for the oldGene
    GeneRecord = SeqRecord(GeneSeq, id=GeneSysName)
    
    #now let's add some more information to make it useful
    GeneRecord.name=GeneName
    GeneRecord.features=GeneSysName
    GeneRecord.description=descr

    return GeneRecord 
Exemple #6
0
def intermine_query(ids, organism, *args):
    service = Service(service_urls[organism])
    query = service.new_query("Gene", case_sensitive=True)
    query.add_constraint("Gene", "LOOKUP", ids, code="A")
    query.add_constraint("organism.name", "=", organism, code="B")
    query.select(*args)
    return query
Exemple #7
0
def wmquery():
    service = Service("http://intermine.wormbase.org/tools/wormmine/service")
    query = service.new_query("Gene")
    query.add_view(
        "biotype", "length", "symbol", "primaryIdentifier",
        "downstreamIntergenicRegion.primaryIdentifier",
        "downstreamIntergenicRegion.organism.name",
        "downstreamIntergenicRegion.locations.feature.primaryIdentifier",
        "downstreamIntergenicRegion.locations.start",
        "downstreamIntergenicRegion.locations.end",
        "downstreamIntergenicRegion.locations.strand",
        "homologues.dataSets.name",
        "upstreamIntergenicRegion.primaryIdentifier",
        "upstreamIntergenicRegion.organism.name",
        "upstreamIntergenicRegion.locations.feature.primaryIdentifier",
        "upstreamIntergenicRegion.locations.start",
        "upstreamIntergenicRegion.locations.end",
        "upstreamIntergenicRegion.locations.strand",
        "transcripts.primaryIdentifier", "transcripts.symbol")

    for row in query.rows():
        print (row["biotype"], row["length"], row["symbol"], row["primaryIdentifier"], \
            row["downstreamIntergenicRegion.primaryIdentifier"], \
            row["downstreamIntergenicRegion.organism.name"], \
            row["downstreamIntergenicRegion.locations.feature.primaryIdentifier"], \
            row["downstreamIntergenicRegion.locations.start"], \
            row["downstreamIntergenicRegion.locations.end"], \
            row["downstreamIntergenicRegion.locations.strand"], row["homologues.dataSets.name"], \
            row["upstreamIntergenicRegion.primaryIdentifier"], \
            row["upstreamIntergenicRegion.organism.name"], \
            row["upstreamIntergenicRegion.locations.feature.primaryIdentifier"], \
            row["upstreamIntergenicRegion.locations.start"], \
            row["upstreamIntergenicRegion.locations.end"], \
            row["upstreamIntergenicRegion.locations.strand"], row["transcripts.primaryIdentifier"], \
            row["transcripts.symbol"])
Exemple #8
0
def get_all_gene_annotations():
    service = Service(
        "https://yeastmine.yeastgenome.org:443/yeastmine/service")
    query = service.new_query("Gene")
    col_names = [
        "briefDescription", "description", "functionSummary",
        "chromosome.primaryIdentifier", "secondaryIdentifier", "symbol",
        "phenotypeSummary", "locations.strand", "locations.end",
        "locations.start"
    ]
    query.add_view(col_names)
    seen_orfs = set()
    col_dicts = {c: [] for c in col_names}
    for row in query.rows():
        # for some reason rows are repeated in the yeastmine output, so I deduplicate them here
        if row['secondaryIdentifier'] not in seen_orfs:
            for c in col_names:
                col_dicts[c].append(row[c])
            seen_orfs.add(row['secondaryIdentifier'])
    name_shortener = {
        'chromosome.primaryIdentifier': 'chromosome',
        'secondaryIdentifier': 'ORF',
        'symbol': 'Gene',
        'locations.start': 'start',
        'locations.end': 'end',
        'locations.strand': 'orf_strand'
    }
    td = pd.DataFrame(col_dicts).rename(columns=name_shortener)
    td['Gene_ORF'] = td.apply(lambda row: gene_orfer(row), axis=1)
    return td
Exemple #9
0
def main():
    """Connects to yeastmine and creates a dictionary of annotation data.
    Data is saved into shelve as well as returned."""
    #print("annotations.SGD.yeastmine.main:")
    service = Service("http://yeastmine.yeastgenome.org/yeastmine")

    query = service.new_query()

    query.add_view(
        "SequenceFeature.primaryIdentifier", "SequenceFeature.featureType",
        "SequenceFeature.secondaryIdentifier", "SequenceFeature.description",
        "SequenceFeature.sgdAlias", "SequenceFeature.name", "SequenceFeature.symbol",
        "SequenceFeature.chromosome.name", "SequenceFeature.chromosome.featAttribute",
        "SequenceFeature.locations.start", "SequenceFeature.locations.end", "SequenceFeature.locations.strand"
        )
    query.add_constraint("SequenceFeature.organism.name", "=", "Saccharomyces cerevisiae", "A")
    query.add_constraint("SequenceFeature.featureType", "=", "ORF", "B")
    query.set_logic("(A and B)")

    annotation = {}
    #print("settins.PROJECT_ROOT: %s" % settings.PROJECT_ROOT)
    #print("os.path.join: %s" % os.path.join(os.path.join(settings.PROJECT_ROOT, 'apps', 'annotations', 'SGD', 'yeastmine')))
    db = shelve.open(os.path.join(settings.PROJECT_ROOT, 'apps', 'annotations', 'SGD', 'yeastmine'), 'c')
    for row in query.rows():
        data = {}
        for x in xrange(0, len(row.views)):
            attribute = row.views[x].split('.')[-1]
            value = row.data[x]['value']
            if attribute == 'name' and not value: continue
            data[attribute] = value
        if 'name' not in data: data['name'] = None
        annotation[data['secondaryIdentifier']] = data
        db[str(data['secondaryIdentifier'])] = data
    db.close()
    return annotation
def getInteractions():
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")

    # Get a new query on the class (table) you will be querying:
    query = service.new_query("Gene")

    # Type constraints should come early - before all mentions of the paths they constrain
    query.add_constraint("goAnnotation.ontologyTerm", "GOTerm")

    # The view specifies the output columns
    query.add_view(
        "symbol", "interactions.details.experimentType",
        "interactions.gene2.symbol", "interactions.gene2.briefDescription"
    )

    # You can edit the constraint values below
    query.add_constraint("goAnnotation.qualifier", "IS NULL", code = "C")
    query.add_constraint("goAnnotation.qualifier", "!=", "NOT", code = "B")
    query.add_constraint("goAnnotation.ontologyTerm.name", "=", "cytoplasmic translation", code = "A")
    query.add_constraint("name", "ONE OF", ["Ribosomal Protein of the Large subunit", "Ribosomal Protein of the Small subunit"], code = "D")
    query.add_constraint("interactions.details.annotationType", "=", "manually curated", code = "E")

    # Your custom constraint logic is specified with the code below:
    query.set_logic("A and (B or C) and E and D")

    
    interactions = {}
    
    for row in query.rows():
        if row["symbol"] not in interactions.keys():
            interactions[row["symbol"]] = [{ "expt" : row["interactions.details.experimentType"], "gene2": row["interactions.gene2.symbol"],"desc":row["interactions.gene2.briefDescription"]}]
        else:
            interactions[row["symbol"]].append({ "expt": row["interactions.details.experimentType"], "gene2": row["interactions.gene2.symbol"],"desc":row["interactions.gene2.briefDescription"]})
    return interactions
Exemple #11
0
def get_gene_id(gene_name):
    '''Retrieve systematic yeast gene name from the common name.

    :param gene_name: Common name for yeast gene (e.g. ADE2).
    :type gene_name: str
    :returns: Systematic name for yeast gene (e.g. YOR128C).
    :rtype: str

    '''
    from intermine.webservice import Service

    service = Service('http://yeastmine.yeastgenome.org/yeastmine/service')

    # Get a new query on the class (table) you will be querying:
    query = service.new_query('Gene')

    # The view specifies the output columns
    query.add_view('primaryIdentifier', 'secondaryIdentifier', 'symbol',
                   'name', 'sgdAlias', 'crossReferences.identifier',
                   'crossReferences.source.name')

    # Uncomment and edit the line below (the default) to select a custom sort
    # order:
    # query.add_sort_order('Gene.primaryIdentifier', 'ASC')

    # You can edit the constraint values below
    query.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B')
    query.add_constraint('Gene', 'LOOKUP', gene_name, code='A')

    # Uncomment and edit the code below to specify your own custom logic:
    # query.set_logic('A and B')

    for row in query.rows():
        gid = row['secondaryIdentifier']
    return gid
def plot_go_vs_p(list_name):
    """
    A function to plot GO Term vs P-value with label of gene count on each bar
    ================================================
    example:

        >>>from intermine import query_manager as qm
        >>>b.plot_go_vs_p("PL_obesityMonogen_ORahilly09")

    """
    link = "http://registry.intermine.org/service/instances/" + mine
    r = requests.get(link)

    dict = json.loads(r.text)
    url = dict["instance"]["url"]
    service = Service(url)

    lm = service.list_manager()
    store = lm.get_list(name=list_name)
    r = store.calculate_enrichment(widget="go_enrichment_for_gene")

    gene_count = []
    identifier = []
    p_value = []
    object_count = 0
    for i in r:
        if object_count < 5:
            gene_count.append(i.matches)
            identifier.append(i.identifier)
            p_value.append(i.p_value)
            object_count = object_count + 1
        else:
            if object_count >= 5:
                break
    y = pd.Series(p_value)
    x = identifier
    # Plot the figure.

    ax = y.plot(kind='bar')
    ax.set_title('GO Term vs p-value (Label: Gene count)')
    ax.set_xlabel('GO Term')
    ax.set_ylabel('p_value')
    ax.set_xticklabels(x, rotation='horizontal')

    rects = ax.patches

    def autolabel(rects, ax):
        i = 0
        for rect in rects:
            x = rect.get_x() + rect.get_width()/2.
            y = rect.get_height()
            ax.annotate(gene_count[i], (x, y), xytext=(0, 5),
                        textcoords="offset points",
                        ha='center', va='bottom')
            i = i+1

    autolabel(ax.patches, ax)

    ax.margins(y=0.1)
    plt.show()
Exemple #13
0
def humanmine():
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(gene.identifiers, ["humanmine primary id", "humanmine primary identifier", "humanmine primary gene id", "humanmine primary gene identifier"]):
    
        s = Service("www.humanmine.org/humanmine")
        Gene = s.model.Gene
        q = s.query(Gene).select("*").where("Gene", "LOOKUP", iden["identifier"])
        gene_object = {}  
        for row in q.rows():
            process = row.__str__()
            for x in re.findall(r"(\w+)=('[0-9A-Za-z:()\- \[\]<>\.,]{1,}'|None|[0-9]{1,})", process):

                temp_str = x[1]
                if temp_str[0] == "'" and temp_str[-1] == "'":
                    temp_str = temp_str[1:-1]
                    
                if x[0] == "description":
                    gene_object["description"] = temp_str.strip()
                elif x[0] == "cytoLocation":
                    gene_object["cytogenetic_location"] = temp_str.strip()
                elif x[0] == "id":
                    gene_object["id"] = temp_str.strip()
                elif x[0] == "length":
                    gene_object["length"] = temp_str.strip()
                elif x[0] == "primaryIdentifier":
                    gene_object["primary_id"] = temp_str.strip()
                elif x[0] == "score":
                    gene_object["score"] = temp_str.strip()
                elif x[0] == "scoreType":
                    gene_object["score_type"] = temp_str.strip()
                elif x[0] == "secondaryIdentifier":
                    gene_object["secondary_id"] = temp_str.strip()
                elif x[0] == "symbol":
                    gene_object["symbol"] = temp_str.strip()
                        
        return gene_object
Exemple #14
0
def fetch_from_sgd() -> dict:
    """Query SGD's intermine service and return an up-to-date dict of S. Cerevisiae features (genes).
    Returned is a dictionary of "SGD_ID" -> dict of feature data. Keys in feature data are:
    sgd_id, feature_qualifier, feature_type, orf, name, aliases, chromosome, chromosomal_location, start_coordinate,
    stop_coordinate, description

    :rtype: dict
    """
    re_num = re.compile(r'(\d+)')

    service = Service("https://yeastmine.yeastgenome.org/yeastmine/service")

    query = service.new_query("Gene")
    query.add_view("primaryIdentifier", "featureType", "qualifier",
                   "secondaryIdentifier", "symbol", "chromosomeLocation.start",
                   "chromosomeLocation.end", "description", "synonyms.value")

    query.add_constraint("organism.shortName", "=", "S. cerevisiae", code="A")
    query.add_constraint("featureType", "=", "ORF", code="C")

    genes = {}

    logger.debug("Executing query on yeastmine")
    for row in query.rows():
        sgd_id = row["primaryIdentifier"]
        orf = row["secondaryIdentifier"]

        orfnum = re_num.findall(orf)
        if orfnum:
            orfnum = int(orfnum[0])
        else:
            orfnum = 0

        if orf.startswith('Q'):
            chrom = 0
        else:
            chrom = ord(orf[1]) - 64
            if orf[2] == 'L':
                orfnum = -orfnum

        if sgd_id not in genes:
            logger.debug(f"Parsing new ORF: {orf}")
            genes[sgd_id] = {
                'sgd_id': row["primaryIdentifier"],
                'feature_qualifier': row["qualifier"],
                'feature_type': row['featureType'],
                'orf': orf,
                'name': row["symbol"],
                'aliases': [],
                'chromosome': chrom,
                'chromosomal_location': orfnum,
                'start_coordinate': str(row["chromosomeLocation.start"]),
                'stop_coordinate': str(row["chromosomeLocation.end"]),
                'description': row["description"],
            }

        if row["synonyms.value"] not in (orf, row["symbol"]):
            genes[sgd_id]['aliases'].append(row["synonyms.value"])

    return genes
Exemple #15
0
def get_gene_id(gene_name):
    """Retrieve systematic yeast gene name from the common name.

    :param gene_name: Common name for yeast gene (e.g. ADE2).
    :type gene_name: str
    :returns: Systematic name for yeast gene (e.g. YOR128C).
    :rtype: str

    """
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")

    # Get a new query on the class (table) you will be querying:
    query = service.new_query("Gene")

    # The view specifies the output columns
    query.add_view("primaryIdentifier", "secondaryIdentifier", "symbol",
                   "name", "sgdAlias", "crossReferences.identifier",
                   "crossReferences.source.name")

    # Uncomment and edit the line below (the default) to select a custom sort
    # order:
    # query.add_sort_order("Gene.primaryIdentifier", "ASC")

    # You can edit the constraint values below
    query.add_constraint("organism.shortName", "=", "S. cerevisiae", code="B")
    query.add_constraint("Gene", "LOOKUP", gene_name, code="A")

    # Uncomment and edit the code below to specify your own custom logic:
    # query.set_logic("A and B")

    for row in query.rows():
        gid = row["secondaryIdentifier"]
    return gid
Exemple #16
0
def query_fishmine(intermine_url: str, protein_id: str, query: str="Gene") -> IntermineResult:
    service = Service(intermine_url)
    query = service.new_query(query)
    query.add_view("primaryIdentifier")
    query.add_constraint("primaryIdentifier", "CONTAINS", "ZDB*", code="A")
    query.add_constraint("crossReferences.identifier", "=", "{}".format(protein_id), code="B")
    result_list = ["ZFIN:{}".format(val['primaryIdentifier']) for val in query.rows()]
    return intermine_response_factory(result_list, protein_id)
Exemple #17
0
    def parse(self, limit=None):

        count = 0
        for num in range(10, 100):
            fuzzy_gene = "MGI:{0}*".format(num)
            gene = "MGI:{0}".format(num)
            service = Service("http://www.mousemine.org/mousemine/service")
            logging.getLogger('Model').setLevel(logging.CRITICAL)
            logging.getLogger('JSONIterator').setLevel(logging.CRITICAL)
            query = service.new_query("OntologyAnnotation")
            query.add_constraint("subject", "SequenceFeature")
            query.add_constraint("ontologyTerm", "MPTerm")
            query.add_view("subject.primaryIdentifier", "subject.symbol",
                           "subject.sequenceOntologyTerm.name",
                           "ontologyTerm.identifier", "ontologyTerm.name",
                           "evidence.publications.pubMedId",
                           "evidence.comments.type",
                           "evidence.comments.description")
            query.add_sort_order("OntologyAnnotation.ontologyTerm.name", "ASC")
            query.add_constraint("subject.organism.taxonId",
                                 "=",
                                 "10090",
                                 code="A")
            query.add_constraint("subject", "LOOKUP", fuzzy_gene, code="B")
            query.add_constraint("subject.primaryIdentifier",
                                 "CONTAINS",
                                 gene,
                                 code="C")
            query.outerjoin("evidence.comments")

            for row in query.rows():
                mgi_curie = row["subject.primaryIdentifier"]
                mp_curie = row["ontologyTerm.identifier"]
                pub_curie = "PMID:{0}".format(
                    row["evidence.publications.pubMedId"])
                assoc = G2PAssoc(self.graph, self.name, mgi_curie, mp_curie)
                if row["evidence.publications.pubMedId"]:
                    reference = Reference(
                        self.graph, pub_curie,
                        Reference.ref_types['journal_article'])
                    reference.addRefToGraph()
                    assoc.add_source(pub_curie)

                assoc.add_evidence('ECO:0000059')
                assoc.add_association_to_graph()

            if not count % 10 and count != 0:
                count_from = count - 10
                logger.info(
                    "{0} processed ids from MGI:{1}* to MGI:{2}*".format(
                        datetime.datetime.now(), count_from, count))

            count += 1
            if limit and count >= limit:
                break

        return
Exemple #18
0
def sgd_connection(gene, p_dir, l_dir):
    # load gene phenotype data from SGD database
    service = Service(
        'https://yeastmine.yeastgenome.org:443/yeastmine/service')
    a = service.new_query('Gene')
    view_list = [
        'primaryIdentifier', 'symbol', 'secondaryIdentifier', 'sgdAlias',
        'qualifier', 'phenotypes.experimentType', 'phenotypes.mutantType',
        'phenotypes.observable', 'phenotypes.qualifier', 'phenotypes.allele',
        'phenotypes.alleleComment', 'phenotypes.strainBackground',
        'phenotypes.chemical', 'phenotypes.condition', 'phenotypes.details',
        'phenotypes.reporter', 'phenotypes.publications.pubMedId',
        'phenotypes.publications.citation'
    ]
    for item in view_list:
        a.add_view(item)
    a.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B')
    a.add_constraint('Gene', 'LOOKUP', gene, code='A')
    phenotype_line = 'Gene Primary DBID\tGene Standard Name\tGene Systematic Name\t' \
                     'Gene Sgd Alias\tGene Qualifier\tPhenotypes Experiment Type\t' \
                     'Phenotypes Mutant Type\tPhenotypes Observable\tPhenotypes Qualifier\t' \
                     'Phenotypes Allele\tPhenotypes Allele Comment\tPhenotypes Strain Background\t' \
                     'Phenotypes Chemical\tPhenotypes Condition\tPhenotypes Details\t' \
                     'Phenotypes Reporter\tPublications PubMed ID\tPublications Citation\n'
    p_result_file = os.path.join(p_dir, '{0}.txt'.format(gene))
    with open(p_result_file, 'w', encoding='utf-8') as f1:
        for row in a.rows():
            result_line = ''
            for k in view_list:
                result_line += '{0}\t'.format(str(row[k]))
            phenotype_line += result_line.strip() + '\n'
        f1.write(phenotype_line)
    # Load phenotype summary
    b = service.new_query('Gene')
    b.add_view('phenotypes.genes.phenotypeSummary')
    b.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B')
    b.add_constraint('Gene', 'LOOKUP', gene, code='A')
    summary = ''
    for row in b.rows():
        p_result = row['phenotypes.genes.phenotypeSummary']
        if p_result:
            summary += p_result
    result_list = [gene, summary]
    # Load PubMed id
    c = service.new_query('Gene')
    c.add_view('publicationAnnotations.publication.pubMedId')
    c.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B')
    c.add_constraint('Gene', 'LOOKUP', gene, code='A')
    l_result_file = os.path.join(l_dir, '{0}.txt'.format(gene))
    with open(l_result_file, 'w', encoding='utf-8') as f2:
        for row in c.rows():
            pubmed_id = row['publicationAnnotations.publication.pubMedId']
            if pubmed_id:
                handle = pubmed_connection(pubmed_id, gene)
                if handle:
                    f2.write(handle.read())
    return result_list
Exemple #19
0
def get_yeast_gene_location(gene_name):
    '''Acquire the location of a gene from SGD http://www.yeastgenome.org
    :param gene_name: Name of the gene.
    :type gene_name: string
    :returns location: [int: chromosome, int:biostart, int:bioend, int:strand]
    :rtype location: list

    '''
    from intermine.webservice import Service
    service = Service('http://yeastmine.yeastgenome.org/yeastmine/service')

    # Get a new query on the class (table) you will be querying:
    query = service.new_query('Gene')

    # The view specifies the output columns
    query.add_view('primaryIdentifier', 'secondaryIdentifier', 'symbol',
                   'name', 'organism.shortName',
                   'chromosome.primaryIdentifier', 'chromosomeLocation.start',
                   'chromosomeLocation.end', 'chromosomeLocation.strand')

    # Uncomment and edit the line below (the default) to select a custom sort
    # order:
    # query.add_sort_order('Gene.primaryIdentifier', 'ASC')

    # You can edit the constraint values below
    query.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B')
    query.add_constraint('Gene', 'LOOKUP', gene_name, code='A')

    # Uncomment and edit the code below to specify your own custom logic:
    # query.set_logic('A and B')
    chromosomes = {
        'chrI': 1,
        'chrII': 2,
        'chrIII': 3,
        'chrIV': 4,
        'chrV': 5,
        'chrVI': 6,
        'chrVII': 7,
        'chrVIII': 8,
        'chrIX': 9,
        'chrX': 10,
        'chrXI': 11,
        'chrXII': 12,
        'chrXIII': 13,
        'chrXIV': 14,
        'chrXV': 15,
        'chrXVI': 16
    }
    first_result = query.rows().next()

    return [
        chromosomes[first_result['chromosome.primaryIdentifier']],
        first_result['chromosomeLocation.start'],
        first_result['chromosomeLocation.end'],
        int(first_result['chromosomeLocation.strand'])
    ]
Exemple #20
0
    def download(self, genes, fields, scope=None, species=None):
        '''
        Retrives the data depending on self.constraints and self.view
        '''
        constraints = self.constraints
        views = self.views
        glist = np.array(genes)
        if len(glist) > 1000:
            a = len(glist) / 1000
            segs = np.array_split(glist, a)
        else:
            segs = [glist]

        # store the data in here
        z = []

        # API uses letters to distinguish between constraints
        alpha = list(string.ascii_uppercase)

        for seg in segs:
            # Connect to the API
            service = SS(self.datasource)
            query = service.new_query("Gene")
            query.add_view(",".join(views))
            # Some databases require a host name
            if self.hostid != "":
                query.add_constraint("Gene",
                                     "LOOKUP",
                                     ",".join(seg),
                                     self.hostid,
                                     code="A")
            else:
                query.add_constraint("Gene", "LOOKUP", ",".join(seg), code="A")

            # Apply the constraints
            if len(constraints) != 0:
                i = 1
                for constraint in constraints:
                    letter = alpha[i]
                    if len(constraint.split("=")) == 2:
                        L = constraint.split("=")
                        query.add_constraint(L[0], "=", L[1], code=letter)
                    elif re.search("IS NOT NULL", constraint):
                        p1 = constraint.replace(" IS NOT NULL", "")
                        query.add_constraint(p1, "IS NOT NULL", code=letter)
                    i = i + 1

            # Parse the output into a list of tuples
            j = 0
            for row in query.rows():
                t = [row['symbol']]
                for v in views:
                    t.append(row[v])
                z.append(tuple(t))
                j += 1
        self.dataset = z
Exemple #21
0
def find_max_data_items(new_list, intermine, intermine_url):

    service = Service(intermine_url + "/service")
    max = 0
    for i in new_list:
        query = service.new_query(i)
        query.add_view(i + ".*")
        if (query.count() >= max):
            max = query.count()
    return max
Exemple #22
0
def query(ids):
    service = Service("http://targetmine.nibio.go.jp/targetmine")
    query = service.new_query("Protein")
    query.add_view("primaryIdentifier", "primaryAccession", "name", "length",
                   "compounds.compound.casRegistryNumber",
                   "compounds.compound.name",
                   "compounds.compound.compoundGroup.name")
    test_id = ids[0]
    query.add_constraint("Protein", "IN", ",".join(ids))
    return query.rows()
Exemple #23
0
def get_yeast_gene_location(gene_name):
    """Acquire the location of a gene from SGD http://www.yeastgenome.org
    :param gene_name: Name of the gene.
    :type gene_name: string
    :returns location: [int: chromosome, int:biostart, int:bioend, int:strand]
    :rtype location: list

    """
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")

    # Get a new query on the class (table) you will be querying:
    query = service.new_query("Gene")

    # The view specifies the output columns
    query.add_view("primaryIdentifier", "secondaryIdentifier", "symbol",
                   "name", "organism.shortName",
                   "chromosome.primaryIdentifier", "chromosomeLocation.start",
                   "chromosomeLocation.end", "chromosomeLocation.strand")

    # Uncomment and edit the line below (the default) to select a custom sort
    # order:
    # query.add_sort_order("Gene.primaryIdentifier", "ASC")

    # You can edit the constraint values below
    query.add_constraint("organism.shortName", "=", "S. cerevisiae", code="B")
    query.add_constraint("Gene", "LOOKUP", gene_name, code="A")

    # Uncomment and edit the code below to specify your own custom logic:
    # query.set_logic("A and B")
    chromosomes = {
        "chrI": 1,
        "chrII": 2,
        "chrIII": 3,
        "chrIV": 4,
        "chrV": 5,
        "chrVI": 6,
        "chrVII": 7,
        "chrVIII": 8,
        "chrIX": 9,
        "chrX": 10,
        "chrXI": 11,
        "chrXII": 12,
        "chrXIII": 13,
        "chrXIV": 14,
        "chrXV": 15,
        "chrXVI": 16
    }
    first_result = query.rows().next()

    return [
        chromosomes[first_result["chromosome.primaryIdentifier"]],
        first_result["chromosomeLocation.start"],
        first_result["chromosomeLocation.end"],
        int(first_result["chromosomeLocation.strand"])
    ]
Exemple #24
0
def main():

    if not os.path.exists("results"):
        os.makedirs("results")

    service = Service("https://apps.araport.org/thalemine/service")
    file = open("results/all_genes.csv", "w")
    list_written = []
    list_genes = []
    list_gene_names = []
    for index, line in enumerate(open(os.getcwd() + "/" + sys.argv[1])):
        gene = line.strip()
        query = service.new_query("Gene")
        query.add_view("primaryIdentifier",
                       "RNASeqExpressions.expressionLevel",
                       "RNASeqExpressions.experiment.SRAaccession",
                       "RNASeqExpressions.experiment.tissue",
                       "RNASeqExpressions.unit")
        query.add_sort_order("Gene.RNASeqExpressions.experiment.SRAaccession",
                             "DESC")
        query.add_constraint("primaryIdentifier", "=", gene, code="A")

        for row in query.rows():
            experiment_tissue = str(
                row["RNASeqExpressions.experiment.SRAaccession"]) + "-" + str(
                    row["RNASeqExpressions.experiment.tissue"])
            expression_value = str(row["RNASeqExpressions.expressionLevel"])
            if experiment_tissue not in list_written:
                list_written.append(experiment_tissue)
            list_genes.append((gene, experiment_tissue, expression_value))

        list_gene_names.append(gene)

    for item in list_written:
        file.write("\t" + item)

    file.write("\n")

    flag = 0
    for gene_name in list_gene_names:
        file.write(gene_name)
        for item in list_written:
            flag = 0
            for gene_name_temp, exp_tissue, expression_value in list_genes:
                if gene_name == gene_name_temp:
                    if item == exp_tissue:
                        file.write("\t" + expression_value)
                        flag = 1
                        break
            if flag == 0:
                file.write("\t0")

        file.write("\n")

    file.close()
Exemple #25
0
def query(ids):
    service = Service("http://targetmine.nibio.go.jp/targetmine")
    query = service.new_query("Protein")
    query.add_view(
        "primaryIdentifier", "primaryAccession", "name", "length",
        "compounds.compound.casRegistryNumber", "compounds.compound.name",
        "compounds.compound.compoundGroup.name"
    )
    test_id = ids[0]
    query.add_constraint("Protein", "IN", ",".join(ids))
    return query.rows()
Exemple #26
0
def get_yeast_gene_location(gene_name):
    '''Acquire the location of a gene from SGD http://www.yeastgenome.org
    :param gene_name: Name of the gene.
    :type gene_name: string
    :returns location: [int: chromosome, int:biostart, int:bioend, int:strand]
    :rtype location: list

    '''
    from intermine.webservice import Service
    service = Service('http://yeastmine.yeastgenome.org/yeastmine/service')

    # Get a new query on the class (table) you will be querying:
    query = service.new_query('Gene')

    # The view specifies the output columns
    query.add_view('primaryIdentifier', 'secondaryIdentifier', 'symbol',
                   'name', 'organism.shortName',
                   'chromosome.primaryIdentifier',
                   'chromosomeLocation.start', 'chromosomeLocation.end',
                   'chromosomeLocation.strand')

    # Uncomment and edit the line below (the default) to select a custom sort
    # order:
    # query.add_sort_order('Gene.primaryIdentifier', 'ASC')

    # You can edit the constraint values below
    query.add_constraint('organism.shortName', '=', 'S. cerevisiae',
                         code='B')
    query.add_constraint('Gene', 'LOOKUP', gene_name, code='A')

    # Uncomment and edit the code below to specify your own custom logic:
    # query.set_logic('A and B')
    chromosomes = {'chrI': 1,
                   'chrII': 2,
                   'chrIII': 3,
                   'chrIV': 4,
                   'chrV': 5,
                   'chrVI': 6,
                   'chrVII': 7,
                   'chrVIII': 8,
                   'chrIX': 9,
                   'chrX': 10,
                   'chrXI': 11,
                   'chrXII': 12,
                   'chrXIII': 13,
                   'chrXIV': 14,
                   'chrXV': 15,
                   'chrXVI': 16}
    first_result = query.rows().next()

    return [chromosomes[first_result['chromosome.primaryIdentifier']],
            first_result['chromosomeLocation.start'],
            first_result['chromosomeLocation.end'],
            int(first_result['chromosomeLocation.strand'])]
Exemple #27
0
def get_yeast_gene_location(gene_name):
    """Acquire the location of a gene from SGD http://www.yeastgenome.org
    :param gene_name: Name of the gene.
    :type gene_name: string
    :returns location: [int: chromosome, int:biostart, int:bioend, int:strand]
    :rtype location: list

    """
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")

    # Get a new query on the class (table) you will be querying:
    query = service.new_query("Gene")

    # The view specifies the output columns
    query.add_view("primaryIdentifier", "secondaryIdentifier", "symbol",
                   "name", "organism.shortName",
                   "chromosome.primaryIdentifier",
                   "chromosomeLocation.start", "chromosomeLocation.end",
                   "chromosomeLocation.strand")

    # Uncomment and edit the line below (the default) to select a custom sort
    # order:
    # query.add_sort_order("Gene.primaryIdentifier", "ASC")

    # You can edit the constraint values below
    query.add_constraint("organism.shortName", "=", "S. cerevisiae",
                         code="B")
    query.add_constraint("Gene", "LOOKUP", gene_name, code="A")

    # Uncomment and edit the code below to specify your own custom logic:
    # query.set_logic("A and B")
    chromosomes = {"chrI": 1,
                   "chrII": 2,
                   "chrIII": 3,
                   "chrIV": 4,
                   "chrV": 5,
                   "chrVI": 6,
                   "chrVII": 7,
                   "chrVIII": 8,
                   "chrIX": 9,
                   "chrX": 10,
                   "chrXI": 11,
                   "chrXII": 12,
                   "chrXIII": 13,
                   "chrXIV": 14,
                   "chrXV": 15,
                   "chrXVI": 16}
    first_result = query.rows().next()

    return [chromosomes[first_result["chromosome.primaryIdentifier"]],
            first_result["chromosomeLocation.start"],
            first_result["chromosomeLocation.end"],
            int(first_result["chromosomeLocation.strand"])]
Exemple #28
0
def ratmine(gene):
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(
            gene.identifiers, [
                "ratmine primary id", "ratmine primary identifier",
                "ratmine primary gene id", "ratmine primary gene identifier"
            ]):

        s = Service("http://ratmine.mcw.edu/ratmine")
        Gene = s.model.Gene
        q = s.query(Gene).select("*").where("Gene", "LOOKUP",
                                            iden["identifier"])
        gene_object = {}
        for row in q.rows():
            process = row.__str__()
            for x in re.findall(
                    r"(\w+)=('[0-9A-Za-z:()\- \[\]<>\.,]{1,}'|None|[0-9]{1,})",
                    process):

                temp_str = x[1]
                if temp_str[0] == "'" and temp_str[-1] == "'":
                    temp_str = temp_str[1:-1]

                if x[0] == "briefDescription":
                    if temp_str.strip() == "None":
                        gene_object["brief_description"] = None
                    else:
                        gene_object["brief_description"] = temp_str.strip()
                elif x[0] == "description":
                    gene_object["description"] = temp_str.strip()
                elif x[0] == "geneType":
                    gene_object["gene_type"] = temp_str.strip()
                elif x[0] == "id":
                    gene_object["id"] = temp_str.strip()
                elif x[0] == "length":
                    gene_object["length"] = temp_str.strip()
                elif x[0] == "name":
                    gene_object["name"] = temp_str.strip()
                elif x[0] == "ncbi_gene_number":
                    gene_object["ncbiGeneNumber"] = temp_str.strip()
                elif x[0] == "pharmGKBidentifier":
                    gene_object["pharmGKB_id"] = temp_str.strip()
                elif x[0] == "primaryIdentifier":
                    gene_object["primary_id"] = temp_str.strip()
                elif x[0] == "score":
                    gene_object["score"] = temp_str.strip()
                elif x[0] == "scoreType":
                    gene_object["score_type"] = temp_str.strip()
                elif x[0] == "secondaryIdentifier":
                    gene_object["secondary_id"] = temp_str.strip()
                elif x[0] == "symbol":
                    gene_object["symbol"] = temp_str.strip()

        return gene_object
    def download(self, genes, fields, scope=None, species=None):
        '''
        Retrives the data depending on self.constraints and self.view
        '''
        constraints = self.constraints
        views = self.views
        glist = np.array(genes)
        if len(glist) > 1000:
            a = len(glist) / 1000
            segs = np.array_split(glist, a)
        else:
            segs = [glist]

        # store the data in here
        z = []

        # API uses letters to distinguish between constraints
        alpha = list(string.ascii_uppercase)

        for seg in segs:
            # Connect to the API
            service = SS(self.datasource)
            query = service.new_query("Gene")
            query.add_view(",".join(views))
            # Some databases require a host name
            if self.hostid != "":
                query.add_constraint("Gene", "LOOKUP", ",".join(seg),
                                     self.hostid, code="A")
            else:
                query.add_constraint("Gene", "LOOKUP", ",".join(seg), code="A")

            # Apply the constraints
            if len(constraints) != 0:
                i = 1
                for constraint in constraints:
                    letter = alpha[i]
                    if len(constraint.split("=")) == 2:
                        L = constraint.split("=")
                        query.add_constraint(L[0], "=", L[1], code=letter)
                    elif re.search("IS NOT NULL", constraint):
                        p1 = constraint.replace(" IS NOT NULL", "")
                        query.add_constraint(p1, "IS NOT NULL", code=letter)
                    i = i + 1

            # Parse the output into a list of tuples
            j = 0
            for row in query.rows():
                t = [row['symbol']]
                for v in views:
                    t.append(row[v])
                z.append(tuple(t))
                j += 1
        self.dataset = z
Exemple #30
0
def query_mousemine(intermine_url: str, gene_id: str) -> IntermineResult:
    """
    :param intermine_url: intermine server, eg
                          http://www.mousemine.org/mousemine/service
    :param gene_id: gene ID, eg ENSMUSG00000063180
    :return: Intermine_Result object
    """
    service = Service(intermine_url)
    query = service.new_query("SequenceFeature")
    query.add_view("primaryIdentifier")
    query.add_constraint("SequenceFeature", "LOOKUP", "{}".format(gene_id), code="A")
    query.add_constraint("organism.shortName", "=", "M. musculus", code="B")
    result_list = ["{}".format(val['primaryIdentifier']) for val in query.rows()]
    return intermine_response_factory(result_list, gene_id)
Exemple #31
0
    def parse(self, limit=None):

        count = 0
        for num in range(10, 100):
            fuzzy_gene = "MGI:{0}*".format(num)
            gene = "MGI:{0}".format(num)
            service = Service("http://www.mousemine.org/mousemine/service")
            logging.getLogger('Model').setLevel(logging.ERROR)
            logging.getLogger('JSONIterator').setLevel(logging.ERROR)
            query = service.new_query("OntologyAnnotation")
            query.add_constraint("subject", "SequenceFeature")
            query.add_constraint("ontologyTerm", "MPTerm")
            query.add_view(
                "subject.primaryIdentifier", "subject.symbol",
                "subject.sequenceOntologyTerm.name", "ontologyTerm.identifier",
                "ontologyTerm.name", "evidence.publications.pubMedId",
                "evidence.comments.type", "evidence.comments.description"
            )
            query.add_sort_order("OntologyAnnotation.ontologyTerm.name", "ASC")
            query.add_constraint("subject.organism.taxonId", "=", self.txid, code="A")
            query.add_constraint("subject", "LOOKUP", fuzzy_gene, code="B")
            query.add_constraint(
                "subject.primaryIdentifier", "CONTAINS", gene, code="C")
            query.outerjoin("evidence.comments")

            for row in query.rows():
                mgi_curie = row["subject.primaryIdentifier"]
                mp_curie = row["ontologyTerm.identifier"]
                pub_curie = "PMID:{0}".format(row["evidence.publications.pubMedId"])
                assoc = G2PAssoc(self.graph, self.name, mgi_curie, mp_curie)
                if row["evidence.publications.pubMedId"]:
                    reference = Reference(
                        self.graph, pub_curie, self.globaltt['journal article'])
                    reference.addRefToGraph()
                    assoc.add_source(pub_curie)

                assoc.add_evidence(self.globaltt['experimental phenotypic evidence'])
                assoc.add_association_to_graph()

            if not count % 10 and count != 0:
                count_from = count - 10
                LOG.info(
                    "%s processed ids from MGI:%i* to MGI:%i*",
                    datetime.datetime.now(), count_from, count)

            count += 1
            if limit and count >= limit:
                break

        return
def intermine_query(type):
    from intermine.webservice import Service
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")

    # Get a new query on the class (table) you will be querying:
    query = service.new_query(type)

    # The view specifies the output columns
    query.add_view("primaryIdentifier", "sequence.residues")

    # Uncomment and edit the line below (the default) to select a custom sort order:
    # query.add_sort_order("Chromosome.primaryIdentifier", "ASC")

    return query
 def test(self):
     '''
     Tests the HumanMine API
     Look up symbol for APOBEC3G, should return APOBEC3G.
     '''
     service = SS('http://www.humanmine.org/humanmine/service')
     query = service.new_query("Gene")
     query.add_view("symbol")
     query.add_constraint("Gene", "LOOKUP", "APOBEC3G", code="A")
     for row in query.rows():
         symbol = row['symbol']
     if symbol == "APOBEC3G":
         return 1
     else:
         return 0
Exemple #34
0
 def test(self):
     '''
     Tests the HumanMine API
     Look up symbol for APOBEC3G, should return APOBEC3G.
     '''
     service = SS('http://www.humanmine.org/humanmine/service')
     query = service.new_query("Gene")
     query.add_view("symbol")
     query.add_constraint("Gene", "LOOKUP", "APOBEC3G", code="A")
     for row in query.rows():
         symbol = row['symbol']
     if symbol == "APOBEC3G":
         return 1
     else:
         return 0
Exemple #35
0
def query_fishmine(intermine_url: str,
                   protein_id: str,
                   query: str = "Gene") -> IntermineResult:
    service = Service(intermine_url)
    query = service.new_query(query)
    query.add_view("primaryIdentifier")
    query.add_constraint("primaryIdentifier", "CONTAINS", "ZDB*", code="A")
    query.add_constraint("crossReferences.identifier",
                         "=",
                         "{}".format(protein_id),
                         code="B")
    result_list = [
        "ZFIN:{}".format(val['primaryIdentifier']) for val in query.rows()
    ]
    return intermine_response_factory(result_list, protein_id)
Exemple #36
0
class Yeast(Genome):
    """Yeast genome services"""

    genome_name = 'Saccharomyces_cerevisiae'
    service_url = "https://yeastmine.yeastgenome.org:443/yeastmine/service"
    service = Service(service_url)
    default_genome_path = os.path.join(genome_dir, genome_name)
class LiveSummaryTest(unittest.TestCase):

    TEST_ROOT = os.getenv("TESTMODEL_URL", "http://localhost:8080/intermine-demo/service")
    SERVICE = Service(TEST_ROOT)

    QUERY = SERVICE.select("Employee.*", "department.name")

    def testNumericSummary(self):
        summary = self.QUERY.summarise("age")
        self.assertEqual(10, summary["min"])
        self.assertEqual(74, summary["max"])
        self.assertEqual(44.878787878787875, summary["average"])
        self.assertEqual(12.075481627447155, summary["stdev"])

    def testNonNumericSummary(self):
        summary = self.QUERY.summarise("fullTime")
        self.assertEqual(56, summary[True])
        self.assertEqual(76, summary[False])

        summary = self.QUERY.summarise("department.name")
        self.assertEqual(18, summary["Sales"])

    def testSummaryAsIterator(self):
        path = "department.name"
        q = self.QUERY
        results = q.results(summary_path = path)
        top = results.next()
        self.assertEqual("Accounting", top["item"])
        self.assertEqual(18, top["count"])

        self.assertEqual(top, q.first(summary_path = path))

    def testAliasing(self):
        q = self.QUERY
        self.assertEqual(q.summarise("age"), q.summarize("age"))
    def attack(self):
        username = "******".format(self.ident)
        password = "******"

        try:
            s = Service(self.service.root, username, password)
            s.deregister(s.get_deregistration_token())
            self.counter.add(3)
        except:
            pass

        s = self.service.register(username, password)
        self.LOG.debug("Registered user " + username)
        self.counter.add(1)

        c = 0
        classes = s.model.classes.values()
        self.counter.add(1)

        classkeys = s._get_json('/classkeys')['classes']
        self.counter.add(1)

        while c == 0:
            table = random.choice(classes)
            if not (table.has_id and table.name in classkeys):
                continue
            query = s.query(table.name).select(classkeys[table.name][0])

            c = query.count()
            self.counter.add(1)

        n = random.randint(1, min(100, c))
        members = random.sample(map(lambda r: r[0], query.rows()), n)
        self.counter.add(1)

        self.LOG.debug("Will construct list of %s with: %r", table.name, members)

        with s.list_manager() as lm:
            l = lm.create_list(members, table.name)
            self.LOG.debug('Created list %s, size: %d', l.name, l.size)
            self.counter.add(1)

        try:
            s.deregister(s.get_deregistration_token())
            self.counter.add(2)
        except:
            pass
Exemple #39
0
def flymine(gene):
    obj_array = []

    for ident in gene.identifiers:
        if ident["identifier_type"].lower() in [
                "ensembl", "ensembl id", "ensembl identifier",
                "ensembl gene id"
        ]:
            s = Service("www.flymine.org/query")
            Gene = s.model.Gene
            q = s.query(Gene).select("*").where("Gene", "LOOKUP",
                                                ident["identifier"])
            try:
                for row in q.rows():
                    primary_identifier = row["primaryIdentifier"]
                    brief_description = row["briefDescription"]
                    cyto_location = row["cytoLocation"]
                    description = row["description"]
                    identifier = row["id"]
                    length_of_gene = row["length"]
                    name_of_gene = row["name"]
                    score = row["score"]
                    score_type = row["scoreType"]
                    secondary_identifier = row["secondaryIdentifier"]
                    gene_symbol = row["symbol"]

                    gene_object = {
                        'id': identifier,
                        'primary_id': primary_identifier,
                        'secondary_id': secondary_identifier,
                        'symbol': gene_symbol,
                        'name': name_of_gene,
                        'cyto_location': cyto_location,
                        'brief_description': brief_description,
                        'description': description,
                        'length': length_of_gene,
                        'score': score,
                        'score_type': score_type
                    }
                    obj_array.append(gene_object)
            except intermine.errors.WebserviceError:
                print(
                    "A webservice error occurred. Please contact Intermine support."
                )
            else:
                print("Something else went wrong.")
    return obj_array
Exemple #40
0
def query_intermine(genes):
    genes = ', '.join(genes)
    from intermine.webservice import Service
    service = Service("http://www.mousemine.org/mousemine/service")
    query = service.new_query("OntologyAnnotation")
    query.add_constraint("ontologyTerm", "MPTerm")
    query.add_constraint("subject", "SequenceFeature")
    query.add_view("subject.primaryIdentifier", "subject.symbol",
                   "subject.sequenceOntologyTerm.name",
                   "ontologyTerm.identifier", "ontologyTerm.name",
                   "evidence.publications.pubMedId", "evidence.comments.type",
                   "evidence.comments.description")
    query.add_sort_order("OntologyAnnotation.ontologyTerm.name", "ASC")
    query.add_constraint("subject.organism.taxonId", "=", "10090", code="A")
    query.add_constraint("subject", "LOOKUP", genes, code="B")
    query.outerjoin("evidence.comments")
    return query
Exemple #41
0
def ratmine(gene):
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(gene.identifiers, ["ratmine primary id", "ratmine primary identifier", "ratmine primary gene id", "ratmine primary gene identifier"]):
    
        s = Service("http://ratmine.mcw.edu/ratmine")
        Gene = s.model.Gene
        q = s.query(Gene).select("*").where("Gene", "LOOKUP", iden["identifier"])
        gene_object = {}  
        for row in q.rows():
            process = row.__str__()
            for x in re.findall(r"(\w+)=('[0-9A-Za-z:()\- \[\]<>\.,]{1,}'|None|[0-9]{1,})", process):

                temp_str = x[1]
                if temp_str[0] == "'" and temp_str[-1] == "'":
                    temp_str = temp_str[1:-1]
                    
                if x[0] == "briefDescription":
                    if temp_str.strip() == "None":
                        gene_object["brief_description"] = None
                    else:
                        gene_object["brief_description"] = temp_str.strip()
                elif x[0] == "description":
                    gene_object["description"] = temp_str.strip()
                elif x[0] == "geneType":
                    gene_object["gene_type"] = temp_str.strip()
                elif x[0] == "id":
                    gene_object["id"] = temp_str.strip()
                elif x[0] == "length":
                    gene_object["length"] = temp_str.strip()
                elif x[0] == "name":
                    gene_object["name"] = temp_str.strip()
                elif x[0] == "ncbi_gene_number":
                    gene_object["ncbiGeneNumber"] = temp_str.strip()
                elif x[0] == "pharmGKBidentifier":
                    gene_object["pharmGKB_id"] = temp_str.strip()
                elif x[0] == "primaryIdentifier":
                    gene_object["primary_id"] = temp_str.strip()
                elif x[0] == "score":
                    gene_object["score"] = temp_str.strip()
                elif x[0] == "scoreType":
                    gene_object["score_type"] = temp_str.strip()
                elif x[0] == "secondaryIdentifier":
                    gene_object["secondary_id"] = temp_str.strip()
                elif x[0] == "symbol":
                    gene_object["symbol"] = temp_str.strip()
                        
        return gene_object
def run_queries():
    service = Service('http://intermine.wormbase.org/tools/wormmine/service')

    for x in dir(queries):
        item = getattr(queries, x)
        if callable(item):
            if not item.__name__ in ['assert_result', 'Service', 'assert_greater', 'save_txt_file']:
                time.sleep(1)
                yield x, item()
Exemple #43
0
def wormmine(gene):
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(
            gene.identifiers, [
                "wormmine primary id", "wormmine primary identifier",
                "wormmine primary gene id", "wormmine primary gene identifier"
            ]):

        s = Service("http://intermine.wormbase.org/tools/wormmine")
        Gene = s.model.Gene
        q = s.query(Gene).select("*").where("Gene", "LOOKUP",
                                            iden["identifier"])
        gene_object = {}
        for row in q.rows():
            process = row.__str__()
            for x in re.findall(
                    r"(\w+)=('[0-9A-Za-z:()\- \[\]<>\.,]{1,}'|None|[0-9]{1,})",
                    process):

                temp_str = x[1]
                if temp_str[0] == "'" and temp_str[-1] == "'":
                    temp_str = temp_str[1:-1]

                if x[0] == "id":
                    gene_object["id"] = temp_str.strip()
                elif x[0] == "lastUpdated":
                    gene_object["last_updated"] = temp_str.strip()
                elif x[0] == "length":
                    gene_object["length"] = temp_str.strip()
                elif x[0] == "name":
                    gene_object["name"] = temp_str.strip()
                elif x[0] == "operon":
                    gene_object["operon"] = temp_str.strip()
                elif x[0] == "primary_id":
                    gene_object["primary_id"] = temp_str.strip()
                elif x[0] == "score":
                    gene_object["score"] = temp_str.strip()
                elif x[0] == "score_type":
                    gene_object["score_type"] = temp_str.strip()
                elif x[0] == "secondary_id":
                    gene_object["secondary_id"] = temp_str.strip()
                elif x[0] == "symbol":
                    gene_object["symbol"] = temp_str.strip()

        return gene_object
    def attack(self):
        service = Service(self.service.root)
        self.counter.add(2)

        lists = list(l for l in service.get_all_lists() if l.size and l.status == 'CURRENT')
        Lists.LOG.debug("%d lists", len(lists))
        self.counter.add(1)

        target = random.choice(lists)

        classkeys = None
        with closing(service.opener.open(service.root + "/classkeys")) as sock:
            classkeys = json.loads(sock.read())['classes']
            Lists.LOG.debug("Classkeys for %s are %r", target.name, classkeys[target.list_type])
            self.counter.add(1)

        q = service.query(target.list_type).where(target.list_type, 'IN', target.name)

        index = random.randint(0, target.size - 1)
        rand_member = next(q.rows(size = 1, start = index))
        self.counter.add(1)
        Lists.LOG.debug(rand_member)

        target_keys = classkeys[target.list_type]
        if target_keys:
            lu_q = q.where(target.list_type, 'LOOKUP', rand_member[target_keys[0]])
            Lists.LOG.debug("lookup q: %s", lu_q)
            Lists.LOG.debug("%s should be one", lu_q.count())
            self.counter.add(1)

        suitable_widgets = list(w for w in service.widgets.values() if w['widgetType'] == 'enrichment' and target.list_type in w['targets'])
        self.counter.add(1)

        if not suitable_widgets:
            return

        widget = random.choice(suitable_widgets)

        Lists.LOG.debug("Calculating %s of %s", widget['name'], target.name)
        enriched = list(target.calculate_enrichment(widget['name']))
        self.counter.add(1)

        if enriched:
            Lists.LOG.debug(enriched[0])
Exemple #45
0
def query_mousemine(intermine_url: str, gene_id: str) -> IntermineResult:
    """
    :param intermine_url: intermine server, eg
                          http://www.mousemine.org/mousemine/service
    :param gene_id: gene ID, eg ENSMUSG00000063180
    :return: Intermine_Result object
    """
    service = Service(intermine_url)
    query = service.new_query("SequenceFeature")
    query.add_view("primaryIdentifier")
    query.add_constraint("SequenceFeature",
                         "LOOKUP",
                         "{}".format(gene_id),
                         code="A")
    query.add_constraint("organism.shortName", "=", "M. musculus", code="B")
    result_list = [
        "{}".format(val['primaryIdentifier']) for val in query.rows()
    ]
    return intermine_response_factory(result_list, gene_id)
Exemple #46
0
def templates(request):
    # Determine available InterMines and associated templates
    selected_mines = request.GET.get('mines')
    if selected_mines is not None:
        selected_mines = selected_mines.split('+')
    existing_mines = []
    existing_templates = {}
    intermines = InterMine.objects.all()
    for im in intermines:
        existing_mines.append(im.name)
        if not (selected_mines is None or im.name in selected_mines):
            continue
        base_url = im.url.rstrip('/')
        try:
            service = Service(base_url)
        except:
            # service is inaccessible, or some other error
            continue
        for t_name in service.templates:
            t = service.get_template(t_name)
            if t_name in existing_templates:
                existing_templates[t_name]['mines'].append(im.name)
            else:
                existing_templates[t_name] = {
                    'name': t.name,
                    'title': t.title,
                    'description': t.description,
                    'mines': [im.name]
                }
    # Sort existing_templates properly, and convert it to a list
    for t_name in existing_templates:
        existing_templates[t_name]['mines'] = sorted(
            existing_templates[t_name]['mines'], key=lambda m: m.lower())
    existing_templates = list(existing_templates.values())
    existing_templates = sorted(existing_templates,
                                key=lambda t: t['title'].lower())

    context = {
        'existing_mines': existing_mines,
        'existing_templates': existing_templates,
        'user_mines': selected_mines,
    }
    return render(request, 'intermine_mgr/templates.html', context)
Exemple #47
0
def humanmine():
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(
            gene.identifiers,
        [
            "humanmine primary id", "humanmine primary identifier",
            "humanmine primary gene id", "humanmine primary gene identifier"
        ]):

        s = Service("www.humanmine.org/humanmine")
        Gene = s.model.Gene
        q = s.query(Gene).select("*").where("Gene", "LOOKUP",
                                            iden["identifier"])
        gene_object = {}
        for row in q.rows():
            process = row.__str__()
            for x in re.findall(
                    r"(\w+)=('[0-9A-Za-z:()\- \[\]<>\.,]{1,}'|None|[0-9]{1,})",
                    process):

                temp_str = x[1]
                if temp_str[0] == "'" and temp_str[-1] == "'":
                    temp_str = temp_str[1:-1]

                if x[0] == "description":
                    gene_object["description"] = temp_str.strip()
                elif x[0] == "cytoLocation":
                    gene_object["cytogenetic_location"] = temp_str.strip()
                elif x[0] == "id":
                    gene_object["id"] = temp_str.strip()
                elif x[0] == "length":
                    gene_object["length"] = temp_str.strip()
                elif x[0] == "primaryIdentifier":
                    gene_object["primary_id"] = temp_str.strip()
                elif x[0] == "score":
                    gene_object["score"] = temp_str.strip()
                elif x[0] == "scoreType":
                    gene_object["score_type"] = temp_str.strip()
                elif x[0] == "secondaryIdentifier":
                    gene_object["secondary_id"] = temp_str.strip()
                elif x[0] == "symbol":
                    gene_object["symbol"] = temp_str.strip()

        return gene_object
Exemple #48
0
def flymine(gene):
    obj_array = []
    
    for ident in gene.identifiers:
        if ident["identifier_type"].lower() in ["ensembl", "ensembl id", "ensembl identifier", "ensembl gene id"]:
            s = Service("www.flymine.org/query")
            Gene = s.model.Gene
            q = s.query(Gene).select("*").where("Gene", "LOOKUP", ident["identifier"])
            try:
                for row in q.rows():
                    primary_identifier = row["primaryIdentifier"]
                    brief_description = row["briefDescription"]
                    cyto_location = row["cytoLocation"]
                    description = row["description"]
                    identifier = row["id"]
                    length_of_gene = row["length"]
                    name_of_gene = row["name"]
                    score = row["score"]
                    score_type = row["scoreType"]
                    secondary_identifier = row["secondaryIdentifier"]
                    gene_symbol = row["symbol"]

                    gene_object = {
                        'id': identifier,
                        'primary_id': primary_identifier,
                        'secondary_id': secondary_identifier,
                        'symbol': gene_symbol,
                        'name': name_of_gene,
                        'cyto_location': cyto_location,
                        'brief_description': brief_description,
                        'description': description,
                        'length': length_of_gene,
                        'score': score,
                        'score_type': score_type
                    }
                    obj_array.append(gene_object)
            except intermine.errors.WebserviceError:
                print("A webservice error occurred. Please contact Intermine support.")
            else:
                print("Something else went wrong.")
    return obj_array
Exemple #49
0
def fetchGene(GeneName):
    
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")
    template = service.get_template('Gene_GenomicDNA')

    rows = template.rows(
        E = {"op": "LOOKUP", "value": GeneName, "extra_value": "S. cerevisiae"}
    )
    
    # this service seems to return multiple similar genes but we want the first one only, so count
    # and it returns information about the gene you want
    count=0
    for row in rows:
        
        count=count+1
        if count==1:
            descr= row["description"]
            GeneSeq=Seq(row["sequence.residues"])
            GeneSysName=row["secondaryIdentifier"]
            print(" ")
            print("I think you want...... "+row["secondaryIdentifier"])
            print(row["description"])
            print(" ")
            print(row["sequence.residues"])
            print(" ")
            print("Good choice! I have a feeling you're going to get lucky with this one.")
            print(" ")
            print("Give me a second to put some of my ducks in a circle...")
       

            
    #let's create a record for the oldGene
    GeneRecord = SeqRecord(GeneSeq, id=GeneSysName)
    
    #now let's add some more information to make it useful
    GeneRecord.name=GeneName
    GeneRecord.features=GeneSysName

    return GeneRecord
def getData(mine):
    """
    A function to get datasets corresponding to a mine
    ================================================
    example:

        >>> from intermine import registry
        >>> registry.getData('flymine')
        Name: Affymetrix array: Drosophila1
        Name: Affymetrix array: Drosophila2
        Name: Affymetrix array: GeneChip Drosophila Genome 2.0 Array
        Name: Affymetrix array: GeneChip Drosophila Genome Array
        Name: Anoph-Expr data set
        Name: BDGP cDNA clone data set.....


    """
    x = "http://registry.intermine.org/service/instances/" + mine
    try:
        r = requests.get(x)
        dict = json.loads(r.text)
        link = dict["instance"]["url"]
        service = Service(link)
        query = service.new_query("DataSet")
        query.add_view("name", "url")
        list = []

        for row in query.rows():
            try:
                list.append(row["name"])

            except KeyError:
                print("No info available")
        list.sort()
        for i in range(len(list)):
            print("Name: " + list[i])
        return None
    except KeyError:
        return "No such mine available"
Exemple #51
0
def wormmine(gene):
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(gene.identifiers, ["wormmine primary id", "wormmine primary identifier", "wormmine primary gene id", "wormmine primary gene identifier"]):
    
        s = Service("http://intermine.wormbase.org/tools/wormmine")
        Gene = s.model.Gene
        q = s.query(Gene).select("*").where("Gene", "LOOKUP", iden["identifier"])
        gene_object = {}  
        for row in q.rows():
            process = row.__str__()
            for x in re.findall(r"(\w+)=('[0-9A-Za-z:()\- \[\]<>\.,]{1,}'|None|[0-9]{1,})", process):

                temp_str = x[1]
                if temp_str[0] == "'" and temp_str[-1] == "'":
                    temp_str = temp_str[1:-1]

                if x[0] == "id":
                    gene_object["id"] = temp_str.strip()
                elif x[0] == "lastUpdated":
                    gene_object["last_updated"] = temp_str.strip()
                elif x[0] == "length":
                    gene_object["length"] = temp_str.strip()
                elif x[0] == "name":
                    gene_object["name"] = temp_str.strip()
                elif x[0] == "operon":
                    gene_object["operon"] = temp_str.strip()
                elif x[0] == "primary_id":
                    gene_object["primary_id"] = temp_str.strip()
                elif x[0] == "score":
                    gene_object["score"] = temp_str.strip()
                elif x[0] == "score_type":
                    gene_object["score_type"] = temp_str.strip()
                elif x[0] == "secondary_id":
                    gene_object["secondary_id"] = temp_str.strip()
                elif x[0] == "symbol":
                    gene_object["symbol"] = temp_str.strip()
                        
        return gene_object
    def test_user_registration(self):
        username = '******'.format(uuid.uuid4())
        password = '******'
        try:
            s = Service(self.SERVICE.root, username, password)
            s.deregister(s.get_deregistration_token())
        except:
            pass

        s = self.SERVICE.register(username, password)

        self.assertEqual(s.root, self.SERVICE.root)
        self.assertEqual(2, len(s.get_all_lists()))

        drt = s.get_deregistration_token()
        s.deregister(drt)

        self.assertRaises(WebserviceError, s.get_all_lists)
Exemple #53
0
from intermine.webservice import Service
service = Service("http://yeastmine.yeastgenome.org/yeastmine/service", token = "YOUR-API-KEY")
query = service.new_query("Gene")
query.add_view(
    "primaryIdentifier", "secondaryIdentifier", "organism.shortName", "symbol",
    "name"
)
query.add_constraint("Gene", "IN", "systematic gene names", code = "A")

for row in query.rows():
    print(row["primaryIdentifier"], row["secondaryIdentifier"], row["organism.shortName"], \
        row["symbol"], row["name"])
Exemple #54
0
def fetch_yeast_locus_sequence(locus_name, flanking_size=0):
    """Acquire a sequence from SGD http://www.yeastgenome.org.

    :param locus_name: Common name or systematic name for the locus (e.g. ACT1
                       or YFL039C).
    :type locus_name: str
    :param flanking_size: The length of flanking DNA (on each side) to return
    :type flanking_size: int

    """
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")

    # Get a new query on the class (table) you will be querying:
    query = service.new_query("Gene")

    if flanking_size > 0:

        # The view specifies the output columns
        # secondaryIdentifier: the systematic name (e.g. YFL039C)
        # symbol: short name (e.g. ACT1)
        # length: sequence length
        # flankingRegions.direction: Upstream or downstream (or both) of locus
        # flankingRegions.sequence.length: length of the flanking regions
        # flankingRegions.sequence.residues: sequence of the flanking regions
        query.add_view("secondaryIdentifier", "symbol", "length",
                       "flankingRegions.direction",
                       "flankingRegions.sequence.length",
                       "flankingRegions.sequence.residues")

        # You can edit the constraint values below
        query.add_constraint("flankingRegions.direction", "=", "both",
                             code="A")
        query.add_constraint("Gene", "LOOKUP", locus_name, "S. cerevisiae",
                             code="B")
        query.add_constraint("flankingRegions.distance", "=",
                             "{:.1f}kb".format(flanking_size / 1000.),
                             code="C")
        # Uncomment and edit the code below to specify your own custom logic:
        query.set_logic("A and B and C")

        # TODO: What to do when there"s more than one result?
        first_result = query.rows().next()
        # FIXME: Use logger module instead
        # print first_result["secondaryIdentifier"]
        # print first_result["symbol"], row["length"]
        # print first_result["flankingRegions.direction"]
        # print first_result["flankingRegions.sequence.length"]
        # print first_result["flankingRegions.sequence.residues"]

        seq = coral.DNA(first_result["flankingRegions.sequence.residues"])
        # TODO: add more metadata

    elif flanking_size == 0:
        # The view specifies the output columns
        query.add_view("primaryIdentifier", "secondaryIdentifier", "symbol",
                       "name", "sgdAlias", "organism.shortName",
                       "sequence.length", "sequence.residues", "description",
                       "qualifier")

        query.add_constraint("status", "IS NULL", code="D")
        query.add_constraint("status", "=", "Active", code="C")
        query.add_constraint("qualifier", "IS NULL", code="B")
        query.add_constraint("qualifier", "!=", "Dubious", code="A")
        query.add_constraint("Gene", "LOOKUP", locus_name, "S. cerevisiae",
                             code="E")

        # Your custom constraint logic is specified with the code below:
        query.set_logic("(A or B) and (C or D) and E")

        first_result = query.rows().next()
        seq = coral.DNA(first_result["sequence.residues"])
    else:
        print "Problem with the flanking region size...."
        seq = coral.DNA("")

    return seq
                sys.stderr.write(".")

else:

    # To run your query
    # to use it you will require the intermine python client.
    # To install the client, run the following command from a terminal:
    #
    #     sudo easy_install intermine
    #
    # For further documentation you can visit:
    #     http://intermine.readthedocs.org/en/latest/web-services/

    # The following two lines will be needed in every python script:
    from intermine.webservice import Service
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")
    query = service.new_query("SequenceFeature")
    query.add_view(
        "primaryIdentifier", "featureType", "secondaryIdentifier", "description",
        "sgdAlias", "symbol"
    )
    query.add_constraint("featureType", "=", "telomerase_RNA_gene", code = "Z")
    query.add_constraint("qualifier", "IS NULL", code = "W")
    query.add_constraint("qualifier", "!=", "Dubious", code = "V")
    query.add_constraint("status", "=", "Active", code = "U")
    query.add_constraint("featureType", "=", "transposable_element_gene", code = "S")
    query.add_constraint("featureType", "=", "telomeric_repeat", code = "R")
    query.add_constraint("featureType", "=", "telomere", code = "Q")
    query.add_constraint("featureType", "=", "tRNA_gene", code = "P")
    query.add_constraint("featureType", "=", "snoRNA_gene", code = "O")
    query.add_constraint("featureType", "=", "snRNA_gene", code = "N")
#!/usr/bin/python

from intermine.webservice import Service

service = Service('http://synbiomine.org/query/service')
query = service.new_query()
query.add_view('Gene.symbol', 'Gene.name') 
for row in query.results():
  print row
"""Examples of querying yeastmine with intermine webservice"""

__author__ = "Aaron Brooks"
__copyright__ = "Copyright 2015, "
__credits__ = ["Aaron Brooks"]
__license__ = "GPL"
__version__ = "0.0.1"
__maintainer__ = "Aaron Brooks"
__email__ = "*****@*****.**"
__status__ = "Development"

from intermine.webservice import Service
import pandas as pd

service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")

#-------------------------------------------------------------------#
# Gene Info
#-------------------------------------------------------------------# 
gene = service.model.Gene.where(symbol = 'HFA1').first()

print gene.symbol + "\n" + gene.description
print gene

#-------------------------------------------------------------------#
# Model templates
#-------------------------------------------------------------------#
template = service.get_template("Gene_Pathways")
for row in template.results(A={"symbol":"HFA1"}):
	print row