Beispiel #1
0
def main(args):
    ##
    qmods = {
        'extraConstraint':
        makeOneOfConstraint('Gene.primaryIdentifier', args.identifiers),
    }
    ##
    qs = [
        ('gene', mouseGenes),
        ('synonyms', mouseSynonyms),
        ('expressed', mouseExpressedGenes),
        ('expressedImages', mouseExpressedGenesWithImages),
        ('location', mouseLocations),
        ('proteinIds', mouseProteinIds),
        ('xrefs', mouseXrefs),
        ('pantherId', mousePantherIds),
        ('myGeneLink', mouseMyGeneLinks),
    ]

    hasPheno = set()
    for r in doQuery(mouseHasPheno, MOUSEMINE):
        hasPheno.add(r['primaryIdentifier'])

    hasImpc = set()
    for r in doQuery(mouseHasImpc, MOUSEMINE):
        hasImpc.add(r['primaryIdentifier'])

    id2gene = {}
    for label, q in qs:
        if label == 'gene':
            for r in doQuery(q % qmods, MOUSEMINE):
                r['mgiid'] = r['primaryIdentifier']
                id2gene[r['primaryIdentifier']] = r
        else:
            for r in doQuery(q % qmods, MOUSEMINE):
                obj = id2gene.get(r['primaryIdentifier'], None)
                if obj:
                    obj.setdefault(label, []).append(r)
    print('{\n  "metaData": %s,\n  "data": [' %
          json.dumps(buildMetaObject(MOUSEMINE), indent=2))
    first = True
    for i in id2gene:
        obj = id2gene[i]
        if not first: print(',', end='')
        obj["hasPheno"] = obj["primaryIdentifier"] in hasPheno
        obj["hasImpc"] = obj["primaryIdentifier"] in hasImpc
        print(json.dumps(getJsonObj(obj), indent=2))
        first = False
    print(']\n}')
Beispiel #2
0
def getSamples(url):
    eid2samples = {}
    for r in doQuery(htSamples, url):
        rkey = (r["samples.name"], r["samples.age"], r["samples.sex"],
                r["samples.structure.identifier"])
        eid2samples.setdefault(r['experimentId'], {})[rkey] = r
    return eid2samples
Beispiel #3
0
def main():
    args = parseCmdLine()
    ids = args.identifiers
    xtra = makeOneOfConstraint('Genotype.alleles.feature.primaryIdentifier',
                               ids)
    xtra2 = makeOneOfConstraint('Allele.feature.primaryIdentifier', ids)

    # Process Genotype-AllelePairs. Build index from genotype id to list of component (allele+state)
    id2components = {}
    toDelete = set(SKIP)
    for r in doQuery(q_genotypeAlleles % xtra, MOUSEMINE):
        gid = r["primaryIdentifier"]
        try:
            id2components.setdefault(gid, []).append(r)
        except:
            toDelete.add(gid)

    # Build set of MGI ids of alleles being sent to the alliance
    includedAlleles = set()
    for r in doQuery(q_alleles % xtra2, MOUSEMINE):
        includedAlleles.add(r['primaryIdentifier'])

    # Process genotypes. For each one, find / attach its components if any and output.
    # Screen for genotypes to be deleted.
    #
    print('{\n  "metaData": %s,\n  "data": [' %
          json.dumps(buildMetaObject(MOUSEMINE), indent=2))
    first = True
    for g in doQuery(q_genotypes % xtra, MOUSEMINE):
        gid = g["primaryIdentifier"]
        if gid in toDelete:
            continue
        g["components"] = id2components.get(gid, [])
        gobj = getJsonObj(g, includedAlleles)
        if gobj:
            if not first: print(",", end=' ')
            print(json.dumps(gobj, indent=2))
            first = False
    print("]}")
Beispiel #4
0
def loadEMAPAParents(url):
    log('Loading EMAPA parents...')

    q = '''<query
    name=""
    model="genomic"
    view="OntologyRelation.childTerm.identifier
    OntologyRelation.parentTerm.identifier"
    longDescription=""
    sortOrder="OntologyRelation.childTerm.identifier asc"
    >
        <constraint path="OntologyRelation.childTerm" type="EMAPATerm"/>
        <constraint path="OntologyRelation.parentTerm" type="EMAPATerm"/>
        <constraint path="OntologyRelation.direct" op="=" value="true"/>
    </query>'''
    id2pids = {}
    for i, r in enumerate(doQuery(q, url)):
        id2pids.setdefault(r["childTerm.identifier"],
                           []).append(r["parentTerm.identifier"])
    log('Loaded %d parent/child relations.' % i)
    return id2pids
Beispiel #5
0
def loadEMAPA(url):
    log('Loading EMAPA...')
    id2emapa = {}
    q = '''
    <query
      model="genomic"
      view="
      EMAPATerm.identifier
      EMAPATerm.name
      EMAPATerm.startsAt
      EMAPATerm.endsAt
      "
      >
      </query>
    '''
    for t in doQuery(q, url):
        t["startsAt"] = int(t["startsAt"])
        t["endsAt"] = int(t["endsAt"])
        id2emapa[t["identifier"]] = t
    log('Loaded %d EMAPA terms.' % len(id2emapa))
    return id2emapa
Beispiel #6
0
def getExpressionData(url, ids):
    log('Getting expression data...')
    q = '''<query
    model="genomic"
    view="
        GXDExpression.assayId
        GXDExpression.assayType
        GXDExpression.feature.primaryIdentifier
        GXDExpression.stage
        GXDExpression.structure.identifier
        GXDExpression.publication.mgiId
        GXDExpression.publication.pubMedId"
    sortOrder="GXDExpression.assayId asc GXDExpression.structure.identifier asc GXDExpression.stage asc"
    constraintLogic="A and (B or (C and D)) and E"
    >
      <constraint path="GXDExpression.detected" code="A" op="=" value="true"/>
      <constraint path="GXDExpression.genotype.hasMutantAllele" code="B" op="=" value="false"/>
      <constraint path="GXDExpression.assayType" code="C" op="=" value="In situ reporter (knock in)"/>
      <constraint path="GXDExpression.genotype.zygosity" code="D" op="=" value="ht"/>
      %s
    </query>
  ''' % makeOneOfConstraint('GXDExpression.feature.primaryIdentifier', ids)
    prev = None
    qcount = 0
    ycount = 0
    for r in doQuery(q, MOUSEMINE):
        qcount += 1
        if not prev \
        or r["assayId"] != prev["assayId"] \
        or r["stage"] != prev["stage"] \
        or r["structure.identifier"] != prev["structure.identifier"]:
            ycount += 1
            yield r
        #
        prev = r
        #
    log('getExpressionData: %d results => %d unique results' %
        (qcount, ycount))
Beispiel #7
0
def loadSubmittedAlleles () :
    aids = set()
    for a in doQuery(qAlleles, MOUSEMINE):
        aids.add(a["primaryIdentifier"])
    return aids
Beispiel #8
0
def getExperiments(url):
    for r in doQuery(htExperiments, url):
        yield r
Beispiel #9
0
def getVariables(url):
    eid2vars = {}
    for r in doQuery(htVariables, url):
        if r["variables.name"]:
            eid2vars.setdefault(r['experimentId'], []).append(r)
    return eid2vars
Beispiel #10
0
def getReferences(url):
    eid2refs = {}
    for r in doQuery(htReferences, url):
        eid2refs.setdefault(r['experimentId'], []).append(r)
    return eid2refs
Beispiel #11
0
def annotations(url, okind, skind, ids=None):
    qopts = {
        'alleleFeatView':
        "OntologyAnnotation.subject.feature.primaryIdentifier"
        if skind == "Allele" else "",
        'xtraConstraint':
        makeOneOfConstraint("OntologyAnnotation.subject.primaryIdentifier",
                            ids),
        'xtraConstraint2':
        makeOneOfConstraint(
            "OntologyAnnotationEvidence.annotation.subject.primaryIdentifier",
            ids),
        'okind':
        okind,
        'skind':
        skind,
    }

    qAnnots = '''<query
    model="genomic"
    view="
        OntologyAnnotation.id
        OntologyAnnotation.subject.primaryIdentifier
        OntologyAnnotation.subject.symbol
        OntologyAnnotation.subject.name
        OntologyAnnotation.ontologyTerm.identifier
        OntologyAnnotation.ontologyTerm.name
        OntologyAnnotation.qualifier
        %(alleleFeatView)s
        "
    sortOrder="OntologyAnnotation.id asc"
    >
    <constraint path="OntologyAnnotation.ontologyTerm" type="%(okind)s"/>
    <constraint path="OntologyAnnotation.subject" type="%(skind)s"/>
    <constraint path="OntologyAnnotation.subject.organism.taxonId" op="=" value="10090"/>
    %(xtraConstraint)s
    </query>
    ''' % qopts

    qEvidence = '''<query
    model="genomic"
    view="
        OntologyAnnotation.id
        OntologyAnnotation.evidence.id
        OntologyAnnotation.evidence.annotationDate
        OntologyAnnotation.evidence.code.code
        OntologyAnnotation.evidence.publications.id
        OntologyAnnotation.evidence.publications.pubMedId
        OntologyAnnotation.evidence.publications.mgiJnum
        OntologyAnnotation.evidence.publications.mgiId
        %(alleleFeatView)s
        "
    sortOrder="OntologyAnnotation.id asc OntologyAnnotation.evidence.id asc"
    >
    <constraint path="OntologyAnnotation.ontologyTerm" type="%(okind)s"/>
    <constraint path="OntologyAnnotation.subject" type="%(skind)s"/>
    <constraint path="OntologyAnnotation.subject.organism.taxonId" op="=" value="10090"/>
    %(xtraConstraint)s
    </query>
    ''' % qopts

    qBaseAnnots = '''<query
        model="genomic"
        view="
            OntologyAnnotationEvidence.id
            OntologyAnnotationEvidence.publications.id
            OntologyAnnotationEvidence.annotation.id
            OntologyAnnotationEvidence.baseAnnotations.subject.primaryIdentifier
            OntologyAnnotationEvidence.baseAnnotations.evidence.annotationDate
            OntologyAnnotationEvidence.baseAnnotations.evidence.publications.pubMedId
            "
        sortOrder="OntologyAnnotationEvidence.annotation.id asc OntologyAnnotationEvidence.id asc"
        >
        <constraint path="OntologyAnnotationEvidence.annotation.ontologyTerm" type="%(okind)s"/>
        <constraint path="OntologyAnnotationEvidence.annotation.subject" type="%(skind)s"/>
        <constraint path="OntologyAnnotationEvidence.baseAnnotations.evidence.publications" op="=" loopPath="OntologyAnnotationEvidence.publications"/>
        %(xtraConstraint2)s
        </query>
    ''' % qopts

    qs = [
        map(lambda x: (x[0], 'annotation', list(x[1])),
            groupby(doQuery(qAnnots, url), lambda e: e['id'])),
        map(lambda x: (x[0], 'evidence', list(x[1])),
            groupby(doQuery(qEvidence, url), lambda e: e['id'])),
        map(lambda x: (x[0], 'baseAnnots', list(x[1])),
            groupby(doQuery(qBaseAnnots, url), lambda e: e['annotation.id'])),
    ]
    for x in groupby(heapq.merge(*qs), lambda x: x[0]):
        r = {}
        for y in x[1]:
            if y[1] == 'annotation':
                r.update(y[2][0])
            elif y[1] == 'evidence':
                r['evidence'] = y[2]
            elif y[1] == 'baseAnnots':
                # Note that these are *all* the base annotations. Each one is associated with a
                # specific evidence object. Look for matching ids
                # the field named 'id' in the base annot record should equal the 'evidence.id'
                # in the evidence object.
                r['baseAnnots'] = y[2]
        rr = applyConversions(r, okind, skind)
        if rr:
            for n, e in enumerate(rr["invevidence"]):
                rr["agrevidence"] = e
                rr["agrbaseannots"] = rr["invbaseannots"][n]
                rr2 = formatDafJsonRecord(
                    rr, "disease" if okind == "DOTerm" else "phenotype", skind)
                if rr2:
                    yield rr2
Beispiel #12
0
def getAlleles(url, ids):
    qopts = {
        'xtraConstraint':
        makeOneOfConstraint('Allele.feature.primaryIdentifier', ids)
    }

    # Query for alleles that have expressed component
    qexpressors = '''<query
        model="genomic"
        view="
            MGIExpressesComponent.allele.primaryIdentifier
            MGIExpressesComponent.allele.symbol
            "
        ></query>
        '''
    expressors = set()
    for r in doQuery(qexpressors, url):
        expressors.add(r['allele.primaryIdentifier'])

    # Query allele synonyms, build index of id -> synonyms
    # 2020-12-18: change constraints:
    #    - drop ontology annotation requirement
    #    - drop null allele type restriction
    #    - add exclusion when germline transmission = 'cell line'
    qsynonyms = '''<query
      model="genomic"
      view="
      Allele.primaryIdentifier
      Allele.synonyms.value
      "
      constraintLogic="A and B and C and D"
      sortOrder="Allele.primaryIdentifier asc"
      >
      <constraint code="A" path="Allele.organism.taxonId" op="=" value="10090" />
      <constraint code="B" path="Allele.alleleType" op="NONE OF">
        <value>QTL</value>
      </constraint>
      <constraint code="C" path="Allele.isWildType" op="=" value="false" />
      <constraint code="D" path="Allele.glTransmission" op="!=" value="Cell Line"/>
      %(xtraConstraint)s
      </query>
    ''' % qopts
    aid2syns = {}
    for r in doQuery(qsynonyms, url):
        aid2syns.setdefault(r['primaryIdentifier'],
                            set()).add(r['synonyms.value'])

    # Main allele query.
    qalleles = '''<query
      model="genomic"
      view="
      Allele.primaryIdentifier
      Allele.symbol
      Allele.name
      Allele.alleleType
      Allele.molecularNote
      Allele.feature.primaryIdentifier
      Allele.feature.mgiType
      Allele.drivenBy
      "
      constraintLogic="A and B and C and (D or E)"
      sortOrder="Allele.primaryIdentifier asc"
      >
      <constraint code="A" path="Allele.organism.taxonId" op="=" value="10090" />
      <constraint code="B" path="Allele.alleleType" op="NONE OF">
        <value>QTL</value>
      </constraint>
      <constraint code="C" path="Allele.isWildType" op="=" value="false" />
      <constraint code="D" path="Allele.glTransmission" op="!=" value="Cell Line"/>
      <constraint code="E" path="Allele.glTransmission" op="IS NULL" />
      %(xtraConstraint)s
      </query>
    ''' % qopts

    for r in doQuery(qalleles, url):
        aid = r['primaryIdentifier']
        r['synonyms'] = list(aid2syns.get(aid, []))
        # If the allele has a driver or has expressed components, then the allele has a
        # "construct". At the Alliance, constructs must have an ID, but at MGI they don't (they're not objects).
        # So we create a fake ID for it. This are not displayed and are not used to create links.
        # Constructs are dumped separately. Here we just need the ID
        if r['drivenBy'] or aid in expressors:
            r['construct'] = aid + "_con"
        #
        yield r