Python n3encode Exemples, namespaces.n3encode Python Exemples

Exemple #1

0

Afficher le fichier

 def getDataBySPO(self, thingy, thingytype="s", context=None):
     qdict = {}
     if context:
         qdict['c'] = namespaces.n3encode(context)
     qdict[thingytype] = namespaces.n3encode(thingy)
     #print "QDICT", qdict
     data = self.tsc.query_statements(qdict)
     return data

Exemple #2

0

Afficher le fichier

Fichier : adsrdf.py Projet : kayebohemier/semflow

 def getDataBySPO(self, thingy, thingytype="s", context=None):
     qdict={}
     if context:
         qdict['c']=namespaces.n3encode(context)
     qdict[thingytype]=namespaces.n3encode(thingy)
     #print "QDICT", qdict
     data=self.tsc.query_statements(qdict)
     return data

Exemple #3

0

Afficher le fichier

Fichier : adsrdf.py Projet : kayebohemier/semflow

 def getDataByDict(self, thedict, context=None):
     qdict={}
     if context:
         qdict['c']=namespaces.n3encode(context)
     for ele in thedict.keys():
         if type(thedict[ele])==types.ListType:#NOT SUPPORTED BY SESAME
             qdict[ele]=[]
             for themem in thedict[ele]:
                 qdict[ele].append(namespaces.n3encode(themem))
         else:
             qdict[ele]=namespaces.n3encode(thedict[ele])
     #print "QDICT", qdict
     data=self.tsc.query_statements(qdict)
     return data

Exemple #4

0

Afficher le fichier

 def getDataByDict(self, thedict, context=None):
     qdict = {}
     if context:
         qdict['c'] = namespaces.n3encode(context)
     for ele in thedict.keys():
         if type(thedict[ele]) == types.ListType:  #NOT SUPPORTED BY SESAME
             qdict[ele] = []
             for themem in thedict[ele]:
                 qdict[ele].append(namespaces.n3encode(themem))
         else:
             qdict[ele] = namespaces.n3encode(thedict[ele])
     #print "QDICT", qdict
     data = self.tsc.query_statements(qdict)
     return data

Exemple #5

0

Afficher le fichier

 def getDataBySP(self, thingy, propthingy, context=None):
     qdict = {}
     if context:
         qdict['c'] = namespaces.n3encode(context)
     #print "THINGY", thingy
     if len(thingy.split(':')) > 1:
         qdict['s'] = namespaces.n3encode(thingy)
     else:
         qdict['s'] = thingy
     qdict['p'] = namespaces.n3encode(propthingy)
     #print "qdict", qdict
     data = self.tsc.query_statements(qdict)
     #print data
     bg = ConjunctiveGraph()
     namespaces.bindgraph(bg)
     #abnode=BNode()
     res = bg.parse(StringIO.StringIO(data))
     listofo = []
     #this bnode crap is very fragile TODO:replace
     for trip in res:
         listofo.append(str(trip[2].encode('utf-8')))
     return listofo

Exemple #6

0

Afficher le fichier

Fichier : adsrdf.py Projet : DougBurke/semflow

 def getDataBySP(self, thingy, propthingy, context=None):
     qdict={}
     if context:
         qdict['c']=namespaces.n3encode(context)
     #print "THINGY", thingy
     if len(thingy.split(':'))>1:
         qdict['s']=namespaces.n3encode(thingy)
     else:
         qdict['s']=thingy
     qdict['p']=namespaces.n3encode(propthingy)
     #print "qdict", qdict
     data=self.tsc.query_statements(qdict)
     #print data
     bg=ConjunctiveGraph()
     namespaces.bindgraph(bg)
     #abnode=BNode()
     res=bg.parse(StringIO.StringIO(data))
     listofo=[]
     #this bnode crap is very fragile TODO:replace
     for trip in res:
         listofo.append(str(trip[2].encode('utf-8')))
     return listofo

Exemple #7

0

Afficher le fichier

 def getDataInContext(self, context=None):
     if context:
         data = self.tsc.get_in_context(namespaces.n3encode(context))
     else:
         data = self.tsc.get_in_context()
     return data

Exemple #8

0

Afficher le fichier

 def deleteData(self, context=None):
     if context:
         self.tsc.deletedata(namespaces.n3encode(context))
     else:
         self.tsc.deletedata()

Exemple #9

0

Afficher le fichier

 def addFile(self, thefile, context=None):
     if context:
         self.tsc.postfile(thefile, namespaces.n3encode(context))
     else:
         self.tsc.postfile(thefile)

Exemple #10

0

Afficher le fichier

Fichier : rdf2solarfuncs.py Projet : rahuldave/semflow-core

def getInfoForObsuri(c, solr, theuri, mission, project, othersbool=None, entrybool=False):
    daprops = []
    thedict = {}
    print "OBSURI", theuri
    # There must be better ways to do this
    themission, theproject, theobsid, junkdataid, uritail = getTailFromSplit(mission, project, theuri)
    is_mast = theuri.find("MAST") != -1
    is_chandra = theuri.find("CHANDRA") != -1
    # For this to work chandra URI's now become CHANDRA/chandra'

    debug("URITAIL", uritail)
    if is_mast:
        pquery0 = """
        SELECT ?tname WHERE {
        %s adsbase:target ?tnode.
        ?tnode adsbase:name ?tname.            
        }
        """ % (
            n3encode("uri_obs:" + uritail)
        )
        # Sprint pquery0
        res1 = c.makeQuery(pquery0)
        debug("RES1", res1)
        if len(res1) > 0:
            target = res1[0]["tname"]["value"]
        else:
            target = "Unspecified"
        # target=res1[0]['tname']['value']
        thetarget = themission + "/" + target
    elif is_chandra:
        titleray = c.getDataBySP("uri_obs:" + uritail, "adsbase:title")
        print "titleray", titleray
        if len(titleray) == 0:
            title = "Unspecified"
        else:
            title = titleray[0]
        thetarget = themission + "/" + title  # do mission title on the hope MAST naming is common.
        # In the long run it dosent matter as we need to look it up anyways
    else:
        thetarget = "None"

    # print "The target", thetarget
    thedict["targets_s"] = thetarget
    # print "::::::::::::::::", theobsid, theuri, themission, thevariable
    # thedict['obsids_s']=rinitem(theobsid)
    thedict["obsids_s"] = theproject + "/" + theobsid
    if theproject == themission:
        thedict["obsv_mission_s"] = theproject
    else:
        thedict["obsv_mission_s"] = themission + "/" + theproject
    # print theobsid, c.getDataBySP('uri_obs:'+uritail, 'adsobsv:observationType')
    obstypes = c.getDataBySP("uri_obs:" + uritail, "adsobsv:observationType")
    # print obstypes, ":::"
    if len(obstypes) > 0:
        if theproject == themission:
            thedict["obsvtypes_s"] = theproject + "/" + obstypes[0]
        else:
            thedict["obsvtypes_s"] = themission + "/" + theproject + "/" + obsvtypes[0]
    else:
        if theproject == themission:
            thedict["obsvtypes_s"] = theproject + "/Unspecified"
        else:
            thedict["obsvtypes_s"] = themission + "/" + theproject + "/Unspecified"

    # Hut dosent have obsvtypes. Call it MAST_HUT/None

    # Chandra data was being created using adsobsv:tExpTime when it should have been
    # adsobsv:tExptime. This should now be fixed but this check is left in to catch
    # any oddities.
    #
    theinstruments = c.getDataBySP("uri_obs:" + uritail, "adsbase:usingInstrument")
    # print "DAIN", theinstruments
    tvals = c.getDataBySP("uri_obs:" + uritail, "adsobsv:tExptime")
    # print "UTITAIL", uritail, tvals
    if len(tvals) == 0:
        raise IOError("Unable to find adsobsv:tExptime for uri_obs:{0}".format(uritail))

    else:
        if len(tvals) > 1:
            debug("MULTI-EXP", "uri_obs:{0} has adsobsv:tExptime={1}".format(uritail, tvals))

        thedict["exptime_f"] = float(tvals[0])

    tdt = c.getDataBySP("uri_obs:" + uritail, "adsbase:atTime")[0]
    # print "TDT", tdt
    obsvtime = datetime.datetime.strptime(tdt, "%Y-%m-%dT%H:%M:%S")
    # month, day, year, thehour=tdt.split()

    # th, tmin=thehour[:-2].split(':')
    # th=int(th)
    # tmin=int(tmin)
    # if thehour[-2:]=='PM' and th < 12:
    #    th=int(th)+12
    #    print "TDT", tdt
    # obsvtime=datetime.datetime(int(year), list(calendar.month_abbr).index(month), int(day), th, tmin)
    thedict["obsvtime_d"] = obsvtime.isoformat() + "Z"

    # hasDatum is a subset of hasDataProduct. How do we get sparql to fo up inhertitance hierarchy
    # Currently we have no way of knowing as the owl file hasnt been loaded in
    #    pquery="""
    #        SELECT ?dtype WHERE {
    #        {%s adsobsv:hasDataProduct ?daturi.} UNION {%s adsobsv:hasDatum ?daturi.}
    #        ?daturi adsbase:dataType ?dtype.
    #        }
    #    """ % (n3encode('uri_obs:'+uritail),n3encode('uri_obs:'+uritail) )
    #    res=c.makeQuery(pquery)
    #    #print "RES", res, pquery
    #    tempdt={}
    #    if len(res)>0:
    #        for ele in res:
    #            tkey=ele['dtype']['value']
    #            if tempdt.has_key('tkey'):
    #                tempdt[tkey]+=1
    #            else:
    #                tempdt[tkey]=1
    #        thedict['datatypes_s']=tempdt.keys()
    #    else:
    #        thedict['datatypes_s']=[]

    pquery = """
        SELECT ?daturi WHERE {
        {%s adsobsv:hasDataProduct ?daturi.} UNION {%s adsobsv:hasDatum ?daturi.}
        }
    """ % (
        n3encode("uri_obs:" + uritail),
        n3encode("uri_obs:" + uritail),
    )
    res = c.makeQuery(pquery)
    dtypes = set()
    duriset = set()
    for ele in res:
        thedatauri = ele["daturi"]["value"]
        # print "THEDATAURI", thedatauri
        datauritail = getTailFromSplit(mission, project, thedatauri, "DATA")[-1]
        dtype = c.getDataBySP("uri_dat:" + datauritail, "adsbase:dataType")[0]
        dtypes.add(dtype)
        duriset.add(datauritail)
    # BUG: Still assume one istrument. This will change, point is how? There will be both
    # multiple stuff for non-simple obs and hierarchical stuff for simple obs like gratings
    # how will we model this?
    thedict["datatypes_s"] = list(dtypes)
    debug("DATATYPES", thedict["datatypes_s"])

    # Assumes observation has instrument specified. Or we will fail
    # What if an observation used multiple instruments. Like Chandra image and spectrum.
    # BUG: What do we currently do for Chandra. And how do we organize instruments?
    theinstruments = c.getDataBySP("uri_obs:" + uritail, "adsbase:usingInstrument")
    if len(theinstruments) > 0:
        theinstrument = theinstruments[0]
    else:
        raise IOError("Unable to find adsbase:usingInstrument for uri_obs:{0}".format(uritail))
    theinstrumentname = theinstrument.split("/")[-1]

    # TODO: should be able to query the RDF store for the label to use for the instrument
    # but for now just extract the information from the URI, and remove any %-encoding
    # done
    theinstrumentname = unquote(theinstrumentname)
    thedict["instruments_s"] = "/".join(theinstrumentname.split("_"))

    # BUG: Still assume one telescope, this will change
    thetelescope = c.getDataBySP("uri_obs:" + uritail, "adsobsv:atTelescope")[0]
    thetelescopename = thetelescope.split("/")[-1]
    thedict["telescopes_s"] = "/".join(thetelescopename.split("_"))

    # BUG/TODO: note here telescope is different from instrument. Shouldnt this be
    # combined in.
    # print thedict['instruments_s']
    # pointing=c.getDataBySP('uri_obs:'+theobsid, 'adsobsv:associatedPosition')[0]
    # FAIL dune to bnode crapola ra=c.getDataBySP(pointing, 'adsobsv:ra')
    # BUG we should first even see if Pointing exists before going for ra or dec

    # This will need special handling as it is multivalued array even within obsv.
    # So it will need flattening within publications
    theemdomains = c.getDataBySP("uri_obs:" + uritail, "adsobsv:wavelengthDomain")
    # BUG:Note that by doing this emdomains is optional...Not sure we want that
    if len(theemdomains) > 0:
        thedict["emdomains_s"] = []
        for domain in theemdomains:
            thedict["emdomains_s"].append(domain.split("_")[-1])
    else:
        raise IOError("Unable to find wavelength domains for uri_obs:{0}".format(uritail))
    thepointings = c.getDataBySP("uri_obs:" + uritail, "adsobsv:associatedPosition")

    if len(thepointings) > 0:
        pquery = """
        SELECT ?ra ?dec WHERE {
        %s adsobsv:associatedPosition ?position.
        ?position adsobsv:ra ?ra.
        ?position adsobsv:dec ?dec.
            
        }
        """ % (
            n3encode("uri_obs:" + uritail)
        )

        # print pquery
        res = c.makeQuery(pquery)
        # print "POINTING", res
        ra = None
        dec = None
        if len(res) != 0:
            ra = res[0]["ra"]["value"]
            dec = res[0]["dec"]["value"]
            # print "RADEC", ra, dec
        if ra != "None" and dec != "None":
            thedict["ra_f"] = float(ra)
            thedict["dec_f"] = float(dec)
    else:
        print "******************************************No pointings for ", uritail

    # Stuff below in a separate func for props
    # proposal stuff...not searching abstracts yet

    if entrybool == True:
        # (Get the other properties relevant for observations)
        # PRIMPROPSOBSVONLY=['access_url_s', 'access_format_s', 'calib_level_i', 'datacollection_s', 'res_d', 'tres_d', 'fov_d', 'title', 'emmin', 'emmax'
        # first start with obsv, not data
        doSPQuerySingle(thedict, c, "uri_obs:" + uritail, "adsobsv:wavelengthStart", vtype="f")
        doSPQuerySingle(thedict, c, "uri_obs:" + uritail, "adsobsv:wavelengthEnd", vtype="f")
        doSPQuerySingle(thedict, c, "uri_obs:" + uritail, "adsobsv:resolution", vtype="f")
        doSPQuerySingle(thedict, c, "uri_obs:" + uritail, "adsobsv:tResolution", vtype="f")
        doSPQuerySingle(thedict, c, "uri_obs:" + uritail, "adsobsv:title", name="obsv_title")
        doSPQuerySingle(thedict, c, "uri_obs:" + uritail, "adsobsv:fov", vtype="f")
        thedict["data_collection_s"] = []
        for dtail in duriset:
            thedict["data_id_s"] = dtail
            doSPQueryMultiple(thedict, c, "uri_dat:" + dtail, "adsobsv:dataProductId")
            doSPQueryMultiple(thedict, c, "uri_dat:" + dtail, "adsobsv:dataFormat")
            doSPQueryMultiple(thedict, c, "uri_dat:" + dtail, "adsobsv:calibLevel", vtype="i")
            doSPQueryMultiple(thedict, c, "uri_dat:" + dtail, "adsobsv:dataURL")
            pquery0 = """
                SELECT ?tname WHERE {
                %s adsbase:fromDataCollection ?tnode.
                ?tnode adsbase:name ?tname.            
            }
            """ % (
                n3encode("uri_dat:" + dtail)
            )
            # Sprint pquery0
            res1 = c.makeQuery(pquery0)
            print "RES1", res1
            debug("RES1", res1)
            if len(res1) > 0:
                thedict["data_collection_s"].append(res1[0]["tname"]["value"])
            else:
                thedict["data_collection_s"].append("Unspecified")
    # Now iterate over proposals and get proposal info, then merge in.
    # print "thedict now is", thedict

    if not othersbool:
        return thedict
    if othersbool.has_key("prop") and othersbool["prop"] == True:
        daprops.extend(DAPROPSPROP)
        # print ';;;;;;',daprops
        props = c.getDataBySP("uri_obs:" + uritail, "adsbase:asAResultOfProposal")
        debug("PROPS", props)
        propray = []
        for propuri in props:
            propdict = getInfoForPropuri(c, solr, propuri, mission, project, othersbool)
            propray.append(propdict)
        # print "???????????", thedict, propray
        doAugment(daprops, thedict, propray)
        # return thedict
    # print 'post propping',thedict
    if othersbool.has_key("bib") and othersbool["bib"] == True:
        # print ';;;;;;',daprops
        daprops.extend(DAPROPSBIB)
        print ";;;;;;", daprops
        # get papers
        papersray = []
        # for each paper get dict
        aqstr = "SELECT ?pap {{ ?pap adsbase:aboutScienceProcess <{0}>.}}".format(theuri)
        # print "QUERY", aqstr
        papersen = c.makeQuery(aqstr)
        print "PAPERSEN", papersen
        # BUG: it seems there can be observations with no papers. should we have some default stuff?
        papers = set()
        for pa in papersen:
            papers.add(pa["pap"]["value"])
        for papuri in papers:
            bibcode = papuri.split("#")[-1]
            print "bibcode", bibcode, papuri
            papdict = getInfoForBibcode(c, solr, bibcode, mission, project, othersbool)
            # print "PAPDICT", papdict
            # Following guards against 0 entries which happens with bibcodes with tmp in them
            if len(papdict.keys()) > 0:
                papersray.append(papdict)
        # do an augment
        # print "+++++", thedict
        # print '=====', daprops
        thedict["numpubs_i"] = len(papersray)
        doAugment(daprops, thedict, papersray)
        # retufn the dict
        # return thedict
    print "at end", thedict
    return thedict

Exemple #11

0

Afficher le fichier

Fichier : adsrdf.py Projet : kayebohemier/semflow

 def getDataInContext(self, context=None):
     if context:
         data=self.tsc.get_in_context(namespaces.n3encode(context))
     else:
         data=self.tsc.get_in_context()
     return data

Exemple #12

0

Afficher le fichier

Fichier : adsrdf.py Projet : kayebohemier/semflow

 def deleteData(self, context=None):
     if context:
         self.tsc.deletedata(namespaces.n3encode(context))
     else:
         self.tsc.deletedata()

Exemple #13

0

Afficher le fichier

Fichier : adsrdf.py Projet : kayebohemier/semflow

 def addFile(self, thefile,context=None):
     if context:
         self.tsc.postfile(thefile, namespaces.n3encode(context))
     else:
         self.tsc.postfile(thefile)

Exemple #14

0

Afficher le fichier

Fichier : rdf2solarfuncs.py Projet : nuhaltinsoy/semflow

def getInfoForObsuri(c, solr, theuri, mission, project, othersbool=None, entrybool=False):
    daprops=[]
    thedict={}
    print "OBSURI", theuri
    # There must be better ways to do this
    themission, theproject, theobsid, junkdataid, uritail=getTailFromSplit(mission, project, theuri)
    is_mast = theuri.find('MAST') != -1
    is_chandra = theuri.find('CHANDRA') != -1
    #For this to work chandra URI's now become CHANDRA/chandra'
    
    
 
    
    debug("URITAIL", uritail)
    if is_mast:
        pquery0="""
        SELECT ?tname WHERE {
        %s adsbase:target ?tnode.
        ?tnode adsbase:name ?tname.            
        }
        """ % (n3encode('uri_obs:'+uritail))
        #Sprint pquery0
        res1=c.makeQuery(pquery0)
        debug("RES1", res1)
        if len(res1) > 0:
            target=res1[0]['tname']['value']
        else:
            target='Unspecified'
        #target=res1[0]['tname']['value']
        thetarget=themission+"/"+target
    elif is_chandra:
        titleray=c.getDataBySP('uri_obs:'+uritail, 'adsbase:title')
        if len(titleray)==0:
            title="Unspecified"
        else:
            title=titleray[0]
        thetarget=themission+"/"+title#do mission title on the hope MAST naming is common.
        #In the long run it dosent matter as we need to look it up anyways
    else:
        thetarget="None"

    # print "The target", thetarget
    thedict['targets_s']=thetarget
    #print "::::::::::::::::", theobsid, theuri, themission, thevariable
    #thedict['obsids_s']=rinitem(theobsid)
    thedict['obsids_s']=theproject+"/"+theobsid
    if theproject==themission:
            thedict['obsv_mission_s']=theproject
        else:
            thedict['obsv_mission_s']=themission+"/"+theproject
    #print theobsid, c.getDataBySP('uri_obs:'+uritail, 'adsobsv:observationType')
    obstypes=c.getDataBySP('uri_obs:'+uritail, 'adsobsv:observationType')
    if len(obstypes)>0:
        if theproject==themission:
            thedict['obsvtypes_s']=theproject+"/"+obstypes[0]
        else:
            thedict['obsvtypes_s']=themission+"/"+theproject+"/"+obsvtypes[0]
    else:
        if theproject==themission:
            thedict['obsvtypes_s']=theproject+"/Unspecified"
        else:
            thedict['obsvtypes_s']=themission+"/"+theproject+"/Unspecified"
            
    #Hut dosent have obsvtypes. Call it MAST_HUT/None

    # Chandra data was being created using adsobsv:tExpTime when it should have been
    # adsobsv:tExptime. This should now be fixed but this check is left in to catch
    # any oddities.
    #
    tvals = c.getDataBySP('uri_obs:'+uritail, 'adsobsv:tExptime')
    if len(tvals) == 0:
        raise IOError("Unable to find adsobsv:tExptime for uri_obs:{0}".format(uritail))

    else:
        if len(tvals) > 1:
            debug("MULTI-EXP", "uri_obs:{0} has adsobsv:tExptime={1}".format(uritail, tvals))

        thedict['exptime_f'] = float(tvals[0])
        
    tdt=c.getDataBySP('uri_obs:'+uritail, 'adsbase:atTime')[0]
    #print "TDT", tdt
    obsvtime=datetime.datetime.strptime(tdt,"%Y-%m-%dT%H:%M:%S")
    #month, day, year, thehour=tdt.split()

    #th, tmin=thehour[:-2].split(':')
    #th=int(th)
    #tmin=int(tmin)
    #if thehour[-2:]=='PM' and th < 12:
    #    th=int(th)+12
    #    print "TDT", tdt
    #obsvtime=datetime.datetime(int(year), list(calendar.month_abbr).index(month), int(day), th, tmin)
    thedict['obsvtime_d']=obsvtime.isoformat()+"Z"
    
    #hasDatum is a subset of hasDataProduct. How do we get sparql to fo up inhertitance hierarchy
    #Currently we have no way of knowing as the owl file hasnt been loaded in
#    pquery="""
#        SELECT ?dtype WHERE {
#        {%s adsobsv:hasDataProduct ?daturi.} UNION {%s adsobsv:hasDatum ?daturi.}
#        ?daturi adsbase:dataType ?dtype.
#        }
#    """ % (n3encode('uri_obs:'+uritail),n3encode('uri_obs:'+uritail) )
#    res=c.makeQuery(pquery)
#    #print "RES", res, pquery
#    tempdt={}
#    if len(res)>0:
#        for ele in res:
#            tkey=ele['dtype']['value']
#            if tempdt.has_key('tkey'):
#                tempdt[tkey]+=1
#            else:
#                tempdt[tkey]=1
#        thedict['datatypes_s']=tempdt.keys()
#    else:
#        thedict['datatypes_s']=[]
    
    pquery="""
        SELECT ?daturi WHERE {
        {%s adsobsv:hasDataProduct ?daturi.} UNION {%s adsobsv:hasDatum ?daturi.}
        }
    """ % (n3encode('uri_obs:'+uritail),n3encode('uri_obs:'+uritail) )
    res=c.makeQuery(pquery)
    dtypes=set()
    duriset=set()
    for ele in res:
        thedatauri=ele['daturi']['value']
        #print "THEDATAURI", thedatauri
        datauritail=getTailFromSplit(mission, project, thedatauri,'DATA')[-1]
        dtype=c.getDataBySP('uri_dat:'+datauritail, 'adsbase:dataType')[0]
        dtypes.add(dtype)
        duriset.add(datauritail)
    #BUG: Still assume one istrument. This will change, point is how? There will be both
    #multiple stuff for non-simple obs and hierarchical stuff for simple obs like gratings
    #how will we model this?
    thedict['datatypes_s']=list(dtypes)
    debug("DATATYPES", thedict['datatypes_s'])
    
    #Assumes observation has instrument specified. Or we will fail
    #What if an observation used multiple instruments. Like Chandra image and spectrum.
    #BUG: What do we currently do for Chandra. And how do we organize instruments?
    theinstruments=c.getDataBySP('uri_obs:'+uritail, 'adsbase:usingInstrument')
    if len(theinstruments)>0:
        theinstrument=theinstruments[0]
    else:
        raise IOError("Unable to find adsbase:usingInstrument for uri_obs:{0}".format(uritail))
    theinstrumentname=theinstrument.split('/')[-1]

    # TODO: should be able to query the RDF store for the label to use for the instrument
    # but for now just extract the information from the URI, and remove any %-encoding
    # done
    theinstrumentname = unquote(theinstrumentname)
    thedict['instruments_s']="/".join(theinstrumentname.split('_'))

    #BUG: Still assume one telescope, this will change
    thetelescope=c.getDataBySP('uri_obs:'+uritail, 'adsobsv:atTelescope')[0]
    thetelescopename=thetelescope.split('/')[-1]
    thedict['telescopes_s']="/".join(thetelescopename.split('_'))
    
    #BUG/TODO: note here telescope is different from instrument. Shouldnt this be
    #combined in.
    #print thedict['instruments_s']
    #pointing=c.getDataBySP('uri_obs:'+theobsid, 'adsobsv:associatedPosition')[0]
    #FAIL dune to bnode crapola ra=c.getDataBySP(pointing, 'adsobsv:ra')
    #BUG we should first even see if Pointing exists before going for ra or dec
    
    #This will need special handling as it is multivalued array even within obsv.
    #So it will need flattening within publications
    theemdomains=c.getDataBySP('uri_obs:'+uritail, 'adsobsv:wavelengthDomain')
    #BUG:Note that by doing this emdomains is optional...Not sure we want that
    if len(theemdomains) > 0:
        thedict['emdomains_s']=[]
        for domain in theemdomains:
            thedict['emdomains_s'].append(domain.split('_')[-1])
    else:
        raise IOError("Unable to find wavelength domains for uri_obs:{0}".format(uritail))    
    thepointings=c.getDataBySP('uri_obs:'+uritail, 'adsobsv:associatedPosition')
    
    if len(thepointings) > 0:
        pquery="""
        SELECT ?ra ?dec WHERE {
        %s adsobsv:associatedPosition ?position.
        ?position adsobsv:ra ?ra.
        ?position adsobsv:dec ?dec.
            
        }
        """ % (n3encode('uri_obs:'+uritail))
    
        #print pquery
        res=c.makeQuery(pquery)
        #print "POINTING", res
        ra=None
        dec=None
        if len(res)!=0:
            ra=res[0]['ra']['value']
            dec=res[0]['dec']['value']
            #print "RADEC", ra, dec
        if ra!='None' and dec!='None':
            thedict['ra_f']=float(ra)
            thedict['dec_f']=float(dec)
    else:
        print "******************************************No pointings for ", uritail
    
    #Stuff below in a separate func for props    
    #proposal stuff...not searching abstracts yet
    
    if entrybool==True:
        #(Get the other properties relevant for observations)
        #PRIMPROPSOBSVONLY=['access_url_s', 'access_format_s', 'calib_level_i', 'datacollection_s', 'res_d', 'tres_d', 'fov_d', 'title', 'emmin', 'emmax'
        #first start with obsv, not data
        doSPQuerySingle(thedict, c, 'uri_obs:'+uritail, 'adsobsv:wavelengthStart', vtype='f')
        doSPQuerySingle(thedict, c, 'uri_obs:'+uritail, 'adsobsv:wavelengthEnd', vtype='f')
        doSPQuerySingle(thedict, c, 'uri_obs:'+uritail, 'adsobsv:resolution', vtype='f')
        doSPQuerySingle(thedict, c, 'uri_obs:'+uritail, 'adsobsv:tResolution', vtype='f')
        doSPQuerySingle(thedict, c, 'uri_obs:'+uritail, 'adsobsv:title', name='obsv_title')
        doSPQuerySingle(thedict, c, 'uri_obs:'+uritail, 'adsobsv:fov', vtype='f')
        thedict['data_collection_s']=[]
        for dtail in duriset:
            thedict['data_id_s']=dtail
            doSPQueryMultiple(thedict, c, 'uri_dat:'+dtail, 'adsobsv:dataProductId')
            doSPQueryMultiple(thedict, c, 'uri_dat:'+dtail, 'adsobsv:dataFormat')
            doSPQueryMultiple(thedict, c, 'uri_dat:'+dtail, 'adsobsv:calibLevel', vtype='i')
            doSPQueryMultiple(thedict, c, 'uri_dat:'+dtail, 'adsobsv:dataURL')
            pquery0="""
                SELECT ?tname WHERE {
                %s adsbase:fromDataCollection ?tnode.
                ?tnode adsbase:name ?tname.            
            }
            """ % (n3encode('uri_dat:'+dtail))
        #Sprint pquery0
            res1=c.makeQuery(pquery0)
            print "RES1", res1
            debug("RES1", res1)
            if len(res1) > 0:
                thedict['data_collection_s'].append(res1[0]['tname']['value'])
            else:
                thedict['data_collection_s'].append('Unspecified')
    #Now iterate over proposals and get proposal info, then merge in.
    #print "thedict now is", thedict
    if not othersbool:
        return thedict
    if othersbool.has_key('prop') and othersbool['prop']==True:
        daprops.extend(DAPROPSPROP)
        #print ';;;;;;',daprops
        props=c.getDataBySP('uri_obs:'+uritail, 'adsbase:asAResultOfProposal')
        debug("PROPS", props)
        propray=[]
        for propuri in props:
            propdict=getInfoForPropuri(c, solr, propuri, mission, project, othersbool)
            propray.append(propdict)
        #print "???????????", thedict, propray
        doAugment(daprops, thedict, propray)
        #return thedict
    #print thedict
    if othersbool.has_key('bib') and othersbool['bib']==True:
        print ';;;;;;',daprops
        daprops.extend(DAPROPSBIB)
        print ';;;;;;',daprops
        #get papers
        papersray=[]
        #for each paper get dict
        aqstr = "SELECT ?pap {{ ?pap adsbase:aboutScienceProcess <{0}>.}}".format(theuri)
        #print "QUERY", aqstr
        papersen = c.makeQuery(aqstr)
        #print "PAPERSEN", papersen
        papers = set()
        for pa in papersen:
            papers.add(pa["pap"]["value"])
        for papuri in papers:
            bibcode=papuri.split('#')[-1]
            #print "bibcode", bibcode, papuri
            papdict=getInfoForBibcode(c, solr, bibcode, mission, project, othersbool)
            #print "PAPDICT", papdict
            papersray.append(papdict)
        #do an augment
        #print "+++++", thedict
        #print '=====', daprops
        doAugment(daprops, thedict, papersray)
        #retufn the dict
        #return thedict
    #print thedict    
    return thedict

Exemple #15

0

Afficher le fichier

Fichier : rdf2solr5.py Projet : DougBurke/semflow

def getInfoForBibcode(c, solr, bibcode, mission, project):
    bibcodeuri='uri_bib:'+bibcode
    result={}
    iduri=c.getDataBySP(bibcodeuri, 'fabio:isRealizationOf')
    debug("returned", "{0} {1}".format(iduri, bibcodeuri))
    iduri=iduri[0]

    # we use the original URI when accessing the author names
    idurifull = iduri
    
    result['id']=iduri.split('#')[1]
    theid=result['id']
    iduri='uri_bib:'+result['id']
    debug("IDURI", "{0} {1}".format(iduri, result['id']))
        
    result['bibcode']=bibcode

    # Should get the rdf:label for the concept (caching it)
    # rather than decoding the URI, but needs the label added to the
    # store. Note that we unquote the fragment to ensure %3B and
    # other keywords are displayed sensibly.
    # 
    result['keywords']=[unquote(e.split('#')[1]).replace('_',' ') for e in c.getDataBySP(iduri, 'adsbib:keywordConcept')]
    
    result['title']=c.getDataBySP(iduri, 'adsbase:title')[0].decode("utf-8") # DJB added decode statement as I think we want to send across a unicode string
    pquery0="""
        SELECT ?atext WHERE {
            uri_bib:%s adsbib:hasAbstract [ adsbib:abstractText ?atext ] .            
        }
     """ % (result['id'])

    #print pquery0
    res1=c.makeQuery(pquery0)
    #print res1[0]
    result['abstract']=res1[0]['atext']['value']

    debug("TITLE", result['title'].encode("ascii", "replace")) ## can contain UTF-8
    citationcount=len(c.getDataBySP(iduri, 'cito:cites'))
    result['citationcount_i']=citationcount

    # Paper type handling:
    # 
    # The adsbib:paperType is currently only added for Chandra data, but
    # this means that papers with data from Chandra + MAST missions will
    # have this setting. Since the predicate does not indicate which mission
    # catagorised the paper as "science", we have to either guess, leave
    # as "science" (i.e. with no mission attribution), or ignore.
    # Doug has elected to go for the ignore route since it doesn't seem
    # to be useful at the present time.
    """
    ptray=c.getDataBySP(bibcodeuri, 'adsbib:paperType')

    The following is broken since a Chandra paper with MAST/euve data
    will lose the "chandra/science" setting if the Chandra data is added
    to Solr before EUVE.

    if len(ptray)>0:
        # DJB:
        #   for papers with Chandra and MAST data will have
        #   a paper type of "science", which results in an
        #   entry of mission+"/science" -> "MAST/science"
        #   as well as (added later on) project+"/Regular"
        #
        #   We switch to using project rather than mission here,
        #   so get "chandra/science", "iue/science", ...
        #   although the MAST ones will get re-added later on
        #   (but duplication left in since not all MAST missions
        #   will have an entry added here).
        #
        #paptypes=[mission+"/"+ele for ele in ptray]
        paptypes=[project+"/"+ele for ele in ptray]
        debug("PTYPE", "{0} {1}".format(bibcode, ptray))
    else:
        paptypes=[]
        debug("PTYPE", "{0} {1}".format(bibcode, "NONE"))

    """    
    
    # TODO:
    #
    # We want to store an "author list" as well as the individual
    # authors, so that we can get the ordering correct, but we do not
    # have that information in the RDF store at present. Storing the
    # author list should remove the issue we have when a paper has the
    # same author name appear more than once. Storing an author list
    # is neat, but then how can we have a "only display the first n
    # authors"?  One option would be to create two versions: the full
    # list and a short form, but this is a bit messy.

    # NOTE:
    #
    # Since each author name is stored with a UUID at the end, and
    # will be added multiple times to a paper, if the paper uses data
    # from multiple missions, then we get multiple copies of an
    # author. So we go to the effort of decoding the authors to get a
    # unique set, which means that if an authorname is repeated twice
    # - e.g. Terlevich and Terlevich - then we will lose information
    # if the names match completely. Also, we now query the RDF store
    # for the agents:normName field for each author rather than decode
    # from the URI, although this may slow things down.
    """
    authoren=c.getDataBySP(iduri, 'pav:authoredBy')
    #print authoren
    #BUG: one slash too many in authors you think?
    result['author']=[unquote(e.split('/')[-2]).replace('_',' ') for e in authoren]
    """

    aqstr = "SELECT ?name {{ <{0}> <http://swan.mindinformatics.org/ontologies/1.2/pav/authoredBy> [ <http://swan.mindinformatics.org/ontologies/1.2/agents/normName> ?name ].}}".format(idurifull)
    authoren = c.makeQuery(aqstr)
    authorlist = set()
    for au in authoren:
        authorlist.add(au["name"]["value"])
        
    result['author'] = list(authorlist)
    
    #print result['author']
    result['keywords_s']=result['keywords']
    result['author_s']=result['author']
    #get the publication uri
    result['pubyear_i']=int(c.getDataBySP(bibcodeuri, 'adsbib:pubDate')[0].split()[1])
    theobjects=c.getDataBySP(bibcodeuri, 'adsbase:hasAstronomicalSource')
    debug("THEOBJECTS", "{0} {1}".format(bibcode, len(theobjects)))
    objectlist=[]
    for theobj in theobjects:
        #print "theobj", theobj
        odata=c.getDataBySP('uri_source:'+theobj.split('/')[-1], 'adsbase:hasMetadataString')
        #print "theobj", theobj, odata
        if len(odata)>0:
            odict=eval(odata[0]) # why does this need to be an eval? aha, because we are storing a Python dictionary in the string!
            oid=odict['id']
            # Strip out the leading 'NAME ' from object identifiers
            if oid.startswith("NAME "):
                oid = oid[5:]
            otype=odict['otype']
            ouri=theobj
            objectlist.append({'oid':oid, 'otype':otype, 'ouri':ouri})
        else:
            print "PROBLEM", bibcode, theobj, odata
    result['objectnames']=[e['oid'] for e in objectlist]
    result['objecttypes']=[e['otype'] for e in objectlist]
    result['objectnames_s']=result['objectnames']
    result['objecttypes_s']=result['objecttypes']
    ######FLAG
    #if mission=='CHANDRA':
    #    result['missions_s']=mission
    #else:
    #    result['missions_s']=mission+"/"+project
    #print result['objectnames']
    #theobsids=[rinitem(splitns(e)) for e in c.getDataBySP(bibcodeuri, 'adsbase:aboutScienceProduct')]
    theobsiduris=c.getDataBySP(bibcodeuri, 'adsbase:aboutScienceProcess')
    #print "OBSIDS", bibcodeuri, theobsiduris
    obsray=[]
    #TESTnotice by this we dont uniq telescopes or data types...what does this mean for the numbers, if anything?
    #daprops=['obsids_s','obsvtypes_s','exptime_f','obsvtime_d','instruments_s', 'telescopes_s', 'emdomains_s', 'missions_s', 'targets_s', 'ra_f','dec_f', 'datatypes_s','propids_s', 'proposaltitle', 'proposalpi', 'proposalpi_s', 'proposaltype_s']
    #daprops=['obsids_s','obsvtypes_s','exptime_f','obsvtime_d','instruments_s', 'telescopes_s', 'emdomains_s',  'targets_s', 'ra_f','dec_f', 'datatypes_s','propids_s', 'proposaltitle', 'proposalpi', 'proposalpi_s', 'proposaltype_s']
    daprops=['obsids_s','obsvtypes_s','exptime_f','obsvtime_d','instruments_s', 'telescopes_s', 'emdomains_s',  'targets_s', 'ra_f','dec_f', 'datatypes_s','propids_s', 'proposaltitle', 'proposalpi', 'proposalpi_s', 'proposaltype_s']
    
    debug("THEOBSIDURIS", theobsiduris)
    datatypes=[]
    #olddict=solr.search('id:'+theid)
    missions=set()
    papertypes=set()
    for theuri in theobsiduris:
        thedict={}

        # There must be better ways to do this
        is_mast = theuri.find('MAST') != -1
        is_chandra = theuri.find('CHANDRA') != -1
        
        #BUG: make this polymorphic
        if is_mast:
            themission, theproject,thevariable, theobsid=splitns(theuri, atposition=-3)
            uritail=themission+"/"+theproject+"/"+thevariable+"/"+theobsid
            #thedict['missions_s']=themission+"/"+theproject

            """ For now Doug is excluding the paper type facet

            # See the discussion for the creation of paptypes above
            # since there is some awkwardness here (some MAST papers
            # end up having an adsbib:paperType value since they also
            # contain Chandra data).
            #
            # papertypes.add(theproject+"/Regular")
            papertypes.add(theproject+"/science")
            """

            missions.add(themission+"/"+theproject)

        elif is_chandra:
            themission, thevariable, theobsid=splitns(theuri)
            theproject=themission#like Chandra/Chandra
            uritail=themission+"/"+thevariable+"/"+theobsid
            #thedict['missions_s']=themission # this should be in RDF!!

            """ Paper type handling is currently removed

            # this is needed to handle papers with multiple missions
            papertypes.add(theproject+"/science")
            """

            missions.add(theproject)
        else:
            raise ValueError("Unable to decode URI for mission: " + theuri)
        
        debug("URITAIL", uritail)
        if is_mast:
            pquery0="""
            SELECT ?tname WHERE {
            %s adsbase:target ?tnode.
            ?tnode adsbase:name ?tname.            
            }
            """ % (n3encode('uri_obs:'+uritail))
            #Sprint pquery0
            res1=c.makeQuery(pquery0)
            debug("RES1", res1)
            if len(res1) > 0:
                target=res1[0]['tname']['value']
            else:
                target='Unspecified'
            #target=res1[0]['tname']['value']
            thetarget=themission+"/"+target
        elif is_chandra:
            titleray=c.getDataBySP('uri_obs:'+uritail, 'adsbase:title')
            if len(titleray)==0:
                title="Unspecified"
            else:
                title=titleray[0]
            thetarget=themission+"/"+title
        else:
            thetarget="None"

        # print "The target", thetarget
        thedict['targets_s']=thetarget
        #print "::::::::::::::::", theobsid, theuri, themission, thevariable
        #thedict['obsids_s']=rinitem(theobsid)
        thedict['obsids_s']=theproject+"/"+theobsid

        #print theobsid, c.getDataBySP('uri_obs:'+uritail, 'adsobsv:observationType')
        obstypes=c.getDataBySP('uri_obs:'+uritail, 'adsobsv:observationType')
        if len(obstypes)>0:
            if theproject==themission:
                thedict['obsvtypes_s']=theproject+"/"+obstypes[0]
            else:
                thedict['obsvtypes_s']=themission+"/"+theproject+"/"+obsvtypes[0]
        else:
            if theproject==themission:
                thedict['obsvtypes_s']=theproject+"/Unspecified"
            else:
                thedict['obsvtypes_s']=themission+"/"+theproject+"/Unspecified"
                
        #Hut dosent have obsvtypes. Call it MAST_HUT/None

        # Chandra data was being created using adsobsv:tExpTime when it should have been
        # adsobsv:tExptime. This should now be fixed but this check is left in to catch
        # any oddities.
        #
        tvals = c.getDataBySP('uri_obs:'+uritail, 'adsobsv:tExptime')
        if len(tvals) == 0:
            raise IOError("Unable to find adsobsv:tExptime for uri_obs:{0}".format(uritail))

        else:
            if len(tvals) > 1:
                debug("MULTI-EXP", "uri_obs:{0} has adsobsv:tExptime={1}".format(uritail, tvals))

            thedict['exptime_f'] = float(tvals[0])
            
        tdt=c.getDataBySP('uri_obs:'+uritail, 'adsbase:atTime')[0]
        #print "TDT", tdt
        obsvtime=datetime.datetime.strptime(tdt,"%Y-%m-%dT%H:%M:%S")
        #month, day, year, thehour=tdt.split()

        #th, tmin=thehour[:-2].split(':')
        #th=int(th)
        #tmin=int(tmin)
        #if thehour[-2:]=='PM' and th < 12:
        #    th=int(th)+12
        #    print "TDT", tdt
        #obsvtime=datetime.datetime(int(year), list(calendar.month_abbr).index(month), int(day), th, tmin)
        thedict['obsvtime_d']=obsvtime.isoformat()+"Z"
        
        #hasDatum is a subset of hasDataProduct. How do we get sparql to fo up inhertitance hierarchy
        #Currently we have no way of knowing as the owl file hasnt been loaded in
        pquery="""
            SELECT ?dtype WHERE {
            {%s adsobsv:hasDataProduct ?daturi.} UNION {%s adsobsv:hasDatum ?daturi.}
            ?daturi adsbase:dataType ?dtype.
            }
        """ % (n3encode('uri_obs:'+uritail),n3encode('uri_obs:'+uritail) )
        res=c.makeQuery(pquery)
        #print "RES", res, pquery
        tempdt={}
        if len(res)>0:
            for ele in res:
                tkey=ele['dtype']['value']
                if tempdt.has_key('tkey'):
                    tempdt[tkey]+=1
                else:
                    tempdt[tkey]=1
            thedict['datatypes_s']=tempdt.keys()
        else:
            thedict['datatypes_s']=[]

        #BUG: Still assume one istrument. This will change, point is how? There will be both
        #multiple stuff for non-simple obs and hierarchical stuff for simple obs like gratings
        #how will we model this?
        debug("DATATYPES", thedict['datatypes_s'])
        theinstrument=c.getDataBySP('uri_obs:'+uritail, 'adsbase:usingInstrument')[0]
        theinstrumentname=theinstrument.split('/')[-1]

        # TODO: should be able to query the RDF store for the label to use for the instrument
        # but for now just extract the information from the URI, and remove any %-encoding
        # done
        theinstrumentname = unquote(theinstrumentname)
        thedict['instruments_s']="/".join(theinstrumentname.split('_'))

        #BUG: Still assume one telescope, this will change
        thetelescope=c.getDataBySP('uri_obs:'+uritail, 'adsobsv:atTelescope')[0]
        thetelescopename=thetelescope.split('/')[-1]
        thedict['telescopes_s']="/".join(thetelescopename.split('_'))
        #print thedict['instruments_s']
        #pointing=c.getDataBySP('uri_obs:'+theobsid, 'adsobsv:associatedPosition')[0]
        #FAIL dune to bnode crapola ra=c.getDataBySP(pointing, 'adsobsv:ra')
        #BUG we should first even see if Pointing exists before going for ra or dec
        
        #This will need special handling as it is multivalued array even within obsv.
        #So it will need flattening within publications
        theemdomains=c.getDataBySP('uri_obs:'+uritail, 'adsobsv:wavelengthDomain')
        #BUG:Note that by doing this emdomains is optional...Not sure we want that
        if len(theemdomains) > 0:
            thedict['emdomains_s']=[]
            for domain in theemdomains:
                thedict['emdomains_s'].append(domain.split('_')[-1])
            
        thepointings=c.getDataBySP('uri_obs:'+uritail, 'adsobsv:associatedPosition')
        
        if len(thepointings) > 0:
            pquery="""
            SELECT ?ra ?dec WHERE {
            %s adsobsv:associatedPosition ?position.
            ?position adsobsv:ra ?ra.
            ?position adsobsv:dec ?dec.
                
            }
            """ % (n3encode('uri_obs:'+uritail))
        
            #print pquery
            res=c.makeQuery(pquery)
            #print "POINTING", res
            ra=None
            dec=None
            if len(res)!=0:
                ra=res[0]['ra']['value']
                dec=res[0]['dec']['value']
                #print "RADEC", ra, dec
            if ra!='None' and dec!='None':
                thedict['ra_f']=float(ra)
                thedict['dec_f']=float(dec)
        else:
            print "******************************************No pointings for ", uritail
            
        #proposal stuff...not searching abstracts yet
        props=c.getDataBySP('uri_obs:'+uritail, 'adsbase:asAResultOfProposal')
        debug("PROPS", props)
        #BUG: again assuming only one proposal here. When we get paper proposals this will
        #Not be true any more. We should also disambiguate observational from paper proposals.
        #though paper proposals will be assoced with papers, not here, so this should be obsvprop
        #only
        #what happens when like in 2002ApJ...573..157N, this shows up for multiple missions
        if len(props)>0:
            propuri=props[0]
            debug("PROPURI", propuri)
            if propuri.find('MAST')!=-1:
                themission, theproject,thevariable, thepropid=splitns(propuri, atposition=-3)
                proptail=themission+"/"+theproject+"/"+thevariable+"/"+thepropid
            else:
                themission, thevariable, thepropid=splitns(propuri)
                theproject=themission#like Chandra/Chandra
                proptail=themission+"/"+thevariable+"/"+thepropid
            debug("PROPTAIL", proptail)
            #themission, thevariable, thepropid=splitns(propuri)
            #proptail=themission+"/"+thevariable+"/"+thepropid
            thedict['propids_s']=theproject+"/"+thepropid
            #print proptail, n3encode('uri_prop:'+proptail), c.getDataBySP('uri_prop:'+proptail, 'adsbase:title')
            proposaltitles=c.getDataBySP('uri_prop:'+proptail, 'adsbase:title')
            if len(proposaltitles)>0:
                thedict['proposaltitle']=proposaltitles[0]
            else:
                thedict['proposaltitle']='No Info'
                
            #proposal type already has project or mission included
            proposaltypes=c.getDataBySP('uri_prop:'+proptail, 'adsobsv:observationProposalType')
            if len(proposaltypes)>0:
                thedict['proposaltype_s']=proposaltypes[0]
            else:
                # The map between bibcode, obsid and proposal can contain proposals
                # for which we have no other data. Instead of saying 'No Info' we can
                # at least add in the mission name.
                #
                # TODO: should we just remove proposaltype_s for these records? If not,
                # do we want to add in fake proposals for other missions that do not have
                # them (probably not)?
                #
                #thedict['proposaltype_s']='No Info'
                if themission == "MAST":
                    thedict['proposaltype_s'] = theproject + '/None'
                elif themission == "CHANDRA":
                    thedict['proposaltype_s'] = 'CHANDRA/None'
                else:
                    raise ValueError("Unexpected mission '{0}' for {1}".format(mission, propuri))

            qstr = "SELECT ?name WHERE { <" + propuri + "> adsbase:principalInvestigator [ agent:fullName ?name ] . }"
            pinameres = c.makeQuery(qstr)
            nres = len(pinameres)
            if nres == 0:
                piname = 'No Info'
            else:
                if nres != 1:
                    print("DBG: found {0} proposal pis for {1}, using first from {2}".format(nres, propuri, pinameres))
                piname = pinameres[0]["name"]["value"]
                # the following should not occur but just in case
                if piname.strip() == "":
                    logger.debug("PINAME: found ' ' so converting to 'No Info'; should not happen")
                    piname = "No Info"

            thedict['proposalpi'] = piname
            thedict['proposalpi_s']=thedict['proposalpi']
            
        #BUG: SHOULD we have something like this associating None's where there is no proposal?????'    
        #else:
        #    thedict['propids_s']=themission+"/None"
            
            #print thedict
        obsray.append(thedict)
        
    result['missions_s']=list(missions)

    """ paper types are currently removed

    paptypes.extend(list(papertypes))
    if len(paptypes) == 0:
        debug("PTYPE", "bibcode={0} has no paper type!".format(bibcode))

    result['papertype_s']=paptypes
    """

    #print "OBSRAY", obsray
    if len(obsray)>0:
        for tkey in daprops:
            #print "tkey is ", tkey
            temptkey=[e[tkey] for e in obsray if e.has_key(tkey)]
            #print "temptkey", temptkey
            temp2=[item if hasattr(item,'__iter__') else [item] for item in temptkey]
            #print "temp2", temp2
            if len(temp2) >0:
                result[tkey]=reduce(lambda x,y: x+y, temp2)
            else:
                result[tkey]=[]
    return result

Exemple #16

0

Afficher le fichier

def getInfoForBibcode(bibcode):
    c=adsrdf.ADSConnection(SESAME, REPOSITORY)
    bibcodeuri='uri_bib:'+bibcode
    result={}
    iduri=c.getDataBySP(bibcodeuri, 'fabio:isRealizationOf')
    print "returned", iduri, bibcodeuri
    iduri=iduri[0]

    result['id']=iduri.split('#')[1]
    iduri='uri_bib:'+result['id']
    print "IDURI", iduri, result['id']
        
    result['bibcode']=bibcode
    result['keywords']=[e.split('#')[1].replace('_',' ') for e in c.getDataBySP(iduri, 'adsbib:keywordConcept')]
    result['title']=c.getDataBySP(iduri, 'adsbase:title')[0]
    pquery0="""
        SELECT ?atext WHERE {
            uri_bib:%s adsbib:hasAbstract ?anode.
            ?anode adsbib:abstractText ?atext.            
        }
     """ % (result['id'])
        
    #print pquery0
    res1=c.makeQuery(pquery0)
    #print res1[0]
    result['abstract']=res1[0]['atext']['value']
    print "TITLE", result['title']
    citationcount=len(c.getDataBySP(iduri, 'cito:cites'))
    result['citationcount_i']=citationcount
    #this is the first thing that can have multiple stuff from chandra, hut and other
    #we still dont handle this
    ptray=c.getDataBySP(bibcodeuri, 'adsbib:paperType')
    if len(ptray)>0:
        result['papertype_s']=ptray
        print "PTYPE", bibcode, ptray
    else:
        result['papertype_s']=["None"]
        print "PTYPE", bibcode, "NONE"
    
    #Above is only accurate when we dont do overlaps. For HUT/Chandra overlap, we should
    #be doing None/Something overlap but i do this as just Something should be fine
    #Itake the position that "None", if you want to institutionalize it, should be put in the rdf    
    #print "PAPERTYPE", result['papertype_s']
    authoren=c.getDataBySP(iduri, 'pav:authoredBy')
    #print authoren
    #BUG: one slash too many in authors you think?
    result['author']=[unquote(e.split('/')[-2]).replace('_',' ') for e in authoren]
    #print result['author']
    result['keywords_s']=result['keywords']
    result['author_s']=result['author']
    #get the publication uri
    result['pubyear_i']=int(c.getDataBySP(bibcodeuri, 'adsbib:pubDate')[0].split()[1])
    theobjects=c.getDataBySP(bibcodeuri, 'adsbase:hasAstronomicalSource')
    objectlist=[]
    for theobj in theobjects:
        #print "theobj", theobj
        odata=c.getDataBySP('uri_source:'+theobj.split('/')[-1], 'adsbase:hasMetadataString')
        odict=eval(odata[0])
        oid=odict['id']
        otype=odict['otype']
        ouri=theobj
        objectlist.append({'oid':oid, 'otype':otype, 'ouri':ouri})
    result['objectnames']=[e['oid'] for e in objectlist]
    result['objecttypes']=[e['otype'] for e in objectlist]
    result['objectnames_s']=result['objectnames']
    result['objecttypes_s']=result['objecttypes']
    print result['objectnames']
    #theobsids=[rinitem(splitns(e)) for e in c.getDataBySP(bibcodeuri, 'adsbase:aboutScienceProduct')]
    theobsiduris=c.getDataBySP(bibcodeuri, 'adsbase:aboutScienceProcess')
    #print "OBSIDS", bibcodeuri, theobsiduris
    obsray=[]
    #TESTnotice by this we dont uniq telescopes or data types...what does this mean for the numbers, if anything?
    daprops=['obsids_s','obsvtypes_s','exptime_f','obsvtime_d','instruments_s', 'telescopes_s', 'emdomains_s', 'missions_s', 'targets_s', 'ra_f','dec_f', 'datatypes_s','propids_s', 'proposaltitle', 'proposalpi', 'proposalpi_s', 'proposaltype_s']
    print "THEOBSIDURIS", theobsiduris
    datatypes=[]
    for theuri in theobsiduris:
        thedict={}
        #BUG: make this polymorphic
        themission, thevariable, theobsid=splitns(theuri)
        uritail=themission+"/"+thevariable+"/"+theobsid
        print "URITAIL", uritail
        thedict['missions_s']=themission # this should be in RDF!!
        

        if theuri.find('MAST')!=-1:
            pquery0="""
            SELECT ?tname WHERE {
            %s adsbase:target ?tnode.
            ?tnode adsbase:name ?tname.            
            }
            """ % (n3encode('uri_obs:'+uritail))
            #Sprint pquery0
            res1=c.makeQuery(pquery0)
            target=res1[0]['tname']['value']
            thetarget=themission+"/"+target
        elif theuri.find('CHANDRA'):
            titleray=c.getDataBySP('uri_obs:'+uritail, 'adsbase:title')
            if len(titleray)==0:
                title="Unspecified"
            else:
                title=titleray[0]
            thetarget=themission+"/"+title
        else:
            thetarget="None"
        print "The target", thetarget
        thedict['targets_s']=thetarget
        #print "::::::::::::::::", theobsid, theuri, themission, thevariable
        #thedict['obsids_s']=rinitem(theobsid)
        thedict['obsids_s']=themission+"/"+theobsid

        #print theobsid, c.getDataBySP('uri_obs:'+uritail, 'adsobsv:observationType')
        obstypes=c.getDataBySP('uri_obs:'+uritail, 'adsobsv:observationType')
        if len(obstypes)>0:
            thedict['obsvtypes_s']=obstypes[0]
        else:
            thedict['obsvtypes_s']=themission+"/None"
        #Hut dosent have obsvtypes. Caal it MAST_HUT/None
        print "???", c.getDataBySP('uri_obs:'+uritail, 'adsobsv:tExptime'), c.getDataBySP('uri_obs:'+uritail, 'adsobsv:tExpTime')
        try:
            thedict['exptime_f']=float(c.getDataBySP('uri_obs:'+uritail, 'adsobsv:tExpTime')[0])
        except:
            thedict['exptime_f']=float(c.getDataBySP('uri_obs:'+uritail, 'adsobsv:tExptime')[0])
        tdt=c.getDataBySP('uri_obs:'+uritail, 'adsbase:atTime')[0]
        #print "TDT", tdt
        obsvtime=datetime.datetime.strptime(tdt,"%Y-%m-%dT%H:%M:%S")
        #month, day, year, thehour=tdt.split()

        #th, tmin=thehour[:-2].split(':')
        #th=int(th)
        #tmin=int(tmin)
        #if thehour[-2:]=='PM' and th < 12:
        #    th=int(th)+12
        #    print "TDT", tdt
        #obsvtime=datetime.datetime(int(year), list(calendar.month_abbr).index(month), int(day), th, tmin)
        thedict['obsvtime_d']=obsvtime.isoformat()+"Z"
        
        #hasDatum is a subset of hasDataProduct. How do we get sparql to fo up inhertitance hierarchy
        #Currently we have no way of knowing as the owl file hasnt been loaded in
        pquery="""
            SELECT ?dtype WHERE {
            {%s adsobsv:hasDataProduct ?daturi.} UNION {%s adsobsv:hasDatum ?daturi.}
            ?daturi adsbase:dataType ?dtype.
            }
        """ % (n3encode('uri_obs:'+uritail),n3encode('uri_obs:'+uritail) )
        res=c.makeQuery(pquery)
        #print "RES", res, pquery
        tempdt={}
        if len(res)>0:
            for ele in res:
                tkey=ele['dtype']['value']
                if tempdt.has_key('tkey'):
                    tempdt[tkey]+=1
                else:
                    tempdt[tkey]=1
            thedict['datatypes_s']=tempdt.keys()
        else:
            thedict['datatypes_s']=[]
        #BUG: Still assume one istrument. This will change, point is how? There will be both
        #multiple stuff for non-simple obs and hierarchical stuff for simple obs like gratings
        #how will we model this?
        print "DATATYPES", thedict['datatypes_s']
        theinstrument=c.getDataBySP('uri_obs:'+uritail, 'adsbase:usingInstrument')[0]
        theinstrumentname=theinstrument.split('/')[-1]
        thedict['instruments_s']="/".join(theinstrumentname.split('_'))
        #BUG: Still assume one telescope, this will change
        thetelescope=c.getDataBySP('uri_obs:'+uritail, 'adsobsv:atTelescope')[0]
        thetelescopename=thetelescope.split('/')[-1]
        thedict['telescopes_s']="/".join(thetelescopename.split('_'))
        #print thedict['instruments_s']
        #pointing=c.getDataBySP('uri_obs:'+theobsid, 'adsobsv:associatedPosition')[0]
        #FAIL dune to bnode crapola ra=c.getDataBySP(pointing, 'adsobsv:ra')
        #BUG we should first even see if Pointing exists before going for ra or dec
        
        #This will need special handling as it is multivalued array even within obsv.
        #So it will need flattening within publications
        theemdomains=c.getDataBySP('uri_obs:'+uritail, 'adsobsv:wavelengthDomain')
        #BUG:Note that by doing this emdomains is optional...Not sure we want that
        if len(theemdomains) > 0:
            thedict['emdomains_s']=[]
            for domain in theemdomains:
                thedict['emdomains_s'].append(domain.split('_')[-1])
            
        thepointings=c.getDataBySP('uri_obs:'+uritail, 'adsobsv:associatedPosition')
        
        if len(thepointings) > 0:
            pquery="""
            SELECT ?ra ?dec WHERE {
            %s adsobsv:associatedPosition ?position.
            ?position adsobsv:ra ?ra.
            ?position adsobsv:dec ?dec.
                
            }
            """ % (n3encode('uri_obs:'+uritail))
        
            #print pquery
            res=c.makeQuery(pquery)
            #print "POINTING", res
            ra=None
            dec=None
            if len(res)!=0:
                ra=res[0]['ra']['value']
                dec=res[0]['dec']['value']
                #print "RADEC", ra, dec
            if ra!='None' and dec!='None':
                thedict['ra_f']=float(ra)
                thedict['dec_f']=float(dec)
        else:
            print "******************************************No pointings for ", uritail
            
        #proposal stuff...not searching abstracts yet
        props=c.getDataBySP('uri_obs:'+uritail, 'adsbase:asAResultOfProposal')
        #BUG: again assuming only one proposal here. When we get paper proposals this will
        #Not be true any more. We should also disambiguate observational from paper proposals.
        #though paper proposals will be assoced with papers, not here, so this should be obsvprop
        #only
        if len(props)>0:
            propuri=props[0]
            #print "PROPURI", propuri
            themission, thevariable, thepropid=splitns(propuri)
            proptail=themission+"/"+thevariable+"/"+thepropid
            thedict['propids_s']=themission+"/"+thepropid
            #print proptail, n3encode('uri_prop:'+proptail), c.getDataBySP('uri_prop:'+proptail, 'adsbase:title')
            thedict['proposaltitle']=c.getDataBySP('uri_prop:'+proptail, 'adsbase:title')[0]
            thedict['proposaltype_s']=c.getDataBySP('uri_prop:'+proptail, 'adsobsv:observationProposalType')[0]
            e=c.getDataBySP('uri_prop:'+proptail, 'adsbase:principalInvestigator')[0]
            #print "PI", e
            thedict['proposalpi']=unquote(e.split('/')[-2]).replace('_',' ')
            thedict['proposalpi_s']=thedict['proposalpi']
        
        #BUG: SHOULD we have something like this associating None's where there is no proposal?????'    
        #else:
        #    thedict['propids_s']=themission+"/None"
            
            #print thedict
        obsray.append(thedict)
    
    #print "OBSRAY", obsray
    if len(obsray)>0:
        for tkey in daprops:
            #print "tkey is ", tkey
            temptkey=[e[tkey] for e in obsray if e.has_key(tkey)]
            #print "temptkey", temptkey
            temp2=[item if hasattr(item,'__iter__') else [item] for item in temptkey]
            #print "temp2", temp2
	    if len(temp2) >0:
            	result[tkey]=reduce(lambda x,y: x+y, temp2)
	    else:
		result[tkey]=[]
    return result

Exemple #17

0

Afficher le fichier

Fichier : rdf2solr2.py Projet : DougBurke/semflow

def getInfoForBibcode(bibcode):
    c=adsrdf.ADSConnection(SESAME, REPOSITORY)
    bibcodeuri='uri_bib:'+bibcode
    result={}
    iduri=c.getDataBySP(bibcodeuri, 'fabio:isRealizationOf')
    print "returned", iduri
    iduri=iduri[0]

    result['id']=iduri.split('#')[1]
    iduri='uri_bib:'+result['id']
    print "IDURI", iduri, result['id']
        
    result['bibcode']=bibcode
    result['keywords']=[e.split('#')[1].replace('_',' ') for e in c.getDataBySP(iduri, 'adsbib:keywordConcept')]
    result['title']=c.getDataBySP(iduri, 'adsbase:title')[0]
    pquery0="""
        SELECT ?atext WHERE {
            uri_bib:%s adsbib:hasAbstract ?anode.
            ?anode adsbib:abstractText ?atext.            
        }
     """ % (result['id'])
        
    print pquery0
    res1=c.makeQuery(pquery0)
    #print res1[0]
    result['abstract']=res1[0]['atext']['value']
    #print "TITLE", result['title']
    citationcount=len(c.getDataBySP(iduri, 'cito:cites'))
    result['citationcount_i']=citationcount
    ptray=c.getDataBySP(bibcodeuri, 'adsbib:paperType')
    if len(ptray)>0:
        result['papertype_s']=ptray[0]
        #print "PAPERTYPE", result['papertype_s']
    authoren=c.getDataBySP(iduri, 'pav:authoredBy')
    #print authoren
    #BUG: one slash too many in authors you think?
    result['author']=[unquote(e.split('/')[-2]).replace('_',' ') for e in authoren]
    #print result['author']
    result['keywords_s']=result['keywords']
    result['author_s']=result['author']
    #get the publication uri
    result['pubyear_i']=int(c.getDataBySP(bibcodeuri, 'adsbib:pubDate')[0].split()[1])
    theobjects=c.getDataBySP(bibcodeuri, 'adsbase:hasAstronomicalSource')
    objectlist=[]
    for theobj in theobjects:
        print "theobj", theobj
        odata=c.getDataBySP('uri_source:'+theobj.split('/')[-1], 'adsbase:hasMetadataString')
        odict=eval(odata[0])
        oid=odict['id']
        otype=odict['otype']
        ouri=theobj
        objectlist.append({'oid':oid, 'otype':otype, 'ouri':ouri})
    result['objectnames']=[e['oid'] for e in objectlist]
    result['objecttypes']=[e['otype'] for e in objectlist]
    result['objectnames_s']=result['objectnames']
    result['objecttypes_s']=result['objecttypes']
    print result['objectnames']
    #theobsids=[rinitem(splitns(e)) for e in c.getDataBySP(bibcodeuri, 'adsbase:aboutScienceProduct')]
    theobsiduris=c.getDataBySP(bibcodeuri, 'adsbase:aboutScienceProcess')
    print theobsiduris
    obsray=[]
    daprops=['obsids_s','obsvtypes_s','exptime_f','obsvtime_d','instruments_s','ra_f','dec_f', 'propids_s', 'proposaltitle', 'proposalpi', 'proposalpi_s', 'proposaltype_s']
    for theuri in theobsiduris:
        thedict={}
        themission, thevariable, theobsid=splitns(theuri)
        print "::::::::::::::::", theobsid, theuri
        thedict['obsids_s']=rinitem(theobsid)
        uritail=themission+"/"+thevariable+"/"+theobsid+"/"
        print theobsid, c.getDataBySP('uri_obs:'+uritail, 'adsobsv:observationType')
        thedict['obsvtypes_s']=c.getDataBySP('uri_obs:'+uritail, 'adsobsv:observationType')[0]
        print c.getDataBySP('uri_obs:'+uritail, 'adsobsv:tExptime')
        thedict['exptime_f']=float(c.getDataBySP('uri_obs:'+uritail, 'adsobsv:tExpTime')[0])
        tdt=c.getDataBySP('uri_obs:'+uritail, 'adsbase:atTime')[0]
        print "TDT", tdt
        obsvtime=datetime.datetime.strptime(tdt,"%Y-%m-%dT%H:%M:%S")
        #month, day, year, thehour=tdt.split()

        #th, tmin=thehour[:-2].split(':')
        #th=int(th)
        #tmin=int(tmin)
        #if thehour[-2:]=='PM' and th < 12:
        #    th=int(th)+12
        #    print "TDT", tdt
        #obsvtime=datetime.datetime(int(year), list(calendar.month_abbr).index(month), int(day), th, tmin)
        thedict['obsvtime_d']=obsvtime.isoformat()+"Z"
        
        theinstrument=c.getDataBySP('uri_obs:'+uritail, 'adsbase:usingInstrument')[0]
        #print theinstrument
        theinstrumentname=theinstrument.split('/')[-1]
        #print theinstrumentname
        thedict['instruments_s']="/".join(theinstrumentname.split('_'))
        #print thedict['instruments_s']
        #pointing=c.getDataBySP('uri_obs:'+theobsid, 'adsobsv:associatedPosition')[0]
        #FAIL dune to bnode crapola ra=c.getDataBySP(pointing, 'adsobsv:ra')
        #BUG we should first even see if Pointing exists before going for ra or dec
        thepointings=c.getDataBySP('uri_obs:'+uritail, 'adsobsv:associatedPosition')
        if len(thepointings) > 0:
            pquery="""
            SELECT ?ra ?dec WHERE {
            %s adsobsv:associatedPosition ?position.
            ?position adsobsv:ra ?ra.
            ?position adsobsv:dec ?dec.
                
            }
            """ % (n3encode('uri_obs:'+uritail))
        
            #print pquery
            res=c.makeQuery(pquery)
            #print "POINTING", res
            ra=None
            dec=None
            if len(res)!=0:
                ra=res[0]['ra']['value']
                dec=res[0]['dec']['value']
                #print "RADEC", ra, dec
            if ra!='None' and dec!='None':
                thedict['ra_f']=float(ra)
                thedict['dec_f']=float(dec)
        else:
            print "******************************************No pointings for ", uritail
            
        #proposal stuff...not searching abstracts yet
        propuri=c.getDataBySP('uri_obs:'+uritail, 'adsbase:asAResultOfProposal')[0]
        #print "PROPURI", propuri
        themission, thevariable, thepropid=splitns(propuri)
        proptail=themission+"/"+thevariable+"/"+thepropid+"/"
        thedict['propids_s']=themission+"/"+thepropid
        #print proptail, n3encode('uri_prop:'+proptail), c.getDataBySP('uri_prop:'+proptail, 'adsbase:title')
        thedict['proposaltitle']=c.getDataBySP('uri_prop:'+proptail, 'adsbase:title')[0]
        thedict['proposaltype_s']=c.getDataBySP('uri_prop:'+proptail, 'adsobsv:observationProposalType')[0]
        e=c.getDataBySP('uri_prop:'+proptail, 'adsbase:principalInvestigator')[0]
        #print "PI", e
        thedict['proposalpi']=unquote(e.split('/')[-2]).replace('_',' ')
        thedict['proposalpi_s']=thedict['proposalpi']
        #print thedict
        obsray.append(thedict)
    
    #print "OBSRAY", obsray
    if len(obsray)>0:
        for tkey in daprops:
            result[tkey]=[e[tkey] for e in obsray if e.has_key(tkey)]
    return result

Exemple #18

0

Afficher le fichier

Fichier : rdf2solr5.py Projet : nuhaltinsoy/semflow

def getInfoForBibcode(c, solr, bibcode, mission, project):
    bibcodeuri = 'uri_bib:' + bibcode
    result = {}
    iduri = c.getDataBySP(bibcodeuri, 'fabio:isRealizationOf')
    debug("returned", "{0} {1}".format(iduri, bibcodeuri))
    iduri = iduri[0]

    # we use the original URI when accessing the author names
    idurifull = iduri

    result['id'] = iduri.split('#')[1]
    theid = result['id']
    iduri = 'uri_bib:' + result['id']
    debug("IDURI", "{0} {1}".format(iduri, result['id']))

    result['bibcode'] = bibcode

    # Should get the rdf:label for the concept (caching it)
    # rather than decoding the URI, but needs the label added to the
    # store. Note that we unquote the fragment to ensure %3B and
    # other keywords are displayed sensibly.
    #
    result['keywords'] = [
        unquote(e.split('#')[1]).replace('_', ' ')
        for e in c.getDataBySP(iduri, 'adsbib:keywordConcept')
    ]

    result['title'] = c.getDataBySP(iduri, 'adsbase:title')[0].decode(
        "utf-8"
    )  # DJB added decode statement as I think we want to send across a unicode string
    pquery0 = """
        SELECT ?atext WHERE {
            uri_bib:%s adsbib:hasAbstract [ adsbib:abstractText ?atext ] .            
        }
     """ % (result['id'])

    #print pquery0
    res1 = c.makeQuery(pquery0)
    #print res1[0]
    result['abstract'] = res1[0]['atext']['value']

    debug("TITLE", result['title'].encode("ascii",
                                          "replace"))  ## can contain UTF-8
    citationcount = len(c.getDataBySP(iduri, 'cito:cites'))
    result['citationcount_i'] = citationcount

    # Paper type handling:
    #
    # The adsbib:paperType is currently only added for Chandra data, but
    # this means that papers with data from Chandra + MAST missions will
    # have this setting. Since the predicate does not indicate which mission
    # catagorised the paper as "science", we have to either guess, leave
    # as "science" (i.e. with no mission attribution), or ignore.
    # Doug has elected to go for the ignore route since it doesn't seem
    # to be useful at the present time.
    """
    ptray=c.getDataBySP(bibcodeuri, 'adsbib:paperType')

    The following is broken since a Chandra paper with MAST/euve data
    will lose the "chandra/science" setting if the Chandra data is added
    to Solr before EUVE.

    if len(ptray)>0:
        # DJB:
        #   for papers with Chandra and MAST data will have
        #   a paper type of "science", which results in an
        #   entry of mission+"/science" -> "MAST/science"
        #   as well as (added later on) project+"/Regular"
        #
        #   We switch to using project rather than mission here,
        #   so get "chandra/science", "iue/science", ...
        #   although the MAST ones will get re-added later on
        #   (but duplication left in since not all MAST missions
        #   will have an entry added here).
        #
        #paptypes=[mission+"/"+ele for ele in ptray]
        paptypes=[project+"/"+ele for ele in ptray]
        debug("PTYPE", "{0} {1}".format(bibcode, ptray))
    else:
        paptypes=[]
        debug("PTYPE", "{0} {1}".format(bibcode, "NONE"))

    """

    # TODO:
    #
    # We want to store an "author list" as well as the individual
    # authors, so that we can get the ordering correct, but we do not
    # have that information in the RDF store at present. Storing the
    # author list should remove the issue we have when a paper has the
    # same author name appear more than once. Storing an author list
    # is neat, but then how can we have a "only display the first n
    # authors"?  One option would be to create two versions: the full
    # list and a short form, but this is a bit messy.

    # NOTE:
    #
    # Since each author name is stored with a UUID at the end, and
    # will be added multiple times to a paper, if the paper uses data
    # from multiple missions, then we get multiple copies of an
    # author. So we go to the effort of decoding the authors to get a
    # unique set, which means that if an authorname is repeated twice
    # - e.g. Terlevich and Terlevich - then we will lose information
    # if the names match completely. Also, we now query the RDF store
    # for the agents:normName field for each author rather than decode
    # from the URI, although this may slow things down.
    """
    authoren=c.getDataBySP(iduri, 'pav:authoredBy')
    #print authoren
    #BUG: one slash too many in authors you think?
    result['author']=[unquote(e.split('/')[-2]).replace('_',' ') for e in authoren]
    """

    aqstr = "SELECT ?name {{ <{0}> <http://swan.mindinformatics.org/ontologies/1.2/pav/authoredBy> [ <http://swan.mindinformatics.org/ontologies/1.2/agents/normName> ?name ].}}".format(
        idurifull)
    authoren = c.makeQuery(aqstr)
    authorlist = set()
    for au in authoren:
        authorlist.add(au["name"]["value"])

    result['author'] = list(authorlist)

    #print result['author']
    result['keywords_s'] = result['keywords']
    result['author_s'] = result['author']
    #get the publication uri
    result['pubyear_i'] = int(
        c.getDataBySP(bibcodeuri, 'adsbib:pubDate')[0].split()[1])
    theobjects = c.getDataBySP(bibcodeuri, 'adsbase:hasAstronomicalSource')
    debug("THEOBJECTS", "{0} {1}".format(bibcode, len(theobjects)))
    objectlist = []
    for theobj in theobjects:
        #print "theobj", theobj
        odata = c.getDataBySP('uri_source:' + theobj.split('/')[-1],
                              'adsbase:hasMetadataString')
        #print "theobj", theobj, odata
        if len(odata) > 0:
            odict = eval(
                odata[0]
            )  # why does this need to be an eval? aha, because we are storing a Python dictionary in the string!
            oid = odict['id']
            # Strip out the leading 'NAME ' from object identifiers
            if oid.startswith("NAME "):
                oid = oid[5:]
            otype = odict['otype']
            ouri = theobj
            objectlist.append({'oid': oid, 'otype': otype, 'ouri': ouri})
        else:
            print "PROBLEM", bibcode, theobj, odata
    result['objectnames'] = [e['oid'] for e in objectlist]
    result['objecttypes'] = [e['otype'] for e in objectlist]
    result['objectnames_s'] = result['objectnames']
    result['objecttypes_s'] = result['objecttypes']
    ######FLAG
    #if mission=='CHANDRA':
    #    result['missions_s']=mission
    #else:
    #    result['missions_s']=mission+"/"+project
    #print result['objectnames']
    #theobsids=[rinitem(splitns(e)) for e in c.getDataBySP(bibcodeuri, 'adsbase:aboutScienceProduct')]
    theobsiduris = c.getDataBySP(bibcodeuri, 'adsbase:aboutScienceProcess')
    #print "OBSIDS", bibcodeuri, theobsiduris
    obsray = []
    #TESTnotice by this we dont uniq telescopes or data types...what does this mean for the numbers, if anything?
    #daprops=['obsids_s','obsvtypes_s','exptime_f','obsvtime_d','instruments_s', 'telescopes_s', 'emdomains_s', 'missions_s', 'targets_s', 'ra_f','dec_f', 'datatypes_s','propids_s', 'proposaltitle', 'proposalpi', 'proposalpi_s', 'proposaltype_s']
    #daprops=['obsids_s','obsvtypes_s','exptime_f','obsvtime_d','instruments_s', 'telescopes_s', 'emdomains_s',  'targets_s', 'ra_f','dec_f', 'datatypes_s','propids_s', 'proposaltitle', 'proposalpi', 'proposalpi_s', 'proposaltype_s']
    daprops = [
        'obsids_s', 'obsvtypes_s', 'exptime_f', 'obsvtime_d', 'instruments_s',
        'telescopes_s', 'emdomains_s', 'targets_s', 'ra_f', 'dec_f',
        'datatypes_s', 'propids_s', 'proposaltitle', 'proposalpi',
        'proposalpi_s', 'proposaltype_s'
    ]

    debug("THEOBSIDURIS", theobsiduris)
    datatypes = []
    #olddict=solr.search('id:'+theid)
    missions = set()
    papertypes = set()
    for theuri in theobsiduris:
        thedict = {}

        # There must be better ways to do this
        is_mast = theuri.find('MAST') != -1
        is_chandra = theuri.find('CHANDRA') != -1

        #BUG: make this polymorphic
        if is_mast:
            themission, theproject, thevariable, theobsid = splitns(
                theuri, atposition=-3)
            uritail = themission + "/" + theproject + "/" + thevariable + "/" + theobsid
            #thedict['missions_s']=themission+"/"+theproject
            """ For now Doug is excluding the paper type facet

            # See the discussion for the creation of paptypes above
            # since there is some awkwardness here (some MAST papers
            # end up having an adsbib:paperType value since they also
            # contain Chandra data).
            #
            # papertypes.add(theproject+"/Regular")
            papertypes.add(theproject+"/science")
            """

            missions.add(themission + "/" + theproject)

        elif is_chandra:
            themission, thevariable, theobsid = splitns(theuri)
            theproject = themission  #like Chandra/Chandra
            uritail = themission + "/" + thevariable + "/" + theobsid
            #thedict['missions_s']=themission # this should be in RDF!!
            """ Paper type handling is currently removed

            # this is needed to handle papers with multiple missions
            papertypes.add(theproject+"/science")
            """

            missions.add(theproject)
        else:
            raise ValueError("Unable to decode URI for mission: " + theuri)

        debug("URITAIL", uritail)
        if is_mast:
            pquery0 = """
            SELECT ?tname WHERE {
            %s adsbase:target ?tnode.
            ?tnode adsbase:name ?tname.            
            }
            """ % (n3encode('uri_obs:' + uritail))
            #Sprint pquery0
            res1 = c.makeQuery(pquery0)
            debug("RES1", res1)
            if len(res1) > 0:
                target = res1[0]['tname']['value']
            else:
                target = 'Unspecified'
            #target=res1[0]['tname']['value']
            thetarget = themission + "/" + target
        elif is_chandra:
            titleray = c.getDataBySP('uri_obs:' + uritail, 'adsbase:title')
            if len(titleray) == 0:
                title = "Unspecified"
            else:
                title = titleray[0]
            thetarget = themission + "/" + title
        else:
            thetarget = "None"

        # print "The target", thetarget
        thedict['targets_s'] = thetarget
        #print "::::::::::::::::", theobsid, theuri, themission, thevariable
        #thedict['obsids_s']=rinitem(theobsid)
        thedict['obsids_s'] = theproject + "/" + theobsid

        #print theobsid, c.getDataBySP('uri_obs:'+uritail, 'adsobsv:observationType')
        obstypes = c.getDataBySP('uri_obs:' + uritail,
                                 'adsobsv:observationType')
        if len(obstypes) > 0:
            if theproject == themission:
                thedict['obsvtypes_s'] = theproject + "/" + obstypes[0]
            else:
                thedict[
                    'obsvtypes_s'] = themission + "/" + theproject + "/" + obsvtypes[
                        0]
        else:
            if theproject == themission:
                thedict['obsvtypes_s'] = theproject + "/Unspecified"
            else:
                thedict[
                    'obsvtypes_s'] = themission + "/" + theproject + "/Unspecified"

        #Hut dosent have obsvtypes. Call it MAST_HUT/None

        # Chandra data was being created using adsobsv:tExpTime when it should have been
        # adsobsv:tExptime. This should now be fixed but this check is left in to catch
        # any oddities.
        #
        tvals = c.getDataBySP('uri_obs:' + uritail, 'adsobsv:tExptime')
        if len(tvals) == 0:
            raise IOError(
                "Unable to find adsobsv:tExptime for uri_obs:{0}".format(
                    uritail))

        else:
            if len(tvals) > 1:
                debug(
                    "MULTI-EXP", "uri_obs:{0} has adsobsv:tExptime={1}".format(
                        uritail, tvals))

            thedict['exptime_f'] = float(tvals[0])

        tdt = c.getDataBySP('uri_obs:' + uritail, 'adsbase:atTime')[0]
        #print "TDT", tdt
        obsvtime = datetime.datetime.strptime(tdt, "%Y-%m-%dT%H:%M:%S")
        #month, day, year, thehour=tdt.split()

        #th, tmin=thehour[:-2].split(':')
        #th=int(th)
        #tmin=int(tmin)
        #if thehour[-2:]=='PM' and th < 12:
        #    th=int(th)+12
        #    print "TDT", tdt
        #obsvtime=datetime.datetime(int(year), list(calendar.month_abbr).index(month), int(day), th, tmin)
        thedict['obsvtime_d'] = obsvtime.isoformat() + "Z"

        #hasDatum is a subset of hasDataProduct. How do we get sparql to fo up inhertitance hierarchy
        #Currently we have no way of knowing as the owl file hasnt been loaded in
        pquery = """
            SELECT ?dtype WHERE {
            {%s adsobsv:hasDataProduct ?daturi.} UNION {%s adsobsv:hasDatum ?daturi.}
            ?daturi adsbase:dataType ?dtype.
            }
        """ % (n3encode('uri_obs:' + uritail), n3encode('uri_obs:' + uritail))
        res = c.makeQuery(pquery)
        #print "RES", res, pquery
        tempdt = {}
        if len(res) > 0:
            for ele in res:
                tkey = ele['dtype']['value']
                if tempdt.has_key('tkey'):
                    tempdt[tkey] += 1
                else:
                    tempdt[tkey] = 1
            thedict['datatypes_s'] = tempdt.keys()
        else:
            thedict['datatypes_s'] = []

        #BUG: Still assume one istrument. This will change, point is how? There will be both
        #multiple stuff for non-simple obs and hierarchical stuff for simple obs like gratings
        #how will we model this?
        debug("DATATYPES", thedict['datatypes_s'])
        theinstrument = c.getDataBySP('uri_obs:' + uritail,
                                      'adsbase:usingInstrument')[0]
        theinstrumentname = theinstrument.split('/')[-1]

        # TODO: should be able to query the RDF store for the label to use for the instrument
        # but for now just extract the information from the URI, and remove any %-encoding
        # done
        theinstrumentname = unquote(theinstrumentname)
        thedict['instruments_s'] = "/".join(theinstrumentname.split('_'))

        #BUG: Still assume one telescope, this will change
        thetelescope = c.getDataBySP('uri_obs:' + uritail,
                                     'adsobsv:atTelescope')[0]
        thetelescopename = thetelescope.split('/')[-1]
        thedict['telescopes_s'] = "/".join(thetelescopename.split('_'))
        #print thedict['instruments_s']
        #pointing=c.getDataBySP('uri_obs:'+theobsid, 'adsobsv:associatedPosition')[0]
        #FAIL dune to bnode crapola ra=c.getDataBySP(pointing, 'adsobsv:ra')
        #BUG we should first even see if Pointing exists before going for ra or dec

        #This will need special handling as it is multivalued array even within obsv.
        #So it will need flattening within publications
        theemdomains = c.getDataBySP('uri_obs:' + uritail,
                                     'adsobsv:wavelengthDomain')
        #BUG:Note that by doing this emdomains is optional...Not sure we want that
        if len(theemdomains) > 0:
            thedict['emdomains_s'] = []
            for domain in theemdomains:
                thedict['emdomains_s'].append(domain.split('_')[-1])

        thepointings = c.getDataBySP('uri_obs:' + uritail,
                                     'adsobsv:associatedPosition')

        if len(thepointings) > 0:
            pquery = """
            SELECT ?ra ?dec WHERE {
            %s adsobsv:associatedPosition ?position.
            ?position adsobsv:ra ?ra.
            ?position adsobsv:dec ?dec.
                
            }
            """ % (n3encode('uri_obs:' + uritail))

            #print pquery
            res = c.makeQuery(pquery)
            #print "POINTING", res
            ra = None
            dec = None
            if len(res) != 0:
                ra = res[0]['ra']['value']
                dec = res[0]['dec']['value']
                #print "RADEC", ra, dec
            if ra != 'None' and dec != 'None':
                thedict['ra_f'] = float(ra)
                thedict['dec_f'] = float(dec)
        else:
            print "******************************************No pointings for ", uritail

        #proposal stuff...not searching abstracts yet
        props = c.getDataBySP('uri_obs:' + uritail,
                              'adsbase:asAResultOfProposal')
        debug("PROPS", props)
        #BUG: again assuming only one proposal here. When we get paper proposals this will
        #Not be true any more. We should also disambiguate observational from paper proposals.
        #though paper proposals will be assoced with papers, not here, so this should be obsvprop
        #only
        #what happens when like in 2002ApJ...573..157N, this shows up for multiple missions
        if len(props) > 0:
            propuri = props[0]
            debug("PROPURI", propuri)
            if propuri.find('MAST') != -1:
                themission, theproject, thevariable, thepropid = splitns(
                    propuri, atposition=-3)
                proptail = themission + "/" + theproject + "/" + thevariable + "/" + thepropid
            else:
                themission, thevariable, thepropid = splitns(propuri)
                theproject = themission  #like Chandra/Chandra
                proptail = themission + "/" + thevariable + "/" + thepropid
            debug("PROPTAIL", proptail)
            #themission, thevariable, thepropid=splitns(propuri)
            #proptail=themission+"/"+thevariable+"/"+thepropid
            thedict['propids_s'] = theproject + "/" + thepropid
            #print proptail, n3encode('uri_prop:'+proptail), c.getDataBySP('uri_prop:'+proptail, 'adsbase:title')
            proposaltitles = c.getDataBySP('uri_prop:' + proptail,
                                           'adsbase:title')
            if len(proposaltitles) > 0:
                thedict['proposaltitle'] = proposaltitles[0]
            else:
                thedict['proposaltitle'] = 'No Info'

            #proposal type already has project or mission included
            proposaltypes = c.getDataBySP('uri_prop:' + proptail,
                                          'adsobsv:observationProposalType')
            if len(proposaltypes) > 0:
                thedict['proposaltype_s'] = proposaltypes[0]
            else:
                # The map between bibcode, obsid and proposal can contain proposals
                # for which we have no other data. Instead of saying 'No Info' we can
                # at least add in the mission name.
                #
                # TODO: should we just remove proposaltype_s for these records? If not,
                # do we want to add in fake proposals for other missions that do not have
                # them (probably not)?
                #
                #thedict['proposaltype_s']='No Info'
                if themission == "MAST":
                    thedict['proposaltype_s'] = theproject + '/None'
                elif themission == "CHANDRA":
                    thedict['proposaltype_s'] = 'CHANDRA/None'
                else:
                    raise ValueError("Unexpected mission '{0}' for {1}".format(
                        mission, propuri))

            qstr = "SELECT ?name WHERE { <" + propuri + "> adsbase:principalInvestigator [ agent:fullName ?name ] . }"
            pinameres = c.makeQuery(qstr)
            nres = len(pinameres)
            if nres == 0:
                piname = 'No Info'
            else:
                if nres != 1:
                    print(
                        "DBG: found {0} proposal pis for {1}, using first from {2}"
                        .format(nres, propuri, pinameres))
                piname = pinameres[0]["name"]["value"]
                # the following should not occur but just in case
                if piname.strip() == "":
                    logger.debug(
                        "PINAME: found ' ' so converting to 'No Info'; should not happen"
                    )
                    piname = "No Info"

            thedict['proposalpi'] = piname
            thedict['proposalpi_s'] = thedict['proposalpi']

        #BUG: SHOULD we have something like this associating None's where there is no proposal?????'
        #else:
        #    thedict['propids_s']=themission+"/None"

        #print thedict
        obsray.append(thedict)

    result['missions_s'] = list(missions)
    """ paper types are currently removed

    paptypes.extend(list(papertypes))
    if len(paptypes) == 0:
        debug("PTYPE", "bibcode={0} has no paper type!".format(bibcode))

    result['papertype_s']=paptypes
    """

    #print "OBSRAY", obsray
    if len(obsray) > 0:
        for tkey in daprops:
            #print "tkey is ", tkey
            temptkey = [e[tkey] for e in obsray if e.has_key(tkey)]
            #print "temptkey", temptkey
            temp2 = [
                item if hasattr(item, '__iter__') else [item]
                for item in temptkey
            ]
            #print "temp2", temp2
            if len(temp2) > 0:
                result[tkey] = reduce(lambda x, y: x + y, temp2)
            else:
                result[tkey] = []
    return result