Ejemplo n.º 1
0
 def getdata_helper(self, url, params, expire, headers=None, post=None):
     "Helper function to get data from SiteDB or local cache"
     cname = url.split('/')[-1].replace('-', '_')
     conn  = db_connection(self.dburi)
     col   = conn[self.name][cname]
     local = find_one(col, {'expire':{'$gt':expire_timestamp(time.time())}})
     data  = None
     if  local:
         msg = 'SiteDBService reads from %s.%s' % (self.name, cname)
         self.logger.info(msg)
         try: # get data from local cache
             data = [r for r in col.find() if 'expire' not in r][0]
             del data['_id']
         except Exception as exc:
             print_exc(exc)
             data = {}
     if  not data or not local:
         headers = {'Accept':'application/json'}
         datastream, expire = getdata(\
                 url, params, headers, expire, post,
                 self.error_expire, self.verbose, self.ckey, self.cert,
                 system=self.name)
         try: # read data and write it to local cache
             data = json.load(datastream)
             datastream.close()
             col.remove()
             col.insert(data)
             col.insert({'expire':expire_timestamp(expire)})
         except Exception as exc:
             print_exc(exc)
     return data, expire
Ejemplo n.º 2
0
 def getdata_helper(self, url, params, expire, headers=None, post=None):
     "Helper function to get data from SiteDB or local cache"
     cname = url.split('/')[-1].replace('-', '_')
     conn  = db_connection(self.dburi)
     col   = conn[self.name][cname]
     local = find_one(col, {'expire':{'$gt':expire_timestamp(time.time())}})
     data  = None
     if  local:
         msg = 'SiteDBService reads from %s.%s' % (self.name, cname)
         self.logger.info(msg)
         try: # get data from local cache
             data = [r for r in col.find() if 'expire' not in r][0]
             del data['_id']
         except Exception as exc:
             print_exc(exc)
             data = {}
     if  not data or not local:
         headers = {'Accept':'application/json'}
         datastream, expire = getdata(\
                 url, params, headers, expire, post,
                 self.error_expire, self.verbose, self.ckey, self.cert,
                 system=self.name)
         try: # read data and write it to local cache
             data = json.load(datastream)
             datastream.close()
             col.remove()
             col.insert(data)
             col.insert({'expire':expire_timestamp(expire)})
         except Exception as exc:
             print_exc(exc)
     return data, expire
Ejemplo n.º 3
0
    def lookup_query(self, rawtext):
        """
        Check the parser cache for a given rawtext query.
        Search is done with the qhash of this string.
        Returns a tuple (status, value) for the cases
        (PARSERCACHE_VALID, mongo_query) - valid query found
        (PARSERCACHE_INVALID, error) - error message for invalid query
        (PARSERCACHE_NOTFOUND, None) - not in the cache
        """
        result = find_one(self.col, {'qhash':genkey(rawtext)}, \
                        fields=['query', 'error'])

        if result and result['query']:
            if self.verbose:
                self.logger.debug("DASParserCache: found valid %s->%s" %\
                                  (rawtext, result['query']))
            query = decode_mongo_query(result['query'])
            return (PARSERCACHE_VALID, query)
        elif result and result['error']:
            if self.verbose:
                self.logger.debug("DASParserCache: found invalid %s->%s" %\
                                  (rawtext, result['error']))
            return (PARSERCACHE_INVALID, result['error'])
        else:
            if self.verbose:
                self.logger.debug("DASParserCache: not found %s" %\
                                  (rawtext))
            return (PARSERCACHE_NOTFOUND, None)
Ejemplo n.º 4
0
 def find(self, dasquery):
     """
     Find provided query in DAS cache.
     """
     cond = {'qhash': dasquery.qhash, 'das.system':'das',
             'das.expire': {'$gt':time.time()}}
     return find_one(self.col, cond)
Ejemplo n.º 5
0
    def lookup_query(self, rawtext):
        """
        Check the parser cache for a given rawtext query.
        Search is done with the qhash of this string.
        
        Returns a tuple (status, value) for the cases
        (PARSERCACHE_VALID, mongo_query) - valid query found
        (PARSERCACHE_INVALID, error) - error message for invalid query
        (PARSERCACHE_NOTFOUND, None) - not in the cache
        """
        result = find_one(self.col, {'qhash':genkey(rawtext)},
                        fields=['query', 'error'])

        if result and result['query']:
            if self.verbose:
                self.logger.debug("DASParserCache: found valid %s->%s" %\
                                  (rawtext, result['query']))
            
            query = decode_mongo_query(result['query'])
            return (PARSERCACHE_VALID, query)
        elif result and result['error']:
            if self.verbose:
                self.logger.debug("DASParserCache: found invalid %s->%s" %\
                                  (rawtext, result['error']))
            return (PARSERCACHE_INVALID, result['error'])
        else:
            if self.verbose:
                self.logger.debug("DASParserCache: not found %s" %\
                                  (rawtext))
            return (PARSERCACHE_NOTFOUND, None)
Ejemplo n.º 6
0
 def isexpired(self):
     """
     Check if data is expired in DB.
     """
     spec = {'ts': {'$lt': time.time() + self.expire}}
     if  self.coll and find_one(self.coll, spec):
         return False
     return True
Ejemplo n.º 7
0
 def das_presentation_map(self):
     "Read DAS presentation map"
     spec = {"type": "presentation"}
     data = find_one(self.col, spec)
     if data:
         for daskey, uilist in data.get("presentation", {}).iteritems():
             for row in uilist:
                 if "link" in row:
                     yield row
Ejemplo n.º 8
0
 def das_presentation_map(self):
     "Read DAS presentation map"
     spec  = {'type':'presentation'}
     data  = find_one(self.col, spec)
     if  data:
         for _, uilist in data.get('presentation', {}).items():
             for row in uilist:
                 if  'link' in row:
                     yield row
Ejemplo n.º 9
0
 def add_mapreduce(self, name, fmap, freduce):
     """
     Add mapreduce record and assign it to given name.
     """
     print("Add %s map/reduce function" % name)
     exists = find_one(self.mapreduce, {'name':name})
     if  exists:
         raise Exception('Map/reduce functions for %s already exists' % name)
     self.mapreduce.insert(dict(name=name, map=fmap, reduce=freduce))
     create_indexes(self.mapreduce, [('name', DESCENDING)])
Ejemplo n.º 10
0
 def add_mapreduce(self, name, fmap, freduce):
     """
     Add mapreduce record and assign it to given name.
     """
     print("Add %s map/reduce function" % name)
     exists = find_one(self.mapreduce, {'name': name})
     if exists:
         raise Exception('Map/reduce functions for %s already exists' %
                         name)
     self.mapreduce.insert(dict(name=name, map=fmap, reduce=freduce))
     create_indexes(self.mapreduce, [('name', DESCENDING)])
Ejemplo n.º 11
0
    def update(self):
        """
        Update DBS collection with a fresh copy of datasets. Upon first insert
        of datasets we add dataset:__POPULATED__ record to be used as a flag
        that cache was populated in this cache.
        """
        if SKIP_UPDATES:
            return None

        dbc = self.col
        if not dbc:
            print "%s DBSDaemon %s, no connection to DB" % (dastimestamp(), self.dbcoll)
            return

        try:
            time0 = round(time.time())
            udict = {"$set": {"ts": time0}}
            cdict = {"dataset": "__POPULATED__"}
            gen = self.datasets()
            msg = ""
            if not dbc.count():
                try:  # perform bulk insert operation
                    while True:
                        if not dbc.insert(itertools.islice(gen, self.cache_size)):
                            break
                except InvalidOperation as err:
                    # please note we need to inspect error message to
                    # distinguish InvalidOperation from generate exhastion
                    if str(err) == "cannot do an empty bulk insert":
                        dbc.insert(cdict)
                    pass
                except Exception as err:
                    pass
                # remove records with old ts
                dbc.remove({"ts": {"$lt": time0 - self.expire}})
                msg = "inserted new"
            else:  # we already have records, update their ts
                for row in gen:
                    spec = dict(dataset=row["dataset"])
                    dbc.update(spec, udict, upsert=True)
                msg = "updated old"

            if find_one(dbc, cdict):
                dbc.update(cdict, udict)
            print "%s DBSDaemon %s, %s %s records in %s sec" % (
                dastimestamp(),
                self.dbcoll,
                msg,
                dbc.count(),
                round(time.time() - time0),
            )
        except Exception as exc:
            print "%s DBSDaemon %s, fail to update, reason %s" % (dastimestamp(), self.dbcoll, str(exc))
Ejemplo n.º 12
0
 def find(self, dasquery):
     """
     Find provided query in DAS cache.
     """
     cond = {
         'qhash': dasquery.qhash,
         'das.system': 'das',
         'das.expire': {
             '$gt': time.time()
         }
     }
     return find_one(self.col, cond)
Ejemplo n.º 13
0
 def primary_key(self, das_system, urn):
     """
     Return DAS primary key for provided system and urn. The DAS primary key
     is a first entry in *lookup* attribute of DAS API record.
     """
     spec = {"system": das_system, "urn": urn}
     record = find_one(self.col, spec)
     if not record:
         return None
     pkey = record["lookup"]
     if pkey.find(",") != -1:
         pkey = pkey.split(",")[0]
     return pkey
Ejemplo n.º 14
0
 def primary_key(self, das_system, urn):
     """
     Return DAS primary key for provided system and urn. The DAS primary key
     is a first entry in *lookup* attribute of DAS API record.
     """
     spec = {'system':das_system, 'urn':urn}
     record = find_one(self.col, spec)
     if  not record:
         return None
     pkey = record['lookup']
     if  pkey.find(',') != -1:
         pkey = pkey.split(',')[0]
     return pkey
Ejemplo n.º 15
0
    def update(self):
        """
        Update DBS collection with a fresh copy of datasets. Upon first insert
        of datasets we add dataset:__POPULATED__ record to be used as a flag
        that cache was populated in this cache.
        """
        if SKIP_UPDATES:
            return None

        dbc = self.col
        if  not dbc:
            print("%s DBSDaemon %s, no connection to DB" \
                % (dastimestamp(), self.dbcoll))
            return

        try:
            time0 = round(time.time())
            udict = {'$set':{'ts':time0}}
            cdict = {'dataset':'__POPULATED__'}
            gen = self.datasets()
            msg = ''
            if  not dbc.count():
                try: # perform bulk insert operation
                    res = dbc.insert_many(gen)
                except InvalidOperation as err:
                    # please note we need to inspect error message to
                    # distinguish InvalidOperation from generate exhastion
                    if  str(err) == 'cannot do an empty bulk insert':
                        dbc.insert(cdict)
                    pass
                except Exception as err:
                    pass
                # remove records with old ts
                spec = {'ts':{'$lt':time0-self.expire}}
                dbc.delete_many(spec)
                msg = 'inserted'
            else: # we already have records, update their ts
                for row in gen:
                    spec = dict(dataset=row['dataset'])
                    dbc.update(spec, udict, upsert=True)
                msg = 'updated'

            if  find_one(dbc, cdict):
                dbc.update(cdict, udict)
            print("%s DBSDaemon %s, %s %s records in %s sec" \
            % (dastimestamp(), self.dbcoll, msg, dbc.count(),
                    round(time.time()-time0)))
        except Exception as exc:
            print("%s DBSDaemon %s, fail to update, reason %s" \
                % (dastimestamp(), self.dbcoll, str(exc)))
Ejemplo n.º 16
0
 def primary_mapkey(self, das_system, urn):
     """
     Return DAS primary map key for provided system and urn. For example,
     the file DAS key is mapped to file.name, so this API will return
     file.name
     """
     spec = {'system':das_system, 'urn':urn}
     record = find_one(self.col, spec)
     mapkey = []
     for row in record['das_map']:
         lkey = record['lookup']
         if  lkey.find(',') != -1:
             lkey = lkey.split(',')[0]
         if  row['das_key'] == lkey:
             return row['rec_key']
     return mapkey
Ejemplo n.º 17
0
 def primary_mapkey(self, das_system, urn):
     """
     Return DAS primary map key for provided system and urn. For example,
     the file DAS key is mapped to file.name, so this API will return
     file.name
     """
     spec = {"system": das_system, "urn": urn}
     record = find_one(self.col, spec)
     mapkey = []
     for row in record["das_map"]:
         lkey = record["lookup"]
         if lkey.find(",") != -1:
             lkey = lkey.split(",")[0]
         if row["das_key"] == lkey:
             return row["rec_key"]
     return mapkey
Ejemplo n.º 18
0
 def init_presentationcache(self):
     """
     Initialize presentation cache by reading presentation map.
     """
     spec = {"type": "presentation"}
     data = find_one(self.col, spec)
     if data:
         self.presentationcache = data["presentation"]
         for daskey, uilist in self.presentationcache.iteritems():
             for row in uilist:
                 link = None
                 if "link" in row:
                     link = row["link"]
                 if "diff" in row:
                     self.diffkeycache[daskey] = row["diff"]
                 tdict = {daskey: {"mapkey": row["das"], "link": link}}
                 if row["ui"] in self.reverse_presentation:
                     self.reverse_presentation[row["ui"]].update(tdict)
                 else:
                     self.reverse_presentation[row["ui"]] = {daskey: {"mapkey": row["das"], "link": link}}
Ejemplo n.º 19
0
 def _map_reduce(self, coll, mapreduce, spec=None):
     """
     Perform map/reduce operation over DAS cache using provided
     collection, mapreduce name and optional conditions.
     """
     self.logger.debug("(%s, %s)" % (mapreduce, spec))
     record = find_one(self.mrcol, {'name': mapreduce})
     if not record:
         raise Exception("Map/reduce function '%s' not found" % mapreduce)
     fmap = record['map']
     freduce = record['reduce']
     if spec:
         result = coll.map_reduce(Code(fmap), Code(freduce), query=spec)
     else:
         result = coll.map_reduce(Code(fmap), Code(freduce))
     msg = "found %s records in %s" % (result.count(), result.name)
     self.logger.info(msg)
     self.logger.debug(fmap)
     self.logger.debug(freduce)
     return result
Ejemplo n.º 20
0
 def _map_reduce(self, coll, mapreduce, spec=None):
     """
     Perform map/reduce operation over DAS cache using provided
     collection, mapreduce name and optional conditions.
     """
     self.logger.debug("(%s, %s)" % (mapreduce, spec))
     record = find_one(self.mrcol, {'name':mapreduce})
     if  not record:
         raise Exception("Map/reduce function '%s' not found" % mapreduce)
     fmap = record['map']
     freduce = record['reduce']
     if  spec:
         result = coll.map_reduce(Code(fmap), Code(freduce), query=spec)
     else:
         result = coll.map_reduce(Code(fmap), Code(freduce))
     msg = "found %s records in %s" % (result.count(), result.name)
     self.logger.info(msg)
     self.logger.debug(fmap)
     self.logger.debug(freduce)
     return result
Ejemplo n.º 21
0
 def init_presentationcache(self):
     """
     Initialize presentation cache by reading presentation map.
     """
     spec  = {'type':'presentation'}
     data  = find_one(self.col, spec)
     if  data:
         self.presentationcache = data['presentation']
         for daskey, uilist in self.presentationcache.items():
             for row in uilist:
                 link = None
                 if  'link' in row:
                     link = row['link']
                 if  'diff' in row:
                     self.diffkeycache[daskey] = row['diff']
                 tdict = {daskey : {'mapkey': row['das'], 'link': link}}
                 if  row['ui'] in self.reverse_presentation:
                     self.reverse_presentation[row['ui']].update(tdict)
                 else:
                     self.reverse_presentation[row['ui']] = \
                             {daskey : {'mapkey': row['das'], 'link': link}}
Ejemplo n.º 22
0
 def check_filters(self, collection, spec, fields):
     "Check that given filters can be applied to records found with spec"
     if  not fields:
         return
     conn = db_connection(self.dburi)
     mdb  = conn[self.dbname]
     mdb.add_son_manipulator(self.das_son_manipulator)
     col  = mdb[collection]
     data = find_one(col, spec)
     if  not data:
         return
     found = False
     for fltr in fields:
         row = dict(data)
         if  fltr in row or 'error' in row:
             found = True
             break
         for key in fltr.split('.'):
             if  isinstance(row, dict):
                 if  key in row:
                     row = row[key]
                     found = True
                 else:
                     found = False
             elif isinstance(row, list):
                 for row in list(row):
                     if  key in row:
                         row = row[key]
                         found = True
                         break
                     else:
                         found = False
     if  not found:
         err  = "check_filters unable to find filter=%s" % fltr
         err += "\nrecord=%s" % data
         raise Exception(err)
Ejemplo n.º 23
0
 def das_record(self, dasquery):
     "Retrieve DAS record for given query"
     cond = {'qhash': dasquery.qhash, 'das.expire':{'$gt':time.time()}}
     return find_one(self.col, cond)
Ejemplo n.º 24
0
 def das_record(self, dasquery):
     "Retrieve DAS record for given query"
     cond = {'qhash': dasquery.qhash, 'das.expire': {'$gt': time.time()}}
     return find_one(self.col, cond)