Example #1
0
 def __init__(self, config=None):
     # define DB names to work with. These names should correspond to
     # dtype of documents we assign, see find_dtype and encode method
     self.dbnames = ['fwjr', 'crab']
     # Short-Term Storage
     self.sts = {}
     for dbname in self.dbnames:
         self.sts[dbname] = STSManager(config.short_storage_uri,
                                       dbname=dbname)
     self.sts_agg = STSManager(config.short_storage_uri,
                               dbname='aggregated')
     # Long-Term Storage
     self.tls_thr = config.long_storage_thr
     if LTS:  # we'll use this module if it's loaded
         self.lts = LTSManager(config.long_storage_uri, config.wmauri,
                               config.yarn)
     else:  # fallback
         self.lts = self.sts['fwjr']
     self.specmap = {}
     with open(config.specmap, 'r') as istream:
         cdict = {}
         for line in istream.readlines():
             pair = line.replace('\n', '').split(',')
             self.specmap[pair[0]] = pair[1]  # lfn:LFNArray
     msg = "Short-Term Storage %s, Long-Term Storage %s, specmap %s" % (
         self.sts, self.lts, self.specmap)
     print(tstamp("WMArchiveManager::init"), msg)
     self.time0 = time.time()
     self.read_access = 0
     self.write_access = 0
Example #2
0
 def __init__(self, config=None):
     # Short-Term Storage
     self.sts = STSManager(config.short_storage_uri)
     # Long-Term Storage
     self.tls_thr = config.long_storage_thr
     if LTS:  # we'll use this module if it's loaded
         self.lts = LTSManager(config.long_storage_uri, config.wmauri,
                               config.yarn)
     else:  # fallback
         self.lts = self.sts
     self.specmap = {}
     with open(config.specmap, 'r') as istream:
         cdict = {}
         for line in istream.readlines():
             pair = line.replace('\n', '').split(',')
             self.specmap[pair[0]] = pair[1]  # lfn:LFNArray
     msg = "Short-Term Storage %s, Long-Term Storage %s, specmap %s" % (
         self.sts, self.lts, self.specmap)
     print(tstamp("WMArchiveManager::init"), msg)
Example #3
0
 def __init__(self, config=None):
     # define DB names to work with. These names should correspond to
     # dtype of documents we assign, see find_dtype and encode method
     self.dbnames = ['fwjr', 'crab']
     # Short-Term Storage
     self.sts = {}
     for dbname in self.dbnames:
         self.sts[dbname] = STSManager(config.short_storage_uri, dbname=dbname)
     self.sts_agg = STSManager(config.short_storage_uri, dbname='aggregated')
     # Long-Term Storage
     self.tls_thr = config.long_storage_thr
     if  LTS: # we'll use this module if it's loaded
         self.lts = LTSManager(config.long_storage_uri, config.wmauri, config.yarn)
     else: # fallback
         self.lts = self.sts['fwjr']
     self.specmap = {}
     with open(config.specmap, 'r') as istream:
         cdict = {}
         for line in istream.readlines():
             pair = line.replace('\n', '').split(',')
             self.specmap[pair[0]] = pair[1] # lfn:LFNArray
     # Monit manager
     self.monit = MonitManager(config.monit_credentials, config.monit_attributes)
     # NATS manager
     if hasattr(config, 'use_nats') and config.use_nats:
         self.nats = NATSManager(config.nats_server, topics=config.nats_topics, default_topic='cms.wmarchive', cms_filter=cms_filter)
     else:
         self.nats = None
     msg = "Short-Term Storage %s, Long-Term Storage %s, specmap %s" \
             % (self.sts, self.lts, self.specmap)
     msg += '\nMonit {}'.format(self.monit)
     msg += '\nNATS {}'.format(self.nats)
     print(tstamp("WMArchiveManager::init"), msg)
     self.time0 = time.time()
     self.read_access = 0
     self.write_access = 0
Example #4
0
 def __init__(self, config=None):
     # Short-Term Storage
     self.sts = STSManager(config.short_storage_uri)
     # Long-Term Storage
     self.tls_thr = config.long_storage_thr
     if  LTS: # we'll use this module if it's loaded
         self.lts = LTSManager(config.long_storage_uri, config.wmauri, config.yarn)
     else: # fallback
         self.lts = self.sts
     self.specmap = {}
     with open(config.specmap, 'r') as istream:
         cdict = {}
         for line in istream.readlines():
             pair = line.replace('\n', '').split(',')
             self.specmap[pair[0]] = pair[1] # lfn:LFNArray
     msg = "Short-Term Storage %s, Long-Term Storage %s, specmap %s" % (self.sts, self.lts, self.specmap)
     print(tstamp("WMArchiveManager::init"), msg)
Example #5
0
class WMArchiveManager(object):
    """
    Initialize WMArchive proxy server configuration. The given configuration
    file will provide details of proxy server, agent information, etc.
    """
    def __init__(self, config=None):
        # Short-Term Storage
        self.sts = STSManager(config.short_storage_uri)
        # Long-Term Storage
        self.tls_thr = config.long_storage_thr
        if  LTS: # we'll use this module if it's loaded
            self.lts = LTSManager(config.long_storage_uri, config.wmauri, config.yarn)
        else: # fallback
            self.lts = self.sts
        self.specmap = {}
        with open(config.specmap, 'r') as istream:
            cdict = {}
            for line in istream.readlines():
                pair = line.replace('\n', '').split(',')
                self.specmap[pair[0]] = pair[1] # lfn:LFNArray
        msg = "Short-Term Storage %s, Long-Term Storage %s, specmap %s" % (self.sts, self.lts, self.specmap)
        print(tstamp("WMArchiveManager::init"), msg)

    def status(self):
        "Return current status about WMArchive queue"
        sdict = {}
        sdict.update(self.sts.status())
        if  self.lts != self.sts:
            sdict.update(self.lts.status())
        return sdict

    def jobs(self):
        "Return jobs from WMArchive STS"
        return self.sts.jobs()

    def performance(self, **kwargs):
        "Return stats docs from WMArchive STS"
        return self.sts.performance(**kwargs)

    def qmap(self, mgr, spec, fields):
        "Map user based spec into WMArhchive storage QL"
        newspec = {}
        newfields = []
        for key, val in spec.items():
            newspec[self.specmap.get(key, key)] = val
        for field in fields:
            newfields.append(self.specmap.get(field, field))
        if  hasattr(mgr, 'qmap'):
            return mgr.qmap(newspec, fields)
        return newspec, newfields

    def encode(self, docs):
        """
        Encode given set of documents into appropriate format for long term storage.
        This method will consume documents in DMWM JSON format.
        Yield encoded documents to the client.
        """
        for doc in docs:
            if  not doc.get('wmaid', ''):
                doc['wmaid'] = wmaHash(doc)
            if  not doc.get('wmats', 0):
                doc['wmats'] = time.time()
            if  not doc.get('stype', ''):
                doc['stype'] = self.sts.stype
            yield doc

    def decode(self, docs):
        """
        Decode given set of documents into DMWM JSON format.
        Yield decoded documents to the client.
        """
        for doc in docs:
            yield doc

    def write(self, data):
        """
        Write given data chunk (list of WM documents) into proxy server.
        Return true or false of write operation.
        """
        reason = ''
        status = 'ok'
        ids = []
        if  isinstance(data, dict):
            data = [data]
        try:
            if  not isinstance(data, list):
                raise HTTPError(500, "WMArchive exception, invalid data format: %s" % type(data))
            docs = [r for r in self.encode(data)]
            ids = self.sts.write(docs)
            if  not ids and len(data): # somehow we got empty list for given data
                status = 'unknown'
        except WriteError as exp:
            reason = tstamp("WMArchiveManager::write") + " exception: %s" % str(exp)
            print(reason)
            traceback.print_exc()
            ids = extractFWJRids(data)
            raise HTTPError(500, 'WMArhchive WriteError, ids=%s, exception=%s'\
                    % (ids, str(exp)))
        except Exception as exp:
            reason = tstamp("WMArchiveManager::write") + " exception: %s" % str(exp)
            print(reason)
            traceback.print_exc()
            ids = extractFWJRids(data)
            raise HTTPError(500, 'WMArhchive exception, ids=%s, exception=%s'\
                    % (ids, str(exp)))
        result = {'stype': self.sts.stype, 'ids': ids, 'status': status}
        if  reason:
            result['reason'] = reason
        return result

    def read(self, spec, fields):
        """
        Send request to proxy server to read data for given query.
        Yield list of found documents or None.
        """
        result = {'input': {'spec': spec, 'fields': fields},
                  'results': [], 'storage': self.sts.stype, 'status': 'ok'}
        # convert given spec into query suitable for sts/lts
        if  isinstance(spec, dict):
            try:
                trange = spec.pop('timerange')
            except KeyError:
                print(tstamp("WMArchiveManager::read"), "timerange is not provided in spec", spec)
                raise HTTPError(400, 'WMArhchive no timerange, spec=%s' % spec)

            if  trange_check(trange):
                print(tstamp("WMArchiveManager::read"), "bad timerange: %s" % trange)
                raise HTTPError(400, 'WMArhchive unable to parse timerange, spec=%s' % spec)

            # based on given time range define which manager
            # we'll use for data look-up
            mgr = self.sts
            if  use_lts(trange, self.tls_thr):
                spec['timerange'] = trange # put back timerange for HDFS hdir constraint
                mgr = self.lts

            # convert spec into WMArchive one
            spec, fields = self.qmap(mgr, spec, fields)
        else:
            # if spec is a list, it means user look-up docs by wmaids
            # they represents results of LTS data look-up
            mgr = self.sts
        status = 'ok'
        reason = None
        try:
            # request data from back-end
            data = mgr.read(spec, fields)
        except ReadError as exp:
            print(tstamp("WMArchiveManager::read"), "exception: %s" % str(exp))
            traceback.print_exc()
            raise HTTPError(400, 'WMArhchive ReadError, exception %s' % str(exp))
        except Exception as exp:
            print(tstamp("WMArchiveManager::read"), "exception: %s" % str(exp))
            traceback.print_exc()
            raise HTTPError(400, 'WMArhchive exception %s' % str(exp))
        result['data'] = data
        result['status'] = status
        if  reason:
            result['reason'] = reason
        return result
Example #6
0
class WMArchiveManager(object):
    """
    Initialize WMArchive proxy server configuration. The given configuration
    file will provide details of proxy server, agent information, etc.
    """
    def __init__(self, config=None):
        # define DB names to work with. These names should correspond to
        # dtype of documents we assign, see find_dtype and encode method
        self.dbnames = ['fwjr', 'crab']
        # Short-Term Storage
        self.sts = {}
        for dbname in self.dbnames:
            self.sts[dbname] = STSManager(config.short_storage_uri,
                                          dbname=dbname)
        self.sts_agg = STSManager(config.short_storage_uri,
                                  dbname='aggregated')
        # Long-Term Storage
        self.tls_thr = config.long_storage_thr
        if LTS:  # we'll use this module if it's loaded
            self.lts = LTSManager(config.long_storage_uri, config.wmauri,
                                  config.yarn)
        else:  # fallback
            self.lts = self.sts['fwjr']
        self.specmap = {}
        with open(config.specmap, 'r') as istream:
            cdict = {}
            for line in istream.readlines():
                pair = line.replace('\n', '').split(',')
                self.specmap[pair[0]] = pair[1]  # lfn:LFNArray
        msg = "Short-Term Storage %s, Long-Term Storage %s, specmap %s" % (
            self.sts, self.lts, self.specmap)
        print(tstamp("WMArchiveManager::init"), msg)
        self.time0 = time.time()
        self.read_access = 0
        self.write_access = 0

    def status(self):
        "Return current status about WMArchive queue"
        sdict = {'server': processStatus()}
        sdict['server'].update({
            'uptime': time.time() - self.time0,
            'read_access': self.read_access,
            'write_access': self.write_access
        })
        sdict.update(threadStack())
        sts = {}
        for dbname in self.dbnames:
            sts.update({dbname: self.sts[dbname].status()})
        sdict.update({'sts': sts})
        sdict.update({'lts': self.lts.status()})
        return sdict

    def jobs(self):
        "Return jobs from WMArchive STS"
        jobs = {}
        for dbname in self.dbnames:
            jobs[dbname] = self.sts[dbname].jobs()
        return jobs

    def performance(self, **kwargs):
        "Return stats docs from WMArchive STS"
        return self.sts_agg.performance(**kwargs)

    def qmap(self, mgr, spec, fields):
        "Map user based spec into WMArhchive storage QL"
        newspec = {}
        newfields = []
        for key, val in spec.items():
            newspec[self.specmap.get(key, key)] = val
        for field in fields:
            newfields.append(self.specmap.get(field, field))
        if hasattr(mgr, 'qmap'):
            return mgr.qmap(newspec, fields)
        return newspec, newfields

    def encode(self, docs):
        """
        Encode given set of documents into appropriate format for long term storage.
        This method will consume documents in DMWM JSON format.
        Yield encoded documents to the client.
        """
        for doc in docs:
            dtype = find_dtype(doc)
            if not doc.get('wmaid', ''):
                doc['wmaid'] = wmaHash(doc)
            if not doc.get('wmats', 0):
                doc['wmats'] = time.time()
            if not doc.get('dtype', ''):
                doc['dtype'] = dtype
            if not doc.get('stype', ''):
                doc['stype'] = self.sts[
                    dtype].stype  # here dtype is dbname in STS
            yield doc

    def decode(self, docs):
        """
        Decode given set of documents into DMWM JSON format.
        Yield decoded documents to the client.
        """
        for doc in docs:
            yield doc

    def write(self, data):
        """
        Write given data chunk (list of WM documents) into proxy server.
        Return true or false of write operation.
        """
        self.write_access += 1
        reason = ''
        status = 'ok'
        stype = 'unknown'
        ids = []
        if isinstance(data, dict):
            data = [data]
        try:
            if not isinstance(data, list):
                raise HTTPError(
                    500, "WMArchive exception, invalid data format: %s" %
                    type(data))
            docs = [r for r in self.encode(data)]
            dtype = docs[0]['dtype']
            ids = self.sts[dtype].write(docs)
            stype = self.sts[dtype].stype
            if not ids and len(
                    data):  # somehow we got empty list for given data
                status = 'unknown'
        except WriteError as exp:
            reason = tstamp(
                "WMArchiveManager::write") + " exception: %s" % str(exp)
            print(reason)
            traceback.print_exc()
            ids = extractFWJRids(data)
            raise HTTPError(500, 'WMArhchive WriteError, ids=%s, exception=%s'\
                    % (ids, str(exp)))
        except Exception as exp:
            reason = tstamp(
                "WMArchiveManager::write") + " exception: %s" % str(exp)
            print(reason)
            traceback.print_exc()
            ids = extractFWJRids(data)
            raise HTTPError(500, 'WMArhchive exception, ids=%s, exception=%s'\
                    % (ids, str(exp)))
        result = {'stype': stype, 'ids': ids, 'status': status}
        if reason:
            result['reason'] = reason
        return result

    def read(self, spec, fields):
        """
        Send request to proxy server to read data for given query.
        Yield list of found documents or None.
        """
        self.read_access += 1
        dbname = spec.get('dtype', 'fwjr')
        result = {
            'input': {
                'spec': spec,
                'fields': fields
            },
            'results': [],
            'storage': self.sts[dbname].stype,
            'status': 'ok'
        }
        # convert given spec into query suitable for sts/lts
        if isinstance(spec, dict):
            try:
                trange = spec.pop('timerange')
            except KeyError:
                print(tstamp("WMArchiveManager::read"),
                      "timerange is not provided in spec", spec)
                raise HTTPError(400, 'WMArhchive no timerange, spec=%s' % spec)

            if trange_check(trange):
                print(tstamp("WMArchiveManager::read"),
                      "bad timerange: %s" % trange)
                raise HTTPError(
                    400,
                    'WMArhchive unable to parse timerange, spec=%s' % spec)

            # based on given time range define which manager
            # we'll use for data look-up
            mgr = self.sts[dbname]
            if use_lts(trange, self.tls_thr):
                spec[
                    'timerange'] = trange  # put back timerange for HDFS hdir constraint
                mgr = self.lts

            # convert spec into WMArchive one
            spec, fields = self.qmap(mgr, spec, fields)
        else:
            # if spec is a list, it means user look-up docs by wmaids
            # they represents results of LTS data look-up
            mgr = self.sts[dbname]
        status = 'ok'
        reason = None
        try:
            # request data from back-end
            data = mgr.read(spec, fields)
        except ReadError as exp:
            print(tstamp("WMArchiveManager::read"), "exception: %s" % str(exp))
            traceback.print_exc()
            raise HTTPError(400,
                            'WMArhchive ReadError, exception %s' % str(exp))
        except Exception as exp:
            print(tstamp("WMArchiveManager::read"), "exception: %s" % str(exp))
            traceback.print_exc()
            raise HTTPError(400, 'WMArhchive exception %s' % str(exp))
        result['data'] = data
        result['status'] = status
        if reason:
            result['reason'] = reason
        return result