Beispiel #1
0
def worker(query, expire):
    """
    Worker function which invoke DAS core to update cache for input query
    """
    dascore = DASCore()
    status  = dascore.call(query)
    return status
Beispiel #2
0
class testDASCore(unittest.TestCase):
    """
    A test class for the DAS core module
    """
    def setUp(self):
        """
        set up DAS core module
        """
        debug = 0
        self.das = DASCore(debug=debug, multitask=False)
        config = deepcopy(das_readconfig())
        dburi = config['mongodb']['dburi']
        connection = MongoClient(dburi)
        connection.drop_database('das') 

    def testAggregators(self):
        """test DASCore aggregators via zip service"""
        # test DAS workflow
        query = "file dataset=/ZMM/Summer11-DESIGN42_V11_428_SLHC1-v1/GEN-SIM | grep file.size | sum(file.size)"
        dquery = DASQuery(query)
        result = self.das.call(dquery)
        result = self.das.get_from_cache(dquery)
        result = [r for r in result][0]
        if  'das' in result:
            del result['das'] # strip off DAS info
        expect = {"function": "sum", "result": {"value": 5658838455}, 
                  "key": "file.size", "_id":0}
        # the result may have value == 'N/A' when test is run w/o certificates (travis)
        # in this cas we just skip it
        if result['result']['value'] != 'N/A':
            self.assertEqual(expect, result)
Beispiel #3
0
def worker(query, expire):
    """
    Worker function which invoke DAS core to update cache for input query
    """
    dascore = DASCore()
    status = dascore.call(query)
    return status
Beispiel #4
0
class testDASCore(unittest.TestCase):
    """
    A test class for the DAS core module
    """
    def setUp(self):
        """
        set up DAS core module
        """
        debug = 0
        self.das = DASCore(debug=debug, multitask=False)
        config = deepcopy(das_readconfig())
        dburi = config['mongodb']['dburi']
        connection = Connection(dburi)
        connection.drop_database('das') 

    def testAggregators(self):
        """test DASCore aggregators via zip service"""
        # test DAS workflow
        query  = "zip=14850 | grep zip.code | count(zip.code)"
        dquery = DASQuery(query)
        result = self.das.call(dquery)
        result = self.das.get_from_cache(dquery)
        result = [r for r in result][0]
        if  result.has_key('das'):
            del result['das'] # strip off DAS info
        expect = {"function": "count", "result": {"value": 1}, 
                  "key": "zip.code", "_id":0}
        self.assertEqual(expect, result)

    def testIPService(self):
        """test DASCore with IP service"""
        ipaddr = socket.gethostbyname('cmsweb.cern.ch')
        # test DAS workflow
        query  = "ip=%s" % ipaddr
        dquery = DASQuery(query)
        result = self.das.call(dquery)
        expect = "ok"
        self.assertEqual(expect, result)

        # test results
        query  = "ip=%s | grep ip.address" % ipaddr
        dquery = DASQuery(query)
        result = self.das.get_from_cache(dquery)
        result = [r for r in result][0]
        result = DotDict(result).get('ip.address')
        expect = ipaddr
        self.assertEqual(expect, result)
Beispiel #5
0
class testDASCore(unittest.TestCase):
    """
    A test class for the DAS core module
    """
    def setUp(self):
        """
        set up DAS core module
        """
        debug = 0
        self.das = DASCore(debug=debug)
        config = deepcopy(das_readconfig())
        dburi = config['mongodb']['dburi']
        connection = Connection(dburi)
        connection.drop_database('das') 

    def testAggregators(self):
        """test DASCore aggregators via zip service"""
        # test DAS workflow
        query  = "zip=14850 | grep zip.Placemark.address | count(zip.Placemark.address)"
        dquery = DASQuery(query)
        result = self.das.call(dquery)
        result = self.das.get_from_cache(dquery)
        result = [r for r in result][0]
        expect = {"function": "count", "result": {"value": 1}, 
                  "key": "zip.Placemark.address", "_id":0}
        self.assertEqual(expect, result)

    def testIPService(self):
        """test DASCore with IP service"""
        # test DAS workflow
        query  = "ip=137.138.141.145"
        dquery = DASQuery(query)
        result = self.das.call(dquery)
        expect = "ok"
        self.assertEqual(expect, result)

        # test results
        query  = "ip=137.138.141.145 | grep ip.City"
        dquery = DASQuery(query)
        result = self.das.get_from_cache(dquery)
        result = [r for r in result][0]
        result = DotDict(result).get('ip.City')
        expect = 'Geneva'
        self.assertEqual(expect, result)
Beispiel #6
0
class Robot(object):
    """
    DAS Robot (daemon) class to fetch data from provided URL/API
    and store them into DAS cache.
    """
    def __init__(self, config=None, query=None, sleep=600):
        self.dascore = DASCore(config, nores=True)
        logdir       = getarg(config, 'logdir', '/tmp')
        self.pidfile = os.path.join(logdir, 'robot-%s.pid' % genkey(query))

        if (hasattr(os, "devnull")):
            devnull  = os.devnull
        else:
            devnull  = "/dev/null"

        self.stdin   = devnull # we do not read from stdinput
        self.stdout  = getarg(config, 'stdout', devnull)
        self.stderr  = getarg(config, 'stderr', devnull)
        self.query   = query
        self.sleep   = sleep

    def daemonize(self):
        """
        do the UNIX double-fork magic, see Stevens' "Advanced
        Programming in the UNIX Environment" for details (ISBN 0201563177)
        http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16
        """
        try:
            pid = os.fork()
            if  pid > 0:
                # exit first parent
                sys.exit(0)
        except OSError as err:
            sys.stderr.write("fork #1 failed: %d (%s)\n" \
                % (err.errno, err.strerror))
            sys.exit(1)

        # decouple from parent environment
        os.chdir("/")
        os.umask(0)
        os.setsid()

        # do second fork
        try:
            pid = os.fork()
            if  pid > 0:
                # exit from second parent
                sys.exit(0)
        except OSError as err:
            sys.stderr.write("fork #2 failed: %d (%s)\n" \
                % (err.errno, err.strerror))
            sys.exit(1)

        # redirect standard file descriptors
        sys.stdout.flush()
        sys.stderr.flush()
        stdi = file(self.stdin, 'r')
        stdo = file(self.stdout, 'a+')
        stde = file(self.stderr, 'a+', 0)
        os.dup2(stdi.fileno(), sys.stdin.fileno())
        os.dup2(stdo.fileno(), sys.stdout.fileno())
        os.dup2(stde.fileno(), sys.stderr.fileno())

        # write pidfile
        atexit.register(self.delpid)
        pid = str(os.getpid())
        file(self.pidfile, 'w+').write("%s\n" % pid)

    def delpid(self):
        """Delete PID file"""
        os.remove(self.pidfile)

    def start(self):
        """
        Start the daemon
        """
        # Check for a pidfile to see if the daemon already runs
        try:
            pidf = file(self.pidfile,'r')
            pid  = int(pidf.read().strip())
            pidf.close()
        except IOError:
            pid = None

        if pid:
            message = "pidfile %s already exist. Daemon already running?\n"
            sys.stderr.write(message % self.pidfile)
            sys.exit(1)

        # Start the daemon
        self.daemonize()
        self.run()

    def stop(self):
        """
        Stop the daemon
        """
        # Get the pid from the pidfile
        try:
            pidf = file(self.pidfile, 'r')
            pid = int(pidf.read().strip())
            pidf.close()
        except IOError:
            pid = None

        if not pid:
            message = "pidfile %s does not exist. Daemon not running?\n"
            sys.stderr.write(message % self.pidfile)
            return # not an error in a restart

        # Try killing the daemon process
        try:
            while 1:
                os.kill(pid, SIGTERM)
                time.sleep(0.1)
        except OSError as err:
            if err.find("No such process") > 0:
                if os.path.exists(self.pidfile):
                    os.remove(self.pidfile)
            else:
                print_exc(err)
                sys.exit(1)

    def restart(self):
        """
        Restart the daemon
        """
        self.stop()
        self.start()

    def status(self):
        """
        Return status information about Robot instance.
        """
        # Get the pid from the pidfile
        try:
            pidf  = file(self.pidfile, 'r')
            pid = int(pidf.read().strip())
            pidf.close()
        except IOError:
            pid = None

        if  not pid:
            message = "pidfile %s does not exist. Daemon not running?\n"
            sys.stderr.write(message % self.pidfile)
            return # not an error in a restart

        print "DAS populator information"
        print "PID    :", pid
        print "pidfile:", self.pidfile
        print "stdin  :", self.stdin
        print "stdout :", self.stdout
        print "stderr :", self.stderr
        print "sleep  :", self.sleep
        print "query  :", self.query


    def run(self):
        """
        Method which will be called after the process has been
        daemonized by start() or restart().
        """
        if  not self.query:
            print "DAS query is not provided"
            sys.exit(1)

        while True:
            self.dascore.call(self.query, add_to_analytics=False)
            time.sleep(self.sleep)
Beispiel #7
0
class testCMSFakeDataServices(unittest.TestCase):
    """
    A test class for the DAS core module
    """
    def setUp(self):
        """
        set up DAS core module
        """
        debug = 0

        # read DAS config and make fake Mapping DB entry
        collname      = 'test_collection'
        self.dasmerge = 'test_merge'
        self.dascache = 'test_cache'
        self.dasmr    = 'test_mapreduce'
        self.collname = collname
#        config        = deepcopy(das_readconfig())
        config        = das_readconfig()
        dburi         = config['mongodb']['dburi']
        self.dburi    = dburi
        logger        = PrintManager('TestCMSFakeDataServices', verbose=debug)
        self.base     = 'http://localhost:8080' # URL of DASTestDataService
        self.expire   = 100
        config['logger']    = logger
        config['loglevel']  = debug
        config['verbose']   = debug
        config['mappingdb'] = dict(dburi=dburi, dbname='mapping', collname=collname)
        config['analyticsdb'] = dict(dbname='analytics', collname=collname, history=100)
        config['dasdb'] = {'dbname': 'das',
                           'cachecollection': self.dascache,
                           'mrcollection': self.dasmr,
                           'mergecollection': self.dasmerge}
        config['keylearningdb'] = {'collname': collname, 'dbname': 'keylearning'}
        config['parserdb'] = {'collname': collname, 'dbname': 'parser', 
                                'enable': True, 'sizecap': 10000}
        config['services'] = ['dbs', 'phedex', 'sitedb', 'google_maps', 'ip']

        # mongo parser
        self.mongoparser = ql_manager(config)
        config['mongoparser'] = self.mongoparser

        # setup DAS mapper
        self.mgr = DASMapping(config)

        # create fresh DB
        self.clear_collections()
        self.mgr.delete_db_collection()
        self.mgr.create_db()

        # Add fake mapping records
        self.add_service('ip', 'ip.yml')
        self.add_service('google_maps', 'google_maps.yml')
        self.add_service('dbs', 'dbs.yml')
        self.add_service('phedex', 'phedex.yml')
        self.add_service('sitedb', 'sitedb.yml')

        # create DAS handler
        self.das = DASCore(config)

        # start TestDataService
        self.server = Root(config)
        self.server.start()

    def add_service(self, system, ymlfile):
        """
        Add Fake data service mapping records. We provide system name
        which match corresponding name in DASTestDataService and
        associated with this system YML map file.
        """
        fname  = os.path.join(DASPATH, 'services/maps/%s' % ymlfile)
        url    = self.base + '/%s' % system
        for record in read_service_map(fname):
            record['url'] = url
            record['system'] = system
            self.mgr.add(record)
        for record in read_service_map(fname, 'notations'):
            record['system'] = system
            self.mgr.add(record)

    def clear_collections(self):
        """clean-up test collections"""
        conn = Connection(host=self.dburi)
        for dbname in ['mapping', 'analytics', 'das', 'parser', 'keylearning']:
            db = conn[dbname]
            if  dbname != 'das':
                db.drop_collection(self.collname)
            else:
                db.drop_collection(self.dascache)
                db.drop_collection(self.dasmerge)
                db.drop_collection(self.dasmr)
            

    def tearDown(self):
        """Invoke after each test"""
        self.server.stop()
#        self.mgr.delete_db_collection()
#        self.clear_collections()

    def testDBSService(self):
        """test DASCore with test DBS service"""
        query  = "primary_dataset=abc" # invoke query to fill DAS cache
        dquery = DASQuery(query, mongoparser=self.mongoparser)
        result = self.das.call(dquery)
        expect = "ok"
        self.assertEqual(expect, result)

        query  = "primary_dataset=abc" # invoke query to get results from DAS cache
        dquery = DASQuery(query, mongoparser=self.mongoparser)
        result = self.das.get_from_cache(dquery, collection=self.dasmerge)
        result = [r for r in result]
        result = DotDict(result[0]).get('primary_dataset.name')
        expect = 'abc'
        self.assertEqual(expect, result)

    def testPhedexAndSiteDBServices(self):
        """test DASCore with test PhEDEx and SiteDB services"""
        query  = "site=T3_US_Cornell" # invoke query to fill DAS cache
        dquery = DASQuery(query, mongoparser=self.mongoparser)
        result = self.das.call(dquery)
        expect = "ok"
        self.assertEqual(expect, result)

        query  = "site=T3_US_Cornell | grep site.name" # invoke query to get results from DAS cache
        dquery = DASQuery(query, mongoparser=self.mongoparser)
        result = self.das.get_from_cache(dquery, collection=self.dasmerge)
        result = [r for r in result]
        expect = 'T3_US_Cornell'
        self.assertEqual(expect, DotDict(result[0]).get('site.name'))
        expect = ['_id', 'das_id', 'site', 'cache_id', 'das', 'qhash']
        expect.sort()
        rkeys = result[0].keys()
        rkeys.sort()
        self.assertEqual(expect, rkeys)

    def testAggregators(self):
        """test DASCore aggregators via zip service"""
        query  = "zip=1000"
        dquery = DASQuery(query, mongoparser=self.mongoparser)
        result = self.das.call(dquery)
        expect = "ok"
        self.assertEqual(expect, result)

        query  = "zip=1000 | count(zip.place.city)"
        dquery = DASQuery(query, mongoparser=self.mongoparser)
        result = self.das.get_from_cache(dquery, collection=self.dasmerge)
        result = [r for r in result]
        expect = {"function": "count", "result": {"value": 2}, 
                  "key": "zip.place.city", "_id":0}
        self.assertEqual(expect, result[0])

    def testIPService(self):
        """test DASCore with IP service"""
        query  = "ip=137.138.141.145"
        dquery = DASQuery(query, mongoparser=self.mongoparser)
        result = self.das.call(dquery)
        expect = "ok"
        self.assertEqual(expect, result)

        query  = "ip=137.138.141.145 | grep ip.address"
        dquery = DASQuery(query, mongoparser=self.mongoparser)
        result = self.das.get_from_cache(dquery, collection=self.dasmerge)
        result = [r for r in result]
        result = DotDict(result[0]).get('ip.address')
        expect = '137.138.141.145'
        self.assertEqual(expect, result)

    def testRecords(self):
        """test records DAS keyword with all services"""
        query  = "ip=137.138.141.145"
        dquery = DASQuery(query, mongoparser=self.mongoparser)
        result = self.das.call(dquery)
        expect = "ok"
        self.assertEqual(expect, result)

        query  = "site=T3_US_Cornell"
        dquery = DASQuery(query, mongoparser=self.mongoparser)
        result = self.das.call(dquery)
        expect = "ok"
        self.assertEqual(expect, result)

        query  = "records | grep ip.address"
        dquery = DASQuery(query, mongoparser=self.mongoparser)
        result = self.das.get_from_cache(dquery, collection=self.dasmerge)
        result = [r for r in result]
        result = DotDict(result[0]).get('ip.address')
        expect = '137.138.141.145'
        self.assertEqual(expect, result)

        query  = "records | grep site.name"
        dquery = DASQuery(query, mongoparser=self.mongoparser)
        result = self.das.get_from_cache(dquery, collection=self.dasmerge)
        result = [r for r in result]
        expect = 'T3_US_Cornell'
        self.assertEqual(expect, DotDict(result[0]).get('site.name'))

        query  = "records"
        dquery = DASQuery(query, mongoparser=self.mongoparser)
        result = self.das.get_from_cache(dquery, collection=self.dasmerge)
        res    = []
        for row in result:
            if  row.has_key('ip'):
                res.append(DotDict(row).get('ip.address'))
            if  row.has_key('site'):
                for item in row['site']:
                    if  item.has_key('name') and item['name'] not in res:
                        res.append(item['name'])
        res.sort()
        expect = ['137.138.141.145', 'T3_US_Cornell']
        self.assertEqual(expect, res)
Beispiel #8
0
class Robot(object):
    """
    DAS Robot (daemon) class to fetch data from provided URL/API
    and store them into DAS cache.
    """
    def __init__(self, config=None, query=None, sleep=600):
        self.dascore = DASCore(config, nores=True)
        logdir       = getarg(config, 'logdir', '/tmp')
        self.pidfile = os.path.join(logdir, 'robot-%s.pid' % genkey(query))

        if (hasattr(os, "devnull")):
            devnull  = os.devnull
        else:
            devnull  = "/dev/null"

        self.stdin   = devnull # we do not read from stdinput
        self.stdout  = getarg(config, 'stdout', devnull)
        self.stderr  = getarg(config, 'stderr', devnull)
        self.query   = query
        self.sleep   = sleep

    def daemonize(self):
        """
        do the UNIX double-fork magic, see Stevens' "Advanced
        Programming in the UNIX Environment" for details (ISBN 0201563177)
        http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16
        """
        try:
            pid = os.fork()
            if  pid > 0:
                # exit first parent
                sys.exit(0)
        except OSError as err:
            sys.stderr.write("fork #1 failed: %d (%s)\n" \
                % (err.errno, err.strerror))
            sys.exit(1)

        # decouple from parent environment
        os.chdir("/")
        os.umask(0)
        os.setsid()

        # do second fork
        try:
            pid = os.fork()
            if  pid > 0:
                # exit from second parent
                sys.exit(0)
        except OSError as err:
            sys.stderr.write("fork #2 failed: %d (%s)\n" \
                % (err.errno, err.strerror))
            sys.exit(1)

        # redirect standard file descriptors
        sys.stdout.flush()
        sys.stderr.flush()
        stdi = file(self.stdin, 'r')
        stdo = file(self.stdout, 'a+')
        stde = file(self.stderr, 'a+', 0)
        os.dup2(stdi.fileno(), sys.stdin.fileno())
        os.dup2(stdo.fileno(), sys.stdout.fileno())
        os.dup2(stde.fileno(), sys.stderr.fileno())

        # write pidfile
        atexit.register(self.delpid)
        pid = str(os.getpid())
        file(self.pidfile, 'w+').write("%s\n" % pid)

    def delpid(self):
        """Delete PID file"""
        os.remove(self.pidfile)

    def start(self):
        """
        Start the daemon
        """
        # Check for a pidfile to see if the daemon already runs
        try:
            pidf = file(self.pidfile,'r')
            pid  = int(pidf.read().strip())
            pidf.close()
        except IOError:
            pid = None

        if pid:
            message = "pidfile %s already exist. Daemon already running?\n"
            sys.stderr.write(message % self.pidfile)
            sys.exit(1)

        # Start the daemon
        self.daemonize()
        self.run()

    def stop(self):
        """
        Stop the daemon
        """
        # Get the pid from the pidfile
        try:
            pidf = file(self.pidfile, 'r')
            pid = int(pidf.read().strip())
            pidf.close()
        except IOError:
            pid = None

        if not pid:
            message = "pidfile %s does not exist. Daemon not running?\n"
            sys.stderr.write(message % self.pidfile)
            return # not an error in a restart

        # Try killing the daemon process
        try:
            while 1:
                os.kill(pid, SIGTERM)
                time.sleep(0.1)
        except OSError as err:
            if err.find("No such process") > 0:
                if os.path.exists(self.pidfile):
                    os.remove(self.pidfile)
            else:
                print_exc(err)
                sys.exit(1)

    def restart(self):
        """
        Restart the daemon
        """
        self.stop()
        self.start()

    def status(self):
        """
        Return status information about Robot instance.
        """
        # Get the pid from the pidfile
        try:
            pidf  = file(self.pidfile, 'r')
            pid = int(pidf.read().strip())
            pidf.close()
        except IOError:
            pid = None

        if  not pid:
            message = "pidfile %s does not exist. Daemon not running?\n"
            sys.stderr.write(message % self.pidfile)
            return # not an error in a restart

        print("DAS populator information")
        print("PID    :", pid)
        print("pidfile:", self.pidfile)
        print("stdin  :", self.stdin)
        print("stdout :", self.stdout)
        print("stderr :", self.stderr)
        print("sleep  :", self.sleep)
        print("query  :", self.query)


    def run(self):
        """
        Method which will be called after the process has been
        daemonized by start() or restart().
        """
        if  not self.query:
            print("DAS query is not provided")
            sys.exit(1)

        while True:
            self.dascore.call(self.query)
            time.sleep(self.sleep)