예제 #1
0
class PLYSubmitter(Submitter):
    """
    Submits queries to the DAS PLY yacc/lexx parser to check validity
    (no further submission is done).
    """
    def __init__(self, producer, **kwargs):
        assert HAVE_DAS
        core = DASCore()
        parserdir   = core.dasconfig['das']['parserdir']
        dasservices = core.dasconfig['services']
        daskeys = []
        for val in core.mapping.daskeys().values():
            for item in val:
                daskeys.append(item)
        
        self.dasply = DASPLY(parserdir, daskeys, dasservices)
        self.dasply.build()
        Submitter.__init__(self, producer, **kwargs)
    def submit(self, query):
        """submit query"""
        print "RAW: ", query
        ply = self.dasply.parser.parse(query)
        print "PLY: ", ply
        mongo = ply2mongo(ply)
        print "MONGO: ", mongo
        return True
예제 #2
0
파일: das_parser.py 프로젝트: ktf/DAS
def ply_output(query, keys, services, pdir='/tmp', verbose=False):
    """Print PLY/lexer output"""
    if  verbose:
        dasply = DASPLY(pdir, keys, services, verbose=verbose)
        dasply.build()
        print "input query='%s'" % query
        dasply.test_lexer(query)
예제 #3
0
 def test_spawn_manager(self):
     """Test spawn_queue function"""
     dasservices = ['dbs', 'dbs3']
     daskeys = ['dataset']
     parserdir = os.getcwd()
     query="dataset=/ZMM*/*/*"
     dasply = DASPLY(parserdir, daskeys, dasservices, verbose=0)
     dasply.build()
     ply_q1 = dasply.parser.parse(query)
     ply_q2 = spawn(dasply.parser.parse, query)
     self.assertEqual(ply_q1, ply_q2)
예제 #4
0
    def __init__(self, config=None):
        if  not config:
            config = das_readconfig()
        if  not config.has_key('dasmapping'):
            config['dasmapping'] = DASMapping(config)
        if  not config.has_key('dasanalytics'):
            config['dasanalytics'] = DASAnalytics(config)
        if  not config['dasmapping'].check_maps():
            msg = "No DAS maps found in MappingDB"
            raise Exception(msg)
        self.map         = config['dasmapping']
        self.analytics   = config['dasanalytics']
        self.dasservices = config['services']
        self.daskeysmap  = self.map.daskeys()
        self.operators   = list(das_operators())
        self.daskeys     = list(das_special_keys())
        self.verbose     = config['verbose']
        self.logger      = PrintManager('QLManger', self.verbose)
        for val in self.daskeysmap.values():
            for item in val:
                self.daskeys.append(item)
        parserdir   = config['das']['parserdir']
        self.dasply = DASPLY(parserdir, self.daskeys, self.dasservices, 
                verbose=self.verbose)

        self.enabledb = config['parserdb']['enable']
        if  self.enabledb:
            self.parserdb = DASParserDB(config)
예제 #5
0
파일: das_parser.py 프로젝트: ktf/DAS
def ply_parse_query(query, keys, services, pdir='/tmp', verbose=False):
    """Get ply object for given query."""
    dasply = DASPLY(pdir, keys, services, verbose=verbose)
    dasply.build()
#    ply_query = dasply.parser.parse(query)
#    ply_query = spawn(dasply.parser.parse, query)
#    return ply_query
    error = None
    for trial in xrange(1, 3):
        try:
            ply_query = dasply.parser.parse(query)
            return ply_query
        except Exception as exc:
            msg = "Fail to parse query=%s, trial=%s, exception=%s" \
                    % (query, trial, str(exc))
            print dastimestamp('DAS WARNING ') + ' ' + msg
            error = exc
        time.sleep(trial/10.)
    raise error
예제 #6
0
 def __init__(self, producer, **kwargs):
     assert HAVE_DAS
     core = DASCore()
     parserdir   = core.dasconfig['das']['parserdir']
     dasservices = core.dasconfig['services']
     daskeys = []
     for val in core.mapping.daskeys().values():
         for item in val:
             daskeys.append(item)
     
     self.dasply = DASPLY(parserdir, daskeys, dasservices)
     self.dasply.build()
     Submitter.__init__(self, producer, **kwargs)
예제 #7
0
    def setUp(self):
        """
        set up DAS core module
        """
        self.debug = 0
        dassystems = ['dbs', 'sitedb', 'phedex', 'google_maps', 
                      'postalcode', 'ip_services']
        daskeys = ['dataset', 'file', 'block', 'run', 'site', 
                   'latitude', 'longitude', 'city', 'ip', 'date', 'system', 'zip']
        parserdir = '/tmp'

        self.dasply = DASPLY(parserdir, daskeys, dassystems, verbose=self.debug)
        args = {'errorlog' : ply.yacc.NullLogger()}
        self.dasply.build(**args)

        self.queries = {}

        query = "queries"
        mongo = {'fields': ['queries'], 'spec': {'queries': '*'}}
        self.queries[query] = mongo

        query = "popular queries"
        mongo = {'fields': ['popular', 'queries'], 'spec': {'queries': '*'}}
        self.queries[query] = mongo

        query = "popular queries date last 24h"
        mongo = {'fields': ['popular', 'queries'], 'spec': {'date': '24h'}}
#        date1 = time.time() - 24*60*60
#        date2 = time.time()
#        mongo = {'fields': ['popular', 'queries'], 
#                 'spec': {'date': {'$gte': long(date1), '$lte': long(date2)}}}
        self.queries[query] = mongo

        query = "records"
        mongo = {'fields': ['records'], 'spec': {'records': '*'}}
        self.queries[query] = mongo

        query = "records site=T1_CH_CERN"
        mongo = {'fields': ['records'], 'spec': {'site': 'T1_CH_CERN'}}
        self.queries[query] = mongo

        query = "site=T1_CH_CERN"
        mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}}
        self.queries[query] = mongo

        query = "site site=T1_CH_CERN"
        mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}}
        self.queries[query] = mongo

        query = "site site=srm-cms.cern.ch"
        mongo = {'fields': ['site'], 'spec': {'site': 'srm-cms.cern.ch'}}
        self.queries[query] = mongo

        query = "site site=cmssrm.fnal.gov"
        mongo = {'fields': ['site'], 'spec': {'site': 'cmssrm.fnal.gov'}}
        self.queries[query] = mongo

        query = "site=T1_CH_CERN site"
        mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}}
        self.queries[query] = mongo

        query = "dataset=/a/b/c run=123 | grep dataset.size"
        mongo = {'filters': {'grep': ['dataset.size']}, 'fields': None, 
                 'spec': {'dataset': '/a/b/c', 'run': 123}}
        self.queries[query] = mongo

        query = "site=T1_CH_CERN system=sitedb"
        mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}, 'system': 'sitedb'}
        self.queries[query] = mongo

        query = "zip=10000 | grep zip.Placemark.address | count(zip.Placemark.address)"
        mongo = {'fields': ['zip'], 'spec': {'zip': 10000}, 
                 'filters': {'grep': ['zip.Placemark.address']},
                 'aggregators': [('count', 'zip.Placemark.address')] }
        self.queries[query] = mongo

        query = "city=Ithaca"
        mongo = {'fields': ['city'], 'spec': {'city': 'Ithaca'}}
        self.queries[query] = mongo

        query = "zip=14850"
        mongo = {'fields': ['zip'], 'spec': {'zip': 14850}}
        self.queries[query] = mongo

        query = "ip=137.138.141.145 | grep ip.City"
        mongo = {'fields': ['ip'], 'spec': {'ip': '137.138.141.145'},
                 'filters': {'grep': ['ip.City']}}
        self.queries[query] = mongo

        query = 'latitude=11.1 longitude=-72'
        mongo = {'fields': None, 'spec':{'latitude':11.1, 'longitude': -72}}
        self.queries[query] = mongo

        query = "site=T1_CH_CERN"
        mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}}
        self.queries[query] = mongo

        query = "run=20853"
        mongo = {'fields': ['run'], 'spec': {'run': 20853}}
        self.queries[query] = mongo

        query = "run between [20853,20859]"
        mongo = {'fields': ['run'], 'spec': {'run': {'$gte': 20853, '$lte': 20859}}}
        self.queries[query] = mongo

        query = "file block=123 | grep file.size | sum(file.size)"
        mongo = {'fields': ['file'], 'spec': {'block': 123},
                 'filters': {'grep': ['file.size']},
                 'aggregators': [('sum', 'file.size')]}
        self.queries[query] = mongo

        query = "block=/a/b/RECO#9f5c396b-b6a1"
        mongo = {'fields': ['block'], 'spec': {'block': '/a/b/RECO#9f5c396b-b6a1'}}
        self.queries[query] = mongo

        query = "block dataset=/W/a_2/RECO"
        mongo = {'fields': ['block'], 'spec': {'dataset': '/W/a_2/RECO'}}
        self.queries[query] = mongo

        query = "run date last 24h"
#        date1 = time.time() - 24*60*60
#        date2 = time.time()
#        mongo = {'fields': ['run'], 'spec': {'date': {'$gte': long(date1), '$lte': long(date2)}}}
        mongo = {'fields': ['run'], 'spec': {'date': '24h'}}
        self.queries[query] = mongo

        date1 = 20101201
        date2 = 20101202
        query = "run date between [%s, %s]" % (date1, date2)
        mongo = {'fields': ['run'], 'spec': {'date': {'$lte': long(1291248000), '$gte': long(1291161600)}}}
        self.queries[query] = mongo

        query = "dataset file=/a/b run between [1,2] | grep file.name, file.age | unique | sum(file.size),max(file.size)"
        mongo = {'fields': ['dataset'], 'spec': 
                        {'run': {'$lte': 2, '$gte': 1}, 'file': '/a/b'}, 
                 'filters': {'grep': ['file.name', 'file.age'], 'unique': 1},
                 'aggregators': [('sum', 'file.size'), ('max', 'file.size')]}
        self.queries[query] = mongo
        
        query = "city = camelCase"
        mongo = {'fields': ['city'], 'spec':{'city': 'camelCase'}}
        self.queries[query] = mongo
        
        query = "city = lowercase"
        mongo = {'fields': ['city'], 'spec':{'city': 'lowercase'}}
        self.queries[query] = mongo
        
        query = "city = 'two words'"
        mongo = {'fields': ['city'], 'spec':{'city': 'two words'}}
        self.queries[query] = mongo
        
        query = 'city = "two words"'
        mongo = {'fields': ['city'], 'spec':{'city': 'two words'}}
        self.queries[query] = mongo
        
        #query=DASKEY
        query = 'city = dataset'
        mongo = {'fields': ['city'], 'spec':{'city': 'dataset'}}
        self.queries[query] = mongo
        
        #query=DASKEYtext
        query = 'city = datasetPostfix'
        mongo = {'fields': ['city'], 'spec':{'city': 'datasetPostfix'}}
        self.queries[query] = mongo
        
        #query=OPERATORtext (I don't expect query=OPERATOR to ever work)
        query = 'city = betweenPostfix'
        mongo = {'fields': ['city'], 'spec':{'city': 'betweenPostfix'}}
        self.queries[query] = mongo

        # query w/ filter which contains a key/value pair
        query = 'block=/a/b/c | grep site=T1 '
        mongo = {'fields': ['block'], 'spec': {'block': '/a/b/c'},
                 'filters': {'grep': ['site=T1']}}
        self.queries[query] = mongo

        # query w/ filter which contains a filter conditions
        query = 'run dataset=/a/b/c | grep run.run_number>1, run.run_number<10 '
        mongo = {'fields': ['run'], 'spec': {'dataset': '/a/b/c'}, 
                 'filters': {'grep': ['run.run_number>1', 'run.run_number<10']}}
        self.queries[query] = mongo

        # query w/ filter which contains a filter conditions
        query = 'run dataset=/a/b/c | grep run.a>0, run.b>=0, run.c<=0'
        mongo = {'fields': ['run'], 'spec': {'dataset': '/a/b/c'}, 
                 'filters': {'grep': ['run.a>0', 'run.b>=0', 'run.c<=0']}}
        self.queries[query] = mongo

        # query with DASKEY, date=value
        query = 'dataset date=20110124'
        mongo = {'fields': ['dataset'], 'spec': {'date': 1295827200}}
        self.queries[query] = mongo
        
        # query with DASKEY, date between [value1, value2]
        query = 'dataset date between [20110124,20110126]'
        mongo = {'fields': ['dataset'], 'spec': {'date': {'$gte': 1295827200, '$lte': 1296000000}}}
        self.queries[query] = mongo


        query = 'file=abcdeasdf'
        mongo = {'fields': ['file'], 'spec': {'file': 'abcdeasdf'}}
        self.queries[query] = mongo

        query = 'file=abcdeasdf dataset=abcdes'
        mongo = {'fields': None, 'spec': {'file': 'abcdeasdf', 'dataset': 'abcdes'}}
        self.queries[query] = mongo

        query = 'dataset date = 20080201'
        mongo = {'fields': ['dataset'], 'spec': {'date': 1201824000}}
        self.queries[query] = mongo

        query = 'file dataset date = 20080201'
        mongo = {'fields': ['file', 'dataset'], 'spec': {'date': 1201824000}}
        self.queries[query] = mongo

        query = 'dataset dataset=abcdes date = 20080201'
        mongo = {'fields': ['dataset'], 'spec': {'date': 1201824000, 'dataset': 'abcdes'}}
        self.queries[query] = mongo

        query = 'file dataset dataset=abcdes date = 20080201'
        mongo = {'fields': ['file', 'dataset'], 'spec': {'date': 1201824000, 'dataset': 'abcdes'}}
        self.queries[query] = mongo

        query = 'file=abcdeasdf file dataset dataset=abcdes date = 20080201'
        mongo = {'fields': ['file', 'dataset'], 'spec': {'date': 1201824000, 'file': 'abcdeasdf', 'dataset': 'abcdes'}}
        self.queries[query] = mongo

        query = 'file dataset=bla | grep file.creation_time<20080201'
        mongo = {'fields': ['file'], 'spec': {'dataset': 'bla'},
                 'filters': {'grep': ['file.creation_time<1201824000']}}
        self.queries[query] = mongo
예제 #8
0
class testDASPLY(unittest.TestCase):
    """
    A test class for the DAS PLY parser/lexer
    """
    def setUp(self):
        """
        set up DAS core module
        """
        self.debug = 0
        dassystems = ['dbs', 'sitedb', 'phedex', 'google_maps', 
                      'postalcode', 'ip_services']
        daskeys = ['dataset', 'file', 'block', 'run', 'site', 
                   'latitude', 'longitude', 'city', 'ip', 'date', 'system', 'zip']
        parserdir = '/tmp'

        self.dasply = DASPLY(parserdir, daskeys, dassystems, verbose=self.debug)
        args = {'errorlog' : ply.yacc.NullLogger()}
        self.dasply.build(**args)

        self.queries = {}

        query = "queries"
        mongo = {'fields': ['queries'], 'spec': {'queries': '*'}}
        self.queries[query] = mongo

        query = "popular queries"
        mongo = {'fields': ['popular', 'queries'], 'spec': {'queries': '*'}}
        self.queries[query] = mongo

        query = "popular queries date last 24h"
        mongo = {'fields': ['popular', 'queries'], 'spec': {'date': '24h'}}
#        date1 = time.time() - 24*60*60
#        date2 = time.time()
#        mongo = {'fields': ['popular', 'queries'], 
#                 'spec': {'date': {'$gte': long(date1), '$lte': long(date2)}}}
        self.queries[query] = mongo

        query = "records"
        mongo = {'fields': ['records'], 'spec': {'records': '*'}}
        self.queries[query] = mongo

        query = "records site=T1_CH_CERN"
        mongo = {'fields': ['records'], 'spec': {'site': 'T1_CH_CERN'}}
        self.queries[query] = mongo

        query = "site=T1_CH_CERN"
        mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}}
        self.queries[query] = mongo

        query = "site site=T1_CH_CERN"
        mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}}
        self.queries[query] = mongo

        query = "site site=srm-cms.cern.ch"
        mongo = {'fields': ['site'], 'spec': {'site': 'srm-cms.cern.ch'}}
        self.queries[query] = mongo

        query = "site site=cmssrm.fnal.gov"
        mongo = {'fields': ['site'], 'spec': {'site': 'cmssrm.fnal.gov'}}
        self.queries[query] = mongo

        query = "site=T1_CH_CERN site"
        mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}}
        self.queries[query] = mongo

        query = "dataset=/a/b/c run=123 | grep dataset.size"
        mongo = {'filters': {'grep': ['dataset.size']}, 'fields': None, 
                 'spec': {'dataset': '/a/b/c', 'run': 123}}
        self.queries[query] = mongo

        query = "site=T1_CH_CERN system=sitedb"
        mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}, 'system': 'sitedb'}
        self.queries[query] = mongo

        query = "zip=10000 | grep zip.Placemark.address | count(zip.Placemark.address)"
        mongo = {'fields': ['zip'], 'spec': {'zip': 10000}, 
                 'filters': {'grep': ['zip.Placemark.address']},
                 'aggregators': [('count', 'zip.Placemark.address')] }
        self.queries[query] = mongo

        query = "city=Ithaca"
        mongo = {'fields': ['city'], 'spec': {'city': 'Ithaca'}}
        self.queries[query] = mongo

        query = "zip=14850"
        mongo = {'fields': ['zip'], 'spec': {'zip': 14850}}
        self.queries[query] = mongo

        query = "ip=137.138.141.145 | grep ip.City"
        mongo = {'fields': ['ip'], 'spec': {'ip': '137.138.141.145'},
                 'filters': {'grep': ['ip.City']}}
        self.queries[query] = mongo

        query = 'latitude=11.1 longitude=-72'
        mongo = {'fields': None, 'spec':{'latitude':11.1, 'longitude': -72}}
        self.queries[query] = mongo

        query = "site=T1_CH_CERN"
        mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}}
        self.queries[query] = mongo

        query = "run=20853"
        mongo = {'fields': ['run'], 'spec': {'run': 20853}}
        self.queries[query] = mongo

        query = "run between [20853,20859]"
        mongo = {'fields': ['run'], 'spec': {'run': {'$gte': 20853, '$lte': 20859}}}
        self.queries[query] = mongo

        query = "file block=123 | grep file.size | sum(file.size)"
        mongo = {'fields': ['file'], 'spec': {'block': 123},
                 'filters': {'grep': ['file.size']},
                 'aggregators': [('sum', 'file.size')]}
        self.queries[query] = mongo

        query = "block=/a/b/RECO#9f5c396b-b6a1"
        mongo = {'fields': ['block'], 'spec': {'block': '/a/b/RECO#9f5c396b-b6a1'}}
        self.queries[query] = mongo

        query = "block dataset=/W/a_2/RECO"
        mongo = {'fields': ['block'], 'spec': {'dataset': '/W/a_2/RECO'}}
        self.queries[query] = mongo

        query = "run date last 24h"
#        date1 = time.time() - 24*60*60
#        date2 = time.time()
#        mongo = {'fields': ['run'], 'spec': {'date': {'$gte': long(date1), '$lte': long(date2)}}}
        mongo = {'fields': ['run'], 'spec': {'date': '24h'}}
        self.queries[query] = mongo

        date1 = 20101201
        date2 = 20101202
        query = "run date between [%s, %s]" % (date1, date2)
        mongo = {'fields': ['run'], 'spec': {'date': {'$lte': long(1291248000), '$gte': long(1291161600)}}}
        self.queries[query] = mongo

        query = "dataset file=/a/b run between [1,2] | grep file.name, file.age | unique | sum(file.size),max(file.size)"
        mongo = {'fields': ['dataset'], 'spec': 
                        {'run': {'$lte': 2, '$gte': 1}, 'file': '/a/b'}, 
                 'filters': {'grep': ['file.name', 'file.age'], 'unique': 1},
                 'aggregators': [('sum', 'file.size'), ('max', 'file.size')]}
        self.queries[query] = mongo
        
        query = "city = camelCase"
        mongo = {'fields': ['city'], 'spec':{'city': 'camelCase'}}
        self.queries[query] = mongo
        
        query = "city = lowercase"
        mongo = {'fields': ['city'], 'spec':{'city': 'lowercase'}}
        self.queries[query] = mongo
        
        query = "city = 'two words'"
        mongo = {'fields': ['city'], 'spec':{'city': 'two words'}}
        self.queries[query] = mongo
        
        query = 'city = "two words"'
        mongo = {'fields': ['city'], 'spec':{'city': 'two words'}}
        self.queries[query] = mongo
        
        #query=DASKEY
        query = 'city = dataset'
        mongo = {'fields': ['city'], 'spec':{'city': 'dataset'}}
        self.queries[query] = mongo
        
        #query=DASKEYtext
        query = 'city = datasetPostfix'
        mongo = {'fields': ['city'], 'spec':{'city': 'datasetPostfix'}}
        self.queries[query] = mongo
        
        #query=OPERATORtext (I don't expect query=OPERATOR to ever work)
        query = 'city = betweenPostfix'
        mongo = {'fields': ['city'], 'spec':{'city': 'betweenPostfix'}}
        self.queries[query] = mongo

        # query w/ filter which contains a key/value pair
        query = 'block=/a/b/c | grep site=T1 '
        mongo = {'fields': ['block'], 'spec': {'block': '/a/b/c'},
                 'filters': {'grep': ['site=T1']}}
        self.queries[query] = mongo

        # query w/ filter which contains a filter conditions
        query = 'run dataset=/a/b/c | grep run.run_number>1, run.run_number<10 '
        mongo = {'fields': ['run'], 'spec': {'dataset': '/a/b/c'}, 
                 'filters': {'grep': ['run.run_number>1', 'run.run_number<10']}}
        self.queries[query] = mongo

        # query w/ filter which contains a filter conditions
        query = 'run dataset=/a/b/c | grep run.a>0, run.b>=0, run.c<=0'
        mongo = {'fields': ['run'], 'spec': {'dataset': '/a/b/c'}, 
                 'filters': {'grep': ['run.a>0', 'run.b>=0', 'run.c<=0']}}
        self.queries[query] = mongo

        # query with DASKEY, date=value
        query = 'dataset date=20110124'
        mongo = {'fields': ['dataset'], 'spec': {'date': 1295827200}}
        self.queries[query] = mongo
        
        # query with DASKEY, date between [value1, value2]
        query = 'dataset date between [20110124,20110126]'
        mongo = {'fields': ['dataset'], 'spec': {'date': {'$gte': 1295827200, '$lte': 1296000000}}}
        self.queries[query] = mongo


        query = 'file=abcdeasdf'
        mongo = {'fields': ['file'], 'spec': {'file': 'abcdeasdf'}}
        self.queries[query] = mongo

        query = 'file=abcdeasdf dataset=abcdes'
        mongo = {'fields': None, 'spec': {'file': 'abcdeasdf', 'dataset': 'abcdes'}}
        self.queries[query] = mongo

        query = 'dataset date = 20080201'
        mongo = {'fields': ['dataset'], 'spec': {'date': 1201824000}}
        self.queries[query] = mongo

        query = 'file dataset date = 20080201'
        mongo = {'fields': ['file', 'dataset'], 'spec': {'date': 1201824000}}
        self.queries[query] = mongo

        query = 'dataset dataset=abcdes date = 20080201'
        mongo = {'fields': ['dataset'], 'spec': {'date': 1201824000, 'dataset': 'abcdes'}}
        self.queries[query] = mongo

        query = 'file dataset dataset=abcdes date = 20080201'
        mongo = {'fields': ['file', 'dataset'], 'spec': {'date': 1201824000, 'dataset': 'abcdes'}}
        self.queries[query] = mongo

        query = 'file=abcdeasdf file dataset dataset=abcdes date = 20080201'
        mongo = {'fields': ['file', 'dataset'], 'spec': {'date': 1201824000, 'file': 'abcdeasdf', 'dataset': 'abcdes'}}
        self.queries[query] = mongo

        query = 'file dataset=bla | grep file.creation_time<20080201'
        mongo = {'fields': ['file'], 'spec': {'dataset': 'bla'},
                 'filters': {'grep': ['file.creation_time<1201824000']}}
        self.queries[query] = mongo

    def test_instance(self):
        """Test appearance of instance in a DAS query"""
        query = 'dataset=/a/b/c instance=global'
        mongo = {'fields': ['dataset'], 'spec': {'dataset':'/a/b/c'}, 'instance':'global'}
        ply_query = self.dasply.parser.parse(query)
        result = ply2mongo(ply_query)
        self.assertEqual(mongo, result)

    def test_lexer(self):
        """Test DAS PLY lexer"""
        for query, expect in self.queries.items():
            if  self.debug:
                print("\n%s" % query)
            self.dasply.test_lexer(query)

    def test_parser(self):
        """Test DAS PLY parser"""
        for query, expect in self.queries.items():
            try:
                ply_query = self.dasply.parser.parse(query)
            except:
                self.dasply.parser.parse(query, debug=1)
                print("Input query:", query)
                raise
            if  self.debug:
                print()
                print("input query", query)
                print("ply query  ", ply_query)
            result = ply2mongo(ply_query)
            self.assertEqual(expect, result)

    def test_parser_negate(self):
        """Test DAS PLY parser with negative results"""
        mongo = {}
        queries = {}

        query = 'run last 24h'
        queries[query] = mongo

        query = 'run last dataset'
        queries[query] = mongo

        query = 'dateset in 2010'
        queries[query] = mongo

        query = 'date in 24h'
        queries[query] = mongo

        query = 'date last [20101010,20101012]'
        queries[query] = mongo

        query = 'dataset in [/a/b/c,/c/d/e]'
        queries[query] = mongo

        query = 'detaset = /a/b/c'
        queries[query] = mongo

        query = 'detaset = /a/b/c dataset.size'
        queries[query] = mongo

        query = """dataset date in [20110124,20110126]"""
        queries[query] = mongo

        query = "run in [20853,20859]"
        self.queries[query] = mongo

        query = "dataset" # prevent usage of single keys
        self.queries[query] = mongo

        for query, expect in queries.items():
            self.assertRaises(Exception, self.dasply.parser.parse, query)
예제 #9
0
class QLManager(object):
    """
    DAS QL manager.
    """
    def __init__(self, config=None):
        if  not config:
            config = das_readconfig()
        if  not config.has_key('dasmapping'):
            config['dasmapping'] = DASMapping(config)
        if  not config.has_key('dasanalytics'):
            config['dasanalytics'] = DASAnalytics(config)
        if  not config['dasmapping'].check_maps():
            msg = "No DAS maps found in MappingDB"
            raise Exception(msg)
        self.map         = config['dasmapping']
        self.analytics   = config['dasanalytics']
        self.dasservices = config['services']
        self.daskeysmap  = self.map.daskeys()
        self.operators   = list(das_operators())
        self.daskeys     = list(das_special_keys())
        self.verbose     = config['verbose']
        self.logger      = PrintManager('QLManger', self.verbose)
        for val in self.daskeysmap.values():
            for item in val:
                self.daskeys.append(item)
        parserdir   = config['das']['parserdir']
        self.dasply = DASPLY(parserdir, self.daskeys, self.dasservices, 
                verbose=self.verbose)

        self.enabledb = config['parserdb']['enable']
        if  self.enabledb:
            self.parserdb = DASParserDB(config)

    def parse(self, query):
        """
        Parse input query and return query in MongoDB form.
        Optionally parsed query can be written into analytics DB.
        """
        mongo_query = self.mongo_query(query)
        self.convert2skeys(mongo_query)
        return mongo_query

    def add_to_analytics(self, query, mongo_query):
        "Add DAS query to analytics DB"
        self.analytics.add_query(query, mongo_query)

    def mongo_query(self, query):
        """
        Return mongo query for provided input query
        """
        # NOTE: somehow I need to keep build call just before using
        # PLY parser, otherwise it fails to parse.
        self.dasply.build()
        if  self.verbose:
            msg = "input query='%s'" % query
            self.logger.debug(msg)
            self.dasply.test_lexer(query)
        if  self.enabledb:
            status, value = self.parserdb.lookup_query(query)
            if status == PARSERCACHE_VALID and \
                len(last_key_pattern.findall(query)) == 0:
                mongo_query = value
            elif status == PARSERCACHE_INVALID:
                raise Exception(value)
            else:
                try:
                    ply_query = self.dasply.parser.parse(query)
                    mongo_query = ply2mongo(ply_query)
                    self.parserdb.insert_valid_query(query, mongo_query)
                except Exception as exp:
                    self.parserdb.insert_invalid_query(query, exp)
                    print "Input query=%s" % query
                    raise exp
        else:
            try:
                ply_query   = self.dasply.parser.parse(query)
                mongo_query = ply2mongo(ply_query)
            except Exception as exc:
                msg = "Fail to convert input query='%s' into MongoDB format" \
                    % query
                print_exc(msg, print_traceback=False)
                raise exc
        if  set(mongo_query.keys()) & set(['fields', 'spec']) != \
                set(['fields', 'spec']):
            raise Exception('Invalid MongoDB query %s' % mongo_query)
        if  not mongo_query['fields'] and len(mongo_query['spec'].keys()) > 1:
            raise Exception(ambiguous_msg(query, mongo_query['spec'].keys()))
        for key, val in mongo_query['spec'].iteritems():
            if  isinstance(val, list):
                raise Exception(ambiguos_val_msg(query, key, val))
        return mongo_query

    def convert2skeys(self, mongo_query):
        """
        Convert DAS input keys into DAS selection keys.
        """
        if  not mongo_query['spec']:
            for key in mongo_query['fields']:
                for system in self.map.list_systems():
                    mapkey = self.map.find_mapkey(system, key)
                    if  mapkey:
                        mongo_query['spec'][mapkey] = '*'
            return
        spec = mongo_query['spec']
        to_replace = []
        for key, val in spec.iteritems():
            for system in self.map.list_systems():
                mapkey = self.map.find_mapkey(system, key, val)
                if  mapkey and mapkey != key and \
                    mongo_query['spec'].has_key(key):
                    to_replace.append((key, mapkey))
                    continue
        for key, mapkey in to_replace:
            if  mongo_query['spec'].has_key(key):
                mongo_query['spec'][mapkey] = mongo_query['spec'][key]
                del mongo_query['spec'][key]
        
    def services(self, query):
        """Find out DAS services to use for provided query"""
        skeys, cond = decompose(query)
        if  not skeys:
            skeys = []
        if  isinstance(skeys, str):
            skeys = [skeys]
        slist = []
        # look-up services from Mapping DB
        for key in skeys + [i for i in cond.keys()]:
            for service, keys in self.daskeysmap.iteritems():
                if  service not in self.dasservices:
                    continue
                value = cond.get(key, None)
                daskeys = self.map.find_daskey(service, key, value)
                if  set(keys) & set(daskeys) and service not in slist:
                    slist.append(service)
        # look-up special key condition
        requested_system = query.get('system', None)
        if  requested_system:
            if  isinstance(requested_system, str):
                requested_system = [requested_system]
            return list( set(slist) & set(requested_system) )
        return slist

    def service_apis_map(self, query):
        """
        Find out which APIs correspond to provided query.
        Return a map of found services and their apis.
        """
        skeys, cond = decompose(query)
        if  not skeys:
            skeys = []
        if  isinstance(skeys, str):
            skeys = [skeys]
        adict = {}
        mapkeys = [key for key in cond.keys() if key not in das_special_keys()]
        services = self.services(query)
        for srv in services:
            alist = self.map.list_apis(srv)
            for api in alist:
                daskeys = self.map.api_info(api)['daskeys']
                maps = [r['map'] for r in daskeys]
                if  set(mapkeys) & set(maps) == set(mapkeys): 
                    if  adict.has_key(srv):
                        new_list = adict[srv] + [api]
                        adict[srv] = list( set(new_list) )
                    else:
                        adict[srv] = [api]
        return adict

    def params(self, query):
        """
        Return dictionary of parameters to be used in DAS Core:
        selection keys, conditions and services.
        """
        skeys, cond = decompose(query)
        services = []
        for srv in self.services(query):
            if  srv not in services:
                services.append(srv)
        return dict(selkeys=skeys, conditions=cond, services=services)