コード例 #1
0
ファイル: value_matching_dataset.py プロジェクト: ktf/DAS
def match_value_dataset(kwd, dbs_inst=None):
    """ return keyword matches to dataset values in dbsmanager """
    # if no specific dbs_inst passed, get the current one from request
    if not dbs_inst:
        if not hasattr(request, 'dbs_inst'):
                return None, None
        dbs_inst = request.dbs_inst

    dataset_score = None

    # make sure the kwd is unicode
    if not isinstance(kwd, unicode) and isinstance(kwd, str):
        kwd = unicode(kwd)

    upd_kwd = kwd

    # dbsmgr.find returns a generator, check if it's non empty
    match = find_datasets(kwd, dbs_inst, limit=1)
    if next(match, False):
        if DEBUG:
            print 'Dataset matched by keyword %s' % kwd
        # if kw contains wildcards the score shall be a bit lower
        if '*' in kwd and not '/' in kwd:
            dataset_score = 0.8
        elif '*' in kwd and '/' in kwd:
            dataset_score = 0.9
        elif not '*' in kwd and not '/' in kwd:
            if next(find_datasets('*%s*' % kwd, dbs_inst, limit=1), False):
                dataset_score = 0.7
                upd_kwd = '*%s*' % kwd
        else:
            dataset_score = 1.0

        # prevent number-only-keywords to be matched into datasets
        if kwd.isnumeric():
            dataset_score -= 0.3

    # add extra wildcard to make sure the query will work...
    if not RE_3SLAHES.match(upd_kwd):
        upd_kwd0 = upd_kwd
        if not upd_kwd.startswith('*') and not upd_kwd.startswith('/'):
            upd_kwd = '*' + upd_kwd
        if not upd_kwd0.endswith('*') or '*' not in upd_kwd0:
            upd_kwd += '*'

    return dataset_score, {'map_to': 'dataset.name',
                           'adjusted_keyword': upd_kwd}
コード例 #2
0
ファイル: das_query.py プロジェクト: ktf/DAS
    def __init__(self, query, **flags):
        """
        Accepts general form of DAS query, supported formats are
        DAS input query, DAS mongo query, DAS storage query. The
        supplied flags can carry any query attributes, e.g.
        filters, aggregators, system, instance, etc.
        """
        self._mongoparser   = None
        self._params        = {}
        self._service_apis_map = {}
        self._str           = ''
        self._query         = ''
        self._query_full    = ''
        self._storage_query = {}
        self._mongo_query   = {}
        self._qhash         = None
        self._hashes        = None
        self._system        = None
        self._instance      = None
        self._loose_query   = None
        self._pattern_query = None
        self._sortkeys      = []
        self._filters       = {}
        self._mapreduce     = []
        self._aggregators   = []
        self._flags         = flags

        # loop over flags and set available attributes
        for key, val in flags.iteritems():
            setattr(self, '_%s' % key, val)

        # test data type of input query and apply appropriate initialization
        if  isinstance(query, basestring):
            self._query = query
            try:
                self._mongo_query = self.mongoparser.parse(query)
                for key, val in flags.iteritems():
                    if  key in self.NON_CACHEABLE_FLAGS:
                        continue
                    if  key not in self._mongo_query:
                        self._mongo_query[key] = val
            except Exception as exp:
                msg = "Fail to parse DAS query='%s', %s" % (query, str(exp))
                print_exc(msg, print_traceback=True)
                raise exp
        elif isinstance(query, dict):
            newquery = {}
            for key, val in query.iteritems():
                newquery[key] = val
            if  isinstance(newquery.get('spec'), dict): # mongo query
                self._mongo_query = newquery
            else: # storage query
                self._storage_query = newquery
        elif isinstance(query, object) and hasattr(query, '__class__')\
            and query.__class__.__name__ == 'DASQuery':
            self._query = query.query
            self._hashes = query.hashes
            self._mongo_query = query.mongo_query
            self._storage_query = query.storage_query
        else:
            raise Exception('Unsupported data type of DAS query')
        self.update_attr()

        # check dataset wild-cards
        for key, val in self._mongo_query['spec'].items():
            if  key == 'dataset.name':
                # only match dataset.name but do not primary_dataset.name
                if  not RE_3SLAHES.match(val):

                    # TODO: we currently do not support wildcard matching
                    #       from command line interface
                    if not self._instance:
                        continue

                    # apply 3 slash pattern look-up, continuing only if one
                    # interpretation existings here, ticket #3071
                    self._handle_dataset_slashes(key, val)