Python get_schema Examples, DAS.keywordsearch.metadata.schema_adapter_factory.get_schema Python Examples

Example #1

0

Show file

File: name_matching.py Project: perrozzi/DAS

def keyword_schema_weights(keyword, kwd_idx=-1):
    """
    for each schema term (entity, entity attribute) calculates likelihood for
    the keyword to match it
    """
    entities = get_schema().entity_names.items()
    result = [(string_distance(keyword, entity_short), entity_long)
              for entity_long, entity_short in entities]

    # check synonyms
    entity_synonyms = get_schema().cms_synonyms['daskeys'].items()
    for entity_long, synonyms in entity_synonyms:
        for synonym in synonyms:
            result.extend([
                (string_distance(keyword, synonym), entity_long),
            ])

    # apply some simple patterns
    if kwd_idx == 0:
        if keyword == 'where':
            result.extend([
                (0.75, 'site.name'),
            ])
        if keyword == 'who':
            result.extend([
                (0.5, 'user.name'),
            ])

    result = [item for item in result if item[0] > 0]
    result.sort(key=lambda item: item[0], reverse=True)
    return result

Example #2

0

Show file

File: name_matching.py Project: ktf/DAS

def keyword_schema_weights(keyword, kwd_idx=-1):
    """
    for each schema term (entity, entity attribute) calculates likelihood for
    the keyword to match it
    """
    entities = get_schema().entity_names.iteritems()
    result = [(string_distance(keyword, entity_short), entity_long)
              for entity_long, entity_short in entities]

    # check synonyms
    entity_synonyms = get_schema().cms_synonyms['daskeys'].iteritems()
    for entity_long, synonyms in entity_synonyms:
        for synonym in synonyms:
            result.extend([(string_distance(keyword, synonym), entity_long), ])

    # apply some simple patterns
    if kwd_idx == 0:
        if keyword == 'where':
            result.extend([(0.75, 'site.name'), ])
        if keyword == 'who':
            result.extend([(0.5, 'user.name'), ])

    result = [item for item in result
              if item[0] > 0]
    result.sort(key=lambda item: item[0], reverse=True)
    return result

Example #3

0

Show file

File: value_matching.py Project: ktf/DAS

def keyword_regexp_weights(keyword):
    """ evaluate keyword regexp matches """
    regexps = get_schema().compiled_input_regexps
    for re_compiled, constraint, apis in regexps:
        # do not allow # in dataset
        if '#' in keyword:
            apis = [api
                    for api in apis
                    if api['key'] != 'dataset']
            if not apis:
                continue

        score = 0
        # We shall prefer non empty constraints
        # We may also have different weights for different types of regexps
        if re.search(re_compiled, keyword):
            if constraint.startswith('^') and constraint.endswith('$'):
                score = 0.7
            elif constraint.startswith('^') or constraint.endswith('$'):
                score = 0.6
            elif constraint != '':
                score = 0.5

        if score:
            for api in apis:
                yield score, api['entity_long']

    # append date match...
    if regex.date_yyyymmdd_pattern.match(keyword):
        yield 0.95, 'date'

Example #4

0

Show file

def keyword_regexp_weights(keyword):
    """ evaluate keyword regexp matches """
    regexps = get_schema().compiled_input_regexps
    for re_compiled, constraint, apis in regexps:
        # do not allow # in dataset
        if '#' in keyword:
            apis = [api
                    for api in apis
                    if api['key'] != 'dataset']
            if not apis:
                continue

        score = 0
        # We shall prefer non empty constraints
        # We may also have different weights for different types of regexps
        if re.search(re_compiled, keyword):
            if constraint.startswith('^') and constraint.endswith('$'):
                score = 0.7
            elif constraint.startswith('^') or constraint.endswith('$'):
                score = 0.6
            elif constraint != '':
                score = 0.5

        if score:
            for api in apis:
                yield score, api['entity_long']

    # append date match...
    if regex.date_yyyymmdd_pattern.match(keyword):
        yield 0.95, 'date'

Example #5

0

Show file

File: result_presentation.py Project: ktf/DAS

def dasql_to_nl(dasql_tuple):
    """
    Returns natural language representation of a generated DAS query
     so to explain users what does it mean.
    """
    # TODO: get rid of dasql_tuple, use a namedtuple or dict!?
    (result_type, short_input_params, result_projections, result_filters,
     result_operators) = dasql_tuple

    filters = ['%s=%s' % (f, v) for (f, v) in short_input_params]
    get_title = lambda field: \
        get_schema().get_result_field_title(result_type, field,
                                            technical=True, html=True)
    if result_filters:
        # TODO: add verbose name if any
        filters.extend([
            '{0:s} {1:s} {2:s}'.format(get_title(field), op, val)
            for (field, op, val) in result_filters])

    filters = ' <b>AND</b> '.join(filters)

    if result_projections:
        projections = ', '.join(str(get_title(field))
                                for field in result_projections)
        return '<b>find</b> {projections:s} ' \
               '<b>for each</b> {result_type:s} ' \
               '<b>where</b> {filters:s}'.format(projections=projections,
                                                 result_type=result_type,
                                                 filters=filters)
    else:
        return '<b>find</b> {result_type:s} <b>where</b> {filters:s}'.format(
            result_type=result_type, filters=filters)

Example #6

0

Show file

File: result_presentation.py Project: perrozzi/DAS

def dasql_to_nl(dasql_tuple):
    """
    Returns natural language representation of a generated DAS query
     so to explain users what does it mean.
    """
    # TODO: get rid of dasql_tuple, use a namedtuple or dict!?
    (result_type, short_input_params, result_projections, result_filters,
     result_operators) = dasql_tuple

    filters = ['%s=%s' % (f, v) for (f, v) in short_input_params]
    get_title = lambda field: \
        get_schema().get_result_field_title(result_type, field,
                                            technical=True, html=True)
    if result_filters:
        # TODO: add verbose name if any
        filters.extend([
            '{0:s} {1:s} {2:s}'.format(get_title(field), op, val)
            for (field, op, val) in result_filters
        ])

    filters = ' <b>AND</b> '.join(filters)

    if result_projections:
        projections = ', '.join(
            str(get_title(field)) for field in result_projections)
        return '<b>find</b> {projections:s} ' \
               '<b>for each</b> {result_type:s} ' \
               '<b>where</b> {filters:s}'.format(projections=projections,
                                                 result_type=result_type,
                                                 filters=filters)
    else:
        return '<b>find</b> {result_type:s} <b>where</b> {filters:s}'.format(
            result_type=result_type, filters=filters)

Example #7

0

Show file

File: cms_query_rewrite.py Project: dmwm/DAS

 def __init__(self, cms_rep, render_template):
     self.cms_rep = cms_rep
     self.dasmgr = self.cms_rep.dasmgr
     self.entity_names = self._build_short_daskeys(self.dasmgr)
     # schema adapter from kws
     # TODO: get_field_list_for_entity_by_pk could be moved to DAS Core or...
     self.schema_adapter = get_schema(dascore=self.dasmgr)
     self.render_template = render_template

Example #8

0

Show file

File: cms_query_rewrite.py Project: perrozzi/DAS

 def __init__(self, cms_rep, render_template):
     self.cms_rep = cms_rep
     self.dasmgr = self.cms_rep.dasmgr
     self.entity_names = self._build_short_daskeys(self.dasmgr)
     # schema adapter from kws
     # TODO: get_field_list_for_entity_by_pk could be moved to DAS Core or...
     self.schema_adapter = get_schema(dascore=self.dasmgr)
     self.render_template = render_template

Example #9

0

Show file

File: chunk_matcher.py Project: dmwm/DAS

def add_full_fieldmatch(kwd, matches):
    """ check for full match to am attribute, e.g. dataset.nevents """
    if '.' in kwd:
        match = get_schema().check_result_field_match(kwd)
        if match:
            entity, field = match
            result = {'field': field,
                      'len': 1,
                      'tokens_required': [kwd, ],
                      'score': 20.0}
            matches[entity].append(result)

Example #10

0

Show file

File: ir_entity_attributes.py Project: perrozzi/DAS

def manual_tests():
    """
    manual tests
    """
    from DAS.keywordsearch.metadata.schema_adapter_factory import get_schema
    from DAS.core.das_core import DASCore

    schema_adapter = get_schema(DASCore(multitask=False))
    fields_by_entity = schema_adapter.list_result_fields()
    ir_matcher = SimpleIREntityAttributeMatcher(fields_by_entity)

    def print_results(*args, **kwargs):
        """ run search and print results - used for testsing """
        ir_matcher.search_index(*args, **kwargs)

    if False:
        print_results(
            keywords=u'files of Zmm with number of events more than 10',
            result_type=u'dataset')
        print_results(keywords=u'number events', result_type=u'dataset')
        print_results(keywords=u'number evented', result_type=u'dataset')
        print_results(keywords=u'dataset.nevents', result_type=u'dataset')
        print_results(keywords=u'dataset.numevents', result_type=u'dataset')

        # block.replica.subscribed vs block.replica.custodial
        #  (the deepest name in here is the most important)
        print_results(keywords=u'replica fraction', result_type=u'block')
        print_results(keywords=u'replica fraction', result_type=u'site')
        print_results(keywords=u'custodial replica', result_type=u'block')
        print_results(keywords=u'replica_fraction', result_type=u'site')

        print('=========================================================')

        print_results(keywords=u'number', result_type=u'dataset')
        print_results(keywords=u'of', result_type=u'dataset')
        print_results(keywords=u'events', result_type=u'dataset')
        print_results(keywords=u'number of', result_type=u'dataset')
        print_results(keywords=u'of events', result_type=u'dataset')
        print_results(keywords=u'Number OF Events', result_type=u'dataset')
    print('Q: dataset_fraction')
    print_results(keywords=u'dataset_fraction', result_type=u'site')
    print('Q: dataset fraction')
    print_results(keywords=u'dataset fraction', result_type=u'site')
    print('Q: dataset part')
    print_results(keywords=u'dataset part', result_type=u'site')
    print('============================================')
    print('Q: file')
    print_results(keywords=u'file in', result_type='file', limit=4)
    print('============================================')
    print('Q: file in')
    print_results(keywords=u'file in', result_type='file', limit=4)

Example #11

0

Show file

File: ir_entity_attributes.py Project: ktf/DAS

def manual_tests():
    """
    manual tests
    """
    from DAS.keywordsearch.metadata.schema_adapter_factory import get_schema
    from DAS.core.das_core import DASCore

    schema_adapter = get_schema(DASCore(multitask=False))
    fields_by_entity = schema_adapter.list_result_fields()
    ir_matcher = SimpleIREntityAttributeMatcher(fields_by_entity)

    def print_results(*args, **kwargs):
        """ run search and print results - used for testsing """
        ir_matcher.search_index(*args, **kwargs)

    if False:
        print_results(
            keywords=u'files of Zmm with number of events more than 10',
            result_type=u'dataset')
        print_results(keywords=u'number events', result_type=u'dataset')
        print_results(keywords=u'number evented', result_type=u'dataset')
        print_results(keywords=u'dataset.nevents', result_type=u'dataset')
        print_results(keywords=u'dataset.numevents', result_type=u'dataset')

        # block.replica.subscribed vs block.replica.custodial
        #  (the deepest name in here is the most important)
        print_results(keywords=u'replica fraction', result_type=u'block')
        print_results(keywords=u'replica fraction', result_type=u'site')
        print_results(keywords=u'custodial replica', result_type=u'block')
        print_results(keywords=u'replica_fraction', result_type=u'site')

        print '========================================================='

        print_results(keywords=u'number', result_type=u'dataset')
        print_results(keywords=u'of', result_type=u'dataset')
        print_results(keywords=u'events', result_type=u'dataset')
        print_results(keywords=u'number of', result_type=u'dataset')
        print_results(keywords=u'of events', result_type=u'dataset')
        print_results(keywords=u'Number OF Events', result_type=u'dataset')
    print 'Q: dataset_fraction'
    print_results(keywords=u'dataset_fraction', result_type=u'site')
    print 'Q: dataset fraction'
    print_results(keywords=u'dataset fraction', result_type=u'site')
    print 'Q: dataset part'
    print_results(keywords=u'dataset part', result_type=u'site')
    print '============================================'
    print 'Q: file'
    print_results(keywords=u'file in', result_type='file', limit=4)
    print '============================================'
    print 'Q: file in'
    print_results(keywords=u'file in', result_type='file', limit=4)

Example #12

0

Show file

File: search.py Project: perrozzi/DAS

    def __init__(self, dascore):
        self.schema = get_schema(dascore)

        # import and initialize the ranker
        from DAS.extensions import fast_recursive_ranker
        self.ranker = fast_recursive_ranker
        self.ranker.initialize_ranker(self.schema)

        # build and load the whoosh index (listing fields in service outputs)
        fields = self.schema.list_result_fields()
        self.multi_kwd_searcher = MultiKwdAttributeMatcher(fields)

        # initialize the value trackers (primary_dataset, release, etc)
        init_value_trackers()

Example #13

0

Show file

    def __init__(self, dascore):
        self.schema = get_schema(dascore)

        # import and initialize the ranker
        from DAS.extensions import fast_recursive_ranker
        self.ranker = fast_recursive_ranker
        self.ranker.initialize_ranker(self.schema)

        # build and load the whoosh index (listing fields in service outputs)
        fields = self.schema.list_result_fields()
        self.multi_kwd_searcher = MultiKwdAttributeMatcher(fields)

        # initialize the value trackers (primary_dataset, release, etc)
        init_value_trackers()

Example #14

0

Show file

File: ir_entity_attributes.py Project: dmwm/DAS

def manual_tests():
    """
    manual tests
    """
    from DAS.keywordsearch.metadata.schema_adapter_factory import get_schema
    from DAS.core.das_core import DASCore

    schema_adapter = get_schema(DASCore(multitask=False))
    fields_by_entity = schema_adapter.list_result_fields()
    ir_matcher = SimpleIREntityAttributeMatcher(fields_by_entity)

    def print_results(*args, **kwargs):
        """ run search and print results - used for testsing """
        ir_matcher.search_index(*args, **kwargs)

    if False:
        print_results(keywords=u"files of Zmm with number of events more than 10", result_type=u"dataset")
        print_results(keywords=u"number events", result_type=u"dataset")
        print_results(keywords=u"number evented", result_type=u"dataset")
        print_results(keywords=u"dataset.nevents", result_type=u"dataset")
        print_results(keywords=u"dataset.numevents", result_type=u"dataset")

        # block.replica.subscribed vs block.replica.custodial
        #  (the deepest name in here is the most important)
        print_results(keywords=u"replica fraction", result_type=u"block")
        print_results(keywords=u"replica fraction", result_type=u"site")
        print_results(keywords=u"custodial replica", result_type=u"block")
        print_results(keywords=u"replica_fraction", result_type=u"site")

        print("=========================================================")

        print_results(keywords=u"number", result_type=u"dataset")
        print_results(keywords=u"of", result_type=u"dataset")
        print_results(keywords=u"events", result_type=u"dataset")
        print_results(keywords=u"number of", result_type=u"dataset")
        print_results(keywords=u"of events", result_type=u"dataset")
        print_results(keywords=u"Number OF Events", result_type=u"dataset")
    print("Q: dataset_fraction")
    print_results(keywords=u"dataset_fraction", result_type=u"site")
    print("Q: dataset fraction")
    print_results(keywords=u"dataset fraction", result_type=u"site")
    print("Q: dataset part")
    print_results(keywords=u"dataset part", result_type=u"site")
    print("============================================")
    print("Q: file")
    print_results(keywords=u"file in", result_type="file", limit=4)
    print("============================================")
    print("Q: file in")
    print_results(keywords=u"file in", result_type="file", limit=4)

Example #15

0

Show file

File: chunk_matcher.py Project: perrozzi/DAS

def add_full_fieldmatch(kwd, matches):
    """ check for full match to am attribute, e.g. dataset.nevents """
    if '.' in kwd:
        match = get_schema().check_result_field_match(kwd)
        if match:
            entity, field = match
            result = {
                'field': field,
                'len': 1,
                'tokens_required': [
                    kwd,
                ],
                'score': 20.0
            }
            matches[entity].append(result)

Example #16

0

Show file

File: das_waitfordb.py Project: ktf/DAS

 def need_res_fields_bootsrap():
     """ return whether the list of of entity attributes is available
     if not these are needed to be bootstrapped
     """
     dascore = DASCore(multitask=False)
     schema_adapter = schema_adapter_factory.get_schema(dascore)
     try:
         field_list = schema_adapter.list_result_fields()
         if not field_list:
             return True
         ir_entity_attributes.SimpleIREntityAttributeMatcher(field_list)
     except Exception as exc:
         print exc
         return True
     return False

Example #17

0

Show file

File: chunk_matcher.py Project: dmwm/DAS

    def append_subquery_matches(self, keywords, matches):
        """
        get matches to individual and nearby keywords (non phrase)
        """

        # check for full name matches to a attribute, e.g. dataset.nevents
        for kwd in keywords:
            add_full_fieldmatch(kwd, matches)

        fields_by_entity = get_schema().list_result_fields()
        str_len = len(keywords)
        max_len = min(len(keywords), MAX_TOKEN_COMBINATION_LEN)
        for length in xrange(1, max_len + 1):
            for start in xrange(0, str_len - length + 1):
                chunk = keywords[start:start + length]
                # exclude phrases with "a b c" (as these were processed earlier)
                if any(c for c in chunk if ' ' in c):
                    continue
                # only the last term in the chunk is allowed to contain operator
                if any(test_operator_containment(kw) for kw in chunk[:-1]):
                    continue
                if DEBUG:
                    print('chunk:', chunk)
                    print('len=', length, '; start=', start, 'chunk:', chunk)

                s_chunk = ' '.join(get_keyword_without_operator(kw)
                                   for kw in chunk)
                results = self.fields_idx.search_index(
                    kwds=s_chunk,
                    limit=CHUNK_N_TOKEN_COMBINATION_RESULTS)
                max_score = results and results[0]['score']
                for result in results:
                    result['len'] = len(result['keywords_matched'])
                    entity = result['result_type']
                    if not check_validity(result, fields_by_entity):
                        continue
                    result['field'] = fields_by_entity[entity][result['field']]
                    result['tokens_required'] = chunk
                    if USE_IR_SCORE_NORMALIZATION_LOCAL:
                        result['score'] /= max_score
                    matches[entity].append(result)

Example #18

0

Show file

File: chunk_matcher.py Project: perrozzi/DAS

    def append_subquery_matches(self, keywords, matches):
        """
        get matches to individual and nearby keywords (non phrase)
        """

        # check for full name matches to a attribute, e.g. dataset.nevents
        for kwd in keywords:
            add_full_fieldmatch(kwd, matches)

        fields_by_entity = get_schema().list_result_fields()
        str_len = len(keywords)
        max_len = min(len(keywords), MAX_TOKEN_COMBINATION_LEN)
        for length in xrange(1, max_len + 1):
            for start in xrange(0, str_len - length + 1):
                chunk = keywords[start:start + length]
                # exclude phrases with "a b c" (as these were processed earlier)
                if any(c for c in chunk if ' ' in c):
                    continue
                # only the last term in the chunk is allowed to contain operator
                if any(test_operator_containment(kw) for kw in chunk[:-1]):
                    continue
                if DEBUG:
                    print('chunk:', chunk)
                    print('len=', length, '; start=', start, 'chunk:', chunk)

                s_chunk = ' '.join(
                    get_keyword_without_operator(kw) for kw in chunk)
                results = self.fields_idx.search_index(
                    kwds=s_chunk, limit=CHUNK_N_TOKEN_COMBINATION_RESULTS)
                max_score = results and results[0]['score']
                for result in results:
                    result['len'] = len(result['keywords_matched'])
                    entity = result['result_type']
                    if not check_validity(result, fields_by_entity):
                        continue
                    result['field'] = fields_by_entity[entity][result['field']]
                    result['tokens_required'] = chunk
                    if USE_IR_SCORE_NORMALIZATION_LOCAL:
                        result['score'] /= max_score
                    matches[entity].append(result)

Example #19

0

Show file

File: chunk_matcher.py Project: perrozzi/DAS

    def get_phrase_matches(self, keywords):
        """
        get phrase matches from IR index
        """
        fields_by_entity = get_schema().list_result_fields()

        # first filter out the phrases (we wont combine them with anything)
        phrase_kwds = [kw for kw in keywords if ' ' in kw]

        matches = defaultdict(list)
        for kwd in phrase_kwds:
            # remove operators, e.g. "number of events">10 => number of events
            phrase = get_keyword_without_operator(kwd)
            # get ranked list of matches
            results = self.fields_idx.search_index(
                kwds=phrase, limit=CHUNK_N_PHRASE_RESULTS)

            max_score = results and results[0]['score']
            for result in results:
                #r['len'] =  1
                result['len'] = len(result['keywords_matched'])
                entity = result['result_type']
                if not check_validity(result, fields_by_entity):
                    continue

                # TODO: this shall be done in presentation level
                result['field'] = fields_by_entity[entity][result['field']]
                result['tokens_required'] = [kwd]

                # penalize terms that have multiple matches
                result['score'] *= W_PHRASE
                if USE_IR_SCORE_NORMALIZATION_LOCAL:
                    result['score'] /= max_score

                matches[entity].append(result)

        return matches

Example #20

0

Show file

File: chunk_matcher.py Project: dmwm/DAS

    def get_phrase_matches(self, keywords):
        """
        get phrase matches from IR index
        """
        fields_by_entity = get_schema().list_result_fields()

        # first filter out the phrases (we wont combine them with anything)
        phrase_kwds = [kw for kw in keywords if ' ' in kw]

        matches = defaultdict(list)
        for kwd in phrase_kwds:
            # remove operators, e.g. "number of events">10 => number of events
            phrase = get_keyword_without_operator(kwd)
            # get ranked list of matches
            results = self.fields_idx.search_index(kwds=phrase,
                                                   limit=CHUNK_N_PHRASE_RESULTS)

            max_score = results and results[0]['score']
            for result in results:
                #r['len'] =  1
                result['len'] = len(result['keywords_matched'])
                entity = result['result_type']
                if not check_validity(result, fields_by_entity):
                    continue

                # TODO: this shall be done in presentation level
                result['field'] = fields_by_entity[entity][result['field']]
                result['tokens_required'] = [kwd]

                # penalize terms that have multiple matches
                result['score'] *= W_PHRASE
                if USE_IR_SCORE_NORMALIZATION_LOCAL:
                    result['score'] /= max_score

                matches[entity].append(result)

        return matches

Example #21

0

Show file

File: result_presentation.py Project: perrozzi/DAS

def result_to_dasql(result,
                    frmt='text',
                    shorten_html=True,
                    max_value_len=UI_MAX_DISPLAYED_VALUE_LEN):
    """
    returns proposed query as DASQL in there formats:

    - text, standard DASQL
    - html, colorified DASQL with long values shortened down (if shorten_html
        is specified)
    """
    patterns = DASQL_PATTERNS[frmt]

    def tmpl(name, params=None):
        """
        gets a pattern, formats it with params if any,
        and apply an escape function if needed
        """
        # a helper function to map values of dict
        # TODO: in Py2.7: {k: f(v) for k, v in my_dictionary.items()}
        map_dict_values = lambda f, my_dict: dict(
            (k, f(v)) for k, v in my_dict.items())

        if frmt == 'html':
            # shorten value if it's longer than
            if isinstance(params, dict) and 'value' in params and shorten_html:
                value = params['value']
                if len(value) > max_value_len:
                    params['value'] = shorten_value(value, max_value_len)
                    params['field'] = fescape(params['field'])
            else:
                # for html, make sure to escape the inputs
                if isinstance(params, tuple) or isinstance(params, list):
                    params = tuple(fescape(param) for param in params)
                elif isinstance(params, dict):
                    params = map_dict_values(fescape, params)
                else:
                    params = params and fescape(params)

        pattern = patterns[name]

        if params is not None:
            return pattern % params
        return pattern

    if isinstance(result, dict):
        score = result['score']
        result_type = result['result_type']
        input_params = result['input_values']
        projections_filters = result['result_filters']
        trace = result['trace']
    else:
        (score, result_type, input_params, projections_filters, trace) = result

    # short entity names
    s_result_type = get_schema().entity_names.get(result_type, result_type)
    s_input_params = [(get_schema().entity_names.get(field, field), value)
                      for (field, value) in input_params]
    s_input_params.sort(key=lambda item: item[0])

    s_query = tmpl('RESULT_TYPE', s_result_type) + ' ' + \
        ' '.join(tmpl('INPUT_FIELD_AND_VALUE',
                      {'field': field, 'value': value})
                 for (field, value) in s_input_params)

    result_projections = [
        p for p in projections_filters if not isinstance(p, tuple)
    ]

    result_filters = [p for p in projections_filters if isinstance(p, tuple)]

    if result_projections or result_filters:
        if DEBUG:
            print('selections before:', result_projections)
        result_projections = list(result_projections)

        # automatically add wildcard fields to selections (if any),
        # so they would be displayed in the results
        for field, value in input_params:
            if '*' in value and not field in result_projections:
                result_projections.append(field)

        # add formatted projections
        result_grep = [tmpl('PROJECTION', prj) for prj in result_projections]
        # add filters to grep
        s_result_filters = [
            tmpl('RESULT_FILTER_OP_VALUE', {
                'field': field,
                'op': op,
                'value': val
            }) for (field, op, val) in result_filters
        ]
        result_grep.extend(s_result_filters)
        s_query += tmpl('GREP') + ', '.join(result_grep)

        if DEBUG:
            print('projections after:', result_projections)
            print('filters after:', result_filters)

    return {
        'result':
        s_query,
        'query':
        s_query,
        'trace':
        trace,
        'score':
        score,
        'entity':
        s_result_type,
        'das_ql_tuple':
        (s_result_type, s_input_params, result_projections, result_filters, [])
    }

Example #22

0

Show file

File: result_presentation.py Project: ktf/DAS

def result_to_dasql(result, frmt='text', shorten_html=True,
                    max_value_len=UI_MAX_DISPLAYED_VALUE_LEN):
    """
    returns proposed query as DASQL in there formats:
    * text - standard DASQL
    * html - colorified DASQL with long values shortened down (if shorten_html
        is specified)
    """
    patterns = DASQL_PATTERNS[frmt]

    def tmpl(name, params=None):
        """
        gets a pattern, formats it with params if any,
        and apply an escape function if needed
        """
        # a helper function to map values of dict
        # TODO: in Py2.7: {k: f(v) for k, v in my_dictionary.items()}
        map_dict_values = lambda f, my_dict: dict(
            (k, f(v)) for k, v in my_dict.iteritems())

        if frmt == 'html':
            # shorten value if it's longer than
            if isinstance(params, dict) and 'value' in params and shorten_html:
                value = params['value']
                if len(value) > max_value_len:
                    params['value'] = shorten_value(value, max_value_len)
                    params['field'] = fescape(params['field'])
            else:
                # for html, make sure to escape the inputs
                if isinstance(params, tuple) or isinstance(params, list):
                    params = tuple(fescape(param) for param in params)
                elif isinstance(params, dict):
                    params = map_dict_values(fescape, params)
                else:
                    params = params and fescape(params)

        pattern = patterns[name]

        if params is not None:
            return pattern % params
        return pattern

    if isinstance(result, dict):
        score = result['score']
        result_type = result['result_type']
        input_params = result['input_values']
        projections_filters = result['result_filters']
        trace = result['trace']
    else:
        (score, result_type, input_params, projections_filters, trace) = result

    # short entity names
    s_result_type = get_schema().entity_names.get(result_type, result_type)
    s_input_params = [(get_schema().entity_names.get(field, field), value) for
                      (field, value) in input_params]
    s_input_params.sort(key=lambda item: item[0])

    s_query = tmpl('RESULT_TYPE', s_result_type) + ' ' + \
        ' '.join(tmpl('INPUT_FIELD_AND_VALUE',
                      {'field': field, 'value': value})
                 for (field, value) in s_input_params)

    result_projections = [p for p in projections_filters
                          if not isinstance(p, tuple)]

    result_filters = [p for p in projections_filters
                      if isinstance(p, tuple)]

    if result_projections or result_filters:
        if DEBUG:
            print 'selections before:', result_projections
        result_projections = list(result_projections)

        # automatically add wildcard fields to selections (if any),
        # so they would be displayed in the results
        for field, value in input_params:
            if '*' in value and not field in result_projections:
                result_projections.append(field)

        # add formatted projections
        result_grep = [tmpl('PROJECTION', prj) for prj in result_projections]
        # add filters to grep
        s_result_filters = [tmpl('RESULT_FILTER_OP_VALUE',
                                 {'field': field, 'op': op, 'value': val})
                            for (field, op, val) in result_filters]
        result_grep.extend(s_result_filters)
        s_query += tmpl('GREP') + ', '.join(result_grep)

        if DEBUG:
            print 'projections after:', result_projections
            print 'filters after:', result_filters

    return {
        'result': s_query,
        'query': s_query,
        'trace': trace,
        'score': score,
        'entity': s_result_type,
        'das_ql_tuple': (s_result_type, s_input_params, result_projections,
                         result_filters, [])
    }