Esempio n. 1
0
def match_dataset(kwd, cur_inst):
    """ check for dataset match in current DBS instances """
    if len(kwd) < 3:
        return None
    score, data = match_value_dataset(kwd, cur_inst)
    if score:
        return data.get('adjusted_keyword', kwd)
Esempio n. 2
0
def match_dataset(kwd, cur_inst):
    """ check for dataset match in current DBS instances """
    if len(kwd) < 3:
        return None
    score, data = match_value_dataset(kwd, cur_inst)
    if score:
        return data.get('adjusted_keyword', kwd)
Esempio n. 3
0
def match_dataset_all_inst(kwd, cur_inst):
    """ list matching dataset patterns in all DBS instances """
    if len(kwd) < 3:
        return []
    matches = []
    for inst in list_dbs_instances():
        score, data = match_value_dataset(kwd, inst)
        if not score:
            continue
        data['inst'] = inst
        data['match'] = data.get('adjusted_keyword', kwd)
        # score matches in other DBS instances lower
        score = score - 0.15 if inst != cur_inst else score
        data['score'] = score
        matches.append(data)
    return sorted(matches, key=lambda item: item['score'], reverse=True)
Esempio n. 4
0
def match_dataset_all_inst(kwd, cur_inst):
    """ list matching dataset patterns in all DBS instances """
    if len(kwd) < 3:
        return []
    matches = []
    for inst in list_dbs_instances():
        score, data = match_value_dataset(kwd, inst)
        if not score:
            continue
        data['inst'] = inst
        data['match'] = data.get('adjusted_keyword', kwd)
        # score matches in other DBS instances lower
        score = score - 0.15 if inst != cur_inst else score
        data['score'] = score
        matches.append(data)
    return sorted(matches, key=lambda item: item['score'], reverse=True)
Esempio n. 5
0
def keyword_value_weights(keyword):
    """
    for each attribute, calculates likelihood that given keyword is a value of
    the attribute  (we are mostly interested in API parameters, but
    """

    # to minimize false positives, we exclude the fields from regexp matching
    # for which we have a list of possible values (the quite static ones)
    fields_tracked = input_values_tracker.get_fields_tracked(only_stable=True)

    scores_dict = _select_best_scores(
        (score, field) for score, field in keyword_regexp_weights(keyword)
        if field not in fields_tracked)

    # check for matching of existing datasets, and override regexp based score
    dataset_score, data = match_value_dataset(keyword)
    if dataset_score:
        scores_dict['dataset.name'] = (dataset_score,  data)

    # check for matching fields those values are fairly static (site, release..)
    scores_dict.update(input_values_tracker.input_value_matches(keyword))

    return sorted(scores_dict.itervalues(),
                  key=lambda item: item[0], reverse=True)
Esempio n. 6
0
def keyword_value_weights(keyword):
    """
    for each attribute, calculates likelihood that given keyword is a value of
    the attribute  (we are mostly interested in API parameters, but
    """

    # to minimize false positives, we exclude the fields from regexp matching
    # for which we have a list of possible values (the quite static ones)
    fields_tracked = input_values_tracker.get_fields_tracked(only_stable=True)

    scores_dict = _select_best_scores(
        (score, field) for score, field in keyword_regexp_weights(keyword)
        if field not in fields_tracked)

    # check for matching of existing datasets, and override regexp based score
    dataset_score, data = match_value_dataset(keyword)
    if dataset_score:
        scores_dict['dataset.name'] = (dataset_score,  data)

    # check for matching fields those values are fairly static (site, release..)
    scores_dict.update(input_values_tracker.input_value_matches(keyword))

    return sorted(scores_dict.itervalues(),
                  key=lambda item: item[0], reverse=True)