Python get_candidates Examples

Programming Language: Python

Namespace/Package Name: nomenklatura.matching.candidates

Method/Function: get_candidates

Examples at hotexamples.com: 4

Python get_candidates - 4 examples found. These are the top rated real world Python examples of nomenklatura.matching.candidates.get_candidates extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: __init__.py Project: OpenRefine/nomenklatura

def prefix_search(prefix, dataset):
    prefix_normalized = normalize(prefix, dataset)
    candidates = get_candidates(dataset)
    matches = []
    entities = set()
    for candidate, entity_id in candidates:
        if candidate.startswith(prefix_normalized):
            if entity_id not in entities:
                entities.add(entity_id)
                matches.append((candidate, entity_id))
    return matches

Example #2

Show file

def prefix_search(prefix, dataset):
    prefix_normalized = normalize(prefix, dataset)
    candidates = get_candidates(dataset)
    matches = []
    entities = set()
    for candidate, entity_id in candidates:
        if candidate.startswith(prefix_normalized):
            if entity_id not in entities:
                entities.add(entity_id)
                matches.append((candidate, entity_id))
    return matches

Example #3

Show file

File: __init__.py Project: OpenRefine/nomenklatura

def match(text, dataset, query=None):
    query = '' if query is None else query.strip()
    text_normalized = normalize(text, dataset)
    candidates = get_candidates(dataset)
    matches = []
    begin = time.time()
    func = ALGORITHMS.get(dataset.algorithm, levenshtein)
    for candidate, entity_id in candidates:
        if len(query) and query not in candidate.lower():
            continue
        score = func(text_normalized, candidate)
        matches.append((candidate, entity_id, score))
    matches = sorted(matches, key=lambda (c,e,s): s, reverse=True)
    entities = set()
    matches_uniq = []
    for c,e,s in matches:
        if e in entities:
            continue
        entities.add(e)
        matches_uniq.append((c,e,s))
    duration = time.time() - begin
    log.info("Matching %s candidates took: %sms",
            len(matches_uniq), duration*1000)
    return matches_uniq

Example #4

Show file

def match(text, dataset, query=None):
    query = '' if query is None else query.strip()
    text_normalized = normalize(text, dataset)
    candidates = get_candidates(dataset)
    matches = []
    begin = time.time()
    func = ALGORITHMS.get(dataset.algorithm, levenshtein)
    for candidate, entity_id in candidates:
        if len(query) and query not in candidate.lower():
            continue
        score = func(text_normalized, candidate)
        matches.append((candidate, entity_id, score))
    matches = sorted(matches, key=lambda (c, e, s): s, reverse=True)
    entities = set()
    matches_uniq = []
    for c, e, s in matches:
        if e in entities:
            continue
        entities.add(e)
        matches_uniq.append((c, e, s))
    duration = time.time() - begin
    log.info("Matching %s candidates took: %sms", len(matches_uniq),
             duration * 1000)
    return matches_uniq