Exemple #1
0
def spec(instance):
    querySpec = instance["querySpec"]
    productSpec = instance["productSpec"]

    a = set(querySpec.split(u"|"))
    b = set(productSpec.split(u"|"))

    features = []
    features += _overlap(a, b)

    return util_tools.get_feature_by_list(features)
Exemple #2
0
def levenshtein_distance(instance):
    queryName = instance["queryName"]
    querySpec = instance["querySpec"]

    productName = instance["productName"]
    productSpec = instance["productSpec"]

    features = [
        _levenshtein_distance(queryName, productName),
        _levenshtein_distance(querySpec, productSpec),
    ]

    return util_tools.get_feature_by_list(features)
Exemple #3
0
def lss_length(instance):
    queryName = instance["queryName"]
    querySpec = instance["querySpec"]

    productName = instance["productName"]
    productSpec = instance["productSpec"]

    features = [
        len(_longest_common_substring(queryName, productName)),
        len(_longest_common_substring(querySpec, productSpec)),
    ]

    return util_tools.get_feature_by_list(features)
Exemple #4
0
def bleu(instance):
    queryName = list(instance["queryName"])
    querySpec = list(instance["querySpec"])

    productName = list(instance["productName"])
    productSpec = list(instance["productSpec"])

    features = [
        _bleu_score(queryName, productName),
        _bleu_score(querySpec, productSpec)
    ]

    return util_tools.get_feature_by_list(features)
Exemple #5
0
def overlap(instance):
    queryName = instance["queryName"]
    querySpec = instance["querySpec"]

    productName = instance["productName"]
    productSpec = instance["productSpec"]

    queryName = set(list(queryName))
    querySpec = set(list(querySpec))
    productName = set(list(productName))
    productSpec = set(list(productSpec))

    features = []
    features += _overlap(queryName, productName)
    features += _overlap(querySpec, productSpec)

    return util_tools.get_feature_by_list(features)
Exemple #6
0
def note(instance):
    queryName = instance["queryName"]
    querySpec = instance["querySpec"]

    productNote = instance["productNote"]

    features = [
        len(_longest_common_subsequence(queryName, productNote)),
        len(_longest_common_subsequence(querySpec, productNote)),
        len(_longest_common_substring(queryName, productNote)),
        len(_longest_common_substring(querySpec, productNote)),
        _levenshtein_distance(queryName, productNote),
        _levenshtein_distance(querySpec, productNote),
    ]

    queryName = set(list(queryName))
    querySpec = set(list(querySpec))
    productNote = set(list(productNote))
    features += _overlap(queryName, productNote)
    features += _overlap(querySpec, productNote)

    return util_tools.get_feature_by_list(features)