def spec(instance): querySpec = instance["querySpec"] productSpec = instance["productSpec"] a = set(querySpec.split(u"|")) b = set(productSpec.split(u"|")) features = [] features += _overlap(a, b) return util_tools.get_feature_by_list(features)
def levenshtein_distance(instance): queryName = instance["queryName"] querySpec = instance["querySpec"] productName = instance["productName"] productSpec = instance["productSpec"] features = [ _levenshtein_distance(queryName, productName), _levenshtein_distance(querySpec, productSpec), ] return util_tools.get_feature_by_list(features)
def lss_length(instance): queryName = instance["queryName"] querySpec = instance["querySpec"] productName = instance["productName"] productSpec = instance["productSpec"] features = [ len(_longest_common_substring(queryName, productName)), len(_longest_common_substring(querySpec, productSpec)), ] return util_tools.get_feature_by_list(features)
def bleu(instance): queryName = list(instance["queryName"]) querySpec = list(instance["querySpec"]) productName = list(instance["productName"]) productSpec = list(instance["productSpec"]) features = [ _bleu_score(queryName, productName), _bleu_score(querySpec, productSpec) ] return util_tools.get_feature_by_list(features)
def overlap(instance): queryName = instance["queryName"] querySpec = instance["querySpec"] productName = instance["productName"] productSpec = instance["productSpec"] queryName = set(list(queryName)) querySpec = set(list(querySpec)) productName = set(list(productName)) productSpec = set(list(productSpec)) features = [] features += _overlap(queryName, productName) features += _overlap(querySpec, productSpec) return util_tools.get_feature_by_list(features)
def note(instance): queryName = instance["queryName"] querySpec = instance["querySpec"] productNote = instance["productNote"] features = [ len(_longest_common_subsequence(queryName, productNote)), len(_longest_common_subsequence(querySpec, productNote)), len(_longest_common_substring(queryName, productNote)), len(_longest_common_substring(querySpec, productNote)), _levenshtein_distance(queryName, productNote), _levenshtein_distance(querySpec, productNote), ] queryName = set(list(queryName)) querySpec = set(list(querySpec)) productNote = set(list(productNote)) features += _overlap(queryName, productNote) features += _overlap(querySpec, productNote) return util_tools.get_feature_by_list(features)