def count_synonyms(candidate, reference, model, n): matches = 0.0 ng_refs = ngram_counter(reference, n) for cand in ngram_list(candidate, n): if tuple(cand) in ng_refs.keys(): matches += 1.0 ng_refs[tuple(cand)] -= 1.0 if ng_refs[tuple(cand)] <= 0.0: del ng_refs[tuple(cand)] elif tuple(cand) not in ng_refs.keys(): try: sims = Counter() for ii in range(1, n + 1): # ngrs = ngram_counter(reference, ii) # for rr in ngrs.keys(): for rr in ng_refs.keys(): if rr: # sims[rr] = math.fabs(n_similarity(model, cand, rr)) sims[rr] = n_similarity(model, cand, rr) if sims: matches += np.max(sims.values()) logging.info( '%s %s %f',\ cand, max(sims), sims[rr]) # ngrs[max(sims)] -= sims[rr] ng_refs[max(sims)] -= sims[rr] # if ngrs[max(sims)] <= 0.0: if ng_refs[max(sims)] <= 0.0: # del ngrs[max(sims)] del ng_refs[max(sims)] except KeyError: matches += 0.0 return matches
def count_s_synonyms(candidate, reference, model, n, threshold): matches = 0.0 ng_refs = ngram_counter(reference, n) for cand in ngram_list(candidate, n): if tuple(cand) in ng_refs.keys(): matches += 1.0 ng_refs[tuple(cand)] -= 1.0 if ng_refs[tuple(cand)] <= 0.0: del ng_refs[tuple(cand)] elif tuple(cand) not in ng_refs.keys(): if n == 1: try: sims = Counter() for rr in ng_refs.keys(): sims[rr] = math.fabs(n_similarity(model, cand, rr)) if sims: if float(np.max(sims.values())) > float(threshold): matches += np.max(sims.values()) ng_refs[max(sims)] -= sims[rr] if ng_refs[max(sims)] == 0.0: del ng_refs[max(sims)] except KeyError: matches += 0.0 elif n > 1: matches += 0.0 return matches
def count_synonyms(candidate, reference, model, n): matches = 0.0 ng_refs = ngram_counter(reference, n) for cand in ngram_list(candidate, n): if tuple(cand) in ng_refs.keys(): matches += 1.0 ng_refs[tuple(cand)] -= 1.0 if ng_refs[tuple(cand)] <= 0.0: del ng_refs[tuple(cand)] elif tuple(cand) not in ng_refs.keys(): try: sims = Counter() for rr in ng_refs.keys(): sims[rr] = math.fabs(n_similarity(model, cand, rr)) if sims: matches += np.max(sims.values()) logging.info( "n %i cand %s ref %s sim %f cnt %f", n, cand, rr, sims[rr], matches) ng_refs[max(sims)] -= sims[rr] if ng_refs[max(sims)] <= 0.0: del ng_refs[max(sims)] except KeyError: matches += 0.0 return matches
def count_all_ngrams(n, reference): ''' count total ngrams ''' ngram_count = 0 ng_refs = ngram_list(reference, n) ngram_count += len(ng_refs) return ngram_count
def count_matches(candidate, reference, n): matches = 0.0 ng_refs = ngram_counter(reference, n) for cand in ngram_list(candidate, n): if tuple(cand )in ng_refs.keys(): matches += 1.0 ng_refs[tuple(cand)] -= 1.0 if ng_refs[tuple(cand)] == 0.0: del ng_refs[tuple(cand)] elif tuple(cand) not in ng_refs.keys(): matches += 0.0 return matches
def count_synonyms(candidate, reference, model, n): matches = 0.0 ng_refs = ngram_counter(reference, n) for cand in ngram_list(candidate, n): if tuple(cand) in ng_refs.keys(): matches += 1.0 ng_refs[tuple(cand)] -= 1.0 if ng_refs[tuple(cand)] <= 0.0: del ng_refs[tuple(cand)] elif tuple(cand) not in ng_refs.keys(): if n == 1: logging.info( "no exact match for %s in %s",\ str(cand), str(ng_refs.keys())) try: sims = Counter() for rr in ng_refs.keys(): sims[rr] = math.fabs(n_similarity(model, cand, rr)) logging.info( "similarity between %s and %s is %f",\ cand, rr, sims[rr]) if sims: logging.info( "match count for %s before incrementing is %f",\ str(cand[0]), matches) matches += np.max(sims.values()) logging.info( "%s matched %s count incremented by %f yield %f",\ str(cand[0]), str(max(sims)), sims[rr], matches) ng_refs[max(sims)] -= sims[rr] if ng_refs[max(sims)] == 0.0: logging.info( "removing %s from %s", str(max(sims)), ng_refs) del ng_refs[max(sims)] logging.info( "yields %s", ng_refs) except KeyError: matches += 0.0 elif n > 1: matches += 0.0 return matches
def count_x_synonyms(candidate, reference, model, n, total_precision, threshold): matches = 0.0 xng_refs = Counter() ng_refs = ngram_counter(reference, n) for ii in range(1, total_precision+1): xng_refs += ngram_counter(reference, ii) for cand in ngram_list(candidate, n): if tuple(cand) in ng_refs.keys(): matches += 1.0 ng_refs[tuple(cand)] -= 1.0 xng_refs[tuple(cand)] -= 1.0 if ng_refs[tuple(cand)] <= 0.0: del ng_refs[tuple(cand)] if xng_refs[tuple(cand)] <= 0.0: del xng_refs[tuple(cand)] elif tuple(cand) not in ng_refs.keys(): try: sims = Counter() for rr in xng_refs.keys(): if rr: sims[rr] = n_similarity(model, cand, rr) if sims: if float(np.max(sims.values())) > float(threshold): matches += np.max(sims.values()) logging.info( '%s %s %f',\ cand, max(sims), np.max(sims.values())) xng_refs[max(sims)] -= sims[rr] ng_refs[max(sims)] -= sims[rr] if xng_refs[max(sims)] <= 0.0: del xng_refs[max(sims)] if ng_refs[max(sims)] <= 0.0: del ng_refs[max(sims)] except KeyError: matches += 0.0 return matches
def count_all_ngrams(n, reference): ngram_count = 0 for ii in range(1, 4): ng_refs = ngram_list(reference, ii) ngram_count += len(ng_refs) return ngram_count