def run(q, d1, d2_s): d2 = convert_string_to_dict(d2_s) dimensions = return_keys(d1, d2) v1 = mk_vector(d1, dimensions) v2 = mk_vector(d2, dimensions) v1_bin = binarise(v1) v2_bin = binarise(v2) coverage = sum(v1_bin * v2_bin) / len(q.split()) d1_vec = normalise(v1) d2_vec = normalise(v2) return cosine_similarity(d1_vec, d2_vec), coverage
def compute_pod_summary(name): '''This function is very similar to 'self' in PeARS-pod''' DS_vector = np.zeros(400) word_vector = "" freqs = {} for u in db.session.query(Urls).filter_by(pod=name).all(): DS_vector += convert_to_array(u.vector) for k, v in convert_string_to_dict(u.freqs).items(): if k in freqs: freqs[k] += int(v) else: freqs[k] = int(v) DS_vector = convert_to_string(normalise(DS_vector)) c = 0 for w in sorted(freqs, key=freqs.get, reverse=True): word_vector += w + ':' + str(freqs[w]) + ' ' c += 1 if c == 300: break return DS_vector, word_vector