예제 #1
0
def run(q, d1, d2_s):
    d2 = convert_string_to_dict(d2_s)

    dimensions = return_keys(d1, d2)
    v1 = mk_vector(d1, dimensions)
    v2 = mk_vector(d2, dimensions)
    v1_bin = binarise(v1)
    v2_bin = binarise(v2)

    coverage = sum(v1_bin * v2_bin) / len(q.split())

    d1_vec = normalise(v1)
    d2_vec = normalise(v2)

    return cosine_similarity(d1_vec, d2_vec), coverage
예제 #2
0
def compute_pod_summary(name):
    '''This function is very similar to 'self' in PeARS-pod'''
    DS_vector = np.zeros(400)
    word_vector = ""
    freqs = {}
    for u in db.session.query(Urls).filter_by(pod=name).all():
        DS_vector += convert_to_array(u.vector)
        for k, v in convert_string_to_dict(u.freqs).items():
            if k in freqs:
                freqs[k] += int(v)
            else:
                freqs[k] = int(v)
    DS_vector = convert_to_string(normalise(DS_vector))
    c = 0
    for w in sorted(freqs, key=freqs.get, reverse=True):
        word_vector += w + ':' + str(freqs[w]) + ' '
        c += 1
        if c == 300:
            break
    return DS_vector, word_vector
예제 #3
0
def compute_query_vectors(query):
    """ Make distribution for query """
    words = query.rstrip('\n').split()
    # Only retain arguments which are in the distributional semantic space
    vecs_to_add = []
    words_for_freqs = []
    for word in words:
        words_for_freqs.append(word)
        if word in dm_dict_en:
            vecs_to_add.append(word)

    vbase = np.array([])
    # Add vectors together
    if vecs_to_add:
        # Take first word in vecs_to_add to start addition
        vbase = dm_dict_en[vecs_to_add[0]]
        for vec in vecs_to_add[1:]:
            vbase = vbase + dm_dict_en[vec]

    vbase = normalise(vbase)
    freqs = compute_freq_vector(words_for_freqs)
    return vbase, freqs
예제 #4
0
def pod_from_scratch(name,url,language,description):
    if not db.session.query(Pods).filter_by(url=url).all():
        p = Pods(url=url)
        db.session.add(p)
        db.session.commit()
    p = Pods.query.filter(Pods.url == url).first()
    p.name = name
    p.description = description
    p.language = language
    #Using compute_query_vector as hack to get vectors from pod's name 
    vector, freqs = compute_query_vectors(name.lower()+' '+description.lower(), dm_dict_en)
    p.DS_vector = convert_to_string(normalise(vector))
    word_vector = ""
    c = 0
    for w in sorted(freqs, key=freqs.get, reverse=True):
        word_vector += w + ':' + str(freqs[w]) + ' '
        c += 1
        if c == 300:
            break
    p.word_vector = word_vector
    if not p.registered:
        p.registered = False
    db.session.commit()