예제 #1
0
def compute_vectors(target_url, keyword):
    print("Computing vectors for", target_url)
    if not db.session.query(Urls).filter_by(url=target_url).all():
        u = Urls(url=target_url)
        title, body_str, snippet, cc = extract_from_url(target_url)
        if title != "":
            text = title + " " + body_str
            text = clean_text(text)
            vector = compute_dist_vector(text, dm_dict_en)
            freqs = compute_freq_vector(text)
            u.title = str(title)
            u.vector = convert_to_string(vector)
            u.freqs = convert_dict_to_string(freqs)
            if keyword == "":
                keyword = "generic"
            u.keyword = keyword
            u.pod = "Me"
            if snippet != "":
                u.snippet = str(snippet)
            else:
                u.snippet = u.title
            if cc:
                u.cc = True
            # print(u.url,u.title,u.vector,u.snippet,u.cc)
            db.session.add(u)
            db.session.commit()
            return True
        else:
            return False
    else:
        return True
예제 #2
0
def compute_pod_summary(name):
    '''This function is very similar to 'self' in PeARS-pod'''
    DS_vector = np.zeros(400)
    word_vector = ""
    freqs = {}
    for u in db.session.query(Urls).filter_by(pod=name).all():
        DS_vector += convert_to_array(u.vector)
        for k, v in convert_string_to_dict(u.freqs).items():
            if k in freqs:
                freqs[k] += int(v)
            else:
                freqs[k] = int(v)
    DS_vector = convert_to_string(normalise(DS_vector))
    c = 0
    for w in sorted(freqs, key=freqs.get, reverse=True):
        word_vector += w + ':' + str(freqs[w]) + ' '
        c += 1
        if c == 300:
            break
    return DS_vector, word_vector
예제 #3
0
def pod_from_scratch(name,url,language,description):
    if not db.session.query(Pods).filter_by(url=url).all():
        p = Pods(url=url)
        db.session.add(p)
        db.session.commit()
    p = Pods.query.filter(Pods.url == url).first()
    p.name = name
    p.description = description
    p.language = language
    #Using compute_query_vector as hack to get vectors from pod's name 
    vector, freqs = compute_query_vectors(name.lower()+' '+description.lower(), dm_dict_en)
    p.DS_vector = convert_to_string(normalise(vector))
    word_vector = ""
    c = 0
    for w in sorted(freqs, key=freqs.get, reverse=True):
        word_vector += w + ':' + str(freqs[w]) + ' '
        c += 1
        if c == 300:
            break
    p.word_vector = word_vector
    if not p.registered:
        p.registered = False
    db.session.commit()