예제 #1
0
파일: autoreply.py 프로젝트: strin/surrobot
def query_by_body(body, top_k=5):
    ''' query top_k most probable replies based on raw email body '''
    doc = body2doc(body)
    print '[doc]', doc
    vec = avg_word_vec(doc)
    # retrieve top-k past inquery emails.
    candidates = query_by_vec_online('inbox', vec, top_k)
    # get past replies.
    db = EmailDB('outbox')
    replies = []
    for (email, score) in candidates:
        reply = db.select(
                    where='message_id=:message_id',
                    data=dict(message_id=email.message_id)
                )
        reply = list(reply)[0].body
        replies.append((reply, score))
    return replies
예제 #2
0
파일: embedding.py 프로젝트: strin/surrobot
def query_by_vec_online(table, vec, top_k=5):
    init_word2vec()
    db = EmailDB(table)
    scores = []
    for email in db.select():
        if email.message_id in CACHE:
            candidate_vec = CACHE[email.message_id]
        else:
            doc = body2doc(email.body)
            print "[cache doc]", doc
            candidate_vec = avg_word_vec(doc)
            CACHE[email.message_id] = candidate_vec
        score = np.dot(vec, candidate_vec) / npla.norm(vec) / npla.norm(candidate_vec)  # cosine similarity.
        score = float(score)
        scores.append((email, score))

    scores = [pair for pair in scores if pair[1] == pair[1]]
    scores = sorted(scores, key=lambda pair: pair[1], reverse=True)

    print "[matched]", scores[:top_k]
    return scores[:top_k]
예제 #3
0
def sync(csv_path):
    inbox_db = EmailDB("inbox")
    outbox_db = EmailDB("outbox")

    in_mails = []
    out_mails = []
    for (in_mail, out_mail) in extract(csv_path):
        in_mails.append(in_mail)
        out_mails.append(out_mail)

    def get_in_mail():
        for (mi, in_mail) in enumerate(in_mails):
            print "[inbox][%d/%d]" % (mi, len(in_mails))
            yield in_mail

    def get_out_mail():
        for (mi, out_mail) in enumerate(out_mails):
            print "[outbox][%d/%d]" % (mi, len(out_mails))
            yield out_mail

    inbox_db.update_all("*****@*****.**", get_in_mail())
    outbox_db.update_all("*****@*****.**", get_out_mail())