Beispiel #1
0
def update_query(query,
                 relevant=None,
                 irrelevant=None,
                 alpha=0.9,
                 beta=0.5,
                 gamma=0.1):
    """
    Update query in our db using rocchio algorithm. Note, the query string is not updated but
    the results in the results db are updated.
    :param query: query string
    :param relevant: list of doc_ids
    :param irrelevant: list of doc_ids
    :param alpha: weight of original query
    :param beta: weight of relevant docs
    :param gamma: weight or irrelevant docs
    :return: True if successful, False if unsuccessful
    """
    if relevant is None:
        relevant = []
    if irrelevant is None:
        irrelevant = []
    assert (query != "")
    query_map = SqliteDict(query_map_path)
    try:
        q0 = query_map[query]
    except KeyError:
        # Can't update queries we've never seen
        query_map.close()
        return False
    if not isinstance(q0, DictVector):
        q0 = DictVector(q0)
    doc_vec_db = SqliteDict(doc_vecs_db_path)
    Nr = len(relevant)
    for doc_id in relevant:
        try:
            doc_vec = doc_vec_db[doc_id]
            if not isinstance(doc_vec, DictVector):
                doc_vec = DictVector(doc_vec)
        except KeyError:
            continue
        q0 = q0 + (beta / Nr) * doc_vec
    Ni = len(irrelevant)
    for doc_id in irrelevant:
        try:
            doc_vec = doc_vec_db[doc_id]
            if not isinstance(doc_vec, DictVector):
                doc_vec = DictVector(doc_vec)
        except KeyError:
            continue
        q0 = q0 - (gamma / Ni) * doc_vec
    query_map[query] = q0
    query_map.commit()
    set_query_results(query)
Beispiel #2
0
def undo_update(query,
                relevant=None,
                irrelevant=None,
                alpha=0.9,
                beta=0.5,
                gamma=0.1):
    """
    Method for undoing an update if a user decides a post that was relevant isn't actually relevant.
    :param query: query string
    :param relevant: list of doc_ids
    :param irrelevant: list of doc_ids
    :param alpha: weight of original query
    :param beta: weight of relevant docs
    :param gamma: weight or irrelevant docs
    :return: True if successful, False if unsuccessful
    """
    if relevant is None:
        relevant = []
    if irrelevant is None:
        irrelevant = []
    assert (query != "")
    query_map = SqliteDict(query_map_path)
    try:
        q0 = query_map[query]
    except KeyError:
        # Can't update queries we've never seen
        query_map.close()
        return False
    if not isinstance(q0, DictVector):
        q0 = DictVector(q0)
    doc_vec_db = SqliteDict(doc_vecs_db_path)
    Nr = len(relevant)
    for doc_id in relevant:
        try:
            doc_vec = doc_vec_db[doc_id]
            if not isinstance(doc_vec, DictVector):
                doc_vec = DictVector(doc_vec)
        except KeyError:
            continue
        q0 = q0 - (beta / Nr) * doc_vec
    Ni = len(irrelevant)
    for doc_id in irrelevant:
        try:
            doc_vec = doc_vec_db[doc_id]
            if not isinstance(doc_vec, DictVector):
                doc_vec = DictVector(doc_vec)
        except KeyError:
            continue
        q0 = q0 + (gamma / Ni) * doc_vec
    query_map[query] = q0
    query_map.commit()
    set_query_results(query)
Beispiel #3
0
 def testSubtraction(self):
     other = DictVector()
     other["a"] = 1
     other["e"] = 2
     result = self.vec - other
     self.assertEqual(result["a"], 0)
     self.assertEqual(result["e"], -2)
Beispiel #4
0
 def testAdditionWithNonepmty(self):
     other = DictVector()
     other["a"] = 1
     other["b"] = 2
     result = self.vec + other
     self.assertEqual(result["a"], 2)
     self.assertEqual(result["b"], 4)
     self.assertEqual(result["c"], 3)
Beispiel #5
0
 def testWithDifferentKeys(self):
     other = DictVector()
     other["a"] = 1
     other["e"] = 2
     result = self.vec + other
     self.assertEqual(result["a"], 2)
     self.assertEqual(result["b"], 2)
     self.assertEqual(result["c"], 3)
     self.assertEqual(result["e"], 2)
Beispiel #6
0
 def tfidf_from_doc(self, doc: Document, doc_freqs: DocFreqs) -> dict:
     if self.query_expander is not None:
         tf = self.query_expander.compute_tf(doc, self.term_weights)
     else:
         tf = compute_tf(doc, doc_freqs, self.term_weights)
     tf_idf = {}
     N = doc_freqs.get_num_docs()
     for word in tf.keys():
         tf_idf[word] = tf[word] * np.log(N / (1 + doc_freqs[word]))
     return DictVector(tf_idf)
Beispiel #7
0
 def tfidf_from_tf(self, tf, doc_freqs):
     tf_idf = {}
     N = doc_freqs.get_num_docs()
     for word in tf.keys():
         tf_idf[word] = tf[word] * np.log(N / (1 + doc_freqs[word]))
     return DictVector(tf_idf)
Beispiel #8
0
 def testConstructor(self):
     dic = {"a": 1, "b": 2}
     vec = DictVector(dic)
     self.assertEqual(vec["a"], 1)
     self.assertEqual(vec["b"], 2)
Beispiel #9
0
 def setUp(self) -> None:
     self.vec = DictVector()
     self.vec["a"] = 1
     self.vec["b"] = 2
     self.vec["c"] = 3
Beispiel #10
0
def query2vec(query):
    processed_query = processer.tfidf_from_query(query, doc_freqs)
    processed_query = DictVector(processed_query)
    return processed_query
Beispiel #11
0
db_path = os.path.join(app.root_path, "db")
if not os.path.exists(db_path):
    os.makedirs(db_path)
# query db stores the results of queries for later use
query_db_path = os.path.join(db_path, "queries.db")
# query map is a map of queries to vectors
query_map_path = os.path.join(db_path, "query_map.db")
doc_vecs_db_path = os.path.join(db_path, "doc_vecs.db")

# Upload all doc vectors to db
doc_vec_db = SqliteDict(doc_vecs_db_path)
for doc_id, doc_vec in docs_tfidf:
    try:
        vec = doc_vec_db[doc_id]
    except KeyError:
        doc_vec_db[doc_id] = DictVector(doc_vec)
doc_vec_db.commit()
doc_vec_db.close()


# Convert document to vector, then upload
def upload_doc_vec(doc):
    #TODO Maybe implement this if time permits
    return


@app.route("/docs", methods=["POST"])
def upload_doc():
    doc_db = SqliteDict(os.path.join(db_path, "docs.db"))
    data = json.loads(request.data)
    doc = {