Python DbManager.get_document Examples

Programming Language: Python

Namespace/Package Name: db_manager

Class/Type: DbManager

Method/Function: get_document

Examples at hotexamples.com: 2

Python DbManager.get_document - 2 examples found. These are the top rated real world Python examples of db_manager.DbManager.get_document extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DbManager(30)

commit(3)

insert_dataframe(3)

get_document(2)

get_max_freq_doc(2)

version(2)

add_new_property(2)

resource_exists(1)

get_sensors_by_station_id(1)

get_user_fitness_ratings_array(1)

insert_and_get_min_user_id(1)

insert_document(1)

insert_max_freq_doc(1)

insert_rating(1)

query_all_data_from_table(1)

rebuild_structure(1)

remove_ratings(1)

run_sql(1)

get_one_engine(1)

save_full_index(1)

save_resource(1)

save_user_workout(1)

select_workout_by_id(1)

select_workout_by_intensity_and_bodypart(1)

select_workout_by_name(1)

select_workouts_by_user_parameters(1)

set_version(1)

setup_db(1)

update_visitor(1)

get_selected_nwif_index(1)

close(1)

append_new_org(1)

fetch_one(1)

commit_changes(1)

connect(1)

connect_db(1)

convert_date_string_to_datetime(1)

delete_resource(1)

delete_rows_with_reg_id(1)

delete_user_check(1)

engine(1)

exec(1)

executeScript(1)

fetch_resource(1)

get_movies(1)

find_org_by_ip(1)

find_post(1)

get_all_stations_by_param(1)

get_data_by_sensors_ids(1)

get_data_by_stations_ids(1)

Example #1

Show file

File: controller.py Project: carlos10seg/SearchEngine

 def get_document(self, doc_id):
     dbManager = DbManager()
     return dbManager.get_document(doc_id)

Example #2

Show file

File: engine.py Project: carlos10seg/SearchEngine

    def add_snippets(self, ranked_docs, query):
        dbManager = DbManager()
        builder = StructureBuilder()
        docs_with_snippets = []
        tf_idf_q_terms = {}
        q_terms = builder.get_stemmed_tems(query)

        for q_term in q_terms:
            # number of documents in DC in which q_term appears at least once.
            n_docs_q_term = len(self.q_terms_freqs[q_term]
                                ) if q_term in self.q_terms_freqs else 0
            if n_docs_q_term != 0:
                freq_d = len([q for q in q_terms if q == q_term])
                max_q_freq = self.get_local_max_freq(q_terms)
                tf_idf_q_terms[q_term] = self.calc_tf_idf(
                    freq_d, max_q_freq, self.docs_count, n_docs_q_term)
            else:
                tf_idf_q_terms[q_term] = 0

        for ranked_doc in ranked_docs:
            doc_id = ranked_doc[0]
            docs_relevant_scores = {}
            doc = dbManager.get_document(doc_id)
            if doc == None: continue
            sentences = self.get_doc_sentences(doc)
            title = sentences.pop(0)['content']

            for sentence in sentences:
                senetence_content = sentence['content']
                # if the sentence has less than 2 character then it is probabily not an actual sentence.
                if len(senetence_content) <= 2: continue
                sentence_id = sentence['id']
                tf_idf_sum = 0
                denom_di_sum = 0
                denom_qi_sum = 0
                index_sentence = builder.get_stemmed_terms_frequencies_from_doc(
                    sentence)
                for q_term in q_terms:
                    # check the not stemmed words
                    if q_term in index_sentence.Terms:
                        q_sentence_freq = index_sentence.get_term_freq(q_term)
                        max_freq = index_sentence.get_max_freq()
                        # if the query term doesn't have frequency on the sentence and there is no max freq. then disregard this q_term
                        if (q_sentence_freq == 0 and max_freq == 0):
                            continue

                        tf_idf_doc = self.calc_tf_idf(
                            q_sentence_freq, max_freq, self.docs_count,
                            len(self.q_terms_freqs[q_term]))
                        tf_idf_q = tf_idf_q_terms[q_term]
                        # The two sentences in d that have the highest cosine similarity with respect to q; with TF-IDF as the term weighting scheme.

                        tf_idf_sum += tf_idf_doc * tf_idf_q
                        denom_di_sum += tf_idf_doc**2
                        denom_qi_sum += tf_idf_q**2

                denom = math.sqrt(denom_di_sum) * math.sqrt(denom_qi_sum)
                score = tf_idf_sum / denom if denom != 0 else 0
                docs_relevant_scores[sentence_id] = round(score, 3)

            sorted_docs_total_freqs = sorted(docs_relevant_scores.items(),
                                             key=operator.itemgetter(1),
                                             reverse=True)
            top_sentences = sorted_docs_total_freqs[0:2]
            top_snippets = [
                s['content'] for s in sentences
                if s['id'] == top_sentences[0][0]
                or s['id'] == top_sentences[1][0]
            ]

            docs_with_snippets.append({
                "docId": doc_id,
                "score": ranked_doc[1],
                "title": title,
                "snippets": top_snippets
            })
        return docs_with_snippets