Python FileAccess.get_stop_words Examples

Programming Language: Python

Namespace/Package Name: FileAccess

Class/Type: FileAccess

Method/Function: get_stop_words

Examples at hotexamples.com: 3

Python FileAccess.get_stop_words - 3 examples found. These are the top rated real world Python examples of FileAccess.FileAccess.get_stop_words extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

exists(30)

open(30)

FileAccess(14)

copy(12)

makedirs(10)

get_relevance_data(7)

read_queries(5)

get_stop_words(3)

rename(2)

log(2)

delete(2)

_openSMB(1)

load_file(1)

get_stem_queries(1)

read_file(1)

read_result_file(1)

read_score_file(1)

append_line(1)

save_file(1)

write_file(1)

Example #1

Show file

    def generate_snippet(self, doc, query):
        fa = FileAccess()
        stop_words = fa.get_stop_words()
        query = query.split()
        stopped_content = query
        final_query = " ".join(stopped_content)

        fq_list = final_query.split()
        doc_list = doc.split()
        intr = list(set(doc_list).intersection(fq_list))

        positions = []
        for each in intr:
            if each in intr:
                key = doc_list.index(each)
                positions.append(key)
            else:
                continue
        final_doc = ''
        i = 0
        for each in doc_list:
            if i in positions:
                q = '"' + each + '" '
                final_doc += q
            else:
                final_doc += each + ' '
            i += 1

        return final_doc

Example #2

Show file

    def build_stopped_corpus(self):
        cwd = os.getcwd()
        clean_cacm = os.path.join(cwd, 'clean_cacm')
        stopped_cacm = os.path.join(cwd, 'stopped_cacm')
        fa = FileAccess()

        if not os.path.exists(clean_cacm):
            print "Clean corpus doesn't exist. It is created now. " \
                  "PLease put cleaned files inside the corpus folder"
            os.makedirs(clean_cacm, 0755)
            return
        if not os.path.exists(stopped_cacm):
            os.makedirs(stopped_cacm, 0755)

        stop_words = fa.get_stop_words()
        os.chdir(clean_cacm)

        for eachfile in glob.glob('*.html'):
            print eachfile
            content = open(eachfile).read()
            content = content.split()
            stopped_content = [x for x in content if x not in stop_words]
            final_content = " ".join(stopped_content)

            clean_file = open(os.path.join(stopped_cacm, eachfile), 'w')
            clean_file.write(final_content)
            clean_file.close()

Example #3

Show file

    def get_stopped_queries(self, query_dict):
        fa = FileAccess()
        query_dict = query_dict
        stop_words = fa.get_stop_words()
        stopped_queries = {}
        for each in query_dict:
            query = query_dict[each]
            query_list = query.split()
            stopped_query = [x for x in query_list if x not in stop_words]
            stopped_query = " ".join(stopped_query)
            stopped_queries[each] = stopped_query

        return stopped_queries