Python open_pickle 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pickle_functions

메소드/함수: open_pickle

hotexamples.com에서의 예제들: 6

Python open_pickle - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pickle_functions.open_pickle에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: final_calc.py 프로젝트: sheetal-prasad/Information-Retrieval

def main(query, ranker):
    crawler_tuple = pk.open_pickle("crawler_tuple_pages.pkl")
    global inverted
    global documents
    global max_freq
    global cosine
    global N
    global len_dict

    for url in crawler_tuple.keys():
        N += 1
        a, b = url, crawler_tuple[url][1]
        documents.update({a: b})
        inverted_index(a, b)
        res = max(set(b), key=b.count)
        max_freq.update({a: (b.count(res))})
    len_dict = {}
    for docno, file in documents.items():
        val = tf_idf(docno, file)
        len_dict.update({docno: val})
    #print(len_dict)
    if (ranker == 'cosine'):
        return cosine_calc(query)
        #print("cosine ranking")
    elif (ranker == 'PageRank'):
        return page_rank_calc(query)
        #print('pagerank')


#print(len(main('computer','cosine')))
#print(len(main('computer','PageRank')))

예제 #2

파일 보기

파일: final_calc.py 프로젝트: sheetal-prasad/Information-Retrieval

def page_rank_calc(query):
    cleaned_query = process_query(query)
    pageranks = pk.open_pickle('qdpr.pkl')
    pr_rank = score(pageranks, cleaned_query)
    qdpr_rank = [
        k for k, v in sorted(pr_rank.items(), reverse=True, key=lambda k: k[1])
    ]
    #print(len(cosine_rank))
    return qdpr_rank

예제 #3

파일 보기

 def open_peaks(self, filename):
     peaks = open_pickle(self.peaks, filename)
     self.set_peaks(peaks)

예제 #4

파일 보기

# -*- coding: utf-8 -*-
"""
Created on Mon Apr 27 13:50:24 2020

@author: Sheetal
Python program that calculates the tfidf for each term and computes the inlinks
"""
import math
import pickle_functions as pk

if __name__ == "__main__":

    word_count = pk.open_pickle('word_count.pkl')
    vocab = pk.open_pickle('vocab.pkl')
    crawler_tuple = pk.open_pickle('crawler_tuple_pages.pkl')

    tfidf = {}
    N = len(word_count)  # N is the total number of webpages scarpped
    for url in word_count:
        tfidf[url] = {}
        for tokens in word_count[url]:
            tf = word_count[url][tokens] / (max(
                word for word in word_count[url].values()))

            idf = math.log((N / vocab[tokens]), 2)

            tfidf[url][tokens] = tf * idf
    #print(tfidf)

    pk.save_pickle('tfidf.pkl', tfidf)
    inlink = {}

예제 #5

파일 보기

                        a_tags = bs.find_all('a')
                        for a in a_tags:
                            try:
                                if(re.search('.+?uic.edu',a["href"]) != None):
                                    if not any(ext in a["href"] for ext in skip_exten):
                                        parse = urlparse(a["href"])
                                        temp_href = ((parse.netloc+parse.path).lstrip("www.").rstrip("/"))
                                        if(uic_domain in a["href"] and temp_href not in links_dict.values() and temp_href not in visited):
                                            url_queue.append(temp_href)
                                                
                            except:
                                continue
                        
                        print(page_num)
                        if(page_num>search_limit):
                            break
                except:
                    print("Connection failed for ", url)
                    continue
        web_crawler = scrape(visited,vocab) 
        pk.save_pickle('crawler_tuple_pages.pkl',web_crawler)
        pk.save_pickle('word_count.pkl',word_count)
        pk.save_pickle('vocab.pkl',vocab)  
        pk.save_pickle('page_content.pkl',page_content)

    else:
        web_crawler = pk.open_pickle('crawler_tuple_pages.pkl')
        word_count = pk.open_pickle('word_count.pkl')
        vocab = pk.open_pickle('vocab.pkl')
        page_content = pk.open_pickle('page_content.pkl')

예제 #6

파일 보기

    while (count < 10):
        for url in tfidf:
            for token in tfidf[url]:
                s = 0
                for i in inlink[url]:
                    s += (qdpr_dict[i][token] if token in qdpr_dict[i] else
                          0) * pqitoj(token, i, url, tfidf)
                prQuery = tfidf[url][token] / sum(
                    tfidf[i][token] if token in tfidf[i] else 0 for i in tfidf)
                qdpr_dict[url][token] = (1 - df) * prQuery + (df * s)
        count += 1
    return qdpr_dict


if __name__ == "__main__":

    crawler_tuple = pk.open_pickle("crawler_tuple_pages.pkl")
    tfidf = pk.open_pickle("tfidf.pkl")

    if os.path.exists('inlink.pkl'):
        inlink = pk.open_pickle("inlink.pkl")

    else:
        inlink = inlinkFunc(tfidf, crawler_tuple)
        pk.save_pickle("inlink.pkl", inlink)

    qdpr_fin = qdpr(tfidf, crawler_tuple, inlink)
    print(qdpr_fin)
    pk.save_pickle("qdpr.pkl", qdpr_fin)
    qr = pk.open_pickle('qdpr.pkl')