Python shelve Examples, shelve.shelve Python Examples

Example #1

0

Show file

File: indexer.py Project: satyasashi/Web-Crawler

 def set_collection_length(self, database):
     """
     Determines the length of document collection for
     self.collection_length attribute of the class.
     """
     db = shelve(database, 'r')
     temp_file = shelve('temp/terms_integerid','r')
     self.vec_length = len(temp_file['id2term'])
     return len(db.keys())

Example #2

0

Show file

File: linkanalyser.py Project: satyasashi/Web-Crawler

    def analyze(self, url, links):
        """creates a document and sets its outgoing links
      """
        self.db = shelve('database1', 'w')
        key = md5(url).hexdigest()
        #if the document is already in the database, just add its outgoing links

        if key in self.db.iterkeys():
            doc = self.db[key]
            doc.insertOL(links)
            doc.url = url
            document = open(self.retriever.filename(url)).read()
            doc.all_terms = self.term_extractor.get_terms(document)
            unique_terms = self.term_extractor.get_unique_terms(document)
            doc.unique_terms_freq = self.term_extractor.count_term_frequencies(
                unique_terms, document)
            #print self.db[key].outgoingLinks
        #if there is no document for the url, create a document and add its outgoing links
        if key not in self.db.iterkeys():
            newDoc = Document(url)
            newDoc.insertOL(links)
            newDoc.url = url
            document = open(self.retriever.filename(url)).read()
            newDoc.all_terms = self.term_extractor.get_terms(document)
            unique_terms = self.term_extractor.get_unique_terms(document)
            newDoc.unique_terms_freq = self.term_extractor.count_term_frequencies(
                unique_terms, document)
            self.db[key] = newDoc
            #print self.db[key].outgoingLinks
        #self.extractLinksfromResponse(url,links)
        self.db.close()

Example #3

0

Show file

File: indexer.py Project: satyasashi/Web-Crawler

 def yield_documents(self):
         """
         Generator to return a document from the database
         """
         db = shelve(self.db, 'w')
         for document in db.itervalues():
             yield document

Example #4

0

Show file

File: tokenizedocuments.py Project: ABYARTH/Web-Crawler

 def get_all_terms(self, pod = 0.6):
     """
     Iterate through the documents and retrieve the list of all the terms in
     the corpus of text. pod is the percentage of documents that we want to 
     collect the terms for.  
     """
     self.database = shelve(self.db, 'w')
     no_of_docs_in_database = 0
     for key in self.database.iterkeys():
         no_of_docs_in_database += 1
     no_of_docs = int(pod* no_of_docs_in_database)
   
     def yield_all_terms():
         """
         Generator object which yields the all_terms attributes of the
         documents in the database
         """
         for key in self.database.iterkeys():
             yield self.database[key].all_terms
   
     terms_generator = yield_all_terms()
     all_terms = terms_generator.next()
     #print vector,len(vector)
     for i in xrange(1, no_of_docs):
      #print terms_generator.next(),len(terms_generator.next())
         all_terms.extend(terms_generator.next())
      
     self.database.close()  
     return (all_terms)

Example #5

0

Show file

File: linkanalyser.py Project: praveen97uma/Web-Crawler

    def analyze(self, url, links):
        """creates a document and sets its outgoing links
      """
        self.db = shelve("database1", "w")
        key = md5(url).hexdigest()
        # if the document is already in the database, just add its outgoing links

        if key in self.db.iterkeys():
            doc = self.db[key]
            doc.insertOL(links)
            doc.url = url
            document = open(self.retriever.filename(url)).read()
            doc.all_terms = self.term_extractor.get_terms(document)
            unique_terms = self.term_extractor.get_unique_terms(document)
            doc.unique_terms_freq = self.term_extractor.count_term_frequencies(unique_terms, document)
            # print self.db[key].outgoingLinks
        # if there is no document for the url, create a document and add its outgoing links
        if key not in self.db.iterkeys():
            newDoc = Document(url)
            newDoc.insertOL(links)
            newDoc.url = url
            document = open(self.retriever.filename(url)).read()
            newDoc.all_terms = self.term_extractor.get_terms(document)
            unique_terms = self.term_extractor.get_unique_terms(document)
            newDoc.unique_terms_freq = self.term_extractor.count_term_frequencies(unique_terms, document)
            self.db[key] = newDoc
            # print self.db[key].outgoingLinks
        # self.extractLinksfromResponse(url,links)
        self.db.close()

Example #6

0

Show file

    def get_all_terms(self, pod=0.6):
        """
        Iterate through the documents and retrieve the list of all the terms in
        the corpus of text. pod is the percentage of documents that we want to 
        collect the terms for.  
        """
        self.database = shelve(self.db, 'w')
        no_of_docs_in_database = 0
        for key in self.database.iterkeys():
            no_of_docs_in_database += 1
        no_of_docs = int(pod * no_of_docs_in_database)

        def yield_all_terms():
            """
            Generator object which yields the all_terms attributes of the
            documents in the database
            """
            for key in self.database.iterkeys():
                yield self.database[key].all_terms

        terms_generator = yield_all_terms()
        all_terms = terms_generator.next()
        #print vector,len(vector)
        for i in xrange(1, no_of_docs):
            #print terms_generator.next(),len(terms_generator.next())
            all_terms.extend(terms_generator.next())

        self.database.close()
        return (all_terms)

Example #7

0

Show file

File: indexer.py Project: satyasashi/Web-Crawler

    def set_doc_vector(self):
        """
        Iterates through the documents and sets their document vectors 
        """
        doc_generator = self.yield_documents()
        documents = True
        doc_freq = shelve('doc_frequencies', 'w')
        shelve('documentVectors', 'c')
        keyword_database = shelve('temp/terms_to_integer', 'r')
        keywords = keyword_database['term2id']
        dv = shelve('documentVectors', 'w')
        db = shelve(self.db, 'w')
        for document in db.itervalues():            
            key = document.key
            doc_terms = document.unique_terms_freq
    
            tf = 0
            doc_vector = zeros(self.vec_length)
            for kw in keywords:      
                if kw in doc_terms.keys():
                    tf = doc_terms[kw]
                    term_weight = mathutils.calculate_term_weight(tf, doc_freq[kw], self.vec_length)
                    doc_vector[keywords[kw]] = term_weight
 
            doc_vector = mathutils.normalise_vector(doc_vector)
            print doc_vector
            dv[key] = doc_vector

Example #8

0

Show file

File: indexer.py Project: satyasashi/Web-Crawler

 def calculate_document_frequency(self):
     """
     Calculate the document frequencies of the tokens. Document frequency is
     the number of documents in which a keyword appears. This will be used
     to calculate the inverse document frequencies.
     """
     self.get_document_keywords()
     shelve('doc_frequencies','c')
     doc_freq = shelve('doc_frequencies','w')        
     keywords = self.yield_keyword()
     for i in xrange(0,self.vec_length):
         count = 0
         kw = keywords.next()
         doc_generator = self.yield_documents()
         flag = True
         while flag:
             try:
                 if kw in doc_generator.next().all_terms:
                     count+=1
             except StopIteration:
                 flag = False
         doc_freq[kw] = count
         print kw,count
     doc_freq.close()

Example #9

0

Show file

File: tokenizedocuments.py Project: ABYARTH/Web-Crawler

 def assign_id_to_terms(self, list_of_terms):
     """
     Assign an integer id to all the terms for mathematical manipulation.
     We save the dictionary of integers mapped to the terms in a database 
     for future reference.
     id2term stands for mapping of integer to a term.
     term2id stands for term mapped to an integer.
     """
     int_to_terms = dict(enumerate(list_of_terms))
     terms_to_int = dict([(int_to_terms[key],key) for key in int_to_terms])
     shelve('temp/terms_integerid', 'c')
     shelve('temp/terms_to_integer', 'c')
     temp_file1 = shelve('temp/terms_to_integer', 'w')
     temp_file = shelve('temp/terms_integerid', 'w')
     temp_file['id2term'] = int_to_terms
     temp_file1['term2id'] = terms_to_int
     temp_file.close()    
     return terms_to_int

Example #10

0

Show file

 def assign_id_to_terms(self, list_of_terms):
     """
     Assign an integer id to all the terms for mathematical manipulation.
     We save the dictionary of integers mapped to the terms in a database 
     for future reference.
     id2term stands for mapping of integer to a term.
     term2id stands for term mapped to an integer.
     """
     int_to_terms = dict(enumerate(list_of_terms))
     terms_to_int = dict([(int_to_terms[key], key) for key in int_to_terms])
     shelve('temp/terms_integerid', 'c')
     shelve('temp/terms_to_integer', 'c')
     temp_file1 = shelve('temp/terms_to_integer', 'w')
     temp_file = shelve('temp/terms_integerid', 'w')
     temp_file['id2term'] = int_to_terms
     temp_file1['term2id'] = terms_to_int
     temp_file.close()
     return terms_to_int

Example #11

0

Show file

File: linkanalyser.py Project: satyasashi/Web-Crawler

 def __init__(self):
     shelve('database1', 'c')
     self.term_extractor = parser.ExtractTerms()
     self.retriever = Retriever()

Example #12

0

Show file

File: indexer.py Project: satyasashi/Web-Crawler

 def yield_keyword(self):
         temp_file = shelve('temp/terms_integerid','w')
         for value in temp_file['id2term'].itervalues():
             yield value

Example #13

0

Show file

from datetime import timedelta, time, datetime, date
from tkinter import *
from os import popen
from shelve import open as shelve
from time import sleep
from pyperclip import copy
from json import loads
from functools import partial
from subprocess import Popen
from sys import exit
today = date.today()
day = today.weekday()
if day == 6 or day == 5: exit(0)  #sunday or saturday
memory = shelve('holidays')
try:  #check holiday
    memory[today.strftime('%d-%m-%Y')]
    memory.close()
    exit(0)
except KeyError:
    memory.close()
week = ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday')
with open('settings.json') as f:
    memory = loads(f.read())
    wait = timedelta(minutes=memory['alert'])
    start_meeting = partial(Popen, [memory['exe_path']])
    cmd_class = memory['command_after_class']
    cmd_day = memory['command_after_day']
    over = memory['over by']
    over = datetime.combine(today,
                            time(hour=int(over[:2]), minute=int(over[3:])))
dow = week[day]

Example #14

0

Show file

File: linkanalyser.py Project: praveen97uma/Web-Crawler

 def __init__(self):
     shelve("database1", "c")
     self.term_extractor = parser.ExtractTerms()
     self.retriever = Retriever()

Example #15

0

Show file

File: search.py Project: satyasashi/Web-Crawler

"""
This module implements the class for handling the user queries.

temp/terms_to_integer stores the basis vector in a dictionary with the
terms mapped to integers.
"""
from shelve import DbfilenameShelf as shelve
from Models.document import Document
from porter import PorterStemmer
import mathutils
from counter import Counter
from numpy import zeros
from Models.document import Document
from numpy import dot

keyword_database = shelve('temp/terms_to_integer', 'r')
keywords = keyword_database['term2id'].keys()
vec_length = len(keywords)
porter_stemmer = PorterStemmer()
dv = shelve('documentVectors', 'r')
doc_database = shelve('database1', 'r')


def query_parser(query):
    """
    The query string is split into words or terms. The terms are then
    checked if they are present in our basis vector. The terms which are
    found in the basis vector are then mapped to their integer ids and 
    returned as a vector.
    """

Example #16

0

Show file

File: search.py Project: ABYARTH/Web-Crawler

"""
This module implements the class for handling the user queries.

temp/terms_to_integer stores the basis vector in a dictionary with the
terms mapped to integers.
"""
from shelve import DbfilenameShelf as shelve
from Models.document import Document
from porter import PorterStemmer
import mathutils
from counter import Counter
from numpy import zeros
from Models.document import Document
from numpy import dot

keyword_database = shelve('temp/terms_to_integer', 'r')
keywords = keyword_database['term2id'].keys()
vec_length = len(keywords)
porter_stemmer = PorterStemmer()
dv = shelve('documentVectors', 'r')
doc_database = shelve('database1', 'r')

def query_parser(query):
    """
    The query string is split into words or terms. The terms are then
    checked if they are present in our basis vector. The terms which are
    found in the basis vector are then mapped to their integer ids and 
    returned as a vector.
    """
   
    query_terms = query.split()