Ejemplo n.º 1
0
 def hash(self):
     self.size = os.path.getsize(self.path_pdf)
     
     if (self.path_pdf != ''):
         self.hash = md5sum_file(self.path_pdf)
     else:
         self.hash = ""
Ejemplo n.º 2
0
 def __init__(self, url = "", path_pdf = "", path_txt = path_pdf + ".txt"):
     self.url = url
     self.path_pdf = path_pdf
     self.path_txt = path_txt
     if (self.path_pdf == ''):
         self.hash = ""
         self.size = 0
     else:
         self.hash = md5sum_file(path_pdf)
         self.size = os.path.getsize(self.path_pdf)
     #If processed = 0, then it has not yet been processed, 1 - successfully processed
     #-1 - cannot be processed
     self.processed = 0
     self.words = []
     self.shingles = []
Ejemplo n.º 3
0
 def makehash(self):
     if (self.path != None):
         self.hash = md5sum_file(self.path)
     else:
         self.hash = ""