Beispiel #1
0
 def get_file(self, key, hashsum=True):
     """
     @param key: Specifies which data to look at, e. g. "NOUNS" 
                (str).
     @param hashsum: If True (default) return not only file's 
                     contents, but also hashsum, otherwise return 
                     only file content.
     @return: Hashsum (str) of this document's data in some context
              specified by param key. If no file available returns 
              None values.
     """
     sha512sum = None
     content = None
     
     folder = self._get_folder_by_key(key)
     try:
         f = open(folder + self[self.DOC_ID], "r", get_def_enc())
         content = f.readlines()
         sha512sum = sha512("".join(content). \
                             encode(get_def_enc())).hexdigest()
         f.close()
     except IOError:
         pass
     
     if hashsum == False:
         return content
     # Otherwise: Return a tuple
     return content, sha512sum
Beispiel #2
0
 def write_file(self, key, hashsum=True):
     """
     Writes a file (name: document name) to a specified folder.
     @param key: Specifies which data to write, based on the 
                 key of the data stored in in this object, 
                 e. g. "WORDS" or "NOUNS".
     @param hashsum: Defaults to True and is used to write an 
                     hashsum of the file to an hashfile.
     """
     folder = self._get_folder_by_key(key)
     
     doc_id = self[self.DOC_ID]
     doc_as_str = "\n".join(self[key]).encode(get_def_enc())
     sha512sum = ""
     if (hashsum == True):
         hashdict = HashDict() 
         sha512sum = sha512(doc_as_str).hexdigest()
         hashdict[folder + doc_id] = sha512sum
         hashdict.save()
     
     if not exists(folder): 
         print "Folder " + folder + " doesn't exist."
         try:
             makedirs(folder)
             print "Folder " + folder + " created."
         except Exception, e:
             print str(e)
Beispiel #3
0
 def write_symbols(self):
     """ 
     Writes a file with all the symbols (i. e. this class's keys)
     in order.
     """
     f = open(get_symbols_file(), "w", encoding=get_def_enc())
          
     for symbol in sorted(self.keys(), reverse=True):
         f.write(symbol)
         
     f.close()
Beispiel #4
0
    def write_content(self, content_format="line", content_type="raw"):
        """
        Write files in different possible formats and content types to 
        a folder on disk.
        @param content_format: If "line" writes content unit line per 
                             line.
        @param content_type: If "raw", write tokens as is; other values
                            like "words" may (become) possible.
        """
        d = get_mailfolder(content_format=content_format)
        if not exists(d):
            print "Folder " + d + " not availabe. Create it."
            makedirs(d)

        # For now only "raw" content_type exists, and "line" 
        # content_format
        f = open(d + self[self.DOC_ID], "w", get_def_enc())       
        for t in self.get_tokens():
            f.write(t + "\n")
        f.close()
Beispiel #5
0
 def _create_symbols_dict(self):
     """
     Opens a file (for now) with all text available and stores its
     symbols as keys (for each of whom a determined attribute is stored
     as value). Meant as internal method.
     """     
     f = open(get_raw_file(), "r", encoding=get_def_enc())
     
     # Get unique symbols first
     symset = set()
     for sym in f.read():
         symset.add(sym)
         
     # For each symbol (key) determine an attribute to store in this
     # instance
     for sym in symset:
         symobj = self._classify_symbol(sym) 
         self.__setitem__(sym, symobj.get(sym))
 
     f.close()