def get_file(self, key, hashsum=True): """ @param key: Specifies which data to look at, e. g. "NOUNS" (str). @param hashsum: If True (default) return not only file's contents, but also hashsum, otherwise return only file content. @return: Hashsum (str) of this document's data in some context specified by param key. If no file available returns None values. """ sha512sum = None content = None folder = self._get_folder_by_key(key) try: f = open(folder + self[self.DOC_ID], "r", get_def_enc()) content = f.readlines() sha512sum = sha512("".join(content). \ encode(get_def_enc())).hexdigest() f.close() except IOError: pass if hashsum == False: return content # Otherwise: Return a tuple return content, sha512sum
def write_file(self, key, hashsum=True): """ Writes a file (name: document name) to a specified folder. @param key: Specifies which data to write, based on the key of the data stored in in this object, e. g. "WORDS" or "NOUNS". @param hashsum: Defaults to True and is used to write an hashsum of the file to an hashfile. """ folder = self._get_folder_by_key(key) doc_id = self[self.DOC_ID] doc_as_str = "\n".join(self[key]).encode(get_def_enc()) sha512sum = "" if (hashsum == True): hashdict = HashDict() sha512sum = sha512(doc_as_str).hexdigest() hashdict[folder + doc_id] = sha512sum hashdict.save() if not exists(folder): print "Folder " + folder + " doesn't exist." try: makedirs(folder) print "Folder " + folder + " created." except Exception, e: print str(e)
def write_symbols(self): """ Writes a file with all the symbols (i. e. this class's keys) in order. """ f = open(get_symbols_file(), "w", encoding=get_def_enc()) for symbol in sorted(self.keys(), reverse=True): f.write(symbol) f.close()
def write_content(self, content_format="line", content_type="raw"): """ Write files in different possible formats and content types to a folder on disk. @param content_format: If "line" writes content unit line per line. @param content_type: If "raw", write tokens as is; other values like "words" may (become) possible. """ d = get_mailfolder(content_format=content_format) if not exists(d): print "Folder " + d + " not availabe. Create it." makedirs(d) # For now only "raw" content_type exists, and "line" # content_format f = open(d + self[self.DOC_ID], "w", get_def_enc()) for t in self.get_tokens(): f.write(t + "\n") f.close()
def _create_symbols_dict(self): """ Opens a file (for now) with all text available and stores its symbols as keys (for each of whom a determined attribute is stored as value). Meant as internal method. """ f = open(get_raw_file(), "r", encoding=get_def_enc()) # Get unique symbols first symset = set() for sym in f.read(): symset.add(sym) # For each symbol (key) determine an attribute to store in this # instance for sym in symset: symobj = self._classify_symbol(sym) self.__setitem__(sym, symobj.get(sym)) f.close()