def get(self, resource): resource = str(urllib.unquote(resource)) blob_info = blobstore.BlobInfo.get(resource) blob_reader = blobstore.BlobReader('KSyh5UXl7TSETnE-vaSGYw==', buffer_size=1048576) value = blob_reader.read() file = StringIO(value) ole = OleFileIO_PL.OleFileIO(file) assert ole.openstream('Version').read() == '02000000'.decode('hex') # make sure StickyNotes.snt comes from Win 7 for entry in sorted(ole.listdir()): if len(entry) == 2 and entry[1] == '0': rtf = ole.openstream(entry).read() rtf = rtf[:rtf.index('\0')] #rtf = rtf.replace(r'\ansicpg936', '') # no one uses Chinese /_\ txt = Rtf2Txt.getTxt(rtf) print txt
def get(self): usernotes = UserNote.all() #usernotes.filter("user ="******"time") for usernote in usernotes: print usernote.blob_key blob_reader = blobstore.BlobReader(usernote.blob_key, buffer_size=1048576) value = blob_reader.read() file = StringIO(value) ole = OleFileIO_PL.OleFileIO(file) assert ole.openstream('Version').read() == '02000000'.decode('hex') # make sure StickyNotes.snt comes from Win 7 for entry in sorted(ole.listdir()): if len(entry) == 2 and entry[1] == '0': rtf = ole.openstream(entry).read() rtf = rtf[:rtf.index('\0')] rtf = rtf.replace(r'\ansicpg936', '') # no one uses Chinese /_\ txt = Rtf2Txt.getTxt(rtf) print txt
def post(self): try: upload = self.get_uploads()[0] tmp_key = upload.key() user_note = UserNote(user=users.get_current_user(), blob_key=upload.key()) db.put(user_note) #store it in GAE database blob_reader = blobstore.BlobReader(tmp_key, buffer_size=1048576) value = blob_reader.read() file = StringIO(value) oleobj = OleFileIO_PL.OleFileIO(file) assert oleobj.openstream('Version').read() == '02000000'.decode('hex') # make sure StickyNotes.snt comes from Win 7 for entry in sorted(oleobj.listdir()): if len(entry) == 2 and entry[1] == '0': rtf = oleobj.openstream(entry).read() rtf = rtf[:rtf.index('\0')] rtf = rtf.replace(r'\ansicpg936', '') # TODO: multiple language support txt = Rtf2Txt.getTxt(rtf) print txt print tmp_key except: self.redirect('/upload_failure.html')
def extract_rtf(self, file, file_name): tem_file = Rtf2Txt.getTemRtf(file) self.inverted.inverted_index(tem_file, file_name)
def extract_rtf(self, file, file_name): tem_file = Rtf2Txt.getTemRtf(file) self.positive.positive_index(tem_file, file_name)