def _hdoc(self): if self._hd: return self._hd hd = {} hd[fields.HEADERS_KEY] = self.headers hd[fields.DATE_KEY] = self.headers['Date'] hd[fields.CONTENT_HASH_KEY] = self._get_chash() hd[fields.MSGID_KEY] = '' hd[fields.MULTIPART_KEY] = True hd[fields.SUBJECT_KEY] = self.headers.get('Subject') hd[fields.TYPE_KEY] = fields.TYPE_HEADERS_VAL hd[fields.BODY_KEY] = self._get_body_phash() hd[fields.PARTS_MAP_KEY] = walk.walk_msg_tree(walk.get_parts(self._mime_multipart), body_phash=self._get_body_phash()) self._hd = hd return hd
def _hdoc(self): if self._hd: return self._hd hd = {} hd[fields.HEADERS_KEY] = self.headers hd[fields.DATE_KEY] = self.headers['Date'] hd[fields.CONTENT_HASH_KEY] = self._get_chash() hd[fields.MSGID_KEY] = '' hd[fields.MULTIPART_KEY] = True hd[fields.SUBJECT_KEY] = self.headers.get('Subject') hd[fields.TYPE_KEY] = fields.TYPE_HEADERS_VAL hd[fields.BODY_KEY] = self._get_body_phash() hd[fields.PARTS_MAP_KEY] = \ walk.walk_msg_tree(walk.get_parts(self._mime_multipart), body_phash=self._get_body_phash())['part_map'] self._hd = hd return hd
def _do_parse(self, raw): """ Parse raw message and return it along with relevant information about its outer level. This is done in a separate thread, and the callback is passed to `_do_add_msg` method. :param raw: the raw message :type raw: StringIO or basestring :return: msg, parts, chash, size, multi :rtype: tuple """ msg = message_from_string(raw) parts = walk.get_parts(msg) size = len(raw) chash = sha256.SHA256(raw).hexdigest() multi = msg.is_multipart() return msg, parts, chash, size, multi
def _hdoc(self): if self._hd: return self._hd # InputMail does not have a from header but we need it when persisted into soledad. headers = self.headers.copy() headers['From'] = InputMail.FROM_EMAIL_ADDRESS hd = {} hd[HEADERS_KEY] = headers hd[DATE_KEY] = headers['Date'] hd[CONTENT_HASH_KEY] = self._get_chash() hd[MSGID_KEY] = '' hd[MULTIPART_KEY] = True hd[SUBJECT_KEY] = headers.get('Subject') hd[TYPE_KEY] = fields.HEADERS hd[BODY_KEY] = self._get_body_phash() hd[PARTS_MAP_KEY] = \ walk.walk_msg_tree(walk.get_parts(self._mime_multipart), body_phash=self._get_body_phash())['part_map'] self._hd = hd return hd
def _hdoc(self): if self._hd: return self._hd # InputMail does not have a from header but we need it when persisted into soledad. headers = self.headers.copy() headers['From'] = InputMail.FROM_EMAIL_ADDRESS hd = {} hd[fields.HEADERS_KEY] = headers hd[fields.DATE_KEY] = headers['Date'] hd[fields.CONTENT_HASH_KEY] = self._get_chash() hd[fields.MSGID_KEY] = '' hd[fields.MULTIPART_KEY] = True hd[fields.SUBJECT_KEY] = headers.get('Subject') hd[fields.TYPE_KEY] = fields.TYPE_HEADERS_VAL hd[fields.BODY_KEY] = self._get_body_phash() hd[fields.PARTS_MAP_KEY] = \ walk.walk_msg_tree(walk.get_parts(self._mime_multipart), body_phash=self._get_body_phash())['part_map'] self._hd = hd return hd
if len(sys.argv) > 1: FILENAME = sys.argv[1] else: FILENAME = "rfc822.multi-signed.message" """ FILENAME = "rfc822.plain.message" FILENAME = "rfc822.multi-minimal.message" """ msg = p.parse(open(FILENAME)) DO_CHECK = False ################################################# parts = W.get_parts(msg) if DEBUG: def trim(item): item = item[:10] [trim(part["phash"]) for part in parts if part.get('phash', None)] raw_docs = list(W.get_raw_docs(msg, parts)) body_phash_fun = [W.get_body_phash_simple, W.get_body_phash_multi][int(msg.is_multipart())] body_phash = body_phash_fun(W.get_payloads(msg)) parts_map = W.walk_msg_tree(parts, body_phash=body_phash) # TODO add missing headers!
def _parse_msg(raw): msg = message_from_string(raw) parts = walk.get_parts(msg) chash = sha256.SHA256(raw).hexdigest() multi = msg.is_multipart() return msg, parts, chash, multi
# Input from hell if len(sys.argv) > 1: FILENAME = sys.argv[1] else: FILENAME = "rfc822.multi-signed.message" """ FILENAME = "rfc822.plain.message" FILENAME = "rfc822.multi-minimal.message" """ msg = p.parse(open(FILENAME)) DO_CHECK = False ################################################# parts = W.get_parts(msg) if DEBUG: def trim(item): item = item[:10] [trim(part["phash"]) for part in parts if part.get('phash', None)] raw_docs = list(W.get_raw_docs(msg, parts)) body_phash_fun = [W.get_body_phash_simple, W.get_body_phash_multi][int(msg.is_multipart())] body_phash = body_phash_fun(W.get_payloads(msg)) parts_map = W.walk_msg_tree(parts, body_phash=body_phash)
def _parse_msg(raw): msg = message_from_string(raw) parts = walk.get_parts(msg) chash = walk.get_hash(raw) multi = msg.is_multipart() return msg, parts, chash, multi