def main(): outdir = os.path.join(os.path.dirname('__file__'), 'build') try: shutil.rmtree(outdir) except FileNotFoundError: pass os.mkdir(outdir) filenames = [] names = {} for filename in sorted(glob.glob("pep-*.rst")): outbasename = os.path.basename(filename[:-4] + '.html') filenames.append(outbasename) outfilename = os.path.join(outdir, outbasename) pepnum = get_pepnum(outfilename) print(filename, '->', outfilename) with open(filename) as inf, open(outfilename, 'w') as outf: fix_rst_pep(inf, outf, filename, pepnum) with open(filename) as inf: parser = HeaderParser() metadata = parser.parse(inf) names[pepnum] = metadata['Title'] index_filename = os.path.join(outdir, 'index.html') print(index_filename) with open(index_filename, 'w') as f: f.write('<html><head><title>Draft PEP index</title></head>') f.write('<body><h1>Draft PEP index</h1><ul>') for filename in filenames: pepnum = get_pepnum(filename) f.write('<li>{num}: <a href="{link}">{name}</a></li>'.format( link=filename, num=pepnum, name=names[pepnum])) f.write('</ul></body></html>')
def load_files(file_names): subjects = [] errors = [] empty = 0 parser = HeaderParser() for file_name in file_names: with open(file_name, 'r') as file: try: msg = parser.parse(file) subject = msg['Subject'].replace(',', '').replace('\n', '').replace( '\t', '').strip() if len(subject): subjects.append(subject) else: empty += 1 except UnicodeDecodeError: errors.append(file_name) except: errors.append(file_name) print('file_names', len(file_names)) print('valid subjects', len(subjects)) print('errors', len(errors)) print('empty', empty) print() return subjects
def __init__(self, rep_file): """Init object from an open REP file object.""" # Parse the headers. self.filename = rep_file rep_parser = HeaderParser() metadata = rep_parser.parse(rep_file) header_order = iter(self.headers) try: for header_name in metadata.keys(): current_header, required = header_order.next() while header_name != current_header and not required: current_header, required = header_order.next() if header_name != current_header: raise REPError("did not deal with " "%r before having to handle %r" % (header_name, current_header), rep_file.name) except StopIteration: raise REPError("headers missing or out of order", rep_file.name) required = False try: while not required: current_header, required = header_order.next() else: raise REPError("REP is missing its %r" % (current_header,), rep_file.name) except StopIteration: pass # 'REP'. try: self.number = int(metadata['REP']) except ValueError: raise REPParseError("REP number isn't an integer", rep_file.name) # 'Title'. self.title = metadata['Title'] # 'Type'. type_ = metadata['Type'] if type_ not in self.type_values: raise REPError('%r is not a valid Type value' % (type_,), rep_file.name, self.number) self.type_ = type_ # 'Status'. status = metadata['Status'] if status not in self.status_values: raise REPError("%r is not a valid Status value" % (status,), rep_file.name, self.number) # Special case for Active REPs. if (status == u"Active" and self.type_ not in ("Process", "Informational")): raise REPError("Only Process and Informational REPs may " "have an Active status", rep_file.name, self.number) self.status = status # 'Author'. authors_and_emails = self._parse_author(metadata['Author']) if len(authors_and_emails) < 1: raise REPError("no authors found", rep_file.name, self.number) self.authors = map(Author, authors_and_emails)
def handle_read(self): payload = self.recv(8192) self.log.debug("handle read: %d bytes" % len(payload)) self.data.write(payload) if self.maxBytes is not None and self.data.tell() > self.maxBytes: # hopefully we have enough data self.handle_close() if not self.header: # parse header self.data.seek(0) try: self.hdrEnd = self.data.getvalue().index("\r\n\r\n") + 4 except ValueError: return # continue until we have all the headers # status line is "HTTP/version status message" status = self.data.readline() self.status = status.split(" ", 2) # followed by a rfc822-style message header parser = HeaderParser() self.header = parser.parse(self.data) self.data.seek(0, os.SEEK_END) if self.log.isEnabledFor(logging.DEBUG): self.log.debug("version %s " % self.status[0]) self.log.debug("status %s %s " % tuple(self.status[1:])) for key, value in self.header.items(): self.log.debug("header %s = %s" % (key, value)) if self.status[1] != "200": self.log.error("status = %s %s" % tuple(self.status[1:])) self.close()
def reindexCallBack(arg, dirname, fnames): """ Verifies that all files on disk are saved with the proper name (sha1). This function is useful if we ever change how the SHA1 is computed. It allows us to re-index the local storage and adjust our structures without having to re-download all the mail. """ (msgIndex, fldIndex, backupDir) = arg progress("\r%s" % dirname) for oldSha1 in [f for f in fnames if msgIndex.has_key(f)]: msg = msgIndex[oldSha1] fp = open(os.path.join(dirname, oldSha1), 'r') firstLine = fp.readline() if firstLine.startswith('>From - '): # Broken headers that will trip up HeaderParser pass else: fp.seek(0) parser = HeaderParser() pMsg = parser.parse(fp, headersonly=True) fp.close() sha1 = EmailMsg.computeSha1(msg['internaldate'], pMsg) if sha1 != oldSha1: # If the message is malformed, the parser could fail here, even # though it succeeded in EmailMsg.__init__ because in EmailMsg the # full headers were parsed by Google, and here by HeaderParser. logger.debug("Mismatch %s vs %s", sha1, oldSha1) set_trace() fp = open(os.path.join(dirname, oldSha1), 'r') parser = HeaderParser() pMsg = parser.parse(fp, headersonly=True) fp.close() sha1 = EmailMsg.computeSha1(msg['internaldate'], pMsg) if options.dryRun: continue EmailMsg.move(oldSha1, sha1, backupDir) msgIndex[sha1] = msg del( msgIndex[oldSha1] ) for folder in msgIndex[sha1]['folder']: for UID in [item[0] for item in fldIndex[folder].msgs.items() if item[1] is oldSha1]: fldIndex[folder].msgs[UID] = sha1 else: pass
def __init__(self, rep_file): """Init object from an open REP file object.""" # Parse the headers. self.filename = rep_file rep_parser = HeaderParser() metadata = rep_parser.parse(rep_file) header_order = iter(self.headers) try: for header_name in metadata.keys(): current_header, required = next(header_order) while header_name != current_header and not required: current_header, required = next(header_order) if header_name != current_header: raise REPError("did not deal with " "%r before having to handle %r" % (header_name, current_header), rep_file.name) except StopIteration: raise REPError("headers missing or out of order", rep_file.name) required = False try: while not required: current_header, required = next(header_order) else: raise REPError("REP is missing its %r" % (current_header,), rep_file.name) except StopIteration: pass # 'REP'. try: self.number = int(metadata['REP']) except ValueError: raise REPParseError("REP number isn't an integer", rep_file.name) # 'Title'. self.title = metadata['Title'] # 'Type'. type_ = metadata['Type'] if type_ not in self.type_values: raise REPError('%r is not a valid Type value' % (type_,), rep_file.name, self.number) self.type_ = type_ # 'Status'. status = metadata['Status'] if status not in self.status_values: raise REPError("%r is not a valid Status value" % (status,), rep_file.name, self.number) # Special case for Active REPs. if (status == u"Active" and self.type_ not in ("Process", "Informational")): raise REPError("Only Process and Informational REPs may " "have an Active status", rep_file.name, self.number) self.status = status # 'Author'. authors_and_emails = self._parse_author(metadata['Author']) if len(authors_and_emails) < 1: raise REPError("no authors found", rep_file.name, self.number) self.authors = [Author(x) for x in authors_and_emails]
def version_from_pkginfo(): from email.parser import HeaderParser parser = HeaderParser() try: with open('PKG-INFO') as file: pkg_info = parser.parse(file) except FileNotFoundError: print('This is not a regular source distribution!') return None print('Retrieving the distribution version from PKG-SOURCES.') return pkg_info['Version']
def version_from_pkginfo(): """Retrieve the version from an sdist's PKG-INFO file or None on failure""" from email.parser import HeaderParser parser = HeaderParser() try: with open('PKG-INFO') as file: pkg_info = parser.parse(file) except FileNotFoundError: return None print('Retrieving the distribution version from PKG-INFO.') return pkg_info['Version']
def readAuthHeaders(filename): with open(filename) as f: parser = HeaderParser() h = parser.parse(f) ''' print('headers: {}'.format(len(h.items()))) for header in h.items(): print('NEW') print(header) ''' print('spf test: {}'.format(testSPF(h)))
def parse(): global msg parser = HeaderParser() try: msg = parser.parse(open(openfile())) except FileNotFoundError: print( "No file was selected, Please re-run the program and choose a file." ) exit() else: out() return msg
def _read_metadata(cls, fpath): """ Read the original format which is stored as RFC-822 headers. """ data = odict() if fpath and isfile(fpath): parser = HeaderParser() # FIXME: Is this a correct assumption for the encoding? # This was needed due to some errors on windows with open(fpath) as fp: data = parser.parse(fp) return cls._message_to_dict(data)
def __init__(self, pep_file): super(PEP, self).__init__(pep_file) pep_file.seek(0) parser = HeaderParser() self.metadata = metadata = parser.parse(pep_file) date_string = metadata['Created'] if date_string: self.created = parse_date( RE_BAD_SUFFIX.sub('', metadata['Created'])) else: self.created = None
def parsemulti_no_bl(): global msg global spfCount global spfPassCount global spfFailCount global dkimCount global dkimPassCount global dkimFailCount global dmarcCount global dmarcPassCount global dmarcFailCount global totalChecksCount global totalChecksFailCount global AttachmentCount global ShellAttchCount global pdfAttachCount global pngAttachCount global jpegAttachCount global zipAttachCount root = Tk() root.withdraw() folder_selected = filedialog.askdirectory( title='Choose the directory where you have your email files stored') print(folder_selected) parser = HeaderParser() num_files = 0 #headers = parser.parsestr(msg.as_string()) for path, dirs, files in os.walk(folder_selected): for f in files: num_files += 1 spfCount = 0 spfPassCount = 0 spfFailCount = 0 dkimCount = 0 dkimPassCount = 0 dkimFailCount = 0 dmarcCount = 0 dmarcPassCount = 0 dmarcFailCount = 0 totalChecksCount = 0 totalChecksFailCount = 0 AttachmentCount = 0 ShellAttchCount = 0 pdfAttachCount = 0 pngAttachCount = 0 jpegAttachCount = 0 zipAttachCount = 0 msg = (parser.parse(open(os.path.join(folder_selected, f)))) print('') print( '=================================================================' ) print('Email Number: ' + str(num_files)) print( '=================================================================' ) print('') out() spf() dkim() dmarc() attach() attachtype() clientip() risk_no_bl()
def __init__(self, pep_file): """Init object from an open PEP file object.""" # Parse the headers. self.filename = pep_file pep_parser = HeaderParser() metadata = pep_parser.parse(pep_file) header_order = iter(self.headers) try: for header_name in metadata.keys(): current_header, required = header_order.next() while header_name != current_header and not required: current_header, required = header_order.next() if header_name != current_header: raise PEPError( "did not deal with " "%r before having to handle %r" % (header_name, current_header), pep_file.name) except StopIteration: raise PEPError("headers missing or out of order", pep_file.name) required = False try: while not required: current_header, required = header_order.next() else: raise PEPError("PEP is missing its %r" % (current_header, ), pep_file.name) except StopIteration: pass # 'PEP'. try: self.number = int(metadata['PEP']) except ValueError: raise PEPParseError("PEP number isn't an integer", pep_file.name) # 'Title'. self.title = metadata['Title'] # 'Type'. type_ = metadata['Type'] if type_ not in self.type_values: raise PEPError('%r is not a valid Type value' % (type_, ), pep_file.name, self.number) self.type_ = type_ # 'Status'. status = metadata['Status'] if status not in self.status_values: if status == "April Fool!": # See PEP 401 :) status = "Rejected" else: raise PEPError("%r is not a valid Status value" % (status, ), pep_file.name, self.number) # Special case for Active PEPs. if (status == u"Active" and self.type_ not in ("Process", "Informational")): raise PEPError( "Only Process and Informational PEPs may " "have an Active status", pep_file.name, self.number) self.status = status # 'Author'. authors_and_emails = self._parse_author(metadata['Author']) if len(authors_and_emails) < 1: raise PEPError("no authors found", pep_file.name, self.number) self.authors = map(Author, authors_and_emails) # 'BDFL-Delegate' self.delegates = [] if 'BDFL-Delegate' in metadata: delegates_and_emails = self._parse_author( metadata['BDFL-Delegate']) self.delegates = map(Author, delegates_and_emails) # 'Created' for creation date of the PEP self.created = _parse_date(metadata['Created'])
def extractStats(backupDir, msgIndexFile, fldIndexFile): """ TODO: Messages received per day Messages sent per day """ msgIndex = deserialize(msgIndexFile) fldIndex = deserialize(fldIndexFile) timeStats = {'Yrs':{}, 'DOW':{}, 'Hrs':{}} stats = {'CountTotalMsgs':0, 'CountListMsgs':0, 'CountSentMsgs':0, 'CountRcvdMsgs':0, 'TimeAll':deepcopy(timeStats), 'TimeSent':deepcopy(timeStats), 'TimeRcvd':deepcopy(timeStats)} hFrom = {} hTo = {} SentFolder = [f[0] for f in fldIndex.items() if '\\Sent' in f[1]['Type']][0] logger.debug("SentFolder=" + SentFolder) idx = 0 status("Computing stats for %d message(s).\n" % (len(msgIndex))) for sha1 in msgIndex.keys(): idx += 1 fp = open(os.path.join(backupDir, sha1[0:2], sha1), 'r') firstLine = fp.readline() if firstLine.startswith('>From - '): # Broken headers that will trip up HeaderParser pass else: fp.seek(0) parser = HeaderParser() pMsg = parser.parse(fp, headersonly=True) fp.close() stats['CountTotalMsgs'] = stats['CountTotalMsgs'] + 1 if pMsg.get('list-id', ""): stats['CountListMsgs'] = stats['CountListMsgs'] + 1 continue date = parsedate_tz(pMsg.get('date', "")) if date != None: date = time.localtime( mktime_tz(date) ) Stats.saveDateStats(stats['TimeAll'], date) #set_trace() if SentFolder in msgIndex[sha1]['folder']: stats['CountSentMsgs'] = stats['CountSentMsgs'] + 1 try: receivers = pMsg.get_all('to', []) + pMsg.get_all('cc', []) for toEmail in [m[1].lower() for m in getaddresses(receivers) if len(m[1]) > 0]: hTo[toEmail] = hTo.setdefault(toEmail, 0) + 1 Stats.saveDateStats(stats['TimeSent'], date) except: logger.exception("Exception parsing recipient data") else: stats['CountRcvdMsgs'] = stats['CountRcvdMsgs'] + 1 try: for fromEmail in [m[1].lower() for m in getaddresses(pMsg.get_all('from', []))]: hFrom[fromEmail] = hFrom.setdefault(fromEmail, 0) + 1 Stats.saveDateStats(stats['TimeRcvd'], date) except: logger.exception("Exception parsing sender data") progress('\r%.0f%% %d/%d ' % (idx * 100.0 / len(msgIndex), idx, len(msgIndex))) xml = dict2xml({'Stats': stats}) senders = xml.doc.createElement("Senders") xml.root.appendChild(senders) for email, count in sorted(hFrom.items(), key = lambda x: x[1], reverse=True): sender = xml.doc.createElement("Sender") sender.setAttribute("Email", email) sender.setAttribute("Count", str(count)) senders.appendChild(sender) rcvrs = xml.doc.createElement("Receivers") xml.root.appendChild(rcvrs) for email, count in sorted(hTo.items(), key = lambda x: x[1], reverse=True): rcvr = xml.doc.createElement("Receiver") rcvr.setAttribute("Email", email) rcvr.setAttribute("Count", str(count)) rcvrs.appendChild(rcvr) output = open("stats-{0}.xml".format(options.email), 'w') try: xml.doc.writexml(output, encoding='utf-8', indent=' ', addindent=' ', newl="\n") finally: output.close()
def _get_version_from_pkg_info(pkg_info_filename): """get the version from a PKG-INFO (see pep-0314) file""" with open(pkg_info_filename, 'r') as f: parser = HeaderParser() headers = parser.parse(f) return headers.get('version')