Exemple #1
0
def main():
    outdir = os.path.join(os.path.dirname('__file__'), 'build')
    try:
        shutil.rmtree(outdir)
    except FileNotFoundError:
        pass
    os.mkdir(outdir)
    filenames = []
    names = {}
    for filename in sorted(glob.glob("pep-*.rst")):
        outbasename = os.path.basename(filename[:-4] + '.html')
        filenames.append(outbasename)
        outfilename = os.path.join(outdir, outbasename)
        pepnum = get_pepnum(outfilename)
        print(filename, '->', outfilename)
        with open(filename) as inf, open(outfilename, 'w') as outf:
            fix_rst_pep(inf, outf, filename, pepnum)
        with open(filename) as inf:
            parser = HeaderParser()
            metadata = parser.parse(inf)
        names[pepnum] = metadata['Title']

    index_filename = os.path.join(outdir, 'index.html')
    print(index_filename)
    with open(index_filename, 'w') as f:
        f.write('<html><head><title>Draft PEP index</title></head>')
        f.write('<body><h1>Draft PEP index</h1><ul>')
        for filename in filenames:
            pepnum = get_pepnum(filename)
            f.write('<li>{num}: <a href="{link}">{name}</a></li>'.format(
                link=filename, num=pepnum, name=names[pepnum]))
        f.write('</ul></body></html>')
def load_files(file_names):
    subjects = []
    errors = []
    empty = 0
    parser = HeaderParser()
    for file_name in file_names:
        with open(file_name, 'r') as file:
            try:
                msg = parser.parse(file)
                subject = msg['Subject'].replace(',',
                                                 '').replace('\n', '').replace(
                                                     '\t', '').strip()
                if len(subject):
                    subjects.append(subject)
                else:
                    empty += 1
            except UnicodeDecodeError:
                errors.append(file_name)
            except:
                errors.append(file_name)
    print('file_names', len(file_names))
    print('valid subjects', len(subjects))
    print('errors', len(errors))
    print('empty', empty)
    print()
    return subjects
Exemple #3
0
 def __init__(self, rep_file):
     """Init object from an open REP file object."""
     # Parse the headers.
     self.filename = rep_file
     rep_parser = HeaderParser()
     metadata = rep_parser.parse(rep_file)
     header_order = iter(self.headers)
     try:
         for header_name in metadata.keys():
             current_header, required = header_order.next()
             while header_name != current_header and not required:
                 current_header, required = header_order.next()
             if header_name != current_header:
                 raise REPError("did not deal with "
                                "%r before having to handle %r" %
                                (header_name, current_header),
                                rep_file.name)
     except StopIteration:
         raise REPError("headers missing or out of order",
                             rep_file.name)
     required = False
     try:
         while not required:
             current_header, required = header_order.next()
         else:
             raise REPError("REP is missing its %r" % (current_header,),
                            rep_file.name)
     except StopIteration:
         pass
     # 'REP'.
     try:
         self.number = int(metadata['REP'])
     except ValueError:
         raise REPParseError("REP number isn't an integer", rep_file.name)
     # 'Title'.
     self.title = metadata['Title']
     # 'Type'.
     type_ = metadata['Type']
     if type_ not in self.type_values:
         raise REPError('%r is not a valid Type value' % (type_,),
                        rep_file.name, self.number)
     self.type_ = type_
     # 'Status'.
     status = metadata['Status']
     if status not in self.status_values:
         raise REPError("%r is not a valid Status value" %
                        (status,), rep_file.name, self.number)
     # Special case for Active REPs.
     if (status == u"Active" and
             self.type_ not in ("Process", "Informational")):
         raise REPError("Only Process and Informational REPs may "
                        "have an Active status", rep_file.name,
                        self.number)
     self.status = status
     # 'Author'.
     authors_and_emails = self._parse_author(metadata['Author'])
     if len(authors_and_emails) < 1:
         raise REPError("no authors found", rep_file.name,
                        self.number)
     self.authors = map(Author, authors_and_emails)
Exemple #4
0
def main():
    outdir = os.path.join(os.path.dirname('__file__'), 'build')
    try:
        shutil.rmtree(outdir)
    except FileNotFoundError:
        pass
    os.mkdir(outdir)
    filenames = []
    names = {}
    for filename in sorted(glob.glob("pep-*.rst")):
        outbasename = os.path.basename(filename[:-4] + '.html')
        filenames.append(outbasename)
        outfilename = os.path.join(outdir, outbasename)
        pepnum = get_pepnum(outfilename)
        print(filename, '->', outfilename)
        with open(filename) as inf, open(outfilename, 'w') as outf:
            fix_rst_pep(inf, outf, filename, pepnum)
        with open(filename) as inf:
            parser = HeaderParser()
            metadata = parser.parse(inf)
        names[pepnum] = metadata['Title']

    index_filename = os.path.join(outdir, 'index.html')
    print(index_filename)
    with open(index_filename, 'w') as f:
        f.write('<html><head><title>Draft PEP index</title></head>')
        f.write('<body><h1>Draft PEP index</h1><ul>')
        for filename in filenames:
            pepnum = get_pepnum(filename)
            f.write('<li>{num}: <a href="{link}">{name}</a></li>'.format(
                link=filename, num=pepnum, name=names[pepnum]))
        f.write('</ul></body></html>')
Exemple #5
0
 def handle_read(self):
     payload = self.recv(8192)
     self.log.debug("handle read: %d bytes" % len(payload))
     self.data.write(payload)
     if self.maxBytes is not None and self.data.tell() > self.maxBytes:
         # hopefully we have enough data
         self.handle_close()
     if not self.header:
         # parse header
         self.data.seek(0)
         try:
             self.hdrEnd = self.data.getvalue().index("\r\n\r\n") + 4
         except ValueError:
             return # continue until we have all the headers
         # status line is "HTTP/version status message"
         status = self.data.readline()
         self.status = status.split(" ", 2)
         # followed by a rfc822-style message header
         parser = HeaderParser()
         self.header = parser.parse(self.data)
         self.data.seek(0, os.SEEK_END)
         if self.log.isEnabledFor(logging.DEBUG):
             self.log.debug("version %s " % self.status[0])
             self.log.debug("status  %s %s " % tuple(self.status[1:]))
             for key, value in self.header.items():
                 self.log.debug("header  %s = %s" % (key, value))
         if self.status[1] != "200":
             self.log.error("status = %s %s" % tuple(self.status[1:]))
             self.close()
Exemple #6
0
def reindexCallBack(arg, dirname, fnames):
    """
        Verifies that all files on disk are saved with the proper name (sha1).

        This function is useful if we ever change how the SHA1 is computed. It
        allows us to re-index the local storage and adjust our structures
        without having to re-download all the mail.
    """
    (msgIndex, fldIndex, backupDir) = arg
    progress("\r%s" % dirname)
    for oldSha1 in [f for f in fnames if msgIndex.has_key(f)]:
        msg = msgIndex[oldSha1]
        fp = open(os.path.join(dirname, oldSha1), 'r')

        firstLine = fp.readline()
        if firstLine.startswith('>From - '):
            # Broken headers that will trip up HeaderParser
            pass
        else:
            fp.seek(0)

        parser = HeaderParser()
        pMsg = parser.parse(fp, headersonly=True)
        fp.close()
        sha1 = EmailMsg.computeSha1(msg['internaldate'], pMsg)
        if sha1 != oldSha1:
            # If the message is malformed, the parser could fail here, even
            # though it succeeded in EmailMsg.__init__ because in EmailMsg the
            # full headers were parsed by Google, and here by HeaderParser.
            logger.debug("Mismatch %s vs %s", sha1, oldSha1)
            set_trace()
            fp = open(os.path.join(dirname, oldSha1), 'r')
            parser = HeaderParser()
            pMsg = parser.parse(fp, headersonly=True)
            fp.close()
            sha1 = EmailMsg.computeSha1(msg['internaldate'], pMsg)

            if options.dryRun: continue

            EmailMsg.move(oldSha1, sha1, backupDir)
            msgIndex[sha1] = msg
            del( msgIndex[oldSha1] )
            for folder in msgIndex[sha1]['folder']:
                for UID in [item[0] for item in fldIndex[folder].msgs.items() if item[1] is oldSha1]:
                    fldIndex[folder].msgs[UID] = sha1
        else:
            pass
Exemple #7
0
 def __init__(self, rep_file):
     """Init object from an open REP file object."""
     # Parse the headers.
     self.filename = rep_file
     rep_parser = HeaderParser()
     metadata = rep_parser.parse(rep_file)
     header_order = iter(self.headers)
     try:
         for header_name in metadata.keys():
             current_header, required = next(header_order)
             while header_name != current_header and not required:
                 current_header, required = next(header_order)
             if header_name != current_header:
                 raise REPError("did not deal with "
                                "%r before having to handle %r" %
                                (header_name, current_header),
                                rep_file.name)
     except StopIteration:
         raise REPError("headers missing or out of order", rep_file.name)
     required = False
     try:
         while not required:
             current_header, required = next(header_order)
         else:
             raise REPError("REP is missing its %r" % (current_header,),
                            rep_file.name)
     except StopIteration:
         pass
     # 'REP'.
     try:
         self.number = int(metadata['REP'])
     except ValueError:
         raise REPParseError("REP number isn't an integer", rep_file.name)
     # 'Title'.
     self.title = metadata['Title']
     # 'Type'.
     type_ = metadata['Type']
     if type_ not in self.type_values:
         raise REPError('%r is not a valid Type value' % (type_,),
                        rep_file.name, self.number)
     self.type_ = type_
     # 'Status'.
     status = metadata['Status']
     if status not in self.status_values:
         raise REPError("%r is not a valid Status value" %
                        (status,), rep_file.name, self.number)
     # Special case for Active REPs.
     if (status == u"Active" and
             self.type_ not in ("Process", "Informational")):
         raise REPError("Only Process and Informational REPs may "
                        "have an Active status", rep_file.name,
                        self.number)
     self.status = status
     # 'Author'.
     authors_and_emails = self._parse_author(metadata['Author'])
     if len(authors_and_emails) < 1:
         raise REPError("no authors found", rep_file.name,
                        self.number)
     self.authors = [Author(x) for x in authors_and_emails]
Exemple #8
0
def version_from_pkginfo():
    from email.parser import HeaderParser

    parser = HeaderParser()
    try:
        with open('PKG-INFO') as file:
            pkg_info = parser.parse(file)
    except FileNotFoundError:
        print('This is not a regular source distribution!')
        return None
    print('Retrieving the distribution version from PKG-SOURCES.')
    return pkg_info['Version']
Exemple #9
0
def version_from_pkginfo():
    """Retrieve the version from an sdist's PKG-INFO file or None on failure"""
    from email.parser import HeaderParser

    parser = HeaderParser()
    try:
        with open('PKG-INFO') as file:
            pkg_info = parser.parse(file)
    except FileNotFoundError:
        return None
    print('Retrieving the distribution version from PKG-INFO.')
    return pkg_info['Version']
Exemple #10
0
def version_from_pkginfo():
    from email.parser import HeaderParser

    parser = HeaderParser()
    try:
        with open('PKG-INFO') as file:
            pkg_info = parser.parse(file)
    except FileNotFoundError:
        print('This is not a regular source distribution!')
        return None
    print('Retrieving the distribution version from PKG-SOURCES.')
    return pkg_info['Version']
Exemple #11
0
def readAuthHeaders(filename):
    with open(filename) as f:
        parser = HeaderParser()
        h = parser.parse(f)

        '''
        print('headers: {}'.format(len(h.items())))
        for header in h.items():
            print('NEW')
            print(header)
        '''
        print('spf test: {}'.format(testSPF(h)))
Exemple #12
0
def parse():
    global msg
    parser = HeaderParser()
    try:
        msg = parser.parse(open(openfile()))
    except FileNotFoundError:
        print(
            "No file was selected, Please re-run the program and choose a file."
        )
        exit()
    else:
        out()
    return msg
Exemple #13
0
    def _read_metadata(cls, fpath):
        """
        Read the original format which is stored as RFC-822 headers.
        """
        data = odict()
        if fpath and isfile(fpath):
            parser = HeaderParser()

            # FIXME: Is this a correct assumption for the encoding?
            # This was needed due to some errors on windows
            with open(fpath) as fp:
                data = parser.parse(fp)

        return cls._message_to_dict(data)
Exemple #14
0
    def _read_metadata(cls, fpath):
        """
        Read the original format which is stored as RFC-822 headers.
        """
        data = odict()
        if fpath and isfile(fpath):
            parser = HeaderParser()

            # FIXME: Is this a correct assumption for the encoding?
            # This was needed due to some errors on windows
            with open(fpath) as fp:
                data = parser.parse(fp)

        return cls._message_to_dict(data)
Exemple #15
0
    def __init__(self, pep_file):
        super(PEP, self).__init__(pep_file)

        pep_file.seek(0)
        parser = HeaderParser()
        self.metadata = metadata = parser.parse(pep_file)

        date_string = metadata['Created']

        if date_string:
            self.created = parse_date(
                RE_BAD_SUFFIX.sub('', metadata['Created']))
        else:
            self.created = None
Exemple #16
0
def parsemulti_no_bl():
    global msg
    global spfCount
    global spfPassCount
    global spfFailCount
    global dkimCount
    global dkimPassCount
    global dkimFailCount
    global dmarcCount
    global dmarcPassCount
    global dmarcFailCount
    global totalChecksCount
    global totalChecksFailCount
    global AttachmentCount
    global ShellAttchCount
    global pdfAttachCount
    global pngAttachCount
    global jpegAttachCount
    global zipAttachCount
    root = Tk()
    root.withdraw()
    folder_selected = filedialog.askdirectory(
        title='Choose the directory where you have your email files stored')

    print(folder_selected)
    parser = HeaderParser()
    num_files = 0
    #headers = parser.parsestr(msg.as_string())
    for path, dirs, files in os.walk(folder_selected):
        for f in files:
            num_files += 1
            spfCount = 0
            spfPassCount = 0
            spfFailCount = 0

            dkimCount = 0
            dkimPassCount = 0
            dkimFailCount = 0

            dmarcCount = 0
            dmarcPassCount = 0
            dmarcFailCount = 0

            totalChecksCount = 0
            totalChecksFailCount = 0

            AttachmentCount = 0
            ShellAttchCount = 0
            pdfAttachCount = 0
            pngAttachCount = 0
            jpegAttachCount = 0
            zipAttachCount = 0
            msg = (parser.parse(open(os.path.join(folder_selected, f))))
            print('')
            print(
                '================================================================='
            )
            print('Email Number: ' + str(num_files))
            print(
                '================================================================='
            )
            print('')

            out()
            spf()
            dkim()
            dmarc()
            attach()
            attachtype()
            clientip()
            risk_no_bl()
Exemple #17
0
 def __init__(self, pep_file):
     """Init object from an open PEP file object."""
     # Parse the headers.
     self.filename = pep_file
     pep_parser = HeaderParser()
     metadata = pep_parser.parse(pep_file)
     header_order = iter(self.headers)
     try:
         for header_name in metadata.keys():
             current_header, required = header_order.next()
             while header_name != current_header and not required:
                 current_header, required = header_order.next()
             if header_name != current_header:
                 raise PEPError(
                     "did not deal with "
                     "%r before having to handle %r" %
                     (header_name, current_header), pep_file.name)
     except StopIteration:
         raise PEPError("headers missing or out of order", pep_file.name)
     required = False
     try:
         while not required:
             current_header, required = header_order.next()
         else:
             raise PEPError("PEP is missing its %r" % (current_header, ),
                            pep_file.name)
     except StopIteration:
         pass
     # 'PEP'.
     try:
         self.number = int(metadata['PEP'])
     except ValueError:
         raise PEPParseError("PEP number isn't an integer", pep_file.name)
     # 'Title'.
     self.title = metadata['Title']
     # 'Type'.
     type_ = metadata['Type']
     if type_ not in self.type_values:
         raise PEPError('%r is not a valid Type value' % (type_, ),
                        pep_file.name, self.number)
     self.type_ = type_
     # 'Status'.
     status = metadata['Status']
     if status not in self.status_values:
         if status == "April Fool!":
             # See PEP 401 :)
             status = "Rejected"
         else:
             raise PEPError("%r is not a valid Status value" % (status, ),
                            pep_file.name, self.number)
     # Special case for Active PEPs.
     if (status == u"Active"
             and self.type_ not in ("Process", "Informational")):
         raise PEPError(
             "Only Process and Informational PEPs may "
             "have an Active status", pep_file.name, self.number)
     self.status = status
     # 'Author'.
     authors_and_emails = self._parse_author(metadata['Author'])
     if len(authors_and_emails) < 1:
         raise PEPError("no authors found", pep_file.name, self.number)
     self.authors = map(Author, authors_and_emails)
     # 'BDFL-Delegate'
     self.delegates = []
     if 'BDFL-Delegate' in metadata:
         delegates_and_emails = self._parse_author(
             metadata['BDFL-Delegate'])
         self.delegates = map(Author, delegates_and_emails)
     # 'Created' for creation date of the PEP
     self.created = _parse_date(metadata['Created'])
Exemple #18
0
    def extractStats(backupDir, msgIndexFile, fldIndexFile):
        """
        TODO:
            Messages received per day
            Messages sent per day
        """
        msgIndex = deserialize(msgIndexFile)
        fldIndex = deserialize(fldIndexFile)

        timeStats = {'Yrs':{}, 'DOW':{}, 'Hrs':{}}
        stats = {'CountTotalMsgs':0, 'CountListMsgs':0,
                 'CountSentMsgs':0, 'CountRcvdMsgs':0,
                 'TimeAll':deepcopy(timeStats),
                 'TimeSent':deepcopy(timeStats),
                 'TimeRcvd':deepcopy(timeStats)}
        hFrom = {}
        hTo = {}

        SentFolder = [f[0] for f in fldIndex.items() if '\\Sent' in f[1]['Type']][0]
        logger.debug("SentFolder=" + SentFolder)
        idx = 0
        status("Computing stats for %d message(s).\n" % (len(msgIndex)))

        for sha1 in msgIndex.keys():
            idx += 1
            fp = open(os.path.join(backupDir, sha1[0:2], sha1), 'r')

            firstLine = fp.readline()
            if firstLine.startswith('>From - '):
                # Broken headers that will trip up HeaderParser
                pass
            else:
                fp.seek(0)

            parser = HeaderParser()
            pMsg = parser.parse(fp, headersonly=True)
            fp.close()

            stats['CountTotalMsgs'] = stats['CountTotalMsgs'] + 1
            if pMsg.get('list-id', ""):
                stats['CountListMsgs'] = stats['CountListMsgs'] + 1
                continue

            date = parsedate_tz(pMsg.get('date', ""))
            if date != None:
                date = time.localtime( mktime_tz(date) )
            Stats.saveDateStats(stats['TimeAll'], date)

            #set_trace()
            if SentFolder in msgIndex[sha1]['folder']:
                stats['CountSentMsgs'] = stats['CountSentMsgs'] + 1
                try:
                    receivers = pMsg.get_all('to', []) + pMsg.get_all('cc', [])
                    for toEmail in [m[1].lower() for m in getaddresses(receivers) if len(m[1]) > 0]:
                        hTo[toEmail] = hTo.setdefault(toEmail, 0) + 1
                    Stats.saveDateStats(stats['TimeSent'], date)
                except:
                    logger.exception("Exception parsing recipient data")
            else:
                stats['CountRcvdMsgs'] = stats['CountRcvdMsgs'] + 1
                try:
                    for fromEmail in [m[1].lower() for m in getaddresses(pMsg.get_all('from', []))]:
                        hFrom[fromEmail] = hFrom.setdefault(fromEmail, 0) + 1
                    Stats.saveDateStats(stats['TimeRcvd'], date)
                except:
                    logger.exception("Exception parsing sender data")

            progress('\r%.0f%% %d/%d ' % (idx * 100.0 / len(msgIndex), idx, len(msgIndex)))


        xml = dict2xml({'Stats': stats})

        senders = xml.doc.createElement("Senders")
        xml.root.appendChild(senders)
        for email, count in sorted(hFrom.items(), key = lambda x: x[1], reverse=True):
            sender = xml.doc.createElement("Sender")
            sender.setAttribute("Email", email)
            sender.setAttribute("Count", str(count))
            senders.appendChild(sender)

        rcvrs = xml.doc.createElement("Receivers")
        xml.root.appendChild(rcvrs)
        for email, count in sorted(hTo.items(), key = lambda x: x[1], reverse=True):
            rcvr = xml.doc.createElement("Receiver")
            rcvr.setAttribute("Email", email)
            rcvr.setAttribute("Count", str(count))
            rcvrs.appendChild(rcvr)

        output = open("stats-{0}.xml".format(options.email), 'w')
        try:
            xml.doc.writexml(output, encoding='utf-8', indent='  ', addindent='  ', newl="\n")
        finally:
            output.close()
Exemple #19
0
def _get_version_from_pkg_info(pkg_info_filename):
    """get the version from a PKG-INFO (see pep-0314) file"""
    with open(pkg_info_filename, 'r') as f:
        parser = HeaderParser()
        headers = parser.parse(f)
        return headers.get('version')