예제 #1
0
    def __analyze_mailing_list(self, url_or_dirpath, compressed_dir):
        """Look for mbox archives, retrieve, uncompress and analyze them"""

        mailing_list = MailingList(url_or_dirpath, compressed_dir)

        # Check if mailing list already in database
        # today = datetime.datetime.today().strftime(datetimefmt)
        today = datetime.datetime.today()
        self.db.update_mailing_list(mailing_list.location, mailing_list.alias,
                                    today)

        total, stored, non_parsed = (0, 0, 0)

        if mailing_list.is_local():
            backend = LocalArchive(mailing_list)
        else:
            backend = self.__get_backend(mailing_list)

        backend._create_download_dirs()

        try:
            archives = [a for a in backend.fetch()]
            to_analyze = self.__set_archives_to_analyze(mailing_list, archives)
            total, stored, non_parsed = self.__analyze_list_of_files(
                mailing_list, to_analyze)
        except IOError:
            self.__print_output("Unknown URL or directory: " + url_or_dirpath +
                                ". Skipping.")

        return total, stored, non_parsed
예제 #2
0
    def __analyze_mailing_list(self, url_or_dirpath, compressed_dir):
        """Look for mbox archives, retrieve, uncompress and analyze them"""

        mailing_list = MailingList(url_or_dirpath, compressed_dir)

        # Check if mailing list already in database
        # today = datetime.datetime.today().strftime(datetimefmt)
        today = datetime.datetime.today()
        self.db.update_mailing_list(mailing_list.location,
                                    mailing_list.alias,
                                    today)

        total, stored, non_parsed = (0, 0, 0)

        if mailing_list.is_local():
            backend = LocalArchive(mailing_list)
        else:
            backend = self.__get_backend(mailing_list)

        backend._create_download_dirs()

        try:
            archives = [a for a in backend.fetch()]
            to_analyze = self.__set_archives_to_analyze(mailing_list, archives)
            total, stored, non_parsed = self.__analyze_list_of_files(mailing_list,
                                                                     to_analyze)
        except IOError:
            self.__print_output("Unknown URL or directory: " +
                                url_or_dirpath + ". Skipping.")

        return total, stored, non_parsed
예제 #3
0
        """Walk the mailing list directory looking for archives"""

        mailing_list = self.mailing_list

        if os.path.isfile(mailing_list.location):
            yield MBoxArchive(mailing_list.location, mailing_list.location)
        else:
            for root, dirs, files in os.walk(mailing_list.location):
                for filename in sorted(files):
                    location = os.path.join(root, filename)
                    yield MBoxArchive(location, location)


if __name__ == '__main__':
    import sys
    import pprint

    # Test case
    ml = MailingList(url_or_dirpath=sys.argv[1], compressed_dir=sys.argv[2])
    if ml.is_local():
        o = LocalArchive(ml)
    elif ml.location.startswith(GMANE_URL):
        o = GmaneArchive(ml)
    else:
        o = MailmanArchive(ml)

    o._create_download_dirs()

    for mla in o.fetch():
        pprint.pprint(mla.url)
예제 #4
0
        """Walk the mailing list directory looking for archives"""

        mailing_list = self.mailing_list

        if os.path.isfile(mailing_list.location):
            yield MBoxArchive(mailing_list.location, mailing_list.location)
        else:
            for root, dirs, files in os.walk(mailing_list.location):
                for filename in sorted(files):
                    location = os.path.join(root, filename)
                    yield MBoxArchive(location, location)


if __name__ == '__main__':
    import sys
    import pprint

    # Test case
    ml = MailingList(url_or_dirpath=sys.argv[1], compressed_dir=sys.argv[2])
    if ml.is_local():
        o = LocalArchive(ml)
    elif ml.location.startswith(GMANE_URL):
        o = GmaneArchive(ml)
    else:
        o = MailmanArchive(ml)

    o._create_download_dirs()

    for mla in o.fetch():
        pprint.pprint(mla.url)