def __analyze_mailing_list(self, url_or_dirpath, compressed_dir): """Look for mbox archives, retrieve, uncompress and analyze them""" mailing_list = MailingList(url_or_dirpath, compressed_dir) # Check if mailing list already in database # today = datetime.datetime.today().strftime(datetimefmt) today = datetime.datetime.today() self.db.update_mailing_list(mailing_list.location, mailing_list.alias, today) total, stored, non_parsed = (0, 0, 0) if mailing_list.is_local(): backend = LocalArchive(mailing_list) else: backend = self.__get_backend(mailing_list) backend._create_download_dirs() try: archives = [a for a in backend.fetch()] to_analyze = self.__set_archives_to_analyze(mailing_list, archives) total, stored, non_parsed = self.__analyze_list_of_files( mailing_list, to_analyze) except IOError: self.__print_output("Unknown URL or directory: " + url_or_dirpath + ". Skipping.") return total, stored, non_parsed
def __analyze_mailing_list(self, url_or_dirpath, compressed_dir): """Look for mbox archives, retrieve, uncompress and analyze them""" mailing_list = MailingList(url_or_dirpath, compressed_dir) # Check if mailing list already in database # today = datetime.datetime.today().strftime(datetimefmt) today = datetime.datetime.today() self.db.update_mailing_list(mailing_list.location, mailing_list.alias, today) total, stored, non_parsed = (0, 0, 0) if mailing_list.is_local(): backend = LocalArchive(mailing_list) else: backend = self.__get_backend(mailing_list) backend._create_download_dirs() try: archives = [a for a in backend.fetch()] to_analyze = self.__set_archives_to_analyze(mailing_list, archives) total, stored, non_parsed = self.__analyze_list_of_files(mailing_list, to_analyze) except IOError: self.__print_output("Unknown URL or directory: " + url_or_dirpath + ". Skipping.") return total, stored, non_parsed
"""Walk the mailing list directory looking for archives""" mailing_list = self.mailing_list if os.path.isfile(mailing_list.location): yield MBoxArchive(mailing_list.location, mailing_list.location) else: for root, dirs, files in os.walk(mailing_list.location): for filename in sorted(files): location = os.path.join(root, filename) yield MBoxArchive(location, location) if __name__ == '__main__': import sys import pprint # Test case ml = MailingList(url_or_dirpath=sys.argv[1], compressed_dir=sys.argv[2]) if ml.is_local(): o = LocalArchive(ml) elif ml.location.startswith(GMANE_URL): o = GmaneArchive(ml) else: o = MailmanArchive(ml) o._create_download_dirs() for mla in o.fetch(): pprint.pprint(mla.url)