def notify(config, result, result_k, coherence, extension): now = datetime.datetime.now() # data format: YYYYMMDD now_month = now.month if int(now_month) < 10: now_month = "0" + str(now.month) today = str(now.year) + str(now_month) + str(now.day) logging.info("- EmailNotifier is sending...") smtp_server = config["SERVER"] smtp_port = config["PORT"] smtp_username = config["USERNAME"] smtp_password = config["PASSWORD"] smtp_from = config["FROM"] smtp_to = config["TO"] smtp_cc = config["CC"] smtp_bcc = config["BCC"] smtp_subject = config["SUBJECT"] smtp_body = config["BODY"] # Attachments smtp_new = config["NEW"] + "_" + today smtp_mod = config["MOD"] + "_" + today smtp_del = config["DEL"] + "_" + today smtp_new_k = config["NEW_K"] + "_" + today smtp_mod_k = config["MOD_K"] + "_" + today # Attachment format: <dir><file_name>_<date> smtp_file_new = config["PATH_TEMP"] + smtp_new smtp_file_mod = config["PATH_TEMP"] + smtp_mod smtp_file_del = config["PATH_TEMP"] + smtp_del smtp_file_new_k = config["PATH_TEMP"] + smtp_new_k smtp_file_mod_k = config["PATH_TEMP"] + smtp_mod_k try: logging.info("- EmailNotifier is sending...") server = SMTP(smtp_server, smtp_port) server.set_debuglevel(True) msg = MIMEMultipart() msg["From"] = smtp_from msg["To"] = "; ".join(smtp_to) emails = [smtp_to] if smtp_cc is not "": msg["CC"] = "; ".join(smtp_cc) emails += smtp_cc if smtp_bcc is not "": msg["BCC"] = "; ".join(smtp_bcc) emails += smtp_bcc # Added priority to subject priority = "" if coherence: if coherence == 1: priority = "[Info] " elif 1 < coherence < 4: priority = "[Warning] " else: priority = "[Danger] " msg["Subject"] = priority + smtp_subject body = MIMEText(smtp_body, "html") msg.attach(body) # logging.debug(msg.as_string()) # If dictionary of new posts exists, it creates attachment if "new" in result: try: if extension == "c": smtp_new += ".csv" smtp_file_new += ".csv" CsvConverter.save_dictionary_to_file(smtp_file_new, result["new"], "new") elif extension == "x": smtp_new += ".xml" smtp_file_new += ".xml" XmlConverter.save_dictionary_to_file(smtp_file_new, result["new"]) elif extension == "j": smtp_new += ".json" smtp_file_new += ".json" JsonConverter.save_dictionary_to_file(smtp_file_new, result["new"]) logging.info("- Attaching file: {0}".format(str(smtp_file_new))) with open(smtp_file_new, "rb") as fil: part = MIMEApplication(fil.read(), "text/plain", filename=smtp_new) part.add_header("Content-Disposition", "attachment", filename=smtp_new) msg.attach(part) except OSError, e: logging.error("- Error walking dir '%s': %s" % (dir, e)) raise OSError # If dictionary of post changes exists, it creates attachment if "mod" in result: try: if extension == "c": smtp_mod += ".csv" smtp_file_mod += ".csv" CsvConverter.save_dictionary_to_file(smtp_file_mod, result["mod"], "mod") elif extension == "x": smtp_mod += ".xml" smtp_file_mod += ".xml" XmlConverter.save_dictionary_to_file(smtp_file_mod, result["mod"]) elif extension == "j": smtp_mod += ".json" smtp_file_mod += ".json" JsonConverter.save_dictionary_to_file(smtp_file_mod, result["mod"]) logging.info("- Attaching file: {0}".format(str(smtp_file_mod))) with open(smtp_file_mod, "rb") as fil: part = MIMEApplication(fil.read(), "text/plain", filename=smtp_mod) part.add_header("Content-Disposition", "attachment", filename=smtp_mod) msg.attach(part) except OSError, e: logging.error("- Error walking dir '%s': %s" % (dir, e)) raise OSError
def check_changes(self, new_dict_path, on_tor): logging.info('- Starting check of changes -') old_dict_hash = JsonConverter.open_dictionary_from_file(self._hash_path) new_dict_hash = JsonConverter.open_dictionary_from_file(new_dict_path) past_dict_hash = copy.deepcopy(old_dict_hash) if self._year_start == self._year_end: for month in old_dict_hash[str(self._year_start)]: if int(month) < self._month_start: del past_dict_hash[str(self._year_start)][month] elif int(month) > self._month_end: del past_dict_hash[str(self._year_start)][month] for year in old_dict_hash.keys(): if int(year) < self._year_start: del past_dict_hash[year] else: for year in old_dict_hash.keys(): if int(year) < self._year_start: del past_dict_hash[year] elif int(year) == self._year_start: for month in old_dict_hash[year]: if int(month) < self._month_start: del past_dict_hash[year][month] elif int(year) == self._year_end: for month in old_dict_hash[year]: if int(month) > self._month_end: del past_dict_hash[year][month] # #################### # # REPORT CHANGES # # OR NEWS # # #################### # report_new = {} report_mod = {} report_mod_verbosity = {} for year in new_dict_hash.keys(): report_dict_new = {} report_dict_mod = {} report_dict_mod_verbosity = {} if year in past_dict_hash.keys(): for month in new_dict_hash[year]: report_list_new = [] report_list_mod = [] report_list_mod_verbosity = [] if month in past_dict_hash[year]: if past_dict_hash[year][month] != new_dict_hash[year][month]: for new_post in new_dict_hash[year][month]: post_change_verbosity = {} post_change = {} for past_post in past_dict_hash[year][month]: if new_post['url'] == past_post['url']: if new_post['body'] != past_post['body']: post_change['body'] = 'changed' if self._verbose[4]: post_change_verbosity['body'] = 'changed' if self._debug: Intercept.report_me(year, month, new_post['plain title'], on_tor) if new_post['text'] != past_post['text']: post_change['text'] = 'changed' if self._verbose[5]: post_change_verbosity['text'] = 'changed' if new_post['title'] != past_post['title']: post_change['old title'] = past_post['plain title'] post_change['plain title'] = new_post['plain title'] if self._verbose[0]: post_change_verbosity['old title'] = past_post['plain title'] post_change_verbosity['plain title'] = new_post['plain title'] if new_post['media'] != past_post['media']: post_change['media'] = 'changed' if self._verbose[2]: post_change_verbosity['media'] = 'changed' if new_post['comments'] != past_post['comments']: post_change['comments'] = 'changed' if self._verbose[3]: post_change_verbosity['comments'] = 'changed' if new_post['img'] != past_post['img']: post_change['img'] = 'changed' if self._verbose[1]: post_change_verbosity['img'] = 'changed' if len(post_change): post_change['plain url'] = new_post['plain url'] post_change['plain title'] = new_post['plain title'] report_list_mod.append(post_change) if len(post_change_verbosity): post_change_verbosity['plain url'] = new_post['plain url'] post_change_verbosity['plain title'] = new_post['plain title'] report_list_mod_verbosity.append(post_change_verbosity) if len(report_list_mod): report_dict_mod[month] = report_list_mod if len(report_list_mod_verbosity): report_dict_mod_verbosity[month] = report_list_mod_verbosity # Searching for new post for new_post in new_dict_hash[year][month]: proof = True for past_post in past_dict_hash[year][month]: if new_post['url'] == past_post['url']: proof = False if proof: report_list_new.append(new_post) if len(report_list_new): report_dict_new[month] = report_list_new # Whole new month else: logging.info('- New MONTH found -') report_dict_new[month] = new_dict_hash[year][month] if len(report_dict_mod): report_mod[year] = report_dict_mod if len(report_dict_mod_verbosity): report_mod_verbosity[year] = report_dict_mod_verbosity if len(report_dict_new): report_new[year] = report_dict_new else: # Whole new year logging.info('- New YEAR found -') report_new[year] = new_dict_hash[year] # #################### # # REPORT DELETES # # #################### # report_del = {} for year in past_dict_hash.keys(): report_dict_del = {} if year in new_dict_hash.keys(): for month in past_dict_hash[year]: report_list_del = [] if month not in new_dict_hash[year]: # miss one whole month for past_post in past_dict_hash[year][month]: post_change_del = {'title': past_post['plain title'], 'url': past_post['plain url']} report_list_del.append(post_change_del) if len(report_list_del): report_dict_del[month] = report_list_del else: for past_post in past_dict_hash[year][month]: post_change_del = {} proof = 0 for new_post in new_dict_hash[year][month]: if past_post['url'] != new_post['url']: proof += 1 if proof == len(new_dict_hash[year][month]): # miss one or more posts post_change_del['title'] = past_post['plain title'] post_change_del['url'] = past_post['plain url'] if len(post_change_del): report_list_del.append(post_change_del) if len(report_list_del): report_dict_del[month] = report_list_del else: # miss one whole year for month in past_dict_hash[year]: report_list_del = [] for past_post in past_dict_hash[year][month]: post_change_del = {'title': past_post['plain title'], 'url': past_post['plain url']} report_list_del.append(post_change_del) if len(report_list_del): report_dict_del[month] = report_list_del if len(report_dict_del): report_del[year] = report_dict_del report = {} # report_new: new post dictionary # report_mod: changed post dictionary # report_del: deleted post dictionary if report_new: report['new'] = report_new logging.info('- REPORT NEWS -') if report_mod_verbosity: report['mod'] = report_mod_verbosity logging.info('- REPORT CHANGES -') if report_del: report['del'] = report_del logging.info('- REPORT DELETES -') logging.info('- END CHECK.....................................................OK') # saving changes on file if report_new: for year in report_new.keys(): if year not in old_dict_hash.keys(): old_dict_hash[year] = new_dict_hash[year] else: for month in report_new[year]: old_dict_hash[year][month] = new_dict_hash[year][month] # All changes are saved, no matter about verbosity if report_mod: for year in report_mod.keys(): if year not in old_dict_hash.keys(): old_dict_hash[year] = new_dict_hash[year] else: for month in report_mod[year]: old_dict_hash[year][month] = new_dict_hash[year][month] if report_del: for year in report_del.keys(): if year in new_dict_hash.keys(): for month in report_del[year]: if month in new_dict_hash[year]: old_dict_hash[year][month] = new_dict_hash[year][month] else: del old_dict_hash[year][month] else: del old_dict_hash[year] JsonConverter.save_dictionary_to_file(self._hash_path, old_dict_hash) logging.info('- SAVING CHANGES ...............................................OK') # return report_new, report_del and report_mod_verbosity return report
raise OSError # If dictionary of deletes exists, it creates attachment if "del" in result: try: if extension == "c": smtp_del += ".csv" smtp_file_del += ".csv" CsvConverter.save_dictionary_to_file(smtp_file_del, result["del"], "del") elif extension == "x": smtp_del += ".xml" smtp_file_del += ".xml" XmlConverter.save_dictionary_to_file(smtp_file_del, result["del"]) elif extension == "j": smtp_del += ".json" smtp_file_del += ".json" JsonConverter.save_dictionary_to_file(smtp_file_del, result["del"]) logging.info("- Attaching file: {0}".format(str(smtp_file_del))) with open(smtp_file_del, "rb") as fil: part = MIMEApplication(fil.read(), "text/plain", filename=smtp_del) part.add_header("Content-Disposition", "attachment", filename=smtp_del) msg.attach(part) except OSError, e: logging.error("- Error walking dir '%s': %s" % (dir, e)) raise OSError # ################### # # KEYWORDS DICTIONARY # # ################### # # If dictionary of keywords new exists, it creates attachment if result_k.has_key("new"): if len(result_k["new"]):
def scrape_and_save(self): logging.info('- IP address over tor service: ' + urllib2.urlopen('http://icanhazip.com').read()) dict_tot = {} # scrape the blog and save all html pages per month logging.info('- Scraping and saving hash from {0} {1} to {2} {3} -'.format(str(self._month_start), str(self._year_start), str(self._month_end), str(self._year_end))) if self._year_end == self._year_start: dict_hash = {} # scrapeing first year for month in range(self._month_start, self._month_end + 1): if month < 10: Utility.request_and_save( "{0}/{1}/0{2}".format(str(self._target), str(self._year_start), str(month)), "{0}{1}_{2}.html".format(str(self._temp_dir), str(self._year_start), str(month)), self._user_agent, self._attempts, self._pause, self._time_out) else: Utility.request_and_save( "{0}/{1}/{2}".format(str(self._target), str(self._year_start), str(month)), "{0}{1}_{2}.html".format(str(self._temp_dir), str(self._year_start), str(month)), self._user_agent, self._attempts, self._pause, self._time_out) soup = BeautifulSoup( open("{0}{1}_{2}.html".format(str(self._temp_dir), str(self._year_start), str(month)))) buffer_list = Utility.strip_post(soup, self._year_start, month, self._temp_dir, self._user_agent, self._pause, self._attempts, self._time_out, self._res_to_skip, self._target_prefix) if buffer_list: dict_hash[month] = buffer_list if dict_hash: dict_tot[self._year_start] = dict_hash else: for year in range(self._year_start, self._year_end + 1): if year == self._year_end: dict_hash = {} # scrapeing first year for month in range(1, self._month_end + 1): if month < 10: Utility.request_and_save( "{0}/{1}/0{2}".format(str(self._target), str(year), str(month)), "{0}{1}_{2}.html".format(str(self._temp_dir), str(year), str(month)), self._user_agent, self._attempts, self._pause, self._time_out) else: Utility.request_and_save( "{0}/{1}/{2}".format(str(self._target), str(year), str(month)), "{0}{1}_{2}.html".format(str(self._temp_dir), str(year), str(month)), self._user_agent, self._attempts, self._pause, self._time_out) soup = BeautifulSoup( open("{0}{1}_{2}.html".format(str(self._temp_dir), str(year), str(month)))) buffer_list = Utility.strip_post(soup, year, month, self._temp_dir, self._user_agent, self._pause, self._attempts, self._time_out, self._res_to_skip, self._target_prefix) if buffer_list: dict_hash[month] = buffer_list if dict_hash: dict_tot[year] = dict_hash elif year == self._year_start: # scrapeing last year dict_hash = {} for month in range(self._month_start, 13): if month < 10: Utility.request_and_save( "{0}/{1}/0{2}".format(str(self._target), str(year), str(month)), "{0}{1}_{2}.html".format(str(self._temp_dir), str(year), str(month)), self._user_agent, self._attempts, self._pause, self._time_out) else: Utility.request_and_save( "{0}/{1}/{2}".format(str(self._target), str(year), str(month)), "{0}{1}_{2}.html".format(str(self._temp_dir), str(year), str(month)), self._user_agent, self._attempts, self._pause, self._time_out) soup = BeautifulSoup(open("{0}{1}_{2}.html".format(str(self._temp_dir), str(year), str(month)))) buffer_list = Utility.strip_post(soup, year, month, self._temp_dir, self._user_agent, self._pause, self._attempts, self._time_out, self._res_to_skip, self._target_prefix) if buffer_list: dict_hash[month] = buffer_list if dict_hash: dict_tot[year] = dict_hash else: # if it's neither the first nor the last year dict_hash = {} for month in range(1, 13): if month < 10: Utility.request_and_save( "{0}/{1}/0{2}".format(str(self._target), str(year), str(month)), "{0}{1}_{2}.html".format(str(self._temp_dir), str(year), str(month)), self._user_agent, self._attempts, self._pause, self._time_out) else: Utility.request_and_save( "{0}/{1}/{2}".format(str(self._target), str(year), str(month)), "{0}{1}_{2}.html".format(str(self._temp_dir), str(year), str(month)), self._user_agent, self._attempts, self._pause, self._time_out) soup = BeautifulSoup(open("{0}{1}_{2}.html".format(str(self._temp_dir), str(year), str(month)))) buffer_list = Utility.strip_post(soup, year, month, self._temp_dir, self._user_agent, self._pause, self._attempts, self._time_out, self._res_to_skip, self._target_prefix) if buffer_list: dict_hash[month] = buffer_list if dict_hash: dict_tot[year] = dict_hash JsonConverter.save_dictionary_to_file(self._hash_path, dict_tot) logging.info('- Scraped and saved all hash with no errors -')