def getPlainText(html, links=True): if not isinstance(html, unicode): html = html.decode('latin1') plain_text = u'' if has_html2text: # html2text seems to be not-thread-safe, so I'm avoiding concurrency # here using a semaphore html2text_lock.acquire() try: try: plain_text = html2text(html).strip() except: plain_text = getException () mylog.exception ('Error en getPlainText') finally: html2text_lock.release() if not isinstance(plain_text, unicode): plain_text = plain_text.decode('utf-8') return plain_text
def MainLoop(): global post_history global feed_history global cache global DEBUG semaforo = _threading.BoundedSemaphore() feed_history, post_history = None, None while True: config = ReadConfig() DEBUG = config['debug'] == '1' log_dir = os.path.normpath(os.path.join(HOME, '.newspipe/log')) mylog.logFile(config['log_console'] == '1', 'newspipe', log_dir, DEBUG) gc.collect() if DEBUG: mylog.warning ('DEBUG MODE') mylog.debug ('Home directory: ' + HOME) try: mylog.debug ('Configuration settings:') mylog.debug ('-'*30) for x,y in config.items(): mylog.debug ('%s: %s', x, y) mylog.debug ('-'*30) setPriority (int(config['priority'])) cache.offline = config['offline'] == '1' if cache.offline: mylog.warning('Working offline') cache.debug = DEBUG if CheckOnline(config): NUM_WORKERS = int(config['workers']) if not has_threading: mylog.warning ('Running without threads support') NUM_WORKERS = 1 filename = config['opml'] opml = None try: source_path = os.path.split(sys.argv[0])[0] for p in ('.', source_path): if os.path.exists (os.path.join(p, filename)): filename = os.path.join(p, filename) break fp = cache.urlopen(filename, max_age=60, can_pipe=False).content opml = flatten_tree(ParseOPML(fp), OPML_DEFAULTS) mylog.debug ('Processing file: '+filename) except cache.URLError: mylog.error ('Cannot find the opml file: '+filename) opml = None except: mylog.exception ('Error parsing file: '+filename) opml = None if opml: email_destino = (opml['head']['ownerName'].strip('"'), opml['head']['ownerEmail']) if( opml['head'].has_key('ownerMobile') ): movil_destino = (opml['head']['ownerName'].strip('"'), opml['head']['ownerMobile']) else: movil_destino = False if opml['head'].has_key('fromEmail'): name = opml['head'].get('fromName', 'Newspipe') from_address = name.strip('"') + ' <' + opml['head']['fromEmail'] + '>' config['from_address'] = from_address if not feed_history or not post_history: feed_history, post_history = CargarHistoricos(opml['head']['title']) feeds_queue = Queue.Queue(0) email_queue = Queue.Queue(0) mylog.debug ('Inserting the feeds into the pending queue') for feed in opml['body']: if feed['active'] == '1': feeds_queue.put(feed) else: mylog.debug ('Ignoring the Inactive feed: '+feed['xmlUrl']) mylog.debug ('Inserting the end-of-work markers in the queue') for x in range(NUM_WORKERS): feeds_queue.put(None) mylog.debug ('Starting working threads') workers = [] for x in range(NUM_WORKERS): w = FeedWorker (feeds_queue, email_queue, config, email_destino, movil_destino, semaforo) workers.append(w) w.start() mylog.debug ('Waiting for all the threads to finish') for w in workers: w.join() mylog.debug ('Extracting the emails from the results queue') emails = [] while True: try: email = email_queue.get_nowait() emails += [email,] except Queue.Empty: break try: send_emails (emails, config['send_method'], config['smtp_server'], config['smtp_auth'] == '1',config['smtp_user'],config['smtp_pass'], config['procmail'], config['reverse'] == '1') mensajes_enviados = True except: mylog.exception ('Unhandled exception while sending emails') mensajes_enviados = False mylog.debug (filename + ' finished.') if mensajes_enviados: # borrar las entradas del historico que son demasiado viejas for hash, value in post_history.items(): if hash == 'modified': continue timestamp = value['timestamp'] delta = timedelta(days = 30) # borrar lo que tenga mas 30 dias de antiguedad - maybe this should be configurable too if (datetime.now() - delta) > timestamp: del post_history[hash] post_history['modified'] = True if post_history['modified']: GrabarHistorico (post_history, opml['head']['title'], '.posts') if feed_history['modified']: GrabarHistorico (feed_history, opml['head']['title'], '.feeds') # erase from the cache anything older than 10 days - to be made configurable? try: cache.purge(10) except: mylog.exception ('Unhandled exception when purging the cache') if int(config['sleep_time']) == 0: break else: del(feed_history) del(post_history) feed_history, post_history = None, None mylog.debug ('Going to sleep for %s minutes' % (config['sleep_time'],)) try: sleep(int(config['sleep_time'])*60) except KeyboardInterrupt: return except: mylog.exception ('Unhandled exception') raise # stop the loop, to avoid infinite exceptions loops ;)
def send_emails(msgs, method, server, auth, auth_user, auth_pass, procmail, reverse): # disable the defaulttimeout to avoid a bug with starttls() # the defaulttimeout will be restored at the end of the method backup_timeout = socket.getdefaulttimeout () socket.setdefaulttimeout (None) try: original_msgs = msgs[:] splited_msgs = [] # split the message list in groups of 10 messages aux = [] for each in original_msgs: if len(aux) == 10: splited_msgs.append(aux) aux = [] aux.append(each) if aux: splited_msgs.append(aux) for msgs in splited_msgs: if reverse: msgs.reverse() if method.lower() in ('smtp', 'both'): smtp = smtplib.SMTP(server) smtp.set_debuglevel(DEBUG) respuesta = smtp.ehlo() mylog.debug(repr(respuesta)) # initiate TLS if necesary if len(respuesta) == 2: if 'starttls' in respuesta[1].lower(): mylog.debug('Initiating TLS conection') smtp.starttls() smtp.ehlo() mylog.debug(repr(respuesta)) # authenticate with SMTP server when there's need to if auth: smtp.login(auth_user,auth_pass); count = 0; for msg in msgs: if msg == None: continue fromaddr = msg['From'] r = re.compile('<(.+?)>') toaddr = r.findall(msg['To']) try: # build envelope and send message smtp.sendmail(fromaddr, toaddr, msg.as_string(unixfrom=False)) count = count + 1 mylog.debug('mail sent to %s from %s ' % (toaddr, fromaddr)) except: mylog.exception("Error sending mail") mylog.error(str(msg)) raise try: smtp.quit() except socket.sslerror: pass # connecting to smtp.gmail.com gives this false error if count != len(msgs): note = " (" + str(len(msgs)-count) +" failed)" else: note="" mylog.info ('%d emails sent successfully%s via SMTP' % (count,note,)) if method.lower() in ('procmail', 'both'): count = 0 for msg in msgs: try: fp = os.popen(procmail, 'w') fp.write(msg.as_string(unixfrom=True)) status = fp.close() except: mylog.exception ('IOError executing '+procmail) status = 1 raise if status is None: count += 1 if count != len(msgs): note = " (" + str(len(msgs)-count) +" failed)" else: note="" mylog.info ('%d emails sent successfully%s via PROCMAIL' % (count,note,)) finally: socket.setdefaulttimeout (backup_timeout)
def __init__(self, original, channel, encoding="utf-8", remove=None): global post_history if encoding == '': encoding="utf-8" for key in original.keys(): if type(original.get(key)) == type(""): original[key] = original[key].decode(encoding, "replace") self.original = original self.link = GetValue(original.get('link', channel.htmlUrl)) if original.has_key('enclosures'): self.enclosures = original.enclosures else: self.enclosures = () self.new_text = '' self.text_key = 'None' for k in 'content body content_encoded description summary summary_detail'.split(): if k in original.keys(): if original[k]: self.new_text = original[k] self.text_key = k break if self.text_key == None and 'summary_detail' in original.keys() and 'value' in original['summary_detail'].keys(): self.new_text = original['summary_detail']['value'] self.text_key = "summary_detail/value" self.new_text = GetValue (self.new_text) if channel.download_link: try: downloaded_file = cache.urlopen(self.link, max_age=999999, can_pipe=False); message = downloaded_file.info['Cache-Result'] mylog.debug (message + ' ' + self.link) except KeyboardInterrupt: raise except: mylog.exception ('Cannot download '+self.link) downloaded_file = None if downloaded_file: self.new_text = downloaded_file.content.read() if remove: rc = re.compile (remove, re.I+re.S+re.X) self.new_text = re.sub(rc, '', self.new_text) if type(self.new_text) == type(""): try: self.new_text = self.new_text.decode(encoding) except UnicodeDecodeError, e: mylog.debug("Error in " + channel.xmlUrl + ", " + original.get('title', original.get('url', '?')) + ": " + str(e)) self.new_text = self.new_text.decode(encoding, 'replace')
# get the time until next check, 60 minutos by default delay = int(feed['delay']) ###semaforo.acquire() feed_history[url]['ultimo_check'] = datetime.now() feed_history[url]['proximo_check'] = datetime.now() + timedelta(minutes=delay) if items.__len__() > 0: feed_history[url]['ultima_actualizacion'] = datetime.now() feed_history[url]['delay'] = delay feed_history['modified'] = True ###semaforo.release() except: mylog.exception ('Exception processing '+url) def setPriority (priority): # 0 = Low priority # 1 = Normal priority # 2 = High priority if priority == 1: pass elif priority == 2: raise NotImplementedError('High priority mode not implemented yet') elif priority == 0: if sys.platform.lower().startswith('win'):