def GrabarHistorico(dicc, name, extension): if isinstance(name, unicode): name = name.encode('latin1', 'replace') data_dir = os.path.normpath(os.path.join(HOME, '.newspipe/data')) mylog.debug('Saving archive '+name+extension) dump(dicc, open(os.path.join(data_dir, name + extension +'.new'), 'w')) try: os.remove (os.path.join(data_dir, name+extension+'.bak')) except OSError: pass try: os.rename (os.path.join(data_dir, name+extension), os.path.join(data_dir, name+extension+'.bak')) except OSError: pass os.rename (os.path.join(data_dir, name+extension+'.new'), os.path.join(data_dir, name+extension)) dicc['modified'] = False
def CargarHistoricos(name): if isinstance(name, unicode): name = name.encode('latin1', 'replace') data_dir = os.path.normpath(os.path.join(HOME, '.newspipe/data')) if not os.path.exists(data_dir): os.makedirs(data_dir) try: if feed_history: del(feed_history) except UnboundLocalError: pass try: file_name = os.path.join(data_dir, name+'.feeds') feed_history = load(open(file_name)) mylog.debug('Loading feed archive '+name+'.feeds') except: try: mylog.debug('Archive not found. Trying backup file '+name+'.feeds.bak') file_name = os.path.join(data_dir, name+'.feeds.bak') feed_history = load(open(file_name)) except: feed_history = {} try: if post_history: del(post_history) except UnboundLocalError: pass try: file_name = os.path.join(data_dir, name+'.posts') mylog.debug('Loading post archive '+name+'.posts') post_history = load(open(file_name)) except: try: mylog.debug('Archive not found. Trying backup file '+name+'.posts.bak') file_name = os.path.join(data_dir, name+'.posts.bak') post_history = load(open(file_name)) except: post_history = {} feed_history['modified'] = False post_history['modified'] = False return feed_history, post_history
def setPriority (priority): # 0 = Low priority # 1 = Normal priority # 2 = High priority if priority == 1: pass elif priority == 2: raise NotImplementedError('High priority mode not implemented yet') elif priority == 0: if sys.platform.lower().startswith('win'): try: import ctypes kernel32 = ctypes.windll.kernel32 thread = kernel32.GetCurrentThread() kernel32.SetThreadPriority(thread, -15) mylog.debug ("Thread priority lowered.") except ImportError: mylog.error ('CTypes module is not available. The parameter "priority" will be ignored') pass else: raise NotImplementedError ('Priority settings only implemented in Windows') else: raise ValueError ('The parameter "priority" has an invalid value (%d)' % priority)
def MainLoop(): global post_history global feed_history global cache global DEBUG semaforo = _threading.BoundedSemaphore() feed_history, post_history = None, None while True: config = ReadConfig() DEBUG = config['debug'] == '1' log_dir = os.path.normpath(os.path.join(HOME, '.newspipe/log')) mylog.logFile(config['log_console'] == '1', 'newspipe', log_dir, DEBUG) gc.collect() if DEBUG: mylog.warning ('DEBUG MODE') mylog.debug ('Home directory: ' + HOME) try: mylog.debug ('Configuration settings:') mylog.debug ('-'*30) for x,y in config.items(): mylog.debug ('%s: %s', x, y) mylog.debug ('-'*30) setPriority (int(config['priority'])) cache.offline = config['offline'] == '1' if cache.offline: mylog.warning('Working offline') cache.debug = DEBUG if CheckOnline(config): NUM_WORKERS = int(config['workers']) if not has_threading: mylog.warning ('Running without threads support') NUM_WORKERS = 1 filename = config['opml'] opml = None try: source_path = os.path.split(sys.argv[0])[0] for p in ('.', source_path): if os.path.exists (os.path.join(p, filename)): filename = os.path.join(p, filename) break fp = cache.urlopen(filename, max_age=60, can_pipe=False).content opml = flatten_tree(ParseOPML(fp), OPML_DEFAULTS) mylog.debug ('Processing file: '+filename) except cache.URLError: mylog.error ('Cannot find the opml file: '+filename) opml = None except: mylog.exception ('Error parsing file: '+filename) opml = None if opml: email_destino = (opml['head']['ownerName'].strip('"'), opml['head']['ownerEmail']) if( opml['head'].has_key('ownerMobile') ): movil_destino = (opml['head']['ownerName'].strip('"'), opml['head']['ownerMobile']) else: movil_destino = False if opml['head'].has_key('fromEmail'): name = opml['head'].get('fromName', 'Newspipe') from_address = name.strip('"') + ' <' + opml['head']['fromEmail'] + '>' config['from_address'] = from_address if not feed_history or not post_history: feed_history, post_history = CargarHistoricos(opml['head']['title']) feeds_queue = Queue.Queue(0) email_queue = Queue.Queue(0) mylog.debug ('Inserting the feeds into the pending queue') for feed in opml['body']: if feed['active'] == '1': feeds_queue.put(feed) else: mylog.debug ('Ignoring the Inactive feed: '+feed['xmlUrl']) mylog.debug ('Inserting the end-of-work markers in the queue') for x in range(NUM_WORKERS): feeds_queue.put(None) mylog.debug ('Starting working threads') workers = [] for x in range(NUM_WORKERS): w = FeedWorker (feeds_queue, email_queue, config, email_destino, movil_destino, semaforo) workers.append(w) w.start() mylog.debug ('Waiting for all the threads to finish') for w in workers: w.join() mylog.debug ('Extracting the emails from the results queue') emails = [] while True: try: email = email_queue.get_nowait() emails += [email,] except Queue.Empty: break try: send_emails (emails, config['send_method'], config['smtp_server'], config['smtp_auth'] == '1',config['smtp_user'],config['smtp_pass'], config['procmail'], config['reverse'] == '1') mensajes_enviados = True except: mylog.exception ('Unhandled exception while sending emails') mensajes_enviados = False mylog.debug (filename + ' finished.') if mensajes_enviados: # borrar las entradas del historico que son demasiado viejas for hash, value in post_history.items(): if hash == 'modified': continue timestamp = value['timestamp'] delta = timedelta(days = 30) # borrar lo que tenga mas 30 dias de antiguedad - maybe this should be configurable too if (datetime.now() - delta) > timestamp: del post_history[hash] post_history['modified'] = True if post_history['modified']: GrabarHistorico (post_history, opml['head']['title'], '.posts') if feed_history['modified']: GrabarHistorico (feed_history, opml['head']['title'], '.feeds') # erase from the cache anything older than 10 days - to be made configurable? try: cache.purge(10) except: mylog.exception ('Unhandled exception when purging the cache') if int(config['sleep_time']) == 0: break else: del(feed_history) del(post_history) feed_history, post_history = None, None mylog.debug ('Going to sleep for %s minutes' % (config['sleep_time'],)) try: sleep(int(config['sleep_time'])*60) except KeyboardInterrupt: return except: mylog.exception ('Unhandled exception') raise # stop the loop, to avoid infinite exceptions loops ;)
def run(self): config = self.config _email_destino = self.email_destino movil_destino = self.movil_destino semaforo = self.semaforo while True: gc.collect() feed = self.feeds_queue.get() if feed is None: break url = feed['xmlUrl'] try: time = feed.get('check_time', None) if time: parsed_time = parseTimeRange(time) if parsed_time: if not checkTime (parsed_time): mylog.debug ('Ignoring the feed '+url) continue else: mylog.error ('Error parsing the time range "%s" in the feed %s' % (time, url)) items = [] semaforo.acquire() if not feed_history.has_key(url): feed_history[url] = {} feed_history[url]['ultimo_check'] = None feed_history[url]['proximo_check'] = None feed_history[url]['ultima_actualizacion'] = None feed_history[url]['delay'] = None feed_history['modified'] = True semaforo.release() ultimo_check = feed_history[url]['ultimo_check'] proximo_check = feed_history[url]['proximo_check'] ultima_actualizacion = feed_history[url].get('ultima_actualizacion', None) delay = feed_history[url].get('delay', None) ahora = datetime.now() if proximo_check and ahora < proximo_check: continue title = feed.get('title', feed.get('text', url)) mylog.debug ('Processing '+title) email = feed.get('ownerEmail', None) if email: #mylog.debug ('email[0]=' + _email_destino[0] + ', email[1]=' + _email_destino[1]) #mylog.debug ('Overriding email: ' +email) email_destino = _email_destino[0], email else: email_destino = _email_destino auth = feed.get('auth', None) if auth: if ':' in auth: username, password = auth.split(':') else: mylog.error ('The "auth" parameter for the feed '+title+' is invalid') continue else: username, password = None, None xml = None try: xml = cache.feed_parse(url, config['can_pipe'] == '1', username, password) except cache.SocketTimeoutError: mylog.info ('Timeout error downloading %s' % url) mylog.debug ('Will retry in the the next pass') continue except cache.HTTPError, e: mylog.info ('HTTP Error %d downloading %s' % (e.code, url,)) except cache.URLError, e: mylog.info ('URLError (%s) downloading %s' % (e.reason, url,)) except cache.OfflineError: mylog.info ('Resource unavailable when offline (%s)' % url)
def send_emails(msgs, method, server, auth, auth_user, auth_pass, procmail, reverse): # disable the defaulttimeout to avoid a bug with starttls() # the defaulttimeout will be restored at the end of the method backup_timeout = socket.getdefaulttimeout () socket.setdefaulttimeout (None) try: original_msgs = msgs[:] splited_msgs = [] # split the message list in groups of 10 messages aux = [] for each in original_msgs: if len(aux) == 10: splited_msgs.append(aux) aux = [] aux.append(each) if aux: splited_msgs.append(aux) for msgs in splited_msgs: if reverse: msgs.reverse() if method.lower() in ('smtp', 'both'): smtp = smtplib.SMTP(server) smtp.set_debuglevel(DEBUG) respuesta = smtp.ehlo() mylog.debug(repr(respuesta)) # initiate TLS if necesary if len(respuesta) == 2: if 'starttls' in respuesta[1].lower(): mylog.debug('Initiating TLS conection') smtp.starttls() smtp.ehlo() mylog.debug(repr(respuesta)) # authenticate with SMTP server when there's need to if auth: smtp.login(auth_user,auth_pass); count = 0; for msg in msgs: if msg == None: continue fromaddr = msg['From'] r = re.compile('<(.+?)>') toaddr = r.findall(msg['To']) try: # build envelope and send message smtp.sendmail(fromaddr, toaddr, msg.as_string(unixfrom=False)) count = count + 1 mylog.debug('mail sent to %s from %s ' % (toaddr, fromaddr)) except: mylog.exception("Error sending mail") mylog.error(str(msg)) raise try: smtp.quit() except socket.sslerror: pass # connecting to smtp.gmail.com gives this false error if count != len(msgs): note = " (" + str(len(msgs)-count) +" failed)" else: note="" mylog.info ('%d emails sent successfully%s via SMTP' % (count,note,)) if method.lower() in ('procmail', 'both'): count = 0 for msg in msgs: try: fp = os.popen(procmail, 'w') fp.write(msg.as_string(unixfrom=True)) status = fp.close() except: mylog.exception ('IOError executing '+procmail) status = 1 raise if status is None: count += 1 if count != len(msgs): note = " (" + str(len(msgs)-count) +" failed)" else: note="" mylog.info ('%d emails sent successfully%s via PROCMAIL' % (count,note,)) finally: socket.setdefaulttimeout (backup_timeout)
def __init__(self, original, channel, encoding="utf-8", remove=None): global post_history if encoding == '': encoding="utf-8" for key in original.keys(): if type(original.get(key)) == type(""): original[key] = original[key].decode(encoding, "replace") self.original = original self.link = GetValue(original.get('link', channel.htmlUrl)) if original.has_key('enclosures'): self.enclosures = original.enclosures else: self.enclosures = () self.new_text = '' self.text_key = 'None' for k in 'content body content_encoded description summary summary_detail'.split(): if k in original.keys(): if original[k]: self.new_text = original[k] self.text_key = k break if self.text_key == None and 'summary_detail' in original.keys() and 'value' in original['summary_detail'].keys(): self.new_text = original['summary_detail']['value'] self.text_key = "summary_detail/value" self.new_text = GetValue (self.new_text) if channel.download_link: try: downloaded_file = cache.urlopen(self.link, max_age=999999, can_pipe=False); message = downloaded_file.info['Cache-Result'] mylog.debug (message + ' ' + self.link) except KeyboardInterrupt: raise except: mylog.exception ('Cannot download '+self.link) downloaded_file = None if downloaded_file: self.new_text = downloaded_file.content.read() if remove: rc = re.compile (remove, re.I+re.S+re.X) self.new_text = re.sub(rc, '', self.new_text) if type(self.new_text) == type(""): try: self.new_text = self.new_text.decode(encoding) except UnicodeDecodeError, e: mylog.debug("Error in " + channel.xmlUrl + ", " + original.get('title', original.get('url', '?')) + ": " + str(e)) self.new_text = self.new_text.decode(encoding, 'replace')
xml = cache.feed_parse(url, config['can_pipe'] == '1', username, password) except cache.SocketTimeoutError: mylog.info ('Timeout error downloading %s' % url) mylog.debug ('Will retry in the the next pass') continue except cache.HTTPError, e: mylog.info ('HTTP Error %d downloading %s' % (e.code, url,)) except cache.URLError, e: mylog.info ('URLError (%s) downloading %s' % (e.reason, url,)) except cache.OfflineError: mylog.info ('Resource unavailable when offline (%s)' % url) except Exception, e: mylog.exception ('Error %s downloading %s' % (str(e), url)) if xml: mylog.debug (xml['channel']['Cache-Result'] + ' ' + url) channel = Channel(title, xml['channel'], url, feed['htmlUrl'], feed['download_link'] == '1', feed['diff'] == '1', feed['download_images'] == '1', feed) for elemento in xml['items']: item = channel.NewItem(elemento, xml["encoding"], feed['remove']) # for k in item.keys(): # mylog.debug('Key: ' + str(k)) if post_history.has_key(item.urlHash): post_history[item.urlHash]['timestamp'] = datetime.now() post_history['modified'] = True check_text = feed['check_text'] == '1' if check_text: if item.new_text.strip() == post_history[item.urlHash]['text'].strip():