Beispiel #1
0
def getPlainText(html, links=True):
    if not isinstance(html, unicode):
        html = html.decode('latin1')

    plain_text = u''
    if has_html2text:
        # html2text seems to be not-thread-safe, so I'm avoiding concurrency
        # here using a semaphore
        html2text_lock.acquire()
        try:
            try:
                plain_text = html2text(html).strip()
            except:
                plain_text = getException ()
                mylog.exception ('Error en getPlainText')
        
        finally:
            html2text_lock.release()
    


    if not isinstance(plain_text, unicode):
        plain_text = plain_text.decode('utf-8')

    return plain_text
Beispiel #2
0
def MainLoop():
    global post_history
    global feed_history
    global cache
    global DEBUG


    semaforo = _threading.BoundedSemaphore()
    feed_history, post_history = None, None

    while True:
        config = ReadConfig()

        DEBUG = config['debug'] == '1'

        log_dir = os.path.normpath(os.path.join(HOME, '.newspipe/log'))
        mylog.logFile(config['log_console']  == '1', 'newspipe', log_dir, DEBUG)
        
        gc.collect()

        if DEBUG:
            mylog.warning ('DEBUG MODE')
    

        mylog.debug ('Home directory: ' + HOME)

        try:
            mylog.debug ('Configuration settings:')
            mylog.debug ('-'*30)
            for x,y in config.items():
                mylog.debug ('%s: %s', x, y)
        
            mylog.debug ('-'*30)

            setPriority (int(config['priority']))

            cache.offline = config['offline'] == '1'
            if cache.offline:
                mylog.warning('Working offline')
        

            cache.debug = DEBUG

            if CheckOnline(config):
                NUM_WORKERS = int(config['workers'])

                if not has_threading:
                    mylog.warning ('Running without threads support')
                    NUM_WORKERS = 1

                filename = config['opml']

                opml = None
                try:
                    source_path = os.path.split(sys.argv[0])[0]
                    for p in ('.', source_path):
                        if os.path.exists (os.path.join(p, filename)):
                            filename = os.path.join(p, filename)
                            break

                    fp = cache.urlopen(filename, max_age=60, can_pipe=False).content
                    opml = flatten_tree(ParseOPML(fp), OPML_DEFAULTS)
                    mylog.debug ('Processing file: '+filename)
                except cache.URLError:
                    mylog.error ('Cannot find the opml file: '+filename)
                    opml = None
                except:
                    mylog.exception ('Error parsing file: '+filename)
                    opml = None

                if opml:
                    email_destino = (opml['head']['ownerName'].strip('"'), opml['head']['ownerEmail'])
                    if( opml['head'].has_key('ownerMobile') ):
                        movil_destino = (opml['head']['ownerName'].strip('"'), opml['head']['ownerMobile'])
                    else:
                        movil_destino = False

                    if opml['head'].has_key('fromEmail'):
                        name = opml['head'].get('fromName', 'Newspipe')
                        from_address = name.strip('"') + ' <' + opml['head']['fromEmail'] + '>'
                        config['from_address'] = from_address

                    if not feed_history or not post_history:
                        feed_history, post_history = CargarHistoricos(opml['head']['title'])

                    feeds_queue = Queue.Queue(0)
                    email_queue = Queue.Queue(0)

                    mylog.debug ('Inserting the feeds into the pending queue')
                    for feed in opml['body']:
                        if feed['active'] == '1':
                            feeds_queue.put(feed)
                        else:
                            mylog.debug ('Ignoring the Inactive feed: '+feed['xmlUrl'])
                

                    mylog.debug ('Inserting the end-of-work markers in the queue')
                    for x in range(NUM_WORKERS):
                        feeds_queue.put(None)
                

                    mylog.debug ('Starting working threads')
                    workers = []
                    for x in range(NUM_WORKERS):
                        w = FeedWorker (feeds_queue, email_queue, config, email_destino, movil_destino, semaforo)
                        workers.append(w)
                        w.start()
                

                    mylog.debug ('Waiting for all the threads to finish')
                    for w in workers:
                        w.join()
                

                    mylog.debug ('Extracting the emails from the results queue')
                    emails = []
                    while True:
                        try:
                            email = email_queue.get_nowait()
                            emails += [email,]
                        except Queue.Empty:
                            break
                    
                

                    try:
                        send_emails (emails, config['send_method'], config['smtp_server'], config['smtp_auth'] == '1',config['smtp_user'],config['smtp_pass'], config['procmail'], config['reverse'] == '1')
                        mensajes_enviados = True
                    except:
                        mylog.exception ('Unhandled exception while sending emails')
                        mensajes_enviados = False
                

                    mylog.debug (filename + ' finished.')

                    if mensajes_enviados:
                        # borrar las entradas del historico que son demasiado viejas
                        for hash, value in post_history.items():
                            if hash == 'modified':
                                continue
                            timestamp = value['timestamp']
                            delta = timedelta(days = 30) # borrar lo que tenga mas 30 dias de antiguedad - maybe this should be configurable too
                            if (datetime.now() - delta) > timestamp:
                                del post_history[hash]
                                post_history['modified'] = True
                        
                    
                        if post_history['modified']:
                            GrabarHistorico (post_history, opml['head']['title'], '.posts')
                        if feed_history['modified']:
                            GrabarHistorico (feed_history, opml['head']['title'], '.feeds')
                
            
        

            # erase from the cache anything older than 10 days - to be made configurable?
            try:
                cache.purge(10)
            except:
                mylog.exception ('Unhandled exception when purging the cache')
        

            if int(config['sleep_time']) == 0:
                break
            else:
                del(feed_history)
                del(post_history)
                feed_history, post_history = None, None

                mylog.debug ('Going to sleep for %s minutes' % (config['sleep_time'],))
                try:
                    sleep(int(config['sleep_time'])*60)
                except KeyboardInterrupt:
                    return
            
        
        except:
            mylog.exception ('Unhandled exception')
            raise  # stop the loop, to avoid infinite exceptions loops ;)
Beispiel #3
0
def send_emails(msgs, method, server, auth, auth_user, auth_pass, procmail, reverse):
    # disable the defaulttimeout to avoid a bug with starttls()
    # the defaulttimeout will be restored at the end of the method
    backup_timeout = socket.getdefaulttimeout ()
    socket.setdefaulttimeout (None)    
  
    try:
        original_msgs = msgs[:]
        splited_msgs = []
        
        # split the message list in groups of 10 messages
        aux = []
        for each in original_msgs:
            if len(aux) == 10:
                splited_msgs.append(aux)
                aux = []

            aux.append(each)
        if aux:
            splited_msgs.append(aux)
        
        for msgs in splited_msgs:
            if reverse:
                msgs.reverse()
    
            if method.lower() in ('smtp', 'both'):
                smtp = smtplib.SMTP(server)
                smtp.set_debuglevel(DEBUG)
                respuesta = smtp.ehlo()
                mylog.debug(repr(respuesta))
    
                # initiate TLS if necesary
                if len(respuesta) == 2:
                    if 'starttls' in respuesta[1].lower():
                        mylog.debug('Initiating TLS conection')
                        smtp.starttls()
                        smtp.ehlo()
                        mylog.debug(repr(respuesta))
                        
                # authenticate with SMTP server when there's need to
                if auth:
                    smtp.login(auth_user,auth_pass);
    
                count = 0;
                for msg in msgs:
                    if msg == None:
                        continue
    
                    fromaddr = msg['From']
    
                    r = re.compile('<(.+?)>')
                    toaddr = r.findall(msg['To'])
    
                    try:
                        # build envelope and send message
                        smtp.sendmail(fromaddr, toaddr, msg.as_string(unixfrom=False))
                        count = count + 1
                        mylog.debug('mail sent to %s from %s ' % (toaddr, fromaddr))
                    except:
                        mylog.exception("Error sending mail")
                        mylog.error(str(msg))
                        raise
    
                try:
                    smtp.quit()
                except socket.sslerror:
                    pass  # connecting to smtp.gmail.com gives this false error
    
                if count != len(msgs):
                    note = " (" + str(len(msgs)-count) +" failed)"
                else:
                    note=""
                mylog.info ('%d emails sent successfully%s via SMTP' % (count,note,))
        
    
            if method.lower() in ('procmail', 'both'):
                count = 0
                for msg in msgs:
                    try:
                        fp = os.popen(procmail, 'w')
                        fp.write(msg.as_string(unixfrom=True))
                        status = fp.close()
                    except:
                        mylog.exception ('IOError executing '+procmail)
                        status = 1
                        raise
    
                    if status is None:
                        count += 1
                
            
                if count != len(msgs):
                    note = " (" + str(len(msgs)-count) +" failed)"
                else:
                    note=""
                mylog.info ('%d emails sent successfully%s via PROCMAIL' % (count,note,))
    finally:
        socket.setdefaulttimeout (backup_timeout)    
Beispiel #4
0
    def __init__(self, original, channel, encoding="utf-8", remove=None):
        global post_history

        if encoding == '': encoding="utf-8"

        for key in original.keys():
            if type(original.get(key)) == type(""):
                original[key] = original[key].decode(encoding, "replace")
        
    

        self.original = original
        self.link = GetValue(original.get('link', channel.htmlUrl))
        if original.has_key('enclosures'):
            self.enclosures = original.enclosures
        else:
            self.enclosures = ()
    

        self.new_text = ''
        self.text_key = 'None'
        for k in 'content body content_encoded description summary summary_detail'.split():
            if k in original.keys():
                if original[k]:
                    self.new_text = original[k]
                    self.text_key = k
                    break
            
        
    
        if self.text_key == None and 'summary_detail' in original.keys() and 'value' in original['summary_detail'].keys():
            self.new_text = original['summary_detail']['value']
            self.text_key = "summary_detail/value"
    

        self.new_text = GetValue (self.new_text)

        if channel.download_link:
            try:
                downloaded_file = cache.urlopen(self.link, max_age=999999, can_pipe=False);
                message = downloaded_file.info['Cache-Result']
                mylog.debug (message + ' ' + self.link)
            except KeyboardInterrupt:
                raise
            except:
                mylog.exception ('Cannot download '+self.link)
                downloaded_file = None
        

            if downloaded_file:
                self.new_text = downloaded_file.content.read()
        
    

        if remove:
            rc = re.compile (remove, re.I+re.S+re.X)
            self.new_text = re.sub(rc, '', self.new_text)

        if type(self.new_text) == type(""):
            try:
                self.new_text = self.new_text.decode(encoding)
            except UnicodeDecodeError, e:
                mylog.debug("Error in " + channel.xmlUrl + ", " + original.get('title', original.get('url', '?')) + ": " + str(e))
                self.new_text = self.new_text.decode(encoding, 'replace')
Beispiel #5
0
                

                    # get the time until next check, 60 minutos by default
                    delay = int(feed['delay'])

                    ###semaforo.acquire()
                    feed_history[url]['ultimo_check'] = datetime.now()
                    feed_history[url]['proximo_check'] = datetime.now() + timedelta(minutes=delay)
                    if items.__len__() > 0:
                        feed_history[url]['ultima_actualizacion'] = datetime.now()
                    feed_history[url]['delay'] = delay
                    feed_history['modified'] = True
                    ###semaforo.release()
            
            except:
                mylog.exception ('Exception processing '+url)
    



def setPriority (priority):
    # 0 = Low priority
    # 1 = Normal priority
    # 2 = High priority

    if priority == 1:
        pass
    elif priority == 2:
        raise NotImplementedError('High priority mode not implemented yet')
    elif priority == 0:
        if sys.platform.lower().startswith('win'):