Example #1
0
    def __store_article(self, title, url, category, content, date_published, author_name, feed_link, feed):
                try:
                        entry = FeedsResult.objects.get(title=title)
                        return False
                except:
                        entry = FeedsResult(
                                          title=htmllib.decoding(title),
                                          link=url,
                                          excerpt=content,
                                          author_name=htmllib.decoding(author_name),
                                          category=category,
                                          feed=self.model.objects.get(feedurl=feed_link),
                                          date=datetime.now()
                                          )
                        
                       
#                        try:
#                                entry.date = datetime.strptime(date_published[:-6], '%a, %d %b %Y %H:%M:%S')
#                        except:
#                                try:
#                                        entry.date = datetime.strptime(date_published[0:19], '%Y-%m-%d %H:%M:%S')
#                                except:
#                                        entry.date = datetime.now()
#    
                        entry.save()
                        return True
Example #2
0
    def __parse_feed(self, feed_content, feed_url, stop_target, category, feed_latest, start_target, mid_target, end_target, allow_target):
                feed = feedparser.parse(feed_content)
                i = 0
                dead_i = 0
                for entry in feed.entries:
                        logger.info('start parse feed,the dead_i is %s', dead_i)
                        title = htmllib.decoding(entry.title)
                        categorie_keys = []
                        content = ''
                        date_published = datetime.now()
                        author_name = ''
                        Mystat = True
                        if self.__feedslist_check(title) == False:
                            try:
                                    i += 1
                                    url = ''
                                    logger.info('beging to add new article No. %s', i)
                                    if(entry.has_key('feedburner_origlink')):
                                            url = entry.feedburner_origlink
                                    else:
                                            url = entry.link
                                    if entry.has_key('content'):
                                            content = entry.content[0].value
                                    else:
                                            content = entry.description
                                    if entry.has_key('author'):
                                            author_name = entry.author
                                    else:
                                            author_name = "转载"
                                    stripper = HTMLStripper()
                                    stripper.feed(title)
                                    title = stripper.get_data()
                                    content = htmllib.decoding(content)
                                    content = htmllib.GetFeedclean(url, content, stop_target)
                                    if(entry.has_key('updated_parsed')):
                                            date_published = datetime(*entry.updated_parsed[:6])
                                    else:
                                            date_published = datetime.now()
                            except Exception, data:
                                    logger.warn('this like something happened,the error is %s', data)

                            try:
                                    feedresult = self.__store_article(title, url, category, content, date_published, author_name, feed_url, feed)
                                    if feedresult == True:
                                            logger.info('The No.%s  is fetched to the db', i)
                                    else:
                                            logger.error('The No.%s is fetched Fail', i)
                                            Mystat = False
                            except Exception, data:
                                    logger.warning('the error is %s', data)
                                    Mystat = False
Example #3
0
    def __store_entry(self, feed):
                try:


                    entry, result = Entry.published.get_or_create(title=feed.title)
                    entry.excerpt = htmllib.Filter_html(feed.excerpt).strip()[:80] + u'……'
                    entry.status = 2
                    entry.author_name = htmllib.decoding(feed.author_name)
                    entry.date = feed.date
                    entry.slug = htmllib.sid() 
                    entry.content = htmllib.decoding(self.__Parse_image(feed.content))
                    entry.categories.add(feed.feed.category)
                    entry.sites = [Site.objects.get_current(), ]             
                    entry.save()
                    feed.fetch_stat = 4
                    feed.save()
                except Exception, data:
                        logger.error('the db saved error is: %s', data)
                        feed.fetch_stat = 3
                        feed.save()
Example #4
0
    def __store_article(self, contenthtml, feed):
        entry = FeedsResult.objects.get(pk=feed.pk)
        try:

            entry.content = htmllib.decoding(contenthtml)
            entry.fetch_stat = 1
            images = htmllib.Parse_images_url(contenthtml)
            for image in images:
                    obj, result = TempImages.objects.get_or_create(oldurl=image, entry=entry)
        except Exception, data:
                        entry.fetch_stat = 2
                        logger.info('the db saved error is: %s', data)
Example #5
0
    def __Parse_image(self, content):
                images = htmllib.Parse_images_url(content)

                if images:
                    try:
                        for image in images:
                                tmpimage = TempImages.objects.get(oldurl=image)
                                if tmpimage != None:
                                        content = gbtools.stringQ2B(content)
                                        content = htmllib.decoding(content).replace(image, tmpimage.newurl)

                    except Exception, data:
                        logger.info(data)