Esempio n. 1
0
    def _write_to_db(self, item):
        try:
            post = News.objects.get(title=item["title"])
        except News.DoesNotExist:
            post = News(title=item["title"])

        post.body = item["description"]
        post.order = item["order"]
        post.link = item["link"]
        post.date = item["date"]
        post.save()

        return post.pk
Esempio n. 2
0
 def handle(self, *arguments, **options):
     print 'START'
     i = 1
     block = soup.find_all('div', {'class': 'item-container'})
     for l in block:
         record = News()
         record.title = 'Title %s' % i
         findText = l.findNext('div', {'class': 'issue-item'}).find_all('p')
         a = ''
         for m in findText:
             a += m.text
         record.content = a
         record.urls = l.find('a', {
             'class': 'issue-item-title'
         }).get('href', 'http')
         record.save()
         print 'Saving %s' % i
         i += 1
Esempio n. 3
0
 def process_item(self, item, spider):
     if item.get('type') == "news":
         news = News(
             headline=item.get('headline'),
             body=item.get('body'),
             url=item.get('url'),
             byline=item.get('byLine'),
             section=item.get('section'),
             picture=item.get('picture'),
         )
         news.save()
     elif item.get('type') == "tweet":
         tweet = Tweet(tweet=item.get('tweet'),
                       time=item.get('time'),
                       user=item.get('user'),
                       user_name=item.get('user_name'),
                       link=item.get('link'),
                       user_picture=item.get('user_picture'))
         tweet.save()
     return item
Esempio n. 4
0
 def each_article(self, category, url_list):
     main = Category(name=category)
     main.save()
     for link in url_list:
         soup = self.soup(link)
         div = soup.find("div", {"class": "bdaia-post-content"})
         p_list = div.find_all("p")
         description = []
         for p in p_list:
             description.append(p.get_text())
         self.article.append(u''.join(description).encode('utf-8'))
     full = zip(self.title_list, self.href_list, self.description,
                self.article)
     for i in full:
         item = News(item_title=i[0],
                     item_link=i[1],
                     item_short_descr=i[2],
                     article=i[3],
                     category=main)
         item.save()
         i = []
Esempio n. 5
0
def news_add(request, **kwargs):
    news = News(**kwargs)
    news.save()
    return {'news': news.tojson()}
Esempio n. 6
0
        )

    def handle(self, *args, **options):
        # ...
        print 'Start'
<<<<<<< HEAD
        #News.objects.delete()
        #for r in News.objects.filter(pk__gt=10):
        #    print r.delete()
        
        return True
        for i in range(1,200):
            record = News()
            record.title = 'Title %s' % i
            record.content = 'Content %s' % i
            record.save() 
            print 'Saving %s' % i
        
        
        if options['url']:
            print 'Loadd from %s' % options['url']
        # ...
        print 'End'
        
=======
        import requests
        from bs4 import BeautifulSoup

        url = 'https://pythondigest.ru/'
        r = requests.get( url )
        encoded_page = r.text.encode( 'utf-8' )