Esempio n. 1
0
def doCron(videos):
    raw_data = http.getHttp("https://gdata.youtube.com/feeds/api/standardfeeds/on_the_web")
    soup = BeautifulSoup(raw_data, selfClosingTags=['category'])
    entries=soup.findAll('entry')
    for entry in entries:
        if len(entry('title'))>0:
            mykey=entry('title')[0].text if len(entry('title'))>0 else None
            if mykey and not getVideo(videos, mykey):
                video=Video()
                video.title=entry('title')[0].text
                video.mykey=mykey
                video.text=entry('content')[0].text if len(entry('content'))>0 else ''
                links=entry(lambda tag: tag.name=='link' and tag.attrs[2][0]=='href' and '/watch?' in tag.attrs[2][1])
                if len(links)==0:
                    continue
                video.link=links[0].attrs[2][1]
                imgs=entry('media:thumbnail',  height="90", width="120")
                if len(imgs)==0:
                    continue
                video.img=imgs[0].attrs[0][1] 
                imgsBig=entry('media:thumbnail',  height='360', width='480')
                if len(imgsBig)==0:
                    continue
                video.imgBig=imgsBig[0].attrs[0][1]
                video.tags=getTags(entry)
                video.categories=getCategories(entry)
                video.save();
Esempio n. 2
0
def doCron(stories):
    raw_data = http.getHttp("http://www.google.com/trends/hottrends?sa=X")
    scraper=HotTrendsScraper()
    scraper.feed(raw_data)
    data = scraper.trends
    for d in data:
        buildStoryFromString(d, stories)
Esempio n. 3
0
def buildStoryFromString(data, stories):
    story=findStory(data, stories)
    if not story:
        url="http://www.google.com/search?q="+data.replace(' ', '+')
        logging.info(url)
        try:
            raw_data = http.getHttp(url)
            soup = BeautifulSoup(raw_data)
            story=None
            a=soup.find(lambda tag: tag.name=='a' and tag.attrs[0][0]=='href' and not tag.attrs[0][1].startswith('/') and not 'google' in tag.attrs[0][1])
            if a and a.text:
                story=Story()
                story.deleteFlag=False
                story.mykey=data
                story.title=''
                for c in a.contents:
                    if type(c) == Tag:
                        story.title+=c.text
                    else:
                        story.title+=c
                story.link=a.attrs[0][1]
                story.text=''
                for c in a.parent.contents[4].contents:
                    if type(c) == Tag:
                        story.text+=c.text
                    else:
                        story.text+=c
                story.put()
        except DownloadError: #@UndefinedVariable
            logging.error(url + ' failed to load')
    
    '''
    scraper=SearchScraper()
    scraper.feed(raw_data)
    return scraper.story
    '''