def crawl(): #Use logger to debug the code Logger.debug('Hello google') #Get the html knowledge by parsing url soup = get_b_soup('http://www.google.com') #Send the data to output pipe line Queues.send_to_output(soup.head.title.text) Queues.send_to_output('http://www.google.com' + soup.head.meta['content'])
def crawl(soup): #Get the beautiful soup title = soup.find('meta', {'name': 'title'})['content'] #And send it via the output pipe Queues.send_to_output(title)