def crawl(): #Use logger to debug the code Logger.debug('Hello google') #Get the html knowledge by parsing url soup = get_b_soup('http://www.google.com') #Send the data to output pipe line Queues.send_to_output(soup.head.title.text) Queues.send_to_output('http://www.google.com' + soup.head.meta['content'])
def execute_output(): Logger() Logger.debug('Start Output Process') while True: try: result = Queues.get_output() Logger.info(result) #Your output logic go there except: break Logger.debug('End Output Process')
def main(): #Create Logger file and output queue Logger(parent=True) Queues() #target is in the output.py file output_process = Process(target=output.execute_output, name='OutputProcess') output_process.start() pool = MyPool(processes=config.NUMBER_OF_WORKERS) pool.map(worker, get_bot_list()) output_process.join()
def crawl(soup): #Get the beautiful soup title = soup.find('meta', {'name': 'title'})['content'] #And send it via the output pipe Queues.send_to_output(title)