data = datas.get('data').get('list') if data: for da in data: print(da) insert_data = da insert_data['singer_id'] = datas.get('data').get('singer_id') insert_data['singer_mid'] = datas.get('data').get('singer_mid') insert_data['singer_name'] = datas.get('data').get( 'singer_name') insert_data['total'] = datas.get('data').get('total') insert_data['_id'] = da.get('albumID') # 保存数入库 # self.pipeline.process_item(insert_data, collection_name) def go(singer_mids): start() obj_spider = SpiderMain() obj_spider.craw(singer_mids) if __name__ == '__main__': singer_mids = ['0025NhlN2yWrP4'] go(singer_mids) # blocking, 这里必须这么写 work_queue.join() save_queue.join() print('done')
source=website_name) logger.info(msg) # mongodb 集合名称 insert_data = {} insert_data['_id'] = _ip+'_'+target_url insert_data['ip'] = _ip insert_data['source'] = website_name insert_data['response_time'] = response_time insert_data['target_url'] = target_url insert_data['insert_time'] = time.strftime('%Y-%m-%d %H:%M:%S') # 保存数入库 self.pipeline.process_item(insert_data, self.collection_name) if __name__ == '__main__': # 测试代码 spidermain = SpiderMain() spidermain.run() # blocking work_queue.join() save_queue.join() # finishing crawl origin ip logger.info('available proxy has been saved in your database, please check!')