예제 #1
0
파일: test.py 프로젝트: njxshr/codes
        data = datas.get('data').get('list')
        if data:
            for da in data:
                print(da)
                insert_data = da
                insert_data['singer_id'] = datas.get('data').get('singer_id')
                insert_data['singer_mid'] = datas.get('data').get('singer_mid')
                insert_data['singer_name'] = datas.get('data').get(
                    'singer_name')
                insert_data['total'] = datas.get('data').get('total')
                insert_data['_id'] = da.get('albumID')

                #  保存数入库
                # self.pipeline.process_item(insert_data, collection_name)


def go(singer_mids):
    start()
    obj_spider = SpiderMain()
    obj_spider.craw(singer_mids)


if __name__ == '__main__':
    singer_mids = ['0025NhlN2yWrP4']
    go(singer_mids)

    # blocking, 这里必须这么写
    work_queue.join()
    save_queue.join()
    print('done')
예제 #2
0
                                                                                            source=website_name)
        logger.info(msg)
        # mongodb 集合名称

        insert_data = {}

        insert_data['_id'] = _ip+'_'+target_url
        insert_data['ip'] = _ip
        insert_data['source'] = website_name
        insert_data['response_time'] = response_time
        insert_data['target_url'] = target_url

        insert_data['insert_time'] = time.strftime('%Y-%m-%d %H:%M:%S')

        #  保存数入库 
        self.pipeline.process_item(insert_data, self.collection_name)



if __name__ == '__main__':
    # 测试代码
    spidermain = SpiderMain()
    spidermain.run()

    # blocking
    work_queue.join()
    save_queue.join()

    # finishing crawl origin ip
    logger.info('available proxy has been saved in your database, please check!')