def create_and_schedule_producer(): # 导入自定义的spider类 spider_cls_list = \ list_subclasses_in_specific_module(CUSTOM_SPIDER_SCRIPT_NAME, Spider) # 将spider类列表中的类进行实例化 spider_inst_list = [spider_cls() for spider_cls in spider_cls_list] # 为每个spider创建一个producer并交给EventLoop执行 for spider_inst in spider_inst_list: asyncio.async(producer(spider_inst), loop=loop) return
def create_pipeline(): # 将自定义的pipeline的子类,按照(cls_name, cls_obj)的list返回 pipeline_cls_tuple_list = \ list_subclasses_in_specific_module(CUSTOM_PIPELINE_SCRIPT_NAME, BasePipeline, tuple_with_cls_name=True) # 获取自定义的pipeline_weight pipeline_weight = import_a_name_from_a_module(CUSTOM_PIPELINE_WEIGHT_NAME) # 对pipeline_cls_tuple_list按照pipeline_weight中的权重排序 pipeline_cls_tuple_list.sort(key=(lambda item: pipeline_weight[item[0]])) # 排序后按照顺序依次实例化,并按顺序组成list pipeline_insts = [cls_tuple[1]() for cls_tuple in pipeline_cls_tuple_list] return pipeline_insts