def updteTotalKnowled(self): '''给知识点增加题目总数total字段''' mongo = MongoDB() coll = mongo.getCollection(COLL.knowled) for key, value in subjects.items(): for xd in value['xds']: self.updteTotalKnowledByCid(value, xd, coll)
def parseParperPropAll(self, url=URL.paper_url): '''分析试卷的所有公共属性''' mongo = MongoDB() #创建唯一索引 # for key, value in COLL.type.items(): # coll = mongo.getCollection(value) # coll.create_index([(key+'_id',pymongo.ASCENDING)],unique=True) for key, value in subjects.items(): for xd in value['xds']: self.parsePaperProp(value, xd, mongo, url)
def mainKnowled(self): '''分析所有学科的知识点''' mongo = MongoDB() coll = mongo.getCollection(COLL.knowled) #coll.create_index([("id",pymongo.ASCENDING)],unique=True) for key, value in subjects.items(): for xdKey, xdValue in xds.items(): if xdValue['xd'] in value['xds']: try: coll.insert(self.parseKnowled(value, xdValue)) except Exception as e: logger.exception(u'处理学科:%s,学段:%s;出现异常,异常信息:%s', value['name'], xdValue['name'], e.message)
def generateLastKnowledPgScrapyUrl(self): '''生成最后知识点分析爬取的url''' mongo = MongoDB() coll = mongo.getCollection(COLL.knowled) coll_pg = mongo.getCollection(COLL.pg_url) #coll_pg.create_index([("kid", pymongo.ASCENDING),("pg", pymongo.ASCENDING)], unique=True) for key, value in subjects.items(): for xd in value['xds']: try: coll_pg.insert_many( self.generateLastKnowledPgScrapyUrlByCid( value, xd, coll)) except Exception as e: logger.exception(u'生成知识点url错误,学科:%s,学段:%d,错误信息:%s', value['name'], xd, e.message)
def parseQuestionAllType(self): '''分析题目所有类型''' mongo = MongoDB() for key, value in subjects.items(): for xd in value['xds']: self.parseQuesiontAllTypeByCid(value, xd, mongo)