def insertByLine(self, line): fieldConfig = { # key S E V 'text' : ['#', '/*', ''], 'cate' : ['Cate=', ';', ''], 'seg' : ['Seg=', ';', ''], 'infos' : ['infos=', ';', ''], 'types' : ['Value=onto_value:prop=', ';', ''], 'from_where' : ['from_where=', ';', ''] } drawFieldFromLine(line, fieldConfig) if fieldConfig['types'][2] is not None: inheritType = InheritType(None, db = self.db) for type in fieldConfig['types'][2].split('|'): typeRow = inheritType.selectByLabel(type) if typeRow is None: inheritType.insert(type) typeRow = inheritType.selectByLabel(type) type_id = typeRow['id'] try: self.db.insert_ignore(self.__tableName, {'type_id':type_id, 'text':fieldConfig['text'][2], 'cate':fieldConfig['cate'][2], 'seg':fieldConfig['seg'][2], 'infos':fieldConfig['infos'][2], 'from_where':fieldConfig['from_where'][2],'create_date':None}) self.db.commit() except: # Rollback in case there is any error self.db.rollback()
def selectBySql(self, sql, args = None, ms_update = False): self.MS_PROPS_IGNORE, self.PROPS_IGNORE = load_list() result = [] maxQuerySize=100000 maxWriteSize=maxQuerySize/10 #少年们, 我尽力说的简单一点了, 用一个map 和一个 list 处理type_id中的priority以及出现的顺序问题 就是pids textToPids = {} # {text, [pids,pids, infos, used]} preText='' tempList=[] inheritType = InheritType(None, db = self.db) if self.typesWithParentIds is None: self.typesWithParentIds = inheritType.selectAllWithParentIds() #self.db.execute( sql = sql, args = args) #rows=self.db.get_rows(size=maxQuerySize, is_dict = True) page=0 cursql=sql sql=cursql + ' limit ' + str(page*maxQuerySize) + ',' + str(maxQuerySize) page=page+1 rows=self.db.select( sql = sql, args = args, is_dict = True ) while len(rows) > 0: for row in rows: # ms_update模式下,不在主搜索目录中的词典不导入 if ms_update and row['type_lable'] in self.MS_PROPS_IGNORE: continue if not ms_update and row['type_lable'] in self.PROPS_IGNORE: continue row['text']=row['text'].strip()#徐祥的这个没有去除回车 if (preText!=row['text'] and len(tempList)>=maxWriteSize): self.combineType(tempList,textToPids,result) #del result del textToPids del tempList #result=[] textToPids={} tempList=[] self.dealWithRow(textToPids,row,tempList) preText=row['text'] del rows #rows=self.db.get_rows(size=maxQuerySize, is_dict = True) sql=cursql + ' limit ' + str(page*maxQuerySize) + ',' + str(maxQuerySize) page=page+1 rows=self.db.select( sql = sql, args = args, is_dict = True ) self.db.cursor.close() self.combineType(tempList,textToPids,result) del textToPids del tempList return result
def deleteByLine(self, line): fieldConfig = { # key S E V 'text' : ['#', '/*', None], 'types' : ['Value=onto_value:prop=', ';', None] } drawFieldFromLine(line, fieldConfig) if fieldConfig['types'][2] is not None: inheritType = InheritType(None, db = self.db) for type in fieldConfig['types'][2].split('|'): typeRow = inheritType.selectByLabel(type) if typeRow is None: continue type_id = typeRow['id'] try: self.db.delete(self.__tableName, {'type_id':type_id, 'text':fieldConfig['text'][2]}) self.db.commit() except: # Rollback in case there is any error self.db.rollback()