class QueryManager: def __init__(self, db, cdb): self.cacheIndex = Index() self.cacheIndex.createFromCursor(cdb.findAll()) self.db = db def toArray(self, query): strArr = [] tmpStr = '' j = 0 level = 0 for i in range(len(query)): if query[i] == '(' and i < len(query): if tmpStr != '': strArr.append(tmpStr) tmpStr = '' strArr.append('(') elif query[i] == ')' and i < len(query): if tmpStr != '': strArr.append(tmpStr) tmpStr = '' strArr.append(')') elif query[i] == ' ': if tmpStr != '': strArr.append(tmpStr) tmpStr = '' else: tmpStr += query[i] if tmpStr != '': strArr.append(tmpStr) return strArr def andQ(self, A, B): # A ke e zbor ili lista # Ako ne e lista ke zejme lista od kes ili baza lstA = A lstB = B if not A or not B: raise Exception('Грешка.') return None if not isinstance(A, list): lstA = self.cacheIndex.find(A) if not lstA: lstA = self.db.find(A) if not lstA: raise Exception('Зборот "' + A + '" не постои.') return None if not isinstance(B, list): lstB = self.cacheIndex.find(B) if not lstB: lstB = self.db.find(B) if not lstB: raise Exception('Зборот "' + B + '" не постои.') return None result = [] i = 0 j = 0 lstA.append(None) lstB.append(None) while lstA[i] is not None and lstB[j] is not None: if lstA[i] == lstB[j]: result.append(lstA[i]) j += 1 i += 1 elif lstA[i] < lstB[j]: i += 1 elif lstA[i] > lstB[j]: j += 1 return result def orQ(self, A, B): lstA = A lstB = B if not isinstance(A, list): lstA = self.cacheIndex.find(A) if not lstA: lstA = self.db.find(A) if not lstA: raise Exception('Зборот "' + A + '" не постои.') return None if not isinstance(B, list): lstB = self.cacheIndex.find(B) if not lstB: lstB = self.db.find(B) if not lstB: raise Exception('Зборот "' + B + '" не постои.') return None return list(set(lstA + lstB)) def buildTree(self, query): tree = Tree.buildOrTree(query) return tree def execute(self, query): qArr = self.toArray(query) if len(qArr) == 1: lst = self.cacheIndex.find(qArr[0]) if not lst: lst = self.db.find(qArr[0]) return lst qTree = self.buildTree(qArr) result = self.executeQuery(qTree) return result def executeQuery(self, tree): lnode = tree.left rnode = tree.right if lnode and rnode: if tree.value == 'OR': try: a = self.executeQuery(lnode) b = self.executeQuery(rnode) return self.orQ(a, b) except Exception, e: print e.message return None elif tree.value == 'AND': try: a = self.executeQuery(lnode) b = self.executeQuery(rnode) return self.andQ(a, b) except Exception, e: print e.message return None
class QueryManager: def __init__(self, db, cdb): self.cacheIndex = Index() self.cacheIndex.createFromCursor(cdb.findAll()) self.db = db def toArray(self, query): strArr = [] tmpStr = '' j = 0 level = 0 for i in range(len(query)): if query[i] == '(' and i < len(query): if tmpStr != '': strArr.append(tmpStr) tmpStr = '' strArr.append('(') elif query[i] == ')' and i < len(query): if tmpStr != '': strArr.append(tmpStr) tmpStr = '' strArr.append(')') elif query[i] == ' ': if tmpStr != '': strArr.append(tmpStr) tmpStr = '' else: tmpStr += query[i] if tmpStr != '': strArr.append(tmpStr) return strArr def andQ(self, A, B): # A ke e zbor ili lista # Ako ne e lista ke zejme lista od kes ili baza lstA = A lstB = B if not A or not B: raise Exception('Грешка.' ) return None if not isinstance(A, list): lstA = self.cacheIndex.find(A) if not lstA: lstA = self.db.find(A) if not lstA: raise Exception('Зборот "' + A + '" не постои.' ) return None if not isinstance(B, list): lstB = self.cacheIndex.find(B) if not lstB: lstB = self.db.find(B) if not lstB: raise Exception('Зборот "' + B + '" не постои.' ) return None result = [] i = 0 j = 0 lstA.append(None) lstB.append(None) while lstA[i] is not None and lstB[j] is not None: if lstA[i] == lstB[j]: result.append(lstA[i]) j += 1 i += 1 elif lstA[i] < lstB[j]: i += 1 elif lstA[i] > lstB[j]: j += 1 return result def orQ(self, A, B): lstA = A lstB = B if not isinstance(A, list): lstA = self.cacheIndex.find(A) if not lstA: lstA = self.db.find(A) if not lstA: raise Exception('Зборот "' + A + '" не постои.') return None if not isinstance(B, list): lstB = self.cacheIndex.find(B) if not lstB: lstB = self.db.find(B) if not lstB: raise Exception('Зборот "' + B + '" не постои.') return None return list(set(lstA + lstB)) def buildTree(self, query): tree = Tree.buildOrTree(query) return tree def execute(self, query): qArr = self.toArray(query) if len(qArr) == 1: lst = self.cacheIndex.find(qArr[0]) if not lst: lst = self.db.find(qArr[0]) return lst qTree = self.buildTree(qArr) result = self.executeQuery(qTree) return result def executeQuery(self, tree): lnode = tree.left rnode = tree.right if lnode and rnode: if tree.value == 'OR': try: a = self.executeQuery(lnode) b = self.executeQuery(rnode) return self.orQ(a, b) except Exception, e: print e.message return None elif tree.value == 'AND': try: a = self.executeQuery(lnode) b = self.executeQuery(rnode) return self.andQ(a, b) except Exception, e: print e.message return None
class QueryManagerITC: def __init__(self, db, cdb): # self.cacheIndex = Index() # self.cacheIndex.createFromCursor(cdb.index()) # print self.cacheIndex.find('берзански') self.cacheIndex = Index() self.cacheIndex.createFromCursor(cdb.findAll()) self.db = db self.numDocs = 127000 def toArray(self, query): return query.split(' ') def getArray(self, word): arr = self.cacheIndex.find(word) if not arr: arr = self.db.find(word) if not arr: arr = [] return arr def intersect(self, lstA, lstB): result = [] if not lstA and not lstB: return [] if not lstA: return lstB if not lstB: return lstA i = 0 j = 0 lstA.append(None) lstB.append(None) while lstA[i] is not None and lstB[j] is not None: if lstA[i] == lstB[j]: result.append(lstA[i]) j += 1 i += 1 elif lstA[i] < lstB[j]: i += 1 elif lstA[i] > lstB[j]: j += 1 elif lstB is None: result.append(lstA[i]) i += 1 elif lstA is None: result.append(lstB[j]) j += 1 return result def execute(self, query): qArr = self.toArray(query) colRes = {} result = [] wt = {} val = 0 for word in qArr: colRes[word] = self.getArray(word) qw = WordsList(1) qw.insertList(qArr) for key in qw.list(): tf = round(1 + math.log10(qw.getWord(key)), 2) idf = round(math.log10(self.numDocs / len(colRes[key])), 2) wt[key] = tf * idf val += tf * idf * tf * idf val = math.sqrt(val) for key in qw.list(): wt[key] = wt[key] / val arrIntersect = [] for i in range(len(qArr)): tmpArr = [] for j in range(len(colRes[qArr[i]])): tmpArr.append(colRes[qArr[i]][j][0]) arrIntersect = self.intersect(tmpArr, arrIntersect) arrIntersect.append(None) dictIntersect = {} for word in qArr: niza = colRes[word] dictIntersect[word] = [] i = 0 j = 0 while arrIntersect[j] is not None: if niza[i][0] == arrIntersect[j]: dictIntersect[word].append(niza[i]) i += 1 j += 1 elif niza[i][0] > arrIntersect[j]: j += 1 elif niza[i][0] < arrIntersect[j]: i += 1 for i in range(len(arrIntersect) - 1): value = 0 for key in dictIntersect.keys(): value += dictIntersect[key][i][1] * wt[key] result.append((arrIntersect[i], round(value, 2))) return sorted(result, key=itemgetter(1), reverse=True)
class QueryManagerITC: def __init__(self, db, cdb): # self.cacheIndex = Index() # self.cacheIndex.createFromCursor(cdb.index()) # print self.cacheIndex.find('берзански') self.cacheIndex = Index() self.cacheIndex.createFromCursor(cdb.findAll()) self.db = db self.numDocs = 127000 def toArray(self, query): return query.split(' ') def getArray(self, word): arr = self.cacheIndex.find(word) if not arr: arr = self.db.find(word) if not arr: arr = [] return arr def intersect(self, lstA, lstB): result = [] if not lstA and not lstB: return [] if not lstA: return lstB if not lstB: return lstA i = 0 j = 0 lstA.append(None) lstB.append(None) while lstA[i] is not None and lstB[j] is not None: if lstA[i] == lstB[j]: result.append(lstA[i]) j += 1 i += 1 elif lstA[i] < lstB[j]: i += 1 elif lstA[i] > lstB[j]: j += 1 elif lstB is None: result.append(lstA[i]) i += 1 elif lstA is None: result.append(lstB[j]) j += 1 return result def execute(self, query): qArr = self.toArray(query) colRes = {} result = [] wt = {} val = 0 for word in qArr: colRes[word] = self.getArray(word) qw = WordsList(1) qw.insertList(qArr) for key in qw.list(): tf = round(1 + math.log10(qw.getWord(key)), 2) idf = round(math.log10(self.numDocs/len(colRes[key])), 2) wt[key] = tf*idf val += tf*idf*tf*idf val = math.sqrt(val) for key in qw.list(): wt[key] = wt[key]/val arrIntersect = [] for i in range(len(qArr)): tmpArr = [] for j in range(len(colRes[qArr[i]])): tmpArr.append(colRes[qArr[i]][j][0]) arrIntersect = self.intersect(tmpArr, arrIntersect) arrIntersect.append(None) dictIntersect = {} for word in qArr: niza = colRes[word] dictIntersect[word] = [] i = 0 j = 0 while arrIntersect[j] is not None: if niza[i][0] == arrIntersect[j]: dictIntersect[word].append(niza[i]) i += 1 j += 1 elif niza[i][0] > arrIntersect[j]: j += 1 elif niza[i][0] < arrIntersect[j]: i += 1 for i in range(len(arrIntersect) - 1): value = 0 for key in dictIntersect.keys(): value += dictIntersect[key][i][1] * wt[key] result.append((arrIntersect[i], round(value, 2))) return sorted(result, key=itemgetter(1), reverse=True)