Ejemplo n.º 1
0
class QueryManager:
    def __init__(self, db, cdb):
        self.cacheIndex = Index()
        self.cacheIndex.createFromCursor(cdb.findAll())
        self.db = db

    def toArray(self, query):
        strArr = []
        tmpStr = ''
        j = 0
        level = 0
        for i in range(len(query)):
            if query[i] == '(' and i < len(query):
                if tmpStr != '':
                    strArr.append(tmpStr)
                    tmpStr = ''
                strArr.append('(')
            elif query[i] == ')' and i < len(query):
                if tmpStr != '':
                    strArr.append(tmpStr)
                    tmpStr = ''
                strArr.append(')')
            elif query[i] == ' ':
                if tmpStr != '':
                    strArr.append(tmpStr)
                    tmpStr = ''
            else:
                tmpStr += query[i]
        if tmpStr != '':
            strArr.append(tmpStr)
        return strArr

    def andQ(self, A, B):
        # A ke e zbor ili lista
        # Ako ne e lista ke zejme lista od kes ili baza
        lstA = A
        lstB = B

        if not A or not B:
            raise Exception('Грешка.')
            return None

        if not isinstance(A, list):
            lstA = self.cacheIndex.find(A)
            if not lstA:
                lstA = self.db.find(A)
            if not lstA:
                raise Exception('Зборот "' + A + '" не постои.')
                return None

        if not isinstance(B, list):
            lstB = self.cacheIndex.find(B)
            if not lstB:
                lstB = self.db.find(B)
            if not lstB:
                raise Exception('Зборот "' + B + '" не постои.')
                return None

        result = []
        i = 0
        j = 0
        lstA.append(None)
        lstB.append(None)

        while lstA[i] is not None and lstB[j] is not None:
            if lstA[i] == lstB[j]:
                result.append(lstA[i])
                j += 1
                i += 1
            elif lstA[i] < lstB[j]:
                i += 1
            elif lstA[i] > lstB[j]:
                j += 1

        return result

    def orQ(self, A, B):
        lstA = A
        lstB = B
        if not isinstance(A, list):
            lstA = self.cacheIndex.find(A)
            if not lstA:
                lstA = self.db.find(A)
            if not lstA:
                raise Exception('Зборот "' + A + '" не постои.')
                return None

        if not isinstance(B, list):
            lstB = self.cacheIndex.find(B)
            if not lstB:
                lstB = self.db.find(B)
            if not lstB:
                raise Exception('Зборот "' + B + '" не постои.')
                return None

        return list(set(lstA + lstB))

    def buildTree(self, query):
        tree = Tree.buildOrTree(query)
        return tree

    def execute(self, query):
        qArr = self.toArray(query)
        if len(qArr) == 1:
            lst = self.cacheIndex.find(qArr[0])
            if not lst:
                lst = self.db.find(qArr[0])
            return lst
        qTree = self.buildTree(qArr)
        result = self.executeQuery(qTree)
        return result

    def executeQuery(self, tree):
        lnode = tree.left
        rnode = tree.right

        if lnode and rnode:
            if tree.value == 'OR':
                try:
                    a = self.executeQuery(lnode)
                    b = self.executeQuery(rnode)
                    return self.orQ(a, b)
                except Exception, e:
                    print e.message
                    return None
            elif tree.value == 'AND':
                try:
                    a = self.executeQuery(lnode)
                    b = self.executeQuery(rnode)
                    return self.andQ(a, b)
                except Exception, e:
                    print e.message
                    return None
Ejemplo n.º 2
0
class QueryManager:

    def __init__(self, db, cdb):
        self.cacheIndex = Index()
        self.cacheIndex.createFromCursor(cdb.findAll())
        self.db = db

    def toArray(self, query):
        strArr = []
        tmpStr = ''
        j = 0
        level = 0
        for i in range(len(query)):
            if query[i] == '(' and i < len(query):
                if tmpStr != '':
                    strArr.append(tmpStr)
                    tmpStr = ''
                strArr.append('(')
            elif query[i] == ')' and i < len(query):
                if tmpStr != '':
                    strArr.append(tmpStr)
                    tmpStr = ''
                strArr.append(')')
            elif query[i] == ' ':
                if tmpStr != '':
                    strArr.append(tmpStr)
                    tmpStr = ''
            else:
                tmpStr += query[i]
        if tmpStr != '':
            strArr.append(tmpStr)
        return strArr


    def andQ(self, A, B):
        # A ke e zbor ili lista
        # Ako ne e lista ke zejme lista od kes ili baza
        lstA = A
        lstB = B

        if not A or not B:
            raise Exception('Грешка.' )
            return None

        if not isinstance(A, list):
            lstA = self.cacheIndex.find(A)
            if not lstA:
                lstA = self.db.find(A)
            if not lstA:
                raise Exception('Зборот "' + A + '" не постои.' )
                return None

        if not isinstance(B, list):
            lstB = self.cacheIndex.find(B)
            if not lstB:
                lstB = self.db.find(B)
            if not lstB:
                raise Exception('Зборот "' + B + '" не постои.' )
                return None

        result = []
        i = 0
        j = 0
        lstA.append(None)
        lstB.append(None)

        while lstA[i] is not None and lstB[j] is not None:
            if lstA[i] == lstB[j]:
                result.append(lstA[i])
                j += 1
                i += 1
            elif lstA[i] < lstB[j]:
                i += 1
            elif lstA[i] > lstB[j]:
                j += 1

        return result

    def orQ(self, A, B):
        lstA = A
        lstB = B
        if not isinstance(A, list):
            lstA = self.cacheIndex.find(A)
            if not lstA:
                lstA = self.db.find(A)
            if not lstA:
                raise Exception('Зборот "' + A + '" не постои.')
                return None

        if not isinstance(B, list):
            lstB = self.cacheIndex.find(B)
            if not lstB:
                lstB = self.db.find(B)
            if not lstB:
                raise Exception('Зборот "' + B + '" не постои.')
                return None

        return list(set(lstA + lstB))

    def buildTree(self, query):
        tree = Tree.buildOrTree(query)
        return tree

    def execute(self, query):
        qArr = self.toArray(query)
        if len(qArr) == 1:
            lst = self.cacheIndex.find(qArr[0])
            if not lst:
                lst = self.db.find(qArr[0])
            return lst
        qTree = self.buildTree(qArr)
        result = self.executeQuery(qTree)
        return result

    def executeQuery(self, tree):
        lnode = tree.left
        rnode = tree.right

        if lnode and rnode:
            if tree.value == 'OR':
                try:
                    a = self.executeQuery(lnode)
                    b = self.executeQuery(rnode)
                    return self.orQ(a, b)
                except Exception, e:
                    print e.message
                    return None
            elif tree.value == 'AND':
                try:
                    a = self.executeQuery(lnode)
                    b = self.executeQuery(rnode)
                    return self.andQ(a, b)
                except Exception, e:
                    print e.message
                    return None
Ejemplo n.º 3
0
class QueryManagerITC:
    def __init__(self, db, cdb):
        # self.cacheIndex = Index()
        # self.cacheIndex.createFromCursor(cdb.index())
        # print self.cacheIndex.find('берзански')
        self.cacheIndex = Index()
        self.cacheIndex.createFromCursor(cdb.findAll())
        self.db = db
        self.numDocs = 127000

    def toArray(self, query):
        return query.split(' ')

    def getArray(self, word):

        arr = self.cacheIndex.find(word)
        if not arr:
            arr = self.db.find(word)
        if not arr:
            arr = []
        return arr

    def intersect(self, lstA, lstB):
        result = []

        if not lstA and not lstB:
            return []

        if not lstA:
            return lstB
        if not lstB:
            return lstA

        i = 0
        j = 0
        lstA.append(None)
        lstB.append(None)
        while lstA[i] is not None and lstB[j] is not None:
            if lstA[i] == lstB[j]:
                result.append(lstA[i])
                j += 1
                i += 1
            elif lstA[i] < lstB[j]:
                i += 1
            elif lstA[i] > lstB[j]:
                j += 1
            elif lstB is None:
                result.append(lstA[i])
                i += 1
            elif lstA is None:
                result.append(lstB[j])
                j += 1

        return result

    def execute(self, query):
        qArr = self.toArray(query)
        colRes = {}
        result = []
        wt = {}
        val = 0

        for word in qArr:
            colRes[word] = self.getArray(word)

        qw = WordsList(1)
        qw.insertList(qArr)

        for key in qw.list():
            tf = round(1 + math.log10(qw.getWord(key)), 2)
            idf = round(math.log10(self.numDocs / len(colRes[key])), 2)
            wt[key] = tf * idf
            val += tf * idf * tf * idf

        val = math.sqrt(val)

        for key in qw.list():
            wt[key] = wt[key] / val

        arrIntersect = []
        for i in range(len(qArr)):
            tmpArr = []
            for j in range(len(colRes[qArr[i]])):
                tmpArr.append(colRes[qArr[i]][j][0])

            arrIntersect = self.intersect(tmpArr, arrIntersect)

        arrIntersect.append(None)

        dictIntersect = {}
        for word in qArr:
            niza = colRes[word]
            dictIntersect[word] = []
            i = 0
            j = 0
            while arrIntersect[j] is not None:
                if niza[i][0] == arrIntersect[j]:
                    dictIntersect[word].append(niza[i])
                    i += 1
                    j += 1
                elif niza[i][0] > arrIntersect[j]:
                    j += 1
                elif niza[i][0] < arrIntersect[j]:
                    i += 1

        for i in range(len(arrIntersect) - 1):
            value = 0
            for key in dictIntersect.keys():
                value += dictIntersect[key][i][1] * wt[key]
            result.append((arrIntersect[i], round(value, 2)))

        return sorted(result, key=itemgetter(1), reverse=True)
class QueryManagerITC:

    def __init__(self, db, cdb):
        # self.cacheIndex = Index()
        # self.cacheIndex.createFromCursor(cdb.index())
        # print self.cacheIndex.find('берзански')
        self.cacheIndex = Index()
        self.cacheIndex.createFromCursor(cdb.findAll())
        self.db = db
        self.numDocs = 127000

    def toArray(self, query):
        return query.split(' ')

    def getArray(self, word):

        arr = self.cacheIndex.find(word)
        if not arr:
            arr = self.db.find(word)
        if not arr:
            arr = []
        return arr

    def intersect(self, lstA, lstB):
        result = []

        if not lstA and not lstB:
            return []

        if not lstA:
            return lstB
        if not lstB:
            return lstA

        i = 0
        j = 0
        lstA.append(None)
        lstB.append(None)
        while lstA[i] is not None and lstB[j] is not None:
            if lstA[i] == lstB[j]:
                result.append(lstA[i])
                j += 1
                i += 1
            elif lstA[i] < lstB[j]:
                i += 1
            elif lstA[i] > lstB[j]:
                j += 1
            elif lstB is None:
                result.append(lstA[i])
                i += 1
            elif lstA is None:
                result.append(lstB[j])
                j += 1

        return result

    def execute(self, query):
        qArr = self.toArray(query)
        colRes = {}
        result = []
        wt = {}
        val = 0

        for word in qArr:
            colRes[word] = self.getArray(word)

        qw = WordsList(1)
        qw.insertList(qArr)

        for key in qw.list():
            tf = round(1 + math.log10(qw.getWord(key)), 2)
            idf = round(math.log10(self.numDocs/len(colRes[key])), 2)
            wt[key] = tf*idf
            val += tf*idf*tf*idf

        val = math.sqrt(val)

        for key in qw.list():
            wt[key] = wt[key]/val

        arrIntersect = []
        for i in range(len(qArr)):
            tmpArr = []
            for j in range(len(colRes[qArr[i]])):
                tmpArr.append(colRes[qArr[i]][j][0])

            arrIntersect = self.intersect(tmpArr, arrIntersect)

        arrIntersect.append(None)

        dictIntersect = {}
        for word in qArr:
            niza = colRes[word]
            dictIntersect[word] = []
            i = 0
            j = 0
            while arrIntersect[j] is not None:
                if niza[i][0] == arrIntersect[j]:
                    dictIntersect[word].append(niza[i])
                    i += 1
                    j += 1
                elif niza[i][0] > arrIntersect[j]:
                    j += 1
                elif niza[i][0] < arrIntersect[j]:
                    i += 1

        for i in range(len(arrIntersect) - 1):
            value = 0
            for key in dictIntersect.keys():
                value += dictIntersect[key][i][1] * wt[key]
            result.append((arrIntersect[i], round(value, 2)))



        return sorted(result, key=itemgetter(1), reverse=True)