Ejemplo n.º 1
0
    def __intercept(self, dbcursor, sql, parameters):
        shouldintercept = False
        tokens = nltk.tokenize.word_tokenize(sql)
        if tokens[0].upper() == "UPDATEWORD":
            dbcursor.execute(
                """SELECT * 
                                                  FROM {0} 
                                                  WHERE Word=?
                                                  LIMIT 1""".format(
                    BoWIDFIntelligence.wordTable), (parameters[0], ))
            wordrow = dbcursor.fetchone()
            if wordrow != None:
                doccount = wordrow[1] + 1
                totalcount = wordrow[2] + parameters[2]
                if parameters[3] == None:
                    prediction = None
                else:
                    prediction = (
                        ((parameters[3] * parameters[2]) +
                         (ValueTimes.loads(wordrow[3]) * wordrow[2])) *
                        (1.0 / float(totalcount))).dumps()

                dbcursor.execute(
                    """UPDATE {0} 
                                            SET DocumentCount=?,
                                                TotalCount=?,
                                                Prediction=?
                                            WHERE Word=?""".format(
                        BoWIDFIntelligence.wordTable), (
                            doccount,
                            totalcount,
                            prediction,
                            parameters[0],
                        ))

            else:
                if parameters[3] == None:
                    prediction = None
                elif isinstance(parameters[3], str):
                    prediction = parameters[3]
                elif isinstance(parameters[3], ValueTimes):
                    prediction = parameters[3].dumps()
                else:
                    prediction = None

                dbcursor.execute(
                    """INSERT INTO {0} 
                                            VALUES (?,?,?,?)""".format(
                        BoWIDFIntelligence.wordTable), (
                            parameters[0],
                            1,
                            parameters[2],
                            prediction,
                        ))

            shouldintercept = True

        return shouldintercept
Ejemplo n.º 2
0
    def __predicterWorker(self, aQueue, aStopToken):
        while True:
            item = aQueue.get()

            if item[1] == aStopToken:
                break

            newsitem = self.__getNewsItemByID(item[1][1][5])
            if newsitem == None:
                self.parserManager.put(item[1])
            elif newsitem[7] == None:
                textinfo = self.__getTextInformation(newsitem[6])
                masterinfo = None
                wordcount = 0

                for word in textinfo:
                    wordcount += textinfo[word][0]
                    if word == self.masterWord:
                        masterinfo = textinfo[word]

                weights = {}
                weighttotal = 0.0

                for word in textinfo:
                    if word != self.masterWord:
                        tf = float(textinfo[word][0]) / float(wordcount)
                        idf = log(1.0 + (float(masterinfo[1][1]) /
                                         (1 + float(textinfo[word][1][1]))))
                        tfidf = tf * idf
                        weights[word] = tfidf
                        weighttotal += tfidf

                mastervaltime = self.__getEmptyPrediction()
                if weighttotal != 0:
                    for word in weights:
                        valtime = ValueTimes.loads(textinfo[word][1][3])
                        if weights[word] > 0 and not valtime.isZero():
                            mastervaltime = mastervaltime + valtime * (
                                (128.0 * weights[word]) / weighttotal)
                    mastervaltime = mastervaltime * (1 / 128.0)
                self.bowidfdbconnection.execute(
                    "UPDATEARTICLE {0} SET Prediction=? WHERE NewsID=?".format(
                        self.articleTable), (
                            mastervaltime,
                            item[1][1][5],
                        ),
                    block=True)

            if item[1][0] == self.learnLabel:
                self.learnerManager.put(item[1])

            aQueue.task_done()

        aQueue.task_done()
Ejemplo n.º 3
0
    def __predicterWorker(self, aQueue, aStopToken):
        while True:
            item = aQueue.get()
                                    
            if item[1] == aStopToken:
                break
            
            newsitem = self.__getNewsItemByID(item[1][1][5])
            if newsitem == None:
                self.parserManager.put(item[1])
            elif newsitem[7] == None:
                textinfo = self.__getTextInformation(newsitem[6])
                masterinfo = None
                wordcount = 0
                
                for word in textinfo:
                    wordcount += textinfo[word][0]
                    if word == self.masterWord:
                        masterinfo = textinfo[word]
                        
                weights = {}
                weighttotal = 0.0
                
                for word in textinfo:
                    if word != self.masterWord:
                        tf = float(textinfo[word][0]) / float(wordcount)
                        idf = log( 1.0 + (float(masterinfo[1][1]) / (1 + float(textinfo[word][1][1]))) )
                        tfidf = tf*idf
                        weights[word] = tfidf
                        weighttotal += tfidf
                        
                mastervaltime = self.__getEmptyPrediction()
                if weighttotal != 0:
                    for word in weights:
                        valtime = ValueTimes.loads(textinfo[word][1][3])
                        if weights[word] > 0 and not valtime.isZero():
                            mastervaltime = mastervaltime + valtime*((128.0 * weights[word]) / weighttotal)
                    mastervaltime = mastervaltime * (1 / 128.0)
                self.bowidfdbconnection.execute("UPDATEARTICLE {0} SET Prediction=? WHERE NewsID=?".format(self.articleTable), (mastervaltime, item[1][1][5],), block=True)
                
            if item[1][0] == self.learnLabel:
                self.learnerManager.put(item[1])

            aQueue.task_done()

        aQueue.task_done()
Ejemplo n.º 4
0
 def __intercept(self, dbcursor, sql, parameters):
     shouldintercept = False
     tokens = nltk.tokenize.word_tokenize(sql)
     if tokens[0].upper() == "UPDATEWORDS":
         for word in parameters[0]:
             dbcursor.execute("""SELECT * FROM {0} WHERE Word=? LIMIT 1""".format(BoWIDFIntelligence.wordTable), (word,))
             wordrow = dbcursor.fetchone()
             if wordrow != None:
                 doccount = wordrow[1] + 1
                 totalcount = wordrow[2] + parameters[0][word]
                 if parameters[1] == None:
                     prediction = None
                 else:
                     prediction = (((parameters[1] * parameters[0][word]) + (ValueTimes.loads(wordrow[3]) * wordrow[2])) * (1.0 / float(totalcount))).dumps()
                     
                 dbcursor.execute("""UPDATE {0} 
                                             SET DocumentCount=?,
                                                 TotalCount=?,
                                                 Prediction=?
                                             WHERE Word=?""".format(BoWIDFIntelligence.wordTable), 
                                         (doccount, totalcount, prediction, word,))
                 
             else:
                 if parameters[1] == None:
                     prediction = None
                 elif isinstance(parameters[1], str):
                     prediction = parameters[1]
                 elif isinstance(parameters[1], ValueTimes):
                     prediction = parameters[1].dumps()
                 else:
                     prediction = None
                     
                 dbcursor.execute("""INSERT INTO {0} 
                                             VALUES (?,?,?,?)""".format(BoWIDFIntelligence.wordTable), 
                                         (word, 1, parameters[0][word], prediction,))
                 
         dbcursor.execute("""INSERT OR IGNORE INTO {0} VALUES (?,?,?,?)""".format(BoWIDFIntelligence.wordTable), (BoWIDFIntelligence.masterWord,0,0,None))
         dbcursor.execute("""UPDATE {0} SET DocumentCount=DocumentCount+1 WHERE Word=?""".format(BoWIDFIntelligence.wordTable), (BoWIDFIntelligence.masterWord,))
         
         shouldintercept = True
         
     return shouldintercept
Ejemplo n.º 5
0
 def __getEmptyPrediction(self):
     valtime = {}
     for i in range(39):
         valtime[i * 30 * 60] = 0
     return ValueTimes(valtime)