Esempio n. 1
0
    def __init__(self,
                 aCompanyInformationList,
                 aBoWIDFDatabaseName,
                 aIDFinanceClientIntelligent,
                 LearningThreads=3,
                 PredictingThreads=2,
                 ParsingThreads=4):
        self.status = "OPEN"

        self.companyinformationlist = aCompanyInformationList

        self.bowidfdbconnection = TSSQLConnection(aBoWIDFDatabaseName)
        UpdateWord(self.bowidfdbconnection)
        UpdateArticle(self.bowidfdbconnection)
        UpdateWords(self.bowidfdbconnection)

        self.intradayconnection = aIDFinanceClientIntelligent

        self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                Word text UNIQUE, 
                                DocumentCount int, 
                                TotalCount int, 
                                Prediction text)""".format(self.wordTable),
                                        block=True)

        self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                        Service text,
                                        Ticker text,
                                        Title text,
                                        PubDate int,
                                        Link text,
                                        NewsID text UNIQUE,
                                        Content text,
                                        Prediction text,
                                        Actual text,
                                        PCount int)""".format(
            self.articleTable),
                                        block=True)

        self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                        Service text,
                                        Ticker text,
                                        Title text,
                                        PubDate text,
                                        Link text,
                                        NewsID text UNIQUE,
                                        FullContent text,
                                        Caller text)""".format(
            self.predictQueueTable),
                                        block=True)

        self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                        Service text,
                                        Ticker text,
                                        Title text,
                                        PubDate text,
                                        Link text,
                                        NewsID text UNIQUE,
                                        FullContent text,
                                        Caller text)""".format(
            self.learnQueueTable),
                                        block=True)

        self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                        Service text,
                                        Ticker text,
                                        Title text,
                                        PubDate text,
                                        Link text,
                                        NewsID text UNIQUE,
                                        FullContent text,
                                        Caller text)""".format(
            self.parseQueueTable),
                                        block=True)

        self.longlearnqueue = queue.Queue()
        self.longlearntimer = IntervalTimer(60 * 60,
                                            self.__moveLongLearn2Regular)

        self.predicterManager = ThreadManager(self.predictLabel,
                                              PredictingThreads,
                                              self.__predicterWorker)
        self.learnerManager = ThreadManager(self.learnLabel, LearningThreads,
                                            self.__learnerWorker)
        self.parserManager = ThreadManager(self.parseLabel, ParsingThreads,
                                           self.__parserWorker)
        self.__tableQueues2Managers()
        self.longlearntimer.start()
 def __init__(self, aCompanyInformationList, aBoWIDFDatabaseName, aIDFinanceClientIntelligent,
                      LearningThreads=3, PredictingThreads=2, ParsingThreads=4):
     self.status = "OPEN"
     
     self.companyinformationlist = aCompanyInformationList
     
     self.bowidfdbconnection = TSSQLConnection(aBoWIDFDatabaseName)
     UpdateWord(self.bowidfdbconnection)
     UpdateArticle(self.bowidfdbconnection)
     UpdateWords(self.bowidfdbconnection)
     
     self.intradayconnection = aIDFinanceClientIntelligent
     
     self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                             Word text UNIQUE, 
                             DocumentCount int, 
                             TotalCount int, 
                             Prediction text)""".format(self.wordTable), block=True)
     
     self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                     Service text,
                                     Ticker text,
                                     Title text,
                                     PubDate int,
                                     Link text,
                                     NewsID text UNIQUE,
                                     Content text,
                                     Prediction text,
                                     Actual text,
                                     PCount int)""".format(self.articleTable), block=True)
             
     self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                     Service text,
                                     Ticker text,
                                     Title text,
                                     PubDate text,
                                     Link text,
                                     NewsID text UNIQUE,
                                     FullContent text,
                                     Caller text)""".format(self.predictQueueTable), block=True)
     
     self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                     Service text,
                                     Ticker text,
                                     Title text,
                                     PubDate text,
                                     Link text,
                                     NewsID text UNIQUE,
                                     FullContent text,
                                     Caller text)""".format(self.learnQueueTable), block=True)
     
     self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                     Service text,
                                     Ticker text,
                                     Title text,
                                     PubDate text,
                                     Link text,
                                     NewsID text UNIQUE,
                                     FullContent text,
                                     Caller text)""".format(self.parseQueueTable), block=True)
             
             
     self.longlearnqueue = queue.Queue()
     self.longlearntimer = IntervalTimer(60*60,  self.__moveLongLearn2Regular)
     
     self.predicterManager = ThreadManager(self.predictLabel, PredictingThreads, self.__predicterWorker)
     self.learnerManager = ThreadManager(self.learnLabel, LearningThreads, self.__learnerWorker)
     self.parserManager = ThreadManager(self.parseLabel, ParsingThreads, self.__parserWorker)
     self.__tableQueues2Managers()
     self.longlearntimer.start()
Esempio n. 3
0
class BoWIDFIntelligence:

    masterWord = "__MASTER_WORD__"

    wordTable = "Words"
    articleTable = "Articles"
    predictQueueTable = "ToPredictQueue"
    learnQueueTable = "ToLearnQueue"
    parseQueueTable = "ToParseQueue"

    predictLabel = "PREDICT"
    learnLabel = "LEARN"
    parseLabel = "PARSE"

    def __init__(self,
                 aCompanyInformationList,
                 aBoWIDFDatabaseName,
                 aIDFinanceClientIntelligent,
                 LearningThreads=3,
                 PredictingThreads=2,
                 ParsingThreads=4):
        self.status = "OPEN"

        self.companyinformationlist = aCompanyInformationList

        self.bowidfdbconnection = TSSQLConnection(aBoWIDFDatabaseName)
        UpdateWord(self.bowidfdbconnection)
        UpdateArticle(self.bowidfdbconnection)
        UpdateWords(self.bowidfdbconnection)

        self.intradayconnection = aIDFinanceClientIntelligent

        self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                Word text UNIQUE, 
                                DocumentCount int, 
                                TotalCount int, 
                                Prediction text)""".format(self.wordTable),
                                        block=True)

        self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                        Service text,
                                        Ticker text,
                                        Title text,
                                        PubDate int,
                                        Link text,
                                        NewsID text UNIQUE,
                                        Content text,
                                        Prediction text,
                                        Actual text,
                                        PCount int)""".format(
            self.articleTable),
                                        block=True)

        self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                        Service text,
                                        Ticker text,
                                        Title text,
                                        PubDate text,
                                        Link text,
                                        NewsID text UNIQUE,
                                        FullContent text,
                                        Caller text)""".format(
            self.predictQueueTable),
                                        block=True)

        self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                        Service text,
                                        Ticker text,
                                        Title text,
                                        PubDate text,
                                        Link text,
                                        NewsID text UNIQUE,
                                        FullContent text,
                                        Caller text)""".format(
            self.learnQueueTable),
                                        block=True)

        self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                        Service text,
                                        Ticker text,
                                        Title text,
                                        PubDate text,
                                        Link text,
                                        NewsID text UNIQUE,
                                        FullContent text,
                                        Caller text)""".format(
            self.parseQueueTable),
                                        block=True)

        self.longlearnqueue = queue.Queue()
        self.longlearntimer = IntervalTimer(60 * 60,
                                            self.__moveLongLearn2Regular)

        self.predicterManager = ThreadManager(self.predictLabel,
                                              PredictingThreads,
                                              self.__predicterWorker)
        self.learnerManager = ThreadManager(self.learnLabel, LearningThreads,
                                            self.__learnerWorker)
        self.parserManager = ThreadManager(self.parseLabel, ParsingThreads,
                                           self.__parserWorker)
        self.__tableQueues2Managers()
        self.longlearntimer.start()

    def predict(self,
                aService,
                aTicker,
                aTitle,
                aPubDate,
                aLink,
                aNewsID,
                aFullSource,
                doBlock=False,
                doLearn=True):
        if self.__canPredictInput(aService, aTicker, aTitle, aPubDate, aLink,
                                  aNewsID, aFullSource):
            self.predicterManager.put(
                (self.predictLabel, (aService, aTicker, aTitle, aPubDate,
                                     aLink, aNewsID, aFullSource)))

    def learn(self, aService, aTicker, aTitle, aPubDate, aLink, aNewsID,
              aFullSource):
        if self.__canLearnInput(aService, aTicker, aTitle, aPubDate, aLink,
                                aNewsID, aFullSource):
            self.learnerManager.put(
                (self.learnLabel, (aService, aTicker, aTitle, aPubDate, aLink,
                                   aNewsID, aFullSource)))

    def __canPredictInput(self, aService, aTicker, aTitle, aPubDate, aLink,
                          aNewsID, aFullSource):
        return True

    def __canLearnInput(self, aService, aTicker, aTitle, aPubDate, aLink,
                        aNewsID, aFullSource):
        return True

    def join(self):
        for _ in range(4):
            self.learnerManager.join()
            self.predicterManager.join()
            self.parserManager.join()

    def close(self, block=False):
        if (self.status != "CLOSED"):
            self.status = "CLOSED"

            if block:
                self.join()

            self.learnerManager.close()
            self.predicterManager.close()
            self.parserManager.close()

            self.longlearntimer.cancel()
            self.__moveLongLearn2Regular()

            self.__managers2TableQueues()

            self.bowidfdbconnection.close()

    def __del__(self):
        self.close()

    def __tableQueues2Managers(self):
        self.__tableQueue2Manager(self.predictQueueTable,
                                  self.predicterManager)
        self.__tableQueue2Manager(self.learnQueueTable, self.learnerManager)
        self.__tableQueue2Manager(self.parseQueueTable, self.parserManager)

    def __tableQueue2Manager(self, aTable, aManager):

        rows = self.bowidfdbconnection.execute(
            "SELECT * FROM {0};".format(aTable), block=True)
        for row in rows:
            toput = (row[7], (row[0], row[1], row[2], row[3], row[4], row[5],
                              row[6]))
            aManager.put(toput)
        self.bowidfdbconnection.execute("DELETE FROM {0};".format(aTable),
                                        block=True)
        self.bowidfdbconnection.execute("VACUUM;", block=True)

    def __managers2TableQueues(self):
        self.__manager2TableQueue(self.predictQueueTable,
                                  self.predicterManager)
        self.__manager2TableQueue(self.learnQueueTable, self.learnerManager)
        self.__manager2TableQueue(self.parseQueueTable, self.parserManager)

    def __manager2TableQueue(self, aTable, aManager):
        try:
            while True:
                item = aManager.queue.get(block=False)[1]
                toput = (
                    item[1][0],
                    item[1][1],
                    item[1][2],
                    item[1][3],
                    item[1][4],
                    item[1][5],
                    item[1][6],
                    item[0],
                )
                self.bowidfdbconnection.execute(
                    "INSERT OR IGNORE INTO {0} VALUES (?,?,?,?,?,?,?,?)".
                    format(aTable), toput)
                aManager.queue.task_done()
        except queue.Empty:
            pass

    def __predicterWorker(self, aQueue, aStopToken):
        while True:
            item = aQueue.get()

            if item[1] == aStopToken:
                break

            newsitem = self.__getNewsItemByID(item[1][1][5])
            if newsitem == None:
                self.parserManager.put(item[1])
            elif newsitem[7] == None:
                textinfo = self.__getTextInformation(newsitem[6])
                masterinfo = None
                wordcount = 0

                for word in textinfo:
                    wordcount += textinfo[word][0]
                    if word == self.masterWord:
                        masterinfo = textinfo[word]

                weights = {}
                weighttotal = 0.0

                for word in textinfo:
                    if word != self.masterWord:
                        tf = float(textinfo[word][0]) / float(wordcount)
                        idf = log(1.0 + (float(masterinfo[1][1]) /
                                         (1 + float(textinfo[word][1][1]))))
                        tfidf = tf * idf
                        weights[word] = tfidf
                        weighttotal += tfidf

                mastervaltime = self.__getEmptyPrediction()
                if weighttotal != 0:
                    for word in weights:
                        valtime = ValueTimes.loads(textinfo[word][1][3])
                        if weights[word] > 0 and not valtime.isZero():
                            mastervaltime = mastervaltime + valtime * (
                                (128.0 * weights[word]) / weighttotal)
                    mastervaltime = mastervaltime * (1 / 128.0)
                self.bowidfdbconnection.execute(
                    "UPDATEARTICLE {0} SET Prediction=? WHERE NewsID=?".format(
                        self.articleTable), (
                            mastervaltime,
                            item[1][1][5],
                        ),
                    block=True)

            if item[1][0] == self.learnLabel:
                self.learnerManager.put(item[1])

            aQueue.task_done()

        aQueue.task_done()

    def __learnerWorker(self, aQueue, aStopToken):
        while True:
            item = aQueue.get()

            if item[1] == aStopToken:
                break

            newsitem = self.__getNewsItemByID(item[1][1][5])
            if newsitem == None:
                self.parserManager.put(item[1])
            elif newsitem[7] == None or len(newsitem[7]) < 5:
                toput = (item[1][0],
                         (item[1][1][0], item[1][1][1], item[1][1][2],
                          item[1][1][3], item[1][1][4], item[1][1][5], None))
                self.predicterManager.put(toput)
            elif newsitem[8] == None:
                data = self.intradayconnection.getData(item[1][1][1],
                                                       newsitem[3])
                if data == None:
                    toput = (item[1][0], (item[1][1][0], item[1][1][1],
                                          item[1][1][2], item[1][1][3],
                                          item[1][1][4], item[1][1][5], None))
                    self.longlearnqueue.put(toput)
                else:
                    tokens = self.__tokenize(newsitem[6])
                    self.bowidfdbconnection.execute("UPDATEWORDS",
                                                    (tokens, data),
                                                    block=True)
                    self.bowidfdbconnection.execute(
                        "UPDATE {0} SET Actual=? WHERE NewsID=?".format(
                            self.articleTable), (
                                data.dumps(),
                                item[1][1][5],
                            ))
            aQueue.task_done()

        aQueue.task_done()

    def __parserWorker(self, aQueue, aStopToken):
        config = MyConfig()
        while True:
            item = aQueue.get()

            if item[1] == aStopToken:
                break

            newsitem = self.__getNewsItemByID(item[1][1][5])
            if newsitem == None:
                print("Parsing " + item[1][1][5])
                tse = None
                try:
                    tse = time.mktime(
                        time.strptime(item[1][1][3],
                                      "%a, %d %b %Y %H:%M:%S %Z"))
                except:
                    pass
                try:
                    tse = time.mktime(
                        time.strptime(item[1][1][3].replace(":", ""),
                                      "%Y-%m-%dT%H%M%S%z"))
                except:
                    pass

                if self.intradayconnection.hasProperStart(item[1][1][1],
                                                          tse) and tse != None:
                    for _ in range(3):
                        try:
                            nparticle = newspaper.article.Article(
                                item[1][1][4], config=config)
                            nparticle.download(html=item[1][1][6])
                            nparticle.parse()
                            text = nparticle.text

                            if text != None and len(
                                    text) > 40 and item[1][1][1] in text:
                                rowvalues = (item[1][1][0], item[1][1][1],
                                             item[1][1][2], tse, item[1][1][4],
                                             item[1][1][5], text, None, None,
                                             None)
                                self.bowidfdbconnection.execute(
                                    "INSERT INTO {0} VALUES (?,?,?,?,?,?,?,?,?,?)"
                                    .format(self.articleTable),
                                    rowvalues,
                                    block=True)
                                toput = (item[1][0],
                                         (item[1][1][0], item[1][1][1],
                                          item[1][1][2], item[1][1][3],
                                          item[1][1][4], item[1][1][5], None))
                                if item[1][0] == self.predictLabel:
                                    self.predicterManager.put(toput)
                                elif item[1][0] == self.learnLabel:
                                    self.learnerManager.put(toput)
                            break

                        except newspaper.article.ArticleException as e:
                            pass
                        except Exception as e:
                            pass
            else:
                toput = (item[1][0],
                         (item[1][1][0], item[1][1][1], item[1][1][2],
                          item[1][1][3], item[1][1][4], item[1][1][5], None))
                if item[1][0] == self.predictLabel:
                    self.predicterManager.put(toput)
                elif item[1][0] == self.learnLabel:
                    self.learnerManager.put(toput)

            aQueue.task_done()

        aQueue.task_done()

    def __getNewsItemByID(self, aNewsID):
        newsitemlist = self.bowidfdbconnection.execute(
            "SELECT * FROM {0} WHERE NewsID=? LIMIT 1".format(
                self.articleTable), (aNewsID, ))
        if not newsitemlist:
            return None
        return newsitemlist[0]

    def __getTextInformation(self, text):
        textinformation = {}
        wordcounts = self.__tokenize(text)

        wordlist = list(wordcounts.keys())
        wordlist.append(self.masterWord)

        blocksize = 500
        for i in range(math.ceil(len(wordlist) / blocksize)):
            curparam = tuple(wordlist[i * blocksize:(i + 1) * blocksize])

            sqlstatement = "SELECT * FROM {0} WHERE".format(
                self.wordTable) + " Word=? OR " * (len(curparam) -
                                                   1) + " Word=? "

            for wordrow in self.bowidfdbconnection.execute(
                    sqlstatement, curparam):
                if wordrow[0] == self.masterWord:
                    textinformation[wordrow[0]] = (0, wordrow)
                else:
                    textinformation[wordrow[0]] = (wordcounts[wordrow[0]],
                                                   wordrow)

        for word in wordcounts:
            if word not in textinformation:
                textinformation[word] = (wordcounts[word],
                                         (word, 0, 0,
                                          self.__getEmptyPrediction().dumps()))

        if self.masterWord not in textinformation:
            textinformation[self.masterWord] = (0, (
                self.masterWord, 0, 0, self.__getEmptyPrediction().dumps()))

        return textinformation

    def tokenize(self, text):
        return self.__tokenize(text)

    def __tokenize(self, text):
        ret = {}

        textwot = re.sub("\([.]+:[.]+\)", " ", text, flags=re.DOTALL)
        wordlist = nltk.tokenize.word_tokenize(textwot)
        stemmer = nltk.stem.snowball.EnglishStemmer()
        stopwords = set(nltk.corpus.stopwords.words('english'))
        for word in wordlist:
            try:
                word = stemmer.stem(word).lower()
                if word and word not in stopwords and word.isalpha():
                    if word not in ret:
                        ret[word] = 0
                    ret[word] += 1
            except:
                pass
        return ret

    def __moveLongLearn2Regular(self):
        try:
            while True:
                newsitem = self.longlearnqueue.get(False)
                self.learnerManager.put(newsitem)
        except queue.Empty:
            pass

    def __getEmptyPrediction(self):
        valtime = {}
        for i in range(39):
            valtime[i * 30 * 60] = 0
        return ValueTimes(valtime)
class BoWIDFIntelligence:
    
    masterWord = "__MASTER_WORD__"
        
    wordTable = "Words"
    articleTable = "Articles"
    predictQueueTable = "ToPredictQueue"
    learnQueueTable = "ToLearnQueue"
    parseQueueTable = "ToParseQueue"
    
    predictLabel = "PREDICT"
    learnLabel = "LEARN"
    parseLabel = "PARSE"
    
    
    def __init__(self, aCompanyInformationList, aBoWIDFDatabaseName, aIDFinanceClientIntelligent,
                         LearningThreads=3, PredictingThreads=2, ParsingThreads=4):
        self.status = "OPEN"
        
        self.companyinformationlist = aCompanyInformationList
        
        self.bowidfdbconnection = TSSQLConnection(aBoWIDFDatabaseName)
        UpdateWord(self.bowidfdbconnection)
        UpdateArticle(self.bowidfdbconnection)
        UpdateWords(self.bowidfdbconnection)
        
        self.intradayconnection = aIDFinanceClientIntelligent
        
        self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                Word text UNIQUE, 
                                DocumentCount int, 
                                TotalCount int, 
                                Prediction text)""".format(self.wordTable), block=True)
        
        self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                        Service text,
                                        Ticker text,
                                        Title text,
                                        PubDate int,
                                        Link text,
                                        NewsID text UNIQUE,
                                        Content text,
                                        Prediction text,
                                        Actual text,
                                        PCount int)""".format(self.articleTable), block=True)
                
        self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                        Service text,
                                        Ticker text,
                                        Title text,
                                        PubDate text,
                                        Link text,
                                        NewsID text UNIQUE,
                                        FullContent text,
                                        Caller text)""".format(self.predictQueueTable), block=True)
        
        self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                        Service text,
                                        Ticker text,
                                        Title text,
                                        PubDate text,
                                        Link text,
                                        NewsID text UNIQUE,
                                        FullContent text,
                                        Caller text)""".format(self.learnQueueTable), block=True)
        
        self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}(
                                        Service text,
                                        Ticker text,
                                        Title text,
                                        PubDate text,
                                        Link text,
                                        NewsID text UNIQUE,
                                        FullContent text,
                                        Caller text)""".format(self.parseQueueTable), block=True)
                
                
        self.longlearnqueue = queue.Queue()
        self.longlearntimer = IntervalTimer(60*60,  self.__moveLongLearn2Regular)
        
        self.predicterManager = ThreadManager(self.predictLabel, PredictingThreads, self.__predicterWorker)
        self.learnerManager = ThreadManager(self.learnLabel, LearningThreads, self.__learnerWorker)
        self.parserManager = ThreadManager(self.parseLabel, ParsingThreads, self.__parserWorker)
        self.__tableQueues2Managers()
        self.longlearntimer.start()

    def predict(self, aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource, doBlock=False, doLearn=True):
        if self.__canPredictInput(aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource):
            self.predicterManager.put((self.predictLabel, (aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource)))
        
    def learn(self, aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource):
        if self.__canLearnInput(aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource):
            self.learnerManager.put((self.learnLabel, (aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource)))
    
    def __canPredictInput(self, aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource):
        return True
        
    def __canLearnInput(self, aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource):
        return True
    
    def join(self):
        for _ in range(4):
            self.learnerManager.join()
            self.predicterManager.join()
            self.parserManager.join()
            
    def close(self, block=False):
        if(self.status != "CLOSED"):
            self.status = "CLOSED"
            
            if block:
                self.join()
                
            self.learnerManager.close()
            self.predicterManager.close()
            self.parserManager.close()
        
            self.longlearntimer.cancel()
            self.__moveLongLearn2Regular()
            
            self.__managers2TableQueues()
            
            self.bowidfdbconnection.close()
             
    def __del__(self):
        self.close()
    
    def __tableQueues2Managers(self):
        self.__tableQueue2Manager(self.predictQueueTable, self.predicterManager)
        self.__tableQueue2Manager(self.learnQueueTable, self.learnerManager)
        self.__tableQueue2Manager(self.parseQueueTable, self.parserManager)
        
    def __tableQueue2Manager(self, aTable, aManager):

        rows = self.bowidfdbconnection.execute("SELECT * FROM {0};".format(aTable), block=True)
        for row in rows:
            toput = (row[7], (row[0], row[1], row[2], row[3], row[4], row[5], row[6]))
            aManager.put(toput)
        self.bowidfdbconnection.execute("DELETE FROM {0};".format(aTable), block=True)
        self.bowidfdbconnection.execute("VACUUM;", block=True)
    
    def __managers2TableQueues(self):
        self.__manager2TableQueue(self.predictQueueTable, self.predicterManager)
        self.__manager2TableQueue(self.learnQueueTable, self.learnerManager)
        self.__manager2TableQueue(self.parseQueueTable, self.parserManager)
    
    def __manager2TableQueue(self, aTable, aManager):
        try:
            while True:
                item = aManager.queue.get(block=False)[1]
                toput = (item[1][0], item[1][1], item[1][2], item[1][3], item[1][4], item[1][5], item[1][6], item[0],)
                self.bowidfdbconnection.execute("INSERT OR IGNORE INTO {0} VALUES (?,?,?,?,?,?,?,?)".format(aTable), toput)
                aManager.queue.task_done()
        except queue.Empty:
            pass
    
    def __predicterWorker(self, aQueue, aStopToken):
        while True:
            item = aQueue.get()
                                    
            if item[1] == aStopToken:
                break
            
            newsitem = self.__getNewsItemByID(item[1][1][5])
            if newsitem == None:
                self.parserManager.put(item[1])
            elif newsitem[7] == None:
                textinfo = self.__getTextInformation(newsitem[6])
                masterinfo = None
                wordcount = 0
                
                for word in textinfo:
                    wordcount += textinfo[word][0]
                    if word == self.masterWord:
                        masterinfo = textinfo[word]
                        
                weights = {}
                weighttotal = 0.0
                
                for word in textinfo:
                    if word != self.masterWord:
                        tf = float(textinfo[word][0]) / float(wordcount)
                        idf = log( 1.0 + (float(masterinfo[1][1]) / (1 + float(textinfo[word][1][1]))) )
                        tfidf = tf*idf
                        weights[word] = tfidf
                        weighttotal += tfidf
                        
                mastervaltime = self.__getEmptyPrediction()
                if weighttotal != 0:
                    for word in weights:
                        valtime = ValueTimes.loads(textinfo[word][1][3])
                        if weights[word] > 0 and not valtime.isZero():
                            mastervaltime = mastervaltime + valtime*((128.0 * weights[word]) / weighttotal)
                    mastervaltime = mastervaltime * (1 / 128.0)
                self.bowidfdbconnection.execute("UPDATEARTICLE {0} SET Prediction=? WHERE NewsID=?".format(self.articleTable), (mastervaltime, item[1][1][5],), block=True)
                
            if item[1][0] == self.learnLabel:
                self.learnerManager.put(item[1])

            aQueue.task_done()

        aQueue.task_done()
        
    def __learnerWorker(self, aQueue, aStopToken):
        while True:
            item = aQueue.get()
                                               
            if item[1] == aStopToken:
                break
        
            newsitem = self.__getNewsItemByID(item[1][1][5])
            if newsitem == None:
                self.parserManager.put(item[1])
            elif newsitem[7] == None or len(newsitem[7]) < 5:
                toput = (item[1][0], (item[1][1][0], item[1][1][1], item[1][1][2], item[1][1][3], item[1][1][4], item[1][1][5], None))
                self.predicterManager.put(toput)
            elif newsitem[8] == None:
                data = self.intradayconnection.getData(item[1][1][1], newsitem[3])
                if data == None:
                    toput = (item[1][0], (item[1][1][0], item[1][1][1], item[1][1][2], item[1][1][3], item[1][1][4], item[1][1][5], None))
                    self.longlearnqueue.put(toput)
                else:
                    tokens = self.__tokenize(newsitem[6])
                    self.bowidfdbconnection.execute("UPDATEWORDS", (tokens, data), block=True)
                    self.bowidfdbconnection.execute("UPDATE {0} SET Actual=? WHERE NewsID=?".format(self.articleTable), (data.dumps(), item[1][1][5],))
            aQueue.task_done()
            
        aQueue.task_done()
        
    def __parserWorker(self, aQueue, aStopToken):    
        config = MyConfig()
        while True:
            item = aQueue.get()
                                                
            if item[1] == aStopToken:
                break
            
            newsitem = self.__getNewsItemByID(item[1][1][5])
            if newsitem == None:
                print("Parsing " + item[1][1][5])
                tse = None
                try:
                    tse = time.mktime(time.strptime(item[1][1][3], "%a, %d %b %Y %H:%M:%S %Z"))
                except:
                    pass
                try:
                    tse = time.mktime(time.strptime(item[1][1][3].replace(":", ""), "%Y-%m-%dT%H%M%S%z"))
                except:
                    pass
                        
                if self.intradayconnection.hasProperStart(item[1][1][1], tse) and tse != None:
                    for _ in range(3):
                        try:
                            nparticle = newspaper.article.Article(item[1][1][4], config=config)
                            nparticle.download(html=item[1][1][6])
                            nparticle.parse()
                            text = nparticle.text
                            
                            if text!= None and len(text) > 40 and item[1][1][1] in text:
                                rowvalues = (item[1][1][0], item[1][1][1], item[1][1][2], tse, item[1][1][4], item[1][1][5], text, None, None, None)
                                self.bowidfdbconnection.execute("INSERT INTO {0} VALUES (?,?,?,?,?,?,?,?,?,?)".format(self.articleTable), rowvalues, block=True)
                                toput = (item[1][0], (item[1][1][0], item[1][1][1], item[1][1][2], item[1][1][3], item[1][1][4], item[1][1][5], None))
                                if item[1][0] == self.predictLabel:
                                    self.predicterManager.put(toput)
                                elif item[1][0] == self.learnLabel:
                                    self.learnerManager.put(toput)
                            break
                    
                        except newspaper.article.ArticleException as e:
                            pass
                        except Exception as e:
                            pass
            else:
                toput = (item[1][0], (item[1][1][0], item[1][1][1], item[1][1][2], item[1][1][3], item[1][1][4], item[1][1][5], None))
                if item[1][0] == self.predictLabel:
                    self.predicterManager.put(toput)
                elif item[1][0] == self.learnLabel:
                    self.learnerManager.put(toput)
               
            aQueue.task_done() 

        aQueue.task_done()

    def __getNewsItemByID(self, aNewsID):
        newsitemlist = self.bowidfdbconnection.execute("SELECT * FROM {0} WHERE NewsID=? LIMIT 1".format(self.articleTable), (aNewsID,))
        if not newsitemlist:
            return None
        return newsitemlist[0]
    
    def __getTextInformation(self, text):
        textinformation = {}
        wordcounts = self.__tokenize(text)
        
        
        wordlist = list(wordcounts.keys())
        wordlist.append(self.masterWord)
        
        blocksize = 500
        for i in range(math.ceil(len(wordlist)/blocksize)):
            curparam = tuple(wordlist[i*blocksize:(i+1)*blocksize]) 
        
            sqlstatement = "SELECT * FROM {0} WHERE".format(self.wordTable) + " Word=? OR " * (len(curparam)-1) + " Word=? "
            
            for wordrow in self.bowidfdbconnection.execute(sqlstatement, curparam):
                if wordrow[0] == self.masterWord:
                    textinformation[wordrow[0]] = (0, wordrow)
                else:
                    textinformation[wordrow[0]] = (wordcounts[wordrow[0]], wordrow)
            
        
        for word in wordcounts:
            if word not in textinformation:
                textinformation[word] = (wordcounts[word], (word, 0, 0, self.__getEmptyPrediction().dumps()))
        
        if self.masterWord not in textinformation:
            textinformation[self.masterWord] = (0, (self.masterWord, 0, 0, self.__getEmptyPrediction().dumps()))
        
        return textinformation
        
    def tokenize(self, text):
        return self.__tokenize(text)
            
    def __tokenize(self, text):
        ret = {}
        
        textwot = re.sub("\([.]+:[.]+\)", " ", text, flags=re.DOTALL)
        wordlist = nltk.tokenize.word_tokenize(textwot)
        stemmer = nltk.stem.snowball.EnglishStemmer()
        stopwords = set( nltk.corpus.stopwords.words('english'))
        for word in wordlist:
            try:
                word = stemmer.stem(word).lower()
                if word and word not in stopwords and word.isalpha():
                    if word not in ret:
                        ret[word] = 0
                    ret[word] += 1
            except:
                pass
        return ret    
    
    def __moveLongLearn2Regular(self):
        try:
            while True:
                newsitem = self.longlearnqueue.get(False)
                self.learnerManager.put(newsitem)
        except queue.Empty:
            pass
   
    def __getEmptyPrediction(self):
        valtime = {}
        for i in range(39):
            valtime[i*30*60] = 0
        return ValueTimes(valtime)