コード例 #1
0
ファイル: tweetdb.py プロジェクト: vgoklani/TweetMiner
 def __init__(self):
     conf = Configuration()
     self.ptext = TextProcess(conf)
     self.ds = DataSet(conf)
     self.mongo = MongoDB(self.ds.db, self.ds.collection)
     self.tweet = ""
     self.tokens = ""
     self.i = 0
     self.enable_translation = self.ptext.translation
     self.translation_store = self.ptext.translation_store
コード例 #2
0
 def __init__(self, conf, q):
     self.ptext = TextProcess(conf)
     self.ds = DataSet(conf)
     self.cleaner = KeyCleaner()
     self.enable_translation = self.ptext.translation
     self.translation_store = self.ptext.translation_store
     self.tweets = q  # Tweets queue
     self.tweet = ""
     self.tokens = ""
     self.i = 0
     Thread.__init__(self)
コード例 #3
0
ファイル: tweetdb.py プロジェクト: mftaher/TweetMiner
 def __init__(self):
     conf = Configuration()
     self.ptext = TextProcess(conf)
     self.ds = DataSet(conf)
     self.mongo = MongoDB(self.ds.db,self.ds.collection)
     self.tweet=""
     self.tokens = ""
     self.i = 0
     self.enable_translation = self.ptext.translation
     self.translation_store = self.ptext.translation_store
コード例 #4
0
ファイル: processtweets.py プロジェクト: mftaher/TweetMiner
 def __init__(self,conf,q):
     self.ptext = TextProcess(conf)
     self.ds = DataSet(conf)
     self.cleaner = KeyCleaner()
     self.enable_translation = self.ptext.translation
     self.translation_store = self.ptext.translation_store
     self.tweets = q         # Tweets queue
     self.tweet = ""
     self.tokens = ""
     self.i = 0
     Thread.__init__(self)
コード例 #5
0
class ProcessTweets(Thread):
    def __init__(self, conf, q):
        self.ptext = TextProcess(conf)
        self.ds = DataSet(conf)
        self.cleaner = KeyCleaner()
        self.enable_translation = self.ptext.translation
        self.translation_store = self.ptext.translation_store
        self.tweets = q  # Tweets queue
        self.tweet = ""
        self.tokens = ""
        self.i = 0
        Thread.__init__(self)

    def run(self):
        while True:
            rawTweet = self.tweets.get()
            if "text" in rawTweet:
                tokens = {}
                self.ptext.set_tweet_text(rawTweet['text'])
                self.ptext.set_tweet_source(rawTweet['source'])
                self.ptext.process_text()
                rawTweet['source'] = self.ptext.get_tweet_source()
                rawTweet['text'] = self.ptext.get_tweet_text()
                self.tokens = self.ptext.get_tweet_tokens()
                tokens['tokens'] = self.tokens
                rawTweet.update(tokens)
                self.tweet = self.cleaner.unset_tweet_keys(rawTweet)

                if not self.ptext.get_translate_status():
                    self.ds.output_tweet(self.tweet)
                    self.i += 1
                else:
                    if self.translation_store:
                        if self.enable_translation:
                            if not self.ptext.get_translate_failed():
                                self.ds.output_tweet(self.tweet)
                                self.i += 1
                        else:
                            self.ds.output_tweet(self.tweet)
                            self.i += 1

                self.tweets.task_done()

    def get_tweet_count(self):
        return self.i
コード例 #6
0
 def testText(self):
     tp = TextProcess('config.ini')
     tp.readfile('pubmed_result.txt')
コード例 #7
0
ファイル: processtweets.py プロジェクト: mftaher/TweetMiner
class ProcessTweets(Thread):
    def __init__(self,conf,q):
        self.ptext = TextProcess(conf)
        self.ds = DataSet(conf)
        self.cleaner = KeyCleaner()
        self.enable_translation = self.ptext.translation
        self.translation_store = self.ptext.translation_store
        self.tweets = q         # Tweets queue
        self.tweet = ""
        self.tokens = ""
        self.i = 0
        Thread.__init__(self)
        
    def run(self):
        while True:
            rawTweet = self.tweets.get()
            if "text" in rawTweet:
                tokens = {}
                self.ptext.set_tweet_text(rawTweet['text'])
                self.ptext.set_tweet_source(rawTweet['source'])
                self.ptext.process_text()
                rawTweet['source'] = self.ptext.get_tweet_source()
                rawTweet['text'] = self.ptext.get_tweet_text()
                self.tokens = self.ptext.get_tweet_tokens()
                tokens['tokens'] = self.tokens
                rawTweet.update(tokens)
                self.tweet = self.cleaner.unset_tweet_keys(rawTweet)

                if not self.ptext.get_translate_status():
                    self.ds.output_tweet(self.tweet)
                    self.i +=  1
                else:
                    if self.translation_store:
                        if self.enable_translation:
                            if not self.ptext.get_translate_failed():
                                self.ds.output_tweet(self.tweet)
                                self.i +=  1
                        else:
                            self.ds.output_tweet(self.tweet)
                            self.i +=  1

                self.tweets.task_done()

    def get_tweet_count(self):
        return self.i
コード例 #8
0
from client import Client
from motormanager import MotorManager
from gallery import Gallery
from PyQt5 import uic
from PyQt5.QtCore import QFile, QRegExp
from PyQt5.QtWidgets import QApplication, QFileDialog, QMainWindow, QMenu, QMessageBox, QTableWidgetItem
#=========================================================
# a class that handles the signal and callbacks of the GUI
#=========================================================
# UI config
qtCreatorFile = "mainwindow.ui"
Ui_MainWindow, QtBaseClass = uic.loadUiType(qtCreatorFile)

client = Client()
mm = MotorManager(client)
tp = TextProcess(client, mm)
#=========================================================
# a class that handles the signal and callbacks of the GUI
#=========================================================


class GUI(QMainWindow, Ui_MainWindow):
    def __init__(self):
        QMainWindow.__init__(self)
        Ui_MainWindow.__init__(self)
        self.setupUi(self)

        self.setupFileMenu()
        self.setupHelpMenu()
        self.setupCallbacksLED()
        self.setupMotors()
コード例 #9
0
        "url": "http://pogledaj.to/art/zivot-je-cupav-i-dlakav/",
        "name": "Zivot je cupav i dlakav",
        "dictionary_path": "./Oznake vrsta rijeci/GRUPA1/6-oznake.txt",
        "content-selector": {
            "class": "main the-content"
        }
    },
]

for article in articles[0:1]:
    ###
    # Text processing
    ###
    # Create instance of TextProcess class that fetches text from url and filters it
    tp = TextProcess(url=article["url"],
                     filename=article["name"],
                     content_selector_dict=article["content-selector"])

    # Get filtered senteces in list
    filtered_sentences = tp.get_filtered_sentences()

    ###
    # Dictionary extraction
    ###
    # Create instance of Dictionary class comparses filtered sentences with those in
    # dictionary and creates node and edge list
    wt = Dictionary(dictionary_path=article["dictionary_path"])

    # Sets node nad edge list. First parameter is fitlered sentences, second is array
    # of wanted word types. Words that are not connected to other are set as node list
    # those that are connected are stored as edge list
コード例 #10
0
ファイル: tweetdb.py プロジェクト: vgoklani/TweetMiner
class TweetDB():
    def __init__(self):
        conf = Configuration()
        self.ptext = TextProcess(conf)
        self.ds = DataSet(conf)
        self.mongo = MongoDB(self.ds.db, self.ds.collection)
        self.tweet = ""
        self.tokens = ""
        self.i = 0
        self.enable_translation = self.ptext.translation
        self.translation_store = self.ptext.translation_store

    def get_tweet_from_db(self):
        where = {
            "text": {
                "$exists": "true"
            },
            "geo.coordinates": {
                "$exists": "true"
            }
        }
        select = {
            "text": 1,
            "source": 1,
            "geo": 1,
            "user": 1,
            "retweet_count": 1,
            "created_at": 1
        }
        results = self.mongo.find(where, select)
        return results

    def process_tweets(self):
        tweets = self.get_tweet_from_db()
        for rawTweet in tweets:
            if "text" in rawTweet:
                tokens = {}
                self.ptext.set_tweet_text(rawTweet['text'])
                self.ptext.set_tweet_source(rawTweet['source'])
                self.ptext.process_text()
                rawTweet['source'] = self.ptext.get_tweet_source()
                rawTweet['text'] = self.ptext.get_tweet_text()
                self.tokens = self.ptext.get_tweet_tokens()
                tokens['tokens'] = self.tokens
                rawTweet.update(tokens)
                self.tweet = self.cleaner.unset_tweet_keys(rawTweet)

                if not self.ptext.get_translate_status():
                    self.ds.output_tweet(self.tweet)
                    self.i += 1
                else:
                    if self.translation_store:
                        if self.enable_translation:
                            if not self.ptext.get_translate_failed():
                                self.ds.output_tweet(self.tweet)
                                self.i += 1
                        else:
                            self.ds.output_tweet(self.tweet)
                            self.i += 1

    def get_tweet_count(self):
        return self.i
コード例 #11
0
ファイル: tweetdb.py プロジェクト: mftaher/TweetMiner
class TweetDB():
    def __init__(self):
        conf = Configuration()
        self.ptext = TextProcess(conf)
        self.ds = DataSet(conf)
        self.mongo = MongoDB(self.ds.db,self.ds.collection)
        self.tweet=""
        self.tokens = ""
        self.i = 0
        self.enable_translation = self.ptext.translation
        self.translation_store = self.ptext.translation_store

    def get_tweet_from_db(self):
        where = {
                    "text":{"$exists":"true"},
                    "geo.coordinates":{"$exists":"true"}
                }
        select = {"text":1,"source":1,"geo":1, "user":1,"retweet_count":1,"created_at":1}
        results = self.mongo.find(where,select)
        return results

    def process_tweets(self):
        tweets = self.get_tweet_from_db()
        for rawTweet in tweets:
            if "text" in rawTweet:
                tokens = {}
                self.ptext.set_tweet_text(rawTweet['text'])
                self.ptext.set_tweet_source(rawTweet['source'])
                self.ptext.process_text()
                rawTweet['source'] = self.ptext.get_tweet_source()
                rawTweet['text'] = self.ptext.get_tweet_text()
                self.tokens = self.ptext.get_tweet_tokens()
                tokens['tokens'] = self.tokens
                rawTweet.update(tokens)
                self.tweet = self.cleaner.unset_tweet_keys(rawTweet)

                if not self.ptext.get_translate_status():
                    self.ds.output_tweet(self.tweet)
                    self.i +=  1
                else:
                    if self.translation_store:
                        if self.enable_translation:
                            if not self.ptext.get_translate_failed():
                                self.ds.output_tweet(self.tweet)
                                self.i +=  1
                        else:
                            self.ds.output_tweet(self.tweet)
                            self.i +=  1


    def get_tweet_count(self):
        return self.i