Beispiel #1
0
 def __init__(self):
     conf = Configuration()
     self.ptext = TextProcess(conf)
     self.ds = DataSet(conf)
     self.mongo = MongoDB(self.ds.db, self.ds.collection)
     self.tweet = ""
     self.tokens = ""
     self.i = 0
     self.enable_translation = self.ptext.translation
     self.translation_store = self.ptext.translation_store
Beispiel #2
0
 def __init__(self, conf, q):
     self.ptext = TextProcess(conf)
     self.ds = DataSet(conf)
     self.cleaner = KeyCleaner()
     self.enable_translation = self.ptext.translation
     self.translation_store = self.ptext.translation_store
     self.tweets = q  # Tweets queue
     self.tweet = ""
     self.tokens = ""
     self.i = 0
     Thread.__init__(self)
Beispiel #3
0
 def __init__(self):
     conf = Configuration()
     self.ptext = TextProcess(conf)
     self.ds = DataSet(conf)
     self.mongo = MongoDB(self.ds.db,self.ds.collection)
     self.tweet=""
     self.tokens = ""
     self.i = 0
     self.enable_translation = self.ptext.translation
     self.translation_store = self.ptext.translation_store
Beispiel #4
0
 def __init__(self,conf,q):
     self.ptext = TextProcess(conf)
     self.ds = DataSet(conf)
     self.cleaner = KeyCleaner()
     self.enable_translation = self.ptext.translation
     self.translation_store = self.ptext.translation_store
     self.tweets = q         # Tweets queue
     self.tweet = ""
     self.tokens = ""
     self.i = 0
     Thread.__init__(self)
Beispiel #5
0
class ProcessTweets(Thread):
    def __init__(self, conf, q):
        self.ptext = TextProcess(conf)
        self.ds = DataSet(conf)
        self.cleaner = KeyCleaner()
        self.enable_translation = self.ptext.translation
        self.translation_store = self.ptext.translation_store
        self.tweets = q  # Tweets queue
        self.tweet = ""
        self.tokens = ""
        self.i = 0
        Thread.__init__(self)

    def run(self):
        while True:
            rawTweet = self.tweets.get()
            if "text" in rawTweet:
                tokens = {}
                self.ptext.set_tweet_text(rawTweet['text'])
                self.ptext.set_tweet_source(rawTweet['source'])
                self.ptext.process_text()
                rawTweet['source'] = self.ptext.get_tweet_source()
                rawTweet['text'] = self.ptext.get_tweet_text()
                self.tokens = self.ptext.get_tweet_tokens()
                tokens['tokens'] = self.tokens
                rawTweet.update(tokens)
                self.tweet = self.cleaner.unset_tweet_keys(rawTweet)

                if not self.ptext.get_translate_status():
                    self.ds.output_tweet(self.tweet)
                    self.i += 1
                else:
                    if self.translation_store:
                        if self.enable_translation:
                            if not self.ptext.get_translate_failed():
                                self.ds.output_tweet(self.tweet)
                                self.i += 1
                        else:
                            self.ds.output_tweet(self.tweet)
                            self.i += 1

                self.tweets.task_done()

    def get_tweet_count(self):
        return self.i
Beispiel #6
0
 def testText(self):
     tp = TextProcess('config.ini')
     tp.readfile('pubmed_result.txt')
Beispiel #7
0
class ProcessTweets(Thread):
    def __init__(self,conf,q):
        self.ptext = TextProcess(conf)
        self.ds = DataSet(conf)
        self.cleaner = KeyCleaner()
        self.enable_translation = self.ptext.translation
        self.translation_store = self.ptext.translation_store
        self.tweets = q         # Tweets queue
        self.tweet = ""
        self.tokens = ""
        self.i = 0
        Thread.__init__(self)
        
    def run(self):
        while True:
            rawTweet = self.tweets.get()
            if "text" in rawTweet:
                tokens = {}
                self.ptext.set_tweet_text(rawTweet['text'])
                self.ptext.set_tweet_source(rawTweet['source'])
                self.ptext.process_text()
                rawTweet['source'] = self.ptext.get_tweet_source()
                rawTweet['text'] = self.ptext.get_tweet_text()
                self.tokens = self.ptext.get_tweet_tokens()
                tokens['tokens'] = self.tokens
                rawTweet.update(tokens)
                self.tweet = self.cleaner.unset_tweet_keys(rawTweet)

                if not self.ptext.get_translate_status():
                    self.ds.output_tweet(self.tweet)
                    self.i +=  1
                else:
                    if self.translation_store:
                        if self.enable_translation:
                            if not self.ptext.get_translate_failed():
                                self.ds.output_tweet(self.tweet)
                                self.i +=  1
                        else:
                            self.ds.output_tweet(self.tweet)
                            self.i +=  1

                self.tweets.task_done()

    def get_tweet_count(self):
        return self.i
from client import Client
from motormanager import MotorManager
from gallery import Gallery
from PyQt5 import uic
from PyQt5.QtCore import QFile, QRegExp
from PyQt5.QtWidgets import QApplication, QFileDialog, QMainWindow, QMenu, QMessageBox, QTableWidgetItem
#=========================================================
# a class that handles the signal and callbacks of the GUI
#=========================================================
# UI config
qtCreatorFile = "mainwindow.ui"
Ui_MainWindow, QtBaseClass = uic.loadUiType(qtCreatorFile)

client = Client()
mm = MotorManager(client)
tp = TextProcess(client, mm)
#=========================================================
# a class that handles the signal and callbacks of the GUI
#=========================================================


class GUI(QMainWindow, Ui_MainWindow):
    def __init__(self):
        QMainWindow.__init__(self)
        Ui_MainWindow.__init__(self)
        self.setupUi(self)

        self.setupFileMenu()
        self.setupHelpMenu()
        self.setupCallbacksLED()
        self.setupMotors()
Beispiel #9
0
        "url": "http://pogledaj.to/art/zivot-je-cupav-i-dlakav/",
        "name": "Zivot je cupav i dlakav",
        "dictionary_path": "./Oznake vrsta rijeci/GRUPA1/6-oznake.txt",
        "content-selector": {
            "class": "main the-content"
        }
    },
]

for article in articles[0:1]:
    ###
    # Text processing
    ###
    # Create instance of TextProcess class that fetches text from url and filters it
    tp = TextProcess(url=article["url"],
                     filename=article["name"],
                     content_selector_dict=article["content-selector"])

    # Get filtered senteces in list
    filtered_sentences = tp.get_filtered_sentences()

    ###
    # Dictionary extraction
    ###
    # Create instance of Dictionary class comparses filtered sentences with those in
    # dictionary and creates node and edge list
    wt = Dictionary(dictionary_path=article["dictionary_path"])

    # Sets node nad edge list. First parameter is fitlered sentences, second is array
    # of wanted word types. Words that are not connected to other are set as node list
    # those that are connected are stored as edge list
Beispiel #10
0
class TweetDB():
    def __init__(self):
        conf = Configuration()
        self.ptext = TextProcess(conf)
        self.ds = DataSet(conf)
        self.mongo = MongoDB(self.ds.db, self.ds.collection)
        self.tweet = ""
        self.tokens = ""
        self.i = 0
        self.enable_translation = self.ptext.translation
        self.translation_store = self.ptext.translation_store

    def get_tweet_from_db(self):
        where = {
            "text": {
                "$exists": "true"
            },
            "geo.coordinates": {
                "$exists": "true"
            }
        }
        select = {
            "text": 1,
            "source": 1,
            "geo": 1,
            "user": 1,
            "retweet_count": 1,
            "created_at": 1
        }
        results = self.mongo.find(where, select)
        return results

    def process_tweets(self):
        tweets = self.get_tweet_from_db()
        for rawTweet in tweets:
            if "text" in rawTweet:
                tokens = {}
                self.ptext.set_tweet_text(rawTweet['text'])
                self.ptext.set_tweet_source(rawTweet['source'])
                self.ptext.process_text()
                rawTweet['source'] = self.ptext.get_tweet_source()
                rawTweet['text'] = self.ptext.get_tweet_text()
                self.tokens = self.ptext.get_tweet_tokens()
                tokens['tokens'] = self.tokens
                rawTweet.update(tokens)
                self.tweet = self.cleaner.unset_tweet_keys(rawTweet)

                if not self.ptext.get_translate_status():
                    self.ds.output_tweet(self.tweet)
                    self.i += 1
                else:
                    if self.translation_store:
                        if self.enable_translation:
                            if not self.ptext.get_translate_failed():
                                self.ds.output_tweet(self.tweet)
                                self.i += 1
                        else:
                            self.ds.output_tweet(self.tweet)
                            self.i += 1

    def get_tweet_count(self):
        return self.i
Beispiel #11
0
class TweetDB():
    def __init__(self):
        conf = Configuration()
        self.ptext = TextProcess(conf)
        self.ds = DataSet(conf)
        self.mongo = MongoDB(self.ds.db,self.ds.collection)
        self.tweet=""
        self.tokens = ""
        self.i = 0
        self.enable_translation = self.ptext.translation
        self.translation_store = self.ptext.translation_store

    def get_tweet_from_db(self):
        where = {
                    "text":{"$exists":"true"},
                    "geo.coordinates":{"$exists":"true"}
                }
        select = {"text":1,"source":1,"geo":1, "user":1,"retweet_count":1,"created_at":1}
        results = self.mongo.find(where,select)
        return results

    def process_tweets(self):
        tweets = self.get_tweet_from_db()
        for rawTweet in tweets:
            if "text" in rawTweet:
                tokens = {}
                self.ptext.set_tweet_text(rawTweet['text'])
                self.ptext.set_tweet_source(rawTweet['source'])
                self.ptext.process_text()
                rawTweet['source'] = self.ptext.get_tweet_source()
                rawTweet['text'] = self.ptext.get_tweet_text()
                self.tokens = self.ptext.get_tweet_tokens()
                tokens['tokens'] = self.tokens
                rawTweet.update(tokens)
                self.tweet = self.cleaner.unset_tweet_keys(rawTweet)

                if not self.ptext.get_translate_status():
                    self.ds.output_tweet(self.tweet)
                    self.i +=  1
                else:
                    if self.translation_store:
                        if self.enable_translation:
                            if not self.ptext.get_translate_failed():
                                self.ds.output_tweet(self.tweet)
                                self.i +=  1
                        else:
                            self.ds.output_tweet(self.tweet)
                            self.i +=  1


    def get_tweet_count(self):
        return self.i