Exemple #1
0
class MainWindow(QMainWindow, Ui_MainWindow):
    def __init__(self):
        super(MainWindow, self).__init__()

        # Set up the user interface from Designer.
        self.setupUi(self)

        # Make some local modifications.
        # self.colorDepthCombo.addItem("2 colors (1 bit per pixel)")
        # self.setWindowIcon(QIcon('images/icons8-google-news-24.png'))
        self.crawlNone = 'Nothing had been crawled, try another url.'
        self.exit.triggered.connect(self.close)
        if self.stackedWidget.currentIndex() == 0:
            self.simiBtn.setStyleSheet("""
                                    color: #fff;
                                    text-decoration: none;
                                    background-color: #28a745;
                                    border-color:#28a745;
                                    text-decoration: none;""")
        elif self.stackedWidget.currentIndex() == 1:
            self.kwBtn.setStyleSheet("""
                                    color: #fff;
                                    text-decoration: none;
                                    background-color: #28a745;
                                    border-color:#28a745;
                                    text-decoration: none;""")
        else:
            self.settingsBtn.setStyleSheet("""
                                    color: #fff;
                                    text-decoration: none;
                                    background-color: #28a745;
                                    border-color:#28a745;
                                    text-decoration: none;""")
        # Connect up the buttons.
        self.url1CrawlBtn.clicked.connect(self.url1CrawlBtn_on_click)
        self.url2CrawlBtn.clicked.connect(self.url2CrawlBtn_on_click)
        self.detectBtn.clicked.connect(self.detectBtn_on_click)
        # self.simiBtn.clicked.connect(lambda: self.stackedWidget.setCurrentIndex(0))
        # self.kwBtn.clicked.connect(lambda: self.stackedWidget.setCurrentIndex(1))
        # self.settingsBtn.clicked.connect(lambda: self.stackedWidget.setCurrentIndex(2))

        self.simiBtn.clicked.connect(lambda: self.from_to(0, self.simiBtn))
        self.kwBtn.clicked.connect(lambda: self.from_to(1, self.kwBtn))
        self.settingsBtn.clicked.connect(lambda: self.from_to(2, self.settingsBtn))
        self.kwDetectBtn.clicked.connect(self.kwDetectBtn_on_click)
        self.getlistBtn.clicked.connect(self.getlistBtn_on_click)

        self.searchBtn.clicked.connect(self.searchBtn_on_click)
        self.closeSearchBtn.clicked.connect(self.closeSearchBtn_on_click)

        self.mpComfirmBtn.clicked.connect(self.mpComfirmBtn_on_click)

        # QProcess object for external app
        self.process = QProcess(self)
        # QProcess emits `readyRead` when there is data to be read
        self.process.readyRead.connect(self.dataReady)

        # Just to prevent accidentally running multiple times
        # Disable the button when process starts, and enable it when it finishes
        self.process.started.connect(lambda: self.searchBtn.setEnabled(False))
        self.process.finished.connect(lambda: self.searchBtn.setEnabled(True))

        self.coll_name = None


        output = subprocess.Popen(["sed -n '74p' ../news_spider/settings.py"], stdout=subprocess.PIPE,
                                  shell=True).communicate()
        self.SERVERTEXT = output[0].decode('utf-8')
        output = subprocess.Popen(["sed -n '75p' ../news_spider/settings.py"], stdout=subprocess.PIPE,
                                  shell=True).communicate()
        self.PORTTEXT = output[0].decode('utf-8')
        self.configList.setText(self.SERVERTEXT+self.PORTTEXT)
        self.PORT = self.PORTTEXT.split('=')[1]


        # self.threadpool = QThreadPool()
        # print("Multithreading with maximum %d threads" % self.threadpool.maxThreadCount())

    def mpComfirmBtn_on_click(self):
        mpValue = self.mpValue.text()
        print(mpValue)
        if mpValue:
            self.process.start('sed',
                               ['-i', 's/^MONGODB_PORT=.*/MONGODB_PORT={0}/'.format(mpValue),
                                '../news_spider/settings.py'])
            output = subprocess.Popen(["sed -n '74p' ../news_spider/settings.py"], stdout=subprocess.PIPE,
                                      shell=True).communicate()
            self.SERVERTEXT = output[0].decode('utf-8')
            output = subprocess.Popen(["sed -n '75p' ../news_spider/settings.py"], stdout=subprocess.PIPE,
                                      shell=True).communicate()
            self.PORTTEXT = output[0].decode('utf-8')
            self.configList.clear()
            self.configList.setText(self.SERVERTEXT + self.PORTTEXT)
            self.PORT = self.PORTTEXT.split('=')[1]

    def from_to(self, to, toBtn):
        if self.stackedWidget.currentIndex() == to:
            pass
        else:
            if self.stackedWidget.currentIndex() == 0:
                self.simiBtn.setStyleSheet("""""")
            elif self.stackedWidget.currentIndex() == 1:
                self.kwBtn.setStyleSheet("""""")
            else:
                self.settingsBtn.setStyleSheet("""""")
            self.stackedWidget.setCurrentIndex(to)
            toBtn.setStyleSheet(""" color: #fff;
                                     text-decoration: none;
                                     background-color: #28a745;
                                     border-color:#28a745;
                                     text-decoration: none;
                                 """)

    # def closeStatBtn_on_click(self):
    #     print('stopStat button clicked')
    #     self.statBtn.setEnabled(True)
    #     print(self.p.returncode)
    #     # if self.process.isOpen():
    #     #     self.process.close()
    #     #     print('close process')
    #
    # def statBtn_on_click(self):
    #     print('stat button clicked')
    #     self.statBtn.setEnabled(False)
    #     self.output.clear()
    #     # self.process.start('ping', ['127.0.0.1'])
    #     self.p = subprocess.Popen("scrapy crawl news_spider", cwd='/home/watmel/PycharmProjects/news_similarity_detection/nnorder/news_spider/news_spider/spiders',shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    #     cursor = self.output.textCursor()
    #     for line in self.p.stdout.readlines():
    #         cursor.insertText(str(line, 'utf-8'))
    #     if self.p.returncode != 0:
    #         cursor.insertText("error")

    def closeSearchBtn_on_click(self):
        print('stopStat button clicked')
        if self.process.isOpen():
            self.process.close()
            print('close process')

    # def execute_this_fn(self):
    #     pc = post_crawl()
    #     tag_rank = pc.get_tag_rank()
    #     return tag_rank
    def print_output(self, s):
        tag_to_str = """<table><tr><td>{0}</td><td>{1}</td></tr>""".format('关键词', '热度值')
        for t in s:
            tag_to_str += '<tr><td><b>{0}:</b></td><td>{1}</td></tr>'.format(t['_id'], t['value'])
        tag_to_str += """</table>"""
        self.result.setText(tag_to_str)

    def thread_start(self):
        self.statBtn.setEnabled(False)
        print("THREAD START!")

    def thread_complete(self):
        self.statBtn.setEnabled(True)
        print("THREAD COMPLETE!")

    def searchBtn_on_click(self):
        # worker = Worker(self.execute_this_fn)
        # worker.signals.started.connect(self.thread_start)
        # worker.signals.result.connect(self.print_output)
        # worker.signals.finished.connect(self.thread_complete)
        # self.threadpool.start(worker)
        self.process.setProcessChannelMode(QProcess.MergedChannels)
        # self.process.start('ping', ['127.0.0.1'])
        kw_urlstyle = self.keyword.text()
        print(kw_urlstyle)
        if kw_urlstyle:
            self.output.clear()
            ck = check_kw(kw_urlstyle, self.PORT)
            flag, coll_name = ck.is_crawled()
            self.coll_name = coll_name
            if not flag:
                self.process.start('sed', ['-i', 's/^\s*kw=".*"/    kw="{0}"/'.format(kw_urlstyle),
                                           '../news_spider/spiders/news_spider.py'])

                self.process.waitForFinished(10)
                # self.process.start('sed',['-i','s/^MONGODB_COLLECTION=".*"/MONGODB_COLLECTION="{0}"/'.format(self.coll_name),'../news_spider/spiders/news_spider.py'])
                self.process.start('sed',
                                   ['-i', 's/^MONGODB_COLLECTION=".*"/MONGODB_COLLECTION="{0}"/'.format(self.coll_name),
                                    '../news_spider/settings.py'])
                self.process.waitForFinished(10)

                self.process.start('scrapy crawl news_spider')
            else:
                self.output.setPlainText('该关键词已抓取,可以直接点击获取列表按钮')

        else:
            pass

    def getlistBtn_on_click(self):
        pc = post_crawl(self.coll_name, self.PORT)
        title_list = pc.get_title_list()
        print("title_list:" + str(title_list))
        self.leftnews.addItems(title_list)
        self.rightnews.addItems(title_list)
        self.leftnews.activated.connect(self.left_combox_on_activate)
        self.rightnews.activated.connect(self.right_combox_on_activate)

    def kwDetectBtn_on_click(self):
        text1 = self.leftnewsContent.toPlainText()
        text2 = self.rightnewsContent.toPlainText()
        if text1 and text2:
            s1, t1 = self.get_simhash(text=text1)
            s2, t2 = self.get_simhash(text=text2)
            dis = self.get_distance(s1, s2)
            self.searchDistance.setText('{0}%, 在32位哈希值有{1}位不同'.format(str((1 - dis / 32) * 100), dis.__str__()))

    def left_combox_on_activate(self):
        title = self.leftnews.currentText()
        pc = post_crawl(self.coll_name, self.PORT)
        text = pc.get_text_by_title(title)
        self.leftnewsContent.setText(text)

    def right_combox_on_activate(self):
        title = self.rightnews.currentText()
        pc = post_crawl(self.coll_name, self.PORT)
        text = pc.get_text_by_title(title)
        self.rightnewsContent.setText(text)

    def dataReady(self):
        cursor = self.output.textCursor()
        cursor.movePosition(cursor.End)
        cursor.insertText(str(self.process.readAll(), 'utf-8'))
        self.output.ensureCursorVisible()

    def url1CrawlBtn_on_click(self):
        # print('url1CrawlBtn clicked')
        self.url1.setText('http://news.sina.com.cn/c/2018-06-07/doc-ihcscwwz9278602.shtml')
        if self.url1.text():
            clean_text, title = myGoose(url=self.url1.text()).get_cleaned_text()
            # clean_text = '123'
            if clean_text:
                self.news1.setPlainText(clean_text)
                self.title1.setText(title)
            else:
                self.news1.setPlainText(self.crawlNone)

    def url2CrawlBtn_on_click(self):
        # print('url2CrawlBtn clicked')
        self.url2.setText('http://china.chinadaily.com.cn/2018-06/08/content_36349835.htm')
        if self.url2.text():
            clean_text, title = myGoose(url=self.url2.text()).get_cleaned_text()
            # clean_text = '123'
            if clean_text:
                self.news2.setPlainText(clean_text)
                self.title2.setText(title)
            else:
                self.news2.setPlainText(self.crawlNone)

    def tag_to_str(self, tag):
        # tag_to_str=''
        # for t in tag:
        #     tag_to_str+='<b>{0}</b>: {1}<br/>'.format(t[0],t[1])
        tag_to_str = """<table><tr><td>{0}</td><td>{1}</td></tr>""".format('关键词', '权重')
        for t in tag:
            tag_to_str += '<tr><td><b>{0}:</b></td><td>{1}</td></tr>'.format(t[0], t[1])
        tag_to_str += """</table>"""
        return tag_to_str

    def detectBtn_on_click(self):
        print('detectBtn clicked')
        text1 = self.news1.toPlainText()
        text2 = self.news2.toPlainText()
        if text1 and text2:
            s1, t1 = self.get_simhash(text=text1)
            s2, t2 = self.get_simhash(text=text2)
            self.tag1.setText(self.tag_to_str(t1))
            self.tag2.setText(self.tag_to_str(t2))
            dis = self.get_distance(s1, s2)
            self.distance.setText('{0}%, 在32位哈希值有{1}位不同'.format(str((1 - dis / 32) * 100), dis.__str__()))

    def get_simhash(self, text):
        pair = jieba.analyse.extract_tags(text, topK=20, withWeight=True)
        return MySimHash().get_simhash(pair), pair

    def get_distance(self, s1, s2):
        return MySimHash().get_distance(s1, s2)
Exemple #2
0
class Heimdall(QObject):
    """Heimdall service

    This service maintains a Raspberry Pi or similar device as an automatic display that works without user interaction.

    Connection timeline:
        0. _setup_reestablish_tunnel - Set up a timer to try to reestablish the tunnel. This step is only performed
           if the tunnel was previously active and failed, and exist to provide a reconnection delay.

        1. start_tunnel - Run SSH to the Raspberry pi

        2. try_connect - Attempt to use the SSH connection to contact i3/Sway.
           If this fails, an automatic retry after a timeout is done.

        3. setup - Take over the i3/Sway setup

    """
    def __init__(self, parent=None):
        super().__init__(parent)

        self.reconnect_timer = QTimer()
        self.ssh_proc = QProcess()

        self.bus = QtDBus.QDBusConnection.sessionBus()
        self.dbus_adaptor = DBusAdaptor(self)
        self.contextual_executor = ContextualExecutor(self)

        if not self.bus.isConnected():
            raise Exception("Failed to connect to dbus!")

        self.bus.registerObject("/heimdall", self)
        self.bus.registerService("com.troshchinskiy.Heimdall")

        self.homedir = os.environ['HOME'] + "/.heimdall"
        self.read_config()
        self.start_tunnel()

    def echo(self, text):
        return text

    def version(self):
        return "0.1"

    def connect(self):
        self.ssh = Popen(["ssh"], stdout=PIPE)

    def read_config(self):
        filename = self.homedir + '/config.json'

        print("Loading config file {}...\n".format(filename))

        with open(filename, 'r') as conf_h:
            self.config = json.load(conf_h)

    def start_tunnel(self):
        if self.ssh_proc and self.ssh_proc.isOpen():
            print("Tunnel already running")
            return

        print("Starting tunnel...\n")

        sway_pid = self._run_remote(["pidof", "sway"])
        if sway_pid is None:
            raise Exception('Sway is not running!')

        home_dir = self._run_remote(["echo", '$HOME'])
        uid = self._run_remote(["echo", '$UID'])

        self.remote_socket = "/run/user/" + uid + "/sway-ipc." + uid + "." + sway_pid + ".sock"
        self.local_socket = self.homedir + "/sway.sock"

        print("Sway pid: '{}'".format(sway_pid))
        print("Home dir: '{}'".format(home_dir))
        print("UID     : '{}'".format(uid))
        print("Socket  : '{}'".format(self.remote_socket))

        if os.path.exists(self.local_socket):
            os.remove(self.local_socket)

        r = self.config['remote']

        command_args = [
            "-i", r['ssh-key'], "-p", r['port'], "-l", r['user'], "-R",
            r['backwards-port'] + ":127.0.0.1:" + r['local-ssh-port'], "-L",
            self.local_socket + ':' + self.remote_socket, r['server']
        ]

        print("Running command: ssh {}".format(command_args))

        self.ssh_proc.started.connect(self._ssh_process_started)
        self.ssh_proc.errorOccurred.connect(self._ssh_process_error)
        self.ssh_proc.finished.connect(self._ssh_process_finished)

        self.ssh_proc.start(self.config['commands']['ssh'], command_args)

    def try_connect(self):
        """Try to connect to i3/Sway.

        SSH takes a while to perform the port forwarding, so we may do this several times, until it starts
        working.
        """
        print("Trying to connect to Sway/i3 at socket {}...".format(
            self.local_socket))
        try:
            self.i3 = Connection(socket_path=self.local_socket)
        except ConnectionRefusedError:
            print("Not connected yet!")
            return
        except FileNotFoundError:
            print("Socket doesn't exist yet!!")
            return

        self.connect_timer.stop()
        self.setup()

    def setup(self):
        try:
            print("Setting up Sway/i3...")
            self.wm_version = self.i3.get_version()
            print("Connected to Sway/i3 version {}".format(self.wm_version))

            print("Resetting workspace...")
            for workspace in self.i3.get_workspaces():
                print("Deleting workspace {}".format(workspace.name))
                self.i3.command('[workspace="{}"] kill'.format(workspace.name))

            print("Executing commands...")
            for cmd in self.config['startup']['remote-run']:
                print("\tExecuting: {}".format(cmd))
                self._run_remote(cmd)

            print("Setting up workspaces...")
            wsnum = 0
            for wsconf in self.config['startup']['workspaces']:
                wsnum += 1
                self.i3.command("workspace {}".format(wsnum))
                self.i3.command('rename workspace "{}" to "{}"'.format(
                    wsnum, wsconf['name']))

                for wscmd in wsconf['commands']:
                    self.i3_command(wscmd)

        except (ConnectionRefusedError, FileNotFoundError):
            self._setup_reestablish_tunnel()

    def i3_command(self, command):

        command = command.replace('$TERM_EXEC_KEEP',
                                  self.config['remote']['terminal-exec-keep'])
        command = command.replace('$TERM_EXEC',
                                  self.config['remote']['terminal-exec'])
        command = command.replace('$TERM', self.config['remote']['terminal'])
        command = command.replace(
            '$SSH_TO_HOST', self.config['commands']['ssh'] + " -p " +
            self.config['remote']['backwards-port'] + " -t " +
            os.environ['USER'] + '@localhost ')

        print("Executing command: " + command)
        self.i3.command(command)

    def contextual_action(self, environment, path, command):
        self.contextual_executor.execute(environment, path, command)

    def stop_tunnel(self):
        """Stop the tunnel, if it's running"""

        if self.ssh_proc and self.ssh_proc.isOpen():
            print("Stopping ssh\n")
            self.ssh_proc.kill()
            self.ssh_proc.close()

        if os.path.exists(self.local_socket):
            os.remove(self.local_socket)

    def _setup_reestablish_tunnel(self):
        """Re-establish the SSH tunnel and begin again the process of syncing up"""

        self.stop_tunnel()
        self.reconnect_timer.timeout.connect(self.start_tunnel())
        self.reconnect_timer.singleShot(True)
        self.reconnect_timer.start(100)

    def _ssh_process_started(self):
        print("SSH process started!")
        self.connect_timer = QTimer()
        self.connect_timer.timeout.connect(self.try_connect)
        self.connect_timer.start(50)

    def _ssh_process_error(self, error):
        print("SSH process failed with error {}!".format(error))

    def _ssh_process_finished(self, exit_code, exit_status):
        print("SSH process exited with code {}, status {}!".format(
            exit_code, exit_status))

    def _run_remote(self, command):
        r = self.config['remote']

        ssh_command = [
            self.config['commands']['ssh'], "-i", r['ssh-key'], "-p",
            r['port'], "-l", r['user'], r['server']
        ]
        ssh_command += command

        print("Running: {}".format(ssh_command))
        result_raw = subprocess.run(ssh_command, stdout=subprocess.PIPE)
        result = result_raw.stdout.decode('utf-8').strip()
        return result