예제 #1
0
def csvs(cfiles):
    for file in cfiles:

        if fsys.ext(file) == 'csv':
            table = "file_" + fsys.normalized_name(file)

            database.execute("PRAGMA foreign_keys = OFF;")
            database.execute("DROP TABLE IF EXISTS `%s`" % table)
            database.execute("PRAGMA foreign_keys = ON;")

            with open(file, 'r', encoding='latin1') as f:
                reader = csv.reader(f, delimiter=',')
                columns = next(reader)

                query = (
                    "CREATE TABLE `%s` (`id` integer PRIMARY KEY AUTOINCREMENT,{})"
                    % table).format(','.join(
                        ["`%s` text" % column.lower() for column in columns]))

                database.execute(query)

                rows = []
                for values in reader:
                    rows.append('(' + ','.join([
                        "'%s'" % remove_quotes(normalize_date(value))
                        for value in values
                    ]) + ')')

                query = ("INSERT INTO `%s` ({}) VALUES {}" % table).format(
                    ','.join(["`%s`" % column for column in columns]),
                    ','.join(rows))

                database.execute(query)
예제 #2
0
    def clickedFind(self):
        database.execute("DROP TABLE IF EXISTS `timeline_result`")
        database.execute(sql_create)
        if self.keyButtons:
            for key in self.keyButtons.keys():
                start = self.deStart.date().toString('yyyyMMdd')
                end = self.deEnd.date().toString('yyyyMMdd')
                database.execute(
                    insert_timelines.format(key, start, end, len(key)))

            # получение результатов из базы
            records = database.execute(sql_select)

            # подготовка данных для графика
            lines = {}
            for item in records:
                date, key, rate = tuple(item)
                if key not in lines:
                    lines[key] = (self.colors[key], [], [], [])
                l = lines[key]
                date_d = datetime.datetime.strptime(date, "%Y%m%d").date()
                l[1].append(date_d)
                l[2].append(rate)
                header_records = database.execute(
                    sql_select_header.format(date, key))
                hint = ''
                for header in header_records:
                    hint += header[0]
                l[3].append(hint)
            self.canvasTimeline.setVisible(False)
            plot_draw_lines(lines, self.canvasTimeline)
            self.canvasTimeline.setVisible(True)
예제 #3
0
    def __init__(self, parent):
        super(QtWidgets.QWidget, self).__init__(parent)

        global all_files
        global names

        query = "SELECT `name` FROM `all_files`"
        records = database.execute(query)
        for record in records:
            all_files.append(record[0])

        r = database.execute("SELECT address from email_addresses")
        for i in r:
            ii = i[0].split('@')
            ii = (ii[0].lower().split('.'))
            names.append(ii[0])
            names.append(ii[1])

        self.initUI(self)
예제 #4
0
    def clickedFind(self):
        if self.contentButtons:
            for i in self.contentButtons.keys():
                self.contentButtons[i].deleteLater()
                self.rate[i].deleteLater()
                # del self.colors[i]

        database.execute("DROP TABLE IF EXISTS `doc_result`")
        database.execute(sql_create)
        if self.keyButtons:
            files = []
            for name in self.fileCheck.keys():
                if self.fileCheck[name].checkState():
                    files.append(name)
            files = tuple(files)

            for key in self.keyButtons.keys():
                database.execute(insert_doc.format(key, files, len(key)))

            records = database.execute(sql_select)
            if records:
                i = 0

                for record in records:
                    but = QtWidgets.QPushButton(self.groupBox_3)
                    but.setText(record[0])
                    but.setMaximumWidth(100)
                    but.setToolTip(record[2])
                    rates = {}
                    for key in self.keyButtons.keys():
                        rate = database.execute(sql_select_rate.format(key, record[0]))
                        if rate:
                            rates[key] = rate[0][0]
                        else:
                            rates[key] = 0

                    # print(rates)

                    colors = self.colors
                    # print(colors)

                    wid = Rate()
                    wid.setValue(rates)
                    wid.setMax(records[0][1])

                    wid.setColors(colors)

                    self.gridLayout_2.addWidget(but, i, 0, 1, 1)
                    self.gridLayout_2.addWidget(wid, i, 1, 1, 1)
                    i = i + 1
                    self.contentButtons[record[0]] = but
                    self.rate[record[0]] = wid
예제 #5
0
def files(cfiles):
    database.execute("PRAGMA foreign_keys = OFF;")
    database.execute("DROP TABLE IF EXISTS `all_files`")
    database.execute("PRAGMA foreign_keys = ON;")

    columns = ['name', 'path', 'content']
    query = "CREATE TABLE `all_files` (`id` integer PRIMARY KEY AUTOINCREMENT,{})".format(
        ','.join(["`%s` text" % column.lower() for column in columns]))
    database.execute(query)

    rows = []
    for file in cfiles:
        f = open(file, "r", encoding='latin1')
        rows.append('(' + ','.join([
            "\"%s\"" % value
            for value in [fsys.name(file), file,
                          remove_quotes(f.read())]
        ]) + ')')

    query = "INSERT INTO `all_files` ({}) VALUES {}".format(
        ','.join(["`%s`" % column.lower() for column in columns]),
        ','.join(rows))

    database.execute(query)
예제 #6
0
    def clickedFind(self):
        database.execute("DROP TABLE IF EXISTS `email_result`")
        database.execute(sql_create)
        addresses = ['']
        for address in self.addressCheck.keys():
            if self.addressCheck[address].checkState():
                addresses.append(address)
        addresses = tuple(addresses)

        if self.keyButtons:
            for key in self.keyButtons.keys():
                database.execute(insert_doc.format(key, addresses))
        else:
            database.execute(insert_doc.format('', addresses))
        records = database.execute(sql_select)

        names = {}
        flux = []
        for item in records:
            sender, recipient, subject = tuple(item)
            if sender not in names:
                names[sender] = len(names.items())
            if recipient not in names:
                names[recipient] = len(names.items())

        N = len(names.items())
        flux = np.zeros((N, N))
        headers = [[[''] for x in range(N)] for x in range(N)]
        for item in records:
            sender, recipient, subject = tuple(item)
            sender_idx = names[sender]
            recipient_idx = names[recipient]
            flux[sender_idx][recipient_idx] += 1
            headers[sender_idx][recipient_idx].append(subject)
        names = list(names.keys())
        colors = [self.colors[name] for name in names]
        self.canvasEmails.setVisible(False)
        plot_draw_lines([names, colors, flux, headers], self.canvasEmails)
        self.canvasEmails.setVisible(True)
예제 #7
0
    def __init__(self, parent):
        super(QtWidgets.QWidget, self).__init__(parent)

        global records
        global all_addresses

        query = "select id, address from email_addresses"
        records = database.execute(query)

        all_addresses = {}
        if records:
            for record in records:
                all_addresses[record[1]] = record[0]

        self.init_dynamicUI()
        self.initUI(self)
        self.add_events()
예제 #8
0
    def __init__(self, parent):
        super(QWidget, self).__init__(parent)

        global records
        global all_names

        query = "select lastname, firstname from file_employeerecords"
        records = database.execute(query)

        all_names = []
        if records:
            for record in records:
                all_names.append(record[0])
                all_names.append(record[1])

        self.init_dynamicUI()
        self.initUi(self)
        self.add_events()
예제 #9
0
def articles():
    database.execute("PRAGMA foreign_keys = OFF;")
    database.execute("DROP TABLE IF EXISTS `articles_info`")
    database.execute("PRAGMA foreign_keys = ON;")

    query = "CREATE TABLE `articles_info` (" \
            "`id` integer PRIMARY KEY AUTOINCREMENT, " \
            "`file_id` integer, " \
            "`date` text, " \
            "`header` text, " \
            "`preprocessed_header` text, " \
            "FOREIGN KEY (`file_id`) REFERENCES `all_files`(`id`))"
    database.execute(query)

    query = "SELECT `id`, `content` " \
            "FROM `all_files` " \
            "WHERE REGEXP(`name`, '^[0-9]+$')"
    records = database.execute(query)

    rows = []
    for record in records:
        file_id = record[0]
        file_content = record[1]

        lines = str.splitlines(file_content)

        date = ''
        header = []

        for line in lines:
            if is_date(line):
                date = normalize_date(line)
                break
            else:
                header.append(line)

        header_str = remove_spaces(
            remove_newlines(remove_specchars(' '.join(header).lower())))

        rows.append('(' + ','.join([
            "%d" % file_id,
            "'%s'" % date,
            "'%s'" % header_str,
            "'%s'" % lemmatize(header_str)
        ]) + ')')

    query = "INSERT INTO `articles_info` (`file_id`, `date`, `header`, `preprocessed_header`) VALUES {}".format(
        ','.join(rows))
    database.execute(query)
예제 #10
0
def emails():
    database.execute("PRAGMA foreign_keys = OFF;")
    database.execute("DROP TABLE IF EXISTS `emailheaders_info`")
    database.execute("PRAGMA foreign_keys = ON;")

    query = "CREATE TABLE emailheaders_info (" \
            "`id` integer PRIMARY KEY AUTOINCREMENT, " \
            "`emailheader_id` integer, " \
            "`preprocessed_subject` text, " \
            "FOREIGN KEY (`emailheader_id`) REFERENCES `file_emailheaders`(`id`))"
    database.execute(query)

    query = "SELECT `id`, `subject` " \
            "FROM `file_emailheaders`"
    records = database.execute(query)

    rows = []
    for record in records:
        email_id = record[0]
        email_subject = record[1]

        subject = remove_spaces(remove_newlines(email_subject.lower()))

        rows.append('(' + ','.join([
            "%d" % email_id,
            "'%s'" % lemmatize(subject)]) + ')')

    query = "INSERT INTO emailheaders_info (`emailheader_id`, `preprocessed_subject`) VALUES {}".format(
        ','.join(rows))
    database.execute(query)
예제 #11
0
def emails_graph():
    database.execute("PRAGMA foreign_keys = OFF;")
    database.execute("DROP TABLE IF EXISTS `email_addresses`")
    database.execute("PRAGMA foreign_keys = ON;")

    query = "CREATE TABLE email_addresses (" \
            "`id` integer PRIMARY KEY AUTOINCREMENT, " \
            "`address` text)"
    database.execute(query)

    query = "SELECT DISTINCT `from` FROM `file_emailheaders`"
    addresses = database.execute(query)

    rows = []
    for address in addresses:
        rows.append("(" + "'%s'" % address[0] + ")")

    query = "INSERT INTO `email_addresses` (address) VALUES {}".format(','.join(rows))
    database.execute(query)

    database.execute("PRAGMA foreign_keys = OFF;")
    database.execute("DROP TABLE IF EXISTS `email_references`")
    database.execute("PRAGMA foreign_keys = ON;")

    query = "CREATE TABLE email_references (" \
            "`id` integer PRIMARY KEY AUTOINCREMENT, " \
            "`emailheader_id` integer, " \
            "`from_id` integer, " \
            "`to_id` integer, " \
            "FOREIGN KEY (`emailheader_id`) REFERENCES `file_emailheaders`(`id`)," \
            "FOREIGN KEY (`from_id`) REFERENCES `email_addresses`(`id`)," \
            "FOREIGN KEY (`to_id`) REFERENCES `email_addresses`(`id`))"
    database.execute(query)

    query = "SELECT `from`, `to`, `id` FROM `file_emailheaders`"
    email_items = database.execute(query)

    rows = []
    for email_item in email_items:
        addresses = str.split(email_item[1], ", ")
        for address in addresses:
            rows.append("(" + "(SELECT `id` FROM `email_addresses` WHERE `address` = '%s')" % email_item[0] + "," +
                        "(SELECT `id` FROM `email_addresses` WHERE `address` = '%s')" % address + "," +
                        "%d" % email_item[2] + ")")

    query = "INSERT INTO `email_references` (`from_id`, `to_id`, `emailheader_id`) VALUES {}".format(','.join(rows))
    database.execute(query)
예제 #12
0
def texts():
    database.execute("PRAGMA foreign_keys = OFF;")
    database.execute("DROP TABLE IF EXISTS `files_preprocessed_content`")
    database.execute("PRAGMA foreign_keys = ON;")

    query = "CREATE TABLE `files_preprocessed_content` (" \
            "`id` integer PRIMARY KEY AUTOINCREMENT, " \
            "`file_id` integer, " \
            "`prerocessed_content` text, " \
            "FOREIGN KEY (`file_id`) REFERENCES `all_files`(`id`))"
    database.execute(query)

    query = "SELECT `id`, `content` FROM `all_files`"
    records = database.execute(query)

    rows = []
    for record in records:
        file_id = record[0]
        file_content = record[1]

        lemmatized_content = lemmatize(
            remove_spaces(
                remove_newlines(
                    remove_specchars(
                        file_content.lower()))))

        rows.append('(' + ','.join(["'%s'" % file_id,
                                    "'%s'" % lemmatized_content]) + ')')

    query = "INSERT INTO `files_preprocessed_content` (`file_id`, `prerocessed_content`) VALUES {}".format(
        ','.join(rows))
    database.execute(query)