def csvs(cfiles): for file in cfiles: if fsys.ext(file) == 'csv': table = "file_" + fsys.normalized_name(file) database.execute("PRAGMA foreign_keys = OFF;") database.execute("DROP TABLE IF EXISTS `%s`" % table) database.execute("PRAGMA foreign_keys = ON;") with open(file, 'r', encoding='latin1') as f: reader = csv.reader(f, delimiter=',') columns = next(reader) query = ( "CREATE TABLE `%s` (`id` integer PRIMARY KEY AUTOINCREMENT,{})" % table).format(','.join( ["`%s` text" % column.lower() for column in columns])) database.execute(query) rows = [] for values in reader: rows.append('(' + ','.join([ "'%s'" % remove_quotes(normalize_date(value)) for value in values ]) + ')') query = ("INSERT INTO `%s` ({}) VALUES {}" % table).format( ','.join(["`%s`" % column for column in columns]), ','.join(rows)) database.execute(query)
def clickedFind(self): database.execute("DROP TABLE IF EXISTS `timeline_result`") database.execute(sql_create) if self.keyButtons: for key in self.keyButtons.keys(): start = self.deStart.date().toString('yyyyMMdd') end = self.deEnd.date().toString('yyyyMMdd') database.execute( insert_timelines.format(key, start, end, len(key))) # получение результатов из базы records = database.execute(sql_select) # подготовка данных для графика lines = {} for item in records: date, key, rate = tuple(item) if key not in lines: lines[key] = (self.colors[key], [], [], []) l = lines[key] date_d = datetime.datetime.strptime(date, "%Y%m%d").date() l[1].append(date_d) l[2].append(rate) header_records = database.execute( sql_select_header.format(date, key)) hint = '' for header in header_records: hint += header[0] l[3].append(hint) self.canvasTimeline.setVisible(False) plot_draw_lines(lines, self.canvasTimeline) self.canvasTimeline.setVisible(True)
def __init__(self, parent): super(QtWidgets.QWidget, self).__init__(parent) global all_files global names query = "SELECT `name` FROM `all_files`" records = database.execute(query) for record in records: all_files.append(record[0]) r = database.execute("SELECT address from email_addresses") for i in r: ii = i[0].split('@') ii = (ii[0].lower().split('.')) names.append(ii[0]) names.append(ii[1]) self.initUI(self)
def clickedFind(self): if self.contentButtons: for i in self.contentButtons.keys(): self.contentButtons[i].deleteLater() self.rate[i].deleteLater() # del self.colors[i] database.execute("DROP TABLE IF EXISTS `doc_result`") database.execute(sql_create) if self.keyButtons: files = [] for name in self.fileCheck.keys(): if self.fileCheck[name].checkState(): files.append(name) files = tuple(files) for key in self.keyButtons.keys(): database.execute(insert_doc.format(key, files, len(key))) records = database.execute(sql_select) if records: i = 0 for record in records: but = QtWidgets.QPushButton(self.groupBox_3) but.setText(record[0]) but.setMaximumWidth(100) but.setToolTip(record[2]) rates = {} for key in self.keyButtons.keys(): rate = database.execute(sql_select_rate.format(key, record[0])) if rate: rates[key] = rate[0][0] else: rates[key] = 0 # print(rates) colors = self.colors # print(colors) wid = Rate() wid.setValue(rates) wid.setMax(records[0][1]) wid.setColors(colors) self.gridLayout_2.addWidget(but, i, 0, 1, 1) self.gridLayout_2.addWidget(wid, i, 1, 1, 1) i = i + 1 self.contentButtons[record[0]] = but self.rate[record[0]] = wid
def files(cfiles): database.execute("PRAGMA foreign_keys = OFF;") database.execute("DROP TABLE IF EXISTS `all_files`") database.execute("PRAGMA foreign_keys = ON;") columns = ['name', 'path', 'content'] query = "CREATE TABLE `all_files` (`id` integer PRIMARY KEY AUTOINCREMENT,{})".format( ','.join(["`%s` text" % column.lower() for column in columns])) database.execute(query) rows = [] for file in cfiles: f = open(file, "r", encoding='latin1') rows.append('(' + ','.join([ "\"%s\"" % value for value in [fsys.name(file), file, remove_quotes(f.read())] ]) + ')') query = "INSERT INTO `all_files` ({}) VALUES {}".format( ','.join(["`%s`" % column.lower() for column in columns]), ','.join(rows)) database.execute(query)
def clickedFind(self): database.execute("DROP TABLE IF EXISTS `email_result`") database.execute(sql_create) addresses = [''] for address in self.addressCheck.keys(): if self.addressCheck[address].checkState(): addresses.append(address) addresses = tuple(addresses) if self.keyButtons: for key in self.keyButtons.keys(): database.execute(insert_doc.format(key, addresses)) else: database.execute(insert_doc.format('', addresses)) records = database.execute(sql_select) names = {} flux = [] for item in records: sender, recipient, subject = tuple(item) if sender not in names: names[sender] = len(names.items()) if recipient not in names: names[recipient] = len(names.items()) N = len(names.items()) flux = np.zeros((N, N)) headers = [[[''] for x in range(N)] for x in range(N)] for item in records: sender, recipient, subject = tuple(item) sender_idx = names[sender] recipient_idx = names[recipient] flux[sender_idx][recipient_idx] += 1 headers[sender_idx][recipient_idx].append(subject) names = list(names.keys()) colors = [self.colors[name] for name in names] self.canvasEmails.setVisible(False) plot_draw_lines([names, colors, flux, headers], self.canvasEmails) self.canvasEmails.setVisible(True)
def __init__(self, parent): super(QtWidgets.QWidget, self).__init__(parent) global records global all_addresses query = "select id, address from email_addresses" records = database.execute(query) all_addresses = {} if records: for record in records: all_addresses[record[1]] = record[0] self.init_dynamicUI() self.initUI(self) self.add_events()
def __init__(self, parent): super(QWidget, self).__init__(parent) global records global all_names query = "select lastname, firstname from file_employeerecords" records = database.execute(query) all_names = [] if records: for record in records: all_names.append(record[0]) all_names.append(record[1]) self.init_dynamicUI() self.initUi(self) self.add_events()
def articles(): database.execute("PRAGMA foreign_keys = OFF;") database.execute("DROP TABLE IF EXISTS `articles_info`") database.execute("PRAGMA foreign_keys = ON;") query = "CREATE TABLE `articles_info` (" \ "`id` integer PRIMARY KEY AUTOINCREMENT, " \ "`file_id` integer, " \ "`date` text, " \ "`header` text, " \ "`preprocessed_header` text, " \ "FOREIGN KEY (`file_id`) REFERENCES `all_files`(`id`))" database.execute(query) query = "SELECT `id`, `content` " \ "FROM `all_files` " \ "WHERE REGEXP(`name`, '^[0-9]+$')" records = database.execute(query) rows = [] for record in records: file_id = record[0] file_content = record[1] lines = str.splitlines(file_content) date = '' header = [] for line in lines: if is_date(line): date = normalize_date(line) break else: header.append(line) header_str = remove_spaces( remove_newlines(remove_specchars(' '.join(header).lower()))) rows.append('(' + ','.join([ "%d" % file_id, "'%s'" % date, "'%s'" % header_str, "'%s'" % lemmatize(header_str) ]) + ')') query = "INSERT INTO `articles_info` (`file_id`, `date`, `header`, `preprocessed_header`) VALUES {}".format( ','.join(rows)) database.execute(query)
def emails(): database.execute("PRAGMA foreign_keys = OFF;") database.execute("DROP TABLE IF EXISTS `emailheaders_info`") database.execute("PRAGMA foreign_keys = ON;") query = "CREATE TABLE emailheaders_info (" \ "`id` integer PRIMARY KEY AUTOINCREMENT, " \ "`emailheader_id` integer, " \ "`preprocessed_subject` text, " \ "FOREIGN KEY (`emailheader_id`) REFERENCES `file_emailheaders`(`id`))" database.execute(query) query = "SELECT `id`, `subject` " \ "FROM `file_emailheaders`" records = database.execute(query) rows = [] for record in records: email_id = record[0] email_subject = record[1] subject = remove_spaces(remove_newlines(email_subject.lower())) rows.append('(' + ','.join([ "%d" % email_id, "'%s'" % lemmatize(subject)]) + ')') query = "INSERT INTO emailheaders_info (`emailheader_id`, `preprocessed_subject`) VALUES {}".format( ','.join(rows)) database.execute(query)
def emails_graph(): database.execute("PRAGMA foreign_keys = OFF;") database.execute("DROP TABLE IF EXISTS `email_addresses`") database.execute("PRAGMA foreign_keys = ON;") query = "CREATE TABLE email_addresses (" \ "`id` integer PRIMARY KEY AUTOINCREMENT, " \ "`address` text)" database.execute(query) query = "SELECT DISTINCT `from` FROM `file_emailheaders`" addresses = database.execute(query) rows = [] for address in addresses: rows.append("(" + "'%s'" % address[0] + ")") query = "INSERT INTO `email_addresses` (address) VALUES {}".format(','.join(rows)) database.execute(query) database.execute("PRAGMA foreign_keys = OFF;") database.execute("DROP TABLE IF EXISTS `email_references`") database.execute("PRAGMA foreign_keys = ON;") query = "CREATE TABLE email_references (" \ "`id` integer PRIMARY KEY AUTOINCREMENT, " \ "`emailheader_id` integer, " \ "`from_id` integer, " \ "`to_id` integer, " \ "FOREIGN KEY (`emailheader_id`) REFERENCES `file_emailheaders`(`id`)," \ "FOREIGN KEY (`from_id`) REFERENCES `email_addresses`(`id`)," \ "FOREIGN KEY (`to_id`) REFERENCES `email_addresses`(`id`))" database.execute(query) query = "SELECT `from`, `to`, `id` FROM `file_emailheaders`" email_items = database.execute(query) rows = [] for email_item in email_items: addresses = str.split(email_item[1], ", ") for address in addresses: rows.append("(" + "(SELECT `id` FROM `email_addresses` WHERE `address` = '%s')" % email_item[0] + "," + "(SELECT `id` FROM `email_addresses` WHERE `address` = '%s')" % address + "," + "%d" % email_item[2] + ")") query = "INSERT INTO `email_references` (`from_id`, `to_id`, `emailheader_id`) VALUES {}".format(','.join(rows)) database.execute(query)
def texts(): database.execute("PRAGMA foreign_keys = OFF;") database.execute("DROP TABLE IF EXISTS `files_preprocessed_content`") database.execute("PRAGMA foreign_keys = ON;") query = "CREATE TABLE `files_preprocessed_content` (" \ "`id` integer PRIMARY KEY AUTOINCREMENT, " \ "`file_id` integer, " \ "`prerocessed_content` text, " \ "FOREIGN KEY (`file_id`) REFERENCES `all_files`(`id`))" database.execute(query) query = "SELECT `id`, `content` FROM `all_files`" records = database.execute(query) rows = [] for record in records: file_id = record[0] file_content = record[1] lemmatized_content = lemmatize( remove_spaces( remove_newlines( remove_specchars( file_content.lower())))) rows.append('(' + ','.join(["'%s'" % file_id, "'%s'" % lemmatized_content]) + ')') query = "INSERT INTO `files_preprocessed_content` (`file_id`, `prerocessed_content`) VALUES {}".format( ','.join(rows)) database.execute(query)