def __init__(self, b64string, name): self.engine = Database().engine # create images table if not exist if not self.engine.dialect.has_table(self.engine, self.__tablename__): Base.metadata.create_all(self.engine) self.name = name self.b64string = b64string
def __init__(self, created_at, result): self.engine = Database().engine # create images table if not exist if not self.engine.dialect.has_table(self.engine, self.__tablename__): Base.metadata.create_all(self.engine) self.created_at = created_at self.result = result
def __init__(self, accuracy, label, img): self.engine = Database().engine # create images table if not exist if not self.engine.dialect.has_table(self.engine, self.__tablename__): Base.metadata.create_all(self.engine) self.accuracy = accuracy self.label = label self.img = img
def create_classification(self): sess = Database().session() sess.add( DBClassification( created_at=self.created_at, result=self.result, )) sess.commit() sess.close()
def replace_by_popular_contained(text): q = Database.get_popular_organisation_names(100) popular_org_names = [r[0] for r in q] for popular_org_name in popular_org_names: if popular_org_name in text and popular_org_name != text: print(f'{text} -> {popular_org_name}') text = popular_org_name break return text
def total_predictions(): """ counts the number of predictions of the corresponding classes :return: The amount of predictions belong to each corresponding class """ predictions = {} sess = Database().session() amounts = [] for label in sess.query(DBResult.label): amounts.append(label) sess.close() predictions.update({ list(label)[0]: amount for label, amount in Counter(amounts).items() }) total_amount = reduce(lambda x, y: x + y, Counter(amounts).values()) return predictions, total_amount
def count_predictions(label): """ count all predictions or all predictions associated to a specific label made. :param label: :return: total amount of classifications """ sess = Database().session() if label: rows = sess.query( func.count('*').filter(DBResult.label == "{label}".format( label=label))).scalar() else: rows = sess.query(func.count(DBResult.result_id)).scalar() sess.close() return rows
def calc_accuracies(): """ Calculates the accuracy with which the class was predicted. :param label: the class with which the associated precisions of the classifications are calculated :return: accuracy of associated predictions """ accuracies = {} sess = Database().session() for label in sess.query(DBResult.label): label = list(label)[0] acc_sum = sess.query(func.sum(DBResult.accuracy)).filter( DBResult.label == "{label}".format(label=label)).scalar() accuracies.update({ label: round(acc_sum / DBClassification.count_predictions(label), 2) }) sess.close() return accuracies
def resolve_answer(self, *args, **kwargs): return Database.query(AnswerModel).first()
from service.downloader import Downloader from service.database import Database from database.remiss import Remiss AMOUNT = 1000 RESET_DB = True if RESET_DB: Database.drop_tables() Database.create_tables() print(f'I-0 - Recreated database.\n') saved_remisser = Remiss.query.all() print(f'I-1 - Found {len(saved_remisser)} remisser in the database.\n') print('Querying regeringen.se...') remisser = Downloader.get_last_remisser(AMOUNT) nb_of_remisser = len(remisser) print(f'I-2 - Found {nb_of_remisser} remisser online.\n') for index, online_remiss in enumerate(remisser, start=1): found = False for saved_remiss in saved_remisser: if saved_remiss.url == online_remiss.url: print(f'{index}/{nb_of_remisser} remiss(er) - ' f'Already saved (id {saved_remiss.id})') found = True if found:
def create(self, collection_name): data = self.to_json() data['create_time'] = datetime.datetime.utcnow() return Database.insert(collection=collection_name, data=data)
def update(self, collection_name, query): data = self.to_json() data['update_time'] = datetime.datetime.utcnow() return Database.update(collection=collection_name, query=query, data={"$set": data})
from pdfminer.pdfparser import PDFSyntaxError from database.document import Document from database.consultee import Consultee from service.downloader import Downloader from service.document_parser import DocumentParser from service.database import Database from service.file_manager import FileManager from io import BytesIO RESET_DB = False RESET_FILES = False if RESET_DB: Database.delete_all(Consultee) Database.commit() print(f'Emptied the consultee table.\n') saved_documents = Document.query.filter(Document.type == 'consultee_list') for document in saved_documents: if not RESET_DB and document.consultee_list != []: print(f'Consultees for remiss {document.remiss_id} already saved.') continue elif RESET_DB: Consultee.query.filter( Consultee.consultee_list_id == document.id ).delete() filepath = f'tmp/{document.remiss_id}/{document.id}.pdf'
"""Package for DB connection""" from service.database import Database db_instance = Database()
def get_role(user_id): role = Database.find_one(ROLE_COLLECTION, {"user_id": user_id}) if role: return Role(**role) return None
def shutdown_session(exception=None): Database.remove()
def find_one(cls, collection_name, query): result = Database.find_one(collection=collection_name, query=query) if result: return cls(**result).to_json() return None
def find(cls, collection_name, query): results = Database.find(collection=collection_name, query=query) results = [cls(**result).to_json() for result in results] return results
def get_all(cls, collection_name): results = Database.find(collection=collection_name, query={}) return [cls(**result).to_json() for result in results]
def get_one_by_id(cls, collection_name, id): result = Database.find_one(collection=collection_name, query={'_id': id}) return cls(**result).to_json()
def resolve_answers(self, *args, **kwargs): return Database.query(AnswerModel).all()
for answer in answers_for_remiss: if RESET_DB or answer.organisation is None: org_name = answer.files[0].name if len(answers_for_remiss) > 3: filenames = [a.files[0].name for a in answers_for_remiss] common = Cleaner.long_substr(filenames) if len(common) > 3: org_name = org_name.replace(common, '') org_name = Cleaner.get_organisation_name(org_name) answer.organisation = org_name Database.commit() print(f'{remiss_index}/{nb_of_remisser} - Cleaned') saved_lists = Document.query.filter(Document.type == 'consultee_list').all() print('II-2 Cleaning organisation names from consultee lists...') for document_index, consultee_list in enumerate(saved_lists, start=1): consultees_for_list = Consultee.query.filter_by( consultee_list_id=consultee_list.id).all() nb_of_consultee_lists = len(saved_lists) for consultee in consultees_for_list: if RESET_DB or consultee.cleaned_name is None: org_name = consultee.name