Exemple #1
0
    def test_get_docs_by_audit_id(self):
        audits = get_audits()
        if len(audits) == 0:
            logger.warning('no audits')
            return

        audit_id = audits[0]['_id']

        docs = get_docs_by_audit_id(audit_id, kind='PROTOCOL')
        for a in docs:
            print(a['_id'], a['filename'])
Exemple #2
0
    def test_process_charters_phase_1(self):
        audits = get_audits()
        if len(audits) == 0:
            logger.warning('no audits')
            return

        audit_id = audits[0]['_id']
        docs: [dict] = get_docs_by_audit_id(audit_id, kind='CHARTER')
        processor = document_processors.get('CHARTER')
        for _doc in docs:
            jdoc = DbJsonDoc(_doc)
            processor.preprocess(jdoc, AuditContext())
Exemple #3
0
 def _get_doc_from_db(self, kind):
     audits = get_mongodb_connection()['audits'].find().sort([
         ("createDate", pymongo.ASCENDING)
     ]).limit(1)
     for audit in audits:
         doc_ids = get_docs_by_audit_id(audit['_id'],
                                        kind=kind,
                                        states=[15],
                                        id_only=True)
         if len(doc_ids) > 0:
             print(doc_ids[0])
             doc = finalizer.get_doc_by_id(doc_ids[0])
             # jdoc = DbJsonDoc(doc)
             yield doc
Exemple #4
0
    def test_process_contracts_phase_1(self):
        # runner = Runner.get_instance()

        audits = get_audits()
        if len(audits) == 0:
            logger.warning('no audits')
            return

        audit_id = audits[0]['_id']

        docs = get_docs_by_audit_id(audit_id, kind='CONTRACT')
        processor = document_processors.get('CONTRACT')
        for _doc in docs:
            jdoc = DbJsonDoc(_doc)
            processor.preprocess(jdoc, AuditContext())
Exemple #5
0
    def test_process_protocols_phase_1(self):
        runner = get_runner_instance_no_embedder()

        for audit in get_audits():
            audit_id = audit['_id']
            docs = get_docs_by_audit_id(audit_id, kind='PROTOCOL')

            for doc in docs:
                # charter = runner.make_legal_doc(doc)

                jdoc = DbJsonDoc(doc)
                legal_doc = jdoc.asLegalDoc()

                runner.protocol_parser.find_org_date_number(
                    legal_doc, AuditContext())
                save_analysis(jdoc, legal_doc, -1)
Exemple #6
0
    def test_get_org_names(self):
        parser = CharterParser()

        audits = get_audits()
        if len(audits) == 0:
            logger.warning('no audits')
            return

        audit_id = audits[0]['_id']
        docs = get_docs_by_audit_id(audit_id, kind=CHARTER)

        for db_document in docs:
            print(db_document['filename'])

            parsed_p_json = db_document['parse']
            charter: CharterDocument = join_paragraphs(
                parsed_p_json, doc_id=db_document['_id'])

            # TODO: mind, this could be slow if embedding is required
            parser.find_org_date_number(charter, AuditContext())

            for tag in charter.get_tags():
                print(tag)