def export_updated_contracts_to_json(document_ids, work_dir):
    arr = {}
    n = 0
    for k, doc_id in enumerate(document_ids):
        d = get_doc_by_id(doc_id)
        # if '_id' not in d['user']['author']:
        #   print(f'error: user attributes doc {d["_id"]} is not linked to any user')

        if 'auditId' not in d:
            logger.warning(f'error: doc {d["_id"]} is not linked to any audit')

        arr[str(d['_id'])] = d
        # arr.append(d)
        logger.debug(f"exporting JSON {k} {d['_id']}")
        n = k

    with open(os.path.join(work_dir, 'contracts_mongo.json'),
              'w',
              encoding='utf-8') as outfile:
        json.dump(arr,
                  outfile,
                  indent=2,
                  ensure_ascii=False,
                  default=json_util.default)

    logger.info(f'EXPORTED {n} docs')
Beispiel #2
0
def audit_phase_1(audit, kind=None):
    logger.info(f'.....processing audit {audit["_id"]}')
    ctx = AuditContext(audit["subsidiary"]["name"])

    document_ids = get_docs_by_audit_id(audit["_id"],
                                        states=[DocumentState.New.value],
                                        kind=kind,
                                        id_only=True)
    _charter_ids = audit.get("charters", [])
    document_ids.extend(_charter_ids)

    for k, document_id in enumerate(document_ids):
        _document = finalizer.get_doc_by_id(document_id)
        jdoc = DbJsonDoc(_document)

        processor: BaseProcessor = document_processors.get(jdoc.documentType)
        if processor is None:
            logger.warning(
                f'unknown/unsupported doc type: {jdoc.documentType}, cannot process {document_id}'
            )
        else:
            logger.info(
                f'......pre-processing {k} of {len(document_ids)}  {jdoc.documentType}:{document_id}'
            )
            if need_analysis(jdoc) and jdoc.isNew():
                processor.preprocess(jdoc=jdoc, context=ctx)
Beispiel #3
0
def audit_phase_2(audit, kind=None):
    ctx = AuditContext(audit["subsidiary"]["name"])

    print(f'.....processing audit {audit["_id"]}')

    document_ids = get_docs_by_audit_id(
        audit["_id"],
        states=[DocumentState.Preprocessed.value, DocumentState.Error.value],
        kind=kind,
        id_only=True)

    _charter_ids = audit.get("charters", [])
    document_ids.extend(_charter_ids)

    for k, document_id in enumerate(document_ids):
        _document = finalizer.get_doc_by_id(document_id)
        jdoc = DbJsonDoc(_document)

        processor: BaseProcessor = document_processors.get(jdoc.documentType)
        if processor is None:
            logger.warning(
                f'unknown/unsupported doc type: {jdoc.documentType}, cannot process {document_id}'
            )
        else:
            if need_analysis(jdoc) and jdoc.isPreprocessed():
                logger.info(
                    f'.....processing  {k} of {len(document_ids)}   {jdoc.documentType} {document_id}'
                )
                processor.process(jdoc, audit, ctx)

    change_audit_status(audit,
                        "Finalizing")  # TODO: check ALL docs in proper state
Beispiel #4
0
def _test_contract():
    doc = get_doc_by_id(ObjectId('5f0bb4bd138e9184feef1fa8'))
    a = doc['analysis']['attributes']
    tree = {"contract": convert_contract_db_attributes_to_tree(a)}

    j, json_str = to_json(tree)

    return j, json_str, doc
Beispiel #5
0
def _test_charter():
    doc = get_doc_by_id(ObjectId('5f64161009d100a445b7b0d6'))
    a = doc['analysis']['attributes']
    tree = {"charter": convert_charter_db_attributes_to_tree(a)}

    j, json_str = to_json(tree)

    return j, json_str, doc
    def import_recent_contracts(self):
        self.stats: DataFrame = self.load_contract_trainset_meta()

        docs_ids = [i["_id"] for i in self.get_updated_contracts()]

        for oid in docs_ids:
            d = get_doc_by_id(oid)
            self.save_contract_datapoint(DbJsonDoc(d))
            self._save_stats()
Beispiel #7
0
def get_attributes_tree(id: str):
    # x = json.loads(data, object_hook=lambda d: SimpleNamespace(**d))
    # print(x.name, x.hometown.name, x.hometown.id)
    db = get_mongodb_connection()
    doc = get_doc_by_id(ObjectId(id))

    analysis = doc.get('analysis')
    if analysis:
        tree = analysis.get('attributes_tree')
        r = dotdict(tree)

        return r.charter
Beispiel #8
0
    def test_analyze_charter(self):
        processor: BaseProcessor = document_processors[CHARTER]
        doc = get_doc_by_id(ObjectId('5e5de70d01c6c73c19eebd48'))
        if doc is None:
            raise RuntimeError("fix unit test please")

        audit = get_audit_by_id(doc['auditId'])

        jdoc = DbJsonDoc(doc)
        logger.info(f'......pre-processing {jdoc._id}')
        ctx = AuditContext()
        processor.preprocess(jdoc, context=ctx)
        processor.process(jdoc, audit, ctx)
Beispiel #9
0
    def test_analyze_contract(self):
        processor: BaseProcessor = document_processors[CONTRACT]
        doc = get_doc_by_id(ObjectId('5ded004e4ddc27bcf92dd47c'))
        if doc is None:
            raise RuntimeError("fix unit test please")

        audit = get_audit_by_id(doc['auditId'])

        jdoc = DbJsonDoc(doc)
        logger.info(f'......pre-processing {jdoc._id}')
        ctx = AuditContext()
        processor.preprocess(jdoc, context=ctx)
        processor.process(jdoc, audit, ctx)
Beispiel #10
0
 def _get_doc_from_db(self, kind):
     audits = get_mongodb_connection()['audits'].find().sort([
         ("createDate", pymongo.ASCENDING)
     ]).limit(1)
     for audit in audits:
         doc_ids = get_docs_by_audit_id(audit['_id'],
                                        kind=kind,
                                        states=[15],
                                        id_only=True)
         if len(doc_ids) > 0:
             print(doc_ids[0])
             doc = finalizer.get_doc_by_id(doc_ids[0])
             # jdoc = DbJsonDoc(doc)
             yield doc
Beispiel #11
0
    def test_analyse_acontract(self):

        doc = get_doc_by_id(ObjectId('5fdb213f542ce403c92b4530'))
        # _db_client = MongoClient(f'mongodb://192.168.10.36:27017/')
        # _db_client.server_info()

        # db = _db_client['gpn']

        # documents_collection = db['documents']

        # doc = documents_collection.find_one({"_id": ObjectId('5fdb213f542ce403c92b4530')} )
        # audit = db['audits'].find_one({'_id': doc['auditId']})
        audit = get_audit_by_id(doc['auditId'])
        jdoc = DbJsonDoc(doc)
        logger.info(f'......pre-processing {jdoc._id}')
        _audit_subsidiary: str = audit["subsidiary"]["name"]

        ctx = AuditContext(_audit_subsidiary)
        processor: BaseProcessor = document_processors[CONTRACT]
        processor.preprocess(jdoc, context=ctx)
        processor.process(jdoc, audit, ctx)
        print(jdoc)
Beispiel #12
0
def _test_protocol():
    db = get_mongodb_connection()

    doc = get_doc_by_id(ObjectId('5df7a66b200a3f4d0fad786f'))  # protocol
    convert_one(db, doc)