Beispiel #1
0
def process_workflow_files(request):
    context = tracer.get_context(request_id=str(uuid4()), log_level="INFO")
    context.start_span(component=__name__)
    try:
        if request.method == "GET":
            result = MongoDbConn.find(RESOURCES_COLLECTION,dict(type="camunda_workflow"))
            workflow_files = list()
            for file in result:
                 file.pop("_id")
                 workflow_files.append(file)
            return {"data":workflow_files,"status":"success"}
        elif request.method == "POST":
            payload = json.loads(request.body.decode())
            if "file_path" in payload:
                with open(payload["file_path"]) as fp:
                    xml_string = fp.read()
                return {"status": "success","xml_string" : xml_string}
            if "resource_id" in payload:
                file = MongoDbConn.find_one(RESOURCES_COLLECTION,dict(type="camunda_workflow",resource_id=payload["resource_id"]))
                if file is not None:
                    with open(file["file_path"], 'r+') as f:
                        f.read()
                        f.seek(0)
                        f.write(payload["xml_string"])
                        f.truncate()
                    return {"status":"success","msg":"Workflow updated successfully"}
            return {"status":"failure","msg":"Workflow update failed"}
    # TODO raise specific exception
    except Exception as e:
        context.log(message=str(e), obj={"tb": traceback.format_exc()})
        return {"status": "failure", "msg": str(e)}
    finally:
        context.end_span()
Beispiel #2
0
 def find_ug_based_queues(self, user_groups, solution_id):
     """
     This function will query the DB and get the list of queues
     in which user group item are present
     and return the list of eligible queues
     :param user_groups: User group info for which user is tagged
     :param solution_id: Session solution id
     :return: list of eligible queues
     """
     try:
         ug_ids = []
         for ug in user_groups:
             ug_ids.append(ug['id'])
         query = {'is_deleted': False, 'solution_id': solution_id}
         projection = {'_id': 0}
         queues = MongoDbConn.find(WORKFLOW_QUEUE_COLLECTION,
                                   query,
                                   projection=projection)
         raw_queues = [queue for queue in queues]
         queues_list = []
         for ele in raw_queues:
             if 'user_groups' in ele:
                 for item in ele['user_groups']:
                     if item['id'] in ug_ids:
                         queues_list.append(ele)
                         break
         return queues_list
     except Exception as e:
         self.context.log(message=str(e),
                          obj={'tb': traceback.format_exc()})
         return []
Beispiel #3
0
def find_documents(request, collection, query, solution_id, projection_fields=None):
    context = tracer.get_context(request_id=str(uuid4()), log_level="INFO")
    context.start_span(component=__name__)
    try:
        cursor = MongoDbConn.find(collection, query, projection=projection_fields)
        sort_by, order_by_asc, skip, limit = get_pagination_details(request, sort_by='updated_ts', order_by_asc=-1,
                                                                    skip=0, limit=0)
        documents_list = cursor.sort(sort_by, order_by_asc).skip(skip).limit(limit)

        documents = []
        for document in documents_list:
            document.pop("_id", None)
            document = construct_json(document, DOCUMENT_SUMMARY_FIELDS)
            doc_type = get_doc_type(document['extn'])
            if doc_type == "image":
                document["is_digital"] = False
            else:
                document["is_digital"] = True
            if "confidence_score" not in document:
                document["confidence_score"] = get_confidence_score(document, solution_id, document["is_digital"])
                document["is_failed"] = True if document["doc_state"] == "failed" else False
                document["review_text"] = get_review_text(document["doc_state"], document)
                documents.append(document)
        return documents
    # TODO raise specific exception
    except Exception as e:
        context.log(message=str(e), obj={"tb": traceback.format_exc()})
    finally:
        context.end_span()
Beispiel #4
0
def construct_data(selector, field, solution_id):
    data = list()
    iterlen = get_iters(selector)['length']
    iterstep = get_iters(selector)['step']
    query = dict()
    if field['key_type'] == 'string':
        query["$or"] = [{
            field['key']: str(field["name"]).lower()
        }, {
            field['key']: str(field["name"]).title()
        }]

    elif field['key_type'] == 'bool':
        if "," in field['key']:
            query["$or"] = [{
                key.strip(): True
            } for key in field['key'].split(",")]
        else:
            query[field['key']] = True

    # Common code to get count.
    end_day = datetime.now().replace(hour=23, minute=59, second=59)
    for i in range(iterlen):
        start_day = end_day + timedelta(days=-iterstep)
        query[field['timestamp']] = {'$lte': end_day, '$gte': start_day}
        query["solution_id"] = solution_id
        count = MongoDbConn.find(field['collection'], query).sort('_id', -1)
        end_day = start_day
        data.append(count.count())
    return data
Beispiel #5
0
def set_attributes(field, query):
    result = MongoDbConn.find(field['collection'], query)
    recs = list()
    for rec in result:
        temp = dict()
        temp[field['key']] = get_review_value(field, rec, query)
        recs.append(temp)
    return recs
Beispiel #6
0
def get_solutions():
    resp = MongoDbConn.find(SOLN['collection'], {'is_active': True})
    solutions_list = []
    if resp is not None:
        for sol in resp:
            sol['_id'] = str(sol['_id'])
            solutions_list.append(sol)
    return solutions_list
Beispiel #7
0
def get_entities(solution_id, request):
    query = {"solution_id": solution_id, "entity_type": "domain_object"}
    projection = {"_id": 0, "entity_name": 1, "attributes": 1}
    response = MongoDbConn.find(ENTITY_COLLECTION,
                                query,
                                projection=projection).sort("ts", -1)
    response = [a for a in response]
    resp = format_entity_details(response)
    return resp
Beispiel #8
0
def get_entity_definitions(solution_id):
    result = MongoDbConn.find(ENTITY_DEFN_COLL,
                              query={'solution_id': solution_id})
    resp = list()
    if result is not None:
        for rec in result:
            rec.pop("_id")
            resp.append(rec)
    return resp
Beispiel #9
0
def add_intent(atts, query):
    records = MongoDbConn.find(CLAIMS_FIELDS_COLLECTION, query)
    if records is not None:
        for rec in records:
            upd_key = " ".join([i.strip() for i in str(rec["name"]).split("_") if i != "_"]).strip().title()
            for att in atts:
                if "key" in att.keys() and att["key"] == upd_key and "intent" in rec.keys():
                    att["intent"] = intent_json(rec["intent"])
    return atts
Beispiel #10
0
 def get_insight_template_from_db(self):
     filter_query = {'is_active': True, 'solution_id': self.solution_id}
     insight_template_defs = MongoDbConn.find(self.collection_name, filter_query)
     if insight_template_defs is not None:
         resp = list()
         for rec in insight_template_defs:
             rec['_id'] = str(rec['_id'])
             resp.append(rec)
         return resp
     return None
Beispiel #11
0
def get_all_resources(type,solution_id):
    response = dict(status="failure")
    if type is not None:
        query = dict(type=type, solution_id=solution_id)
    else:
        query = dict(solution_id=solution_id)
    data = MongoDbConn.find(RESOURCES_COLLECTION, query).sort("_id", -1)
    resources = []
    for resource in data:
        resource.pop("_id")
        resources.append(resource)
    response["status"] = "success"
    response["data"] = resources
    return response
Beispiel #12
0
def entity_get(solution_id, config, endpoint):
    context = tracer.get_context(request_id=str(uuid4()), log_level="INFO")
    context.start_span(component=__name__)
    # implementing for entities only
    try:
        endpoints = ["", "domainmapping", "domainobject"]
        if endpoint in endpoints:
            query = {"solution_id": solution_id}
            projection = {"_id": 0}
            if endpoint == "domainobject":
                query["entity_type"] = {"$ne": "entity"}
                projection["entity_name"] = 1
            response = MongoDbConn.find(ENTITY_COLLECTION,
                                        query,
                                        projection=projection).sort("ts", -1)
            response = [a for a in response]
        else:
            response = get_data_from_entity(config, solution_id)

        if response is not None:
            if endpoint in endpoints:
                if endpoint == "domainobject":
                    return {
                        "domain_object":
                        [entity["entity_name"] for entity in response],
                        "status":
                        "success"
                    }
                response_heirarchial, delete_entity_list, all_entity_list = convert_flat_to_heirarchial(
                    response)
                if endpoint == "":
                    return {
                        "domain_object": response_heirarchial,
                        "entities": all_entity_list
                    }
                else:
                    response_key_attr = process_all_domains(
                        response_heirarchial)
                    return response_key_attr
            else:
                return response
        else:
            return []
    # TODO raise specific exception
    except Exception as e:
        context.log(message=str(e), obj={"tb": traceback.format_exc()})
    finally:
        context.end_span()
Beispiel #13
0
def template_train_upload_get(request, template_id):
    context = tracer.get_context(request_id=str(uuid4()), log_level="INFO")
    context.start_span(component=__name__)
    try:
        solution_id = common.get_solution_from_session(request)
        query = {"solution_id": solution_id, "template_id": template_id}
        documents = MongoDbConn.find(TEMPLATE_TRAIN_SAMPLES_COLLECTION, query, {"_id": 0})
        documents = [a for a in documents]
        return {"status": "success", "msg": "retrieved template samples", "data": documents}
    # TODO raise specific exception
    except Exception as e:
        context.log(message=str(e), obj={"tb": traceback.format_exc()})
        tb = traceback.format_exc()
        return {"status": "failure", "msg": "unknown error, " + str(e), "traceback": str(tb)}
    finally:
        context.end_span()
Beispiel #14
0
 def get_solutions():
     context = tracer.get_context(request_id=str(uuid4()), log_level="INFO")
     context.start_span(component=__name__)
     try:
         solns_data = None
         try:
             query = {'is_deleted': False}
             projection = {'_id': 0}
             solns_data = MongoDbConn.find(SOLUTION_COLLECTION,
                                           query,
                                           projection=projection).sort(
                                               'updated_ts', -1)
         # TODO raise specific exception
         except Exception as e:
             context.log(message=str(e), obj={"tb": traceback.format_exc()})
             return {
                 'status': 'failure',
                 'msg': 'Error occurred while fetching solutions'
             }
         if solns_data:
             solns = [soln for soln in solns_data]
             resp = [{
                 "solution_id": e['solution_id'],
                 "solution_name": e['solution_name'],
                 "solution_type": e['solution_type'],
                 "description": e['description'],
                 "timestamp": e['updated_ts'],
                 'hocr_type': e['hocr_type']
             } for e in solns]
             return {
                 'status': 'success',
                 'msg': 'Solutions list',
                 "data": resp
             }
         else:
             return {'status': 'success', 'msg': 'No solutions exists'}
     except Exception as e:
         context.log(message=str(e), obj={"tb": traceback.format_exc()})
         return {
             'status': 'failure',
             'msg': 'Error occurred while fetching solutions'
         }
     finally:
         context.end_span()
Beispiel #15
0
def get_document_details(request, doc_id, page_no):
    context = tracer.get_context(request_id=str(uuid4()), log_level="INFO")
    context.start_span(component=__name__)
    try:
        solution_id = common.get_solution_from_session(request)
        query = {"doc_id": doc_id, "page_no": int(page_no)}
        projection = {"solution_id": 0, "updated_ts": 0, "created_ts": 0, "_id": 0, "doc_id": 0}
        elements = MongoDbConn.find(DOC_ELEMENTS_COLLECTION, query, projection=projection)
        document = MongoDbConn.find_one(DOCUMENTS_COLLECTION, {"doc_id": doc_id}, projection={"doc_id": 1, "entity": 1})
        processed_rules = {}
        if "entity" in document:
            processed_rules = get_all_rules_processed(document["entity"])

        mapping_data = get_doc_mapping_from_template(doc_id, solution_id)
        element_list = []
        for element in elements:
            domain_mapping = get_domain_mapping(mapping_data, element_id=element["element_id"],
                                                section_id=element["section_id"])
            if element["type"] == "table":
                table = dict()
                if "headings" and "columns" in element:
                    table["table"], table["headings"] = construct_table_data(element["headings"], element["columns"],
                                                                             domain_mapping)
                    element["tables"] = table
                    element = remove_items(element, ["headings", "columns"])
            else:
                element["domain_mapping"] = ""
                if domain_mapping and isinstance(domain_mapping, dict) and "domain_mapping" in domain_mapping:
                    element["domain_mapping"] = domain_mapping["domain_mapping"]
                    if processed_rules:
                        element["rules"] = get_rules_info(element["domain_mapping"], processed_rules, solution_id,
                                                          element["text"])
            if "score" not in element:
                element["score"] = 0
            element_list.append(element)
        data = {"elements": element_list, "entity": {}}
        return {"status": "success", "data": data}
    # TODO raise specific exception
    except Exception as e:
        context.log(message=str(e), obj={"tb": traceback.format_exc()})
        return {"status": "failure", "msg": "Error occured while processing", "error": str(e)}
    finally:
        context.end_span()
Beispiel #16
0
def get_count(request, count={}):
    """
    :param request: get request
    :param count: creating empty dictionery for returning counts
    :return: count of known and unknown format counts
    """
    solution_id = common.get_solution_from_session(request)
    context = tracer.get_context(request_id=str(uuid4()), log_level="ERROR")
    context.start_span(component=__name__)
    try:
        query = dict(solution_id=solution_id, is_deleted=False)
        templates = MongoDbConn.find(TEMPLATE_COLLECTION, query)
        templates = [json.loads(a["template"]) for a in templates]
        count["known_format_count"] = len(
            [a for a in templates if a["template_type"] == "known"])
        count["unknown_format_count"] = len(
            [a for a in templates if a["template_type"] in UNKNOWN_TYPES])
        try:
            config = config_from_endpoint(ENTITY_CONFIG, "")
            test = entity_get(solution_id, config, "")
            count["domain_objects"] = len(test["domain_object"])
            return {
                "status": "success",
                "msg": "recived count Successfully",
                "data": count
            }
        except Exception as e:
            return {
                "status": "failed",
                "msg": "error in getting domain object count",
                "error": str(e)
            }
    except Exception as e:
        context.log(message=str(e), obj={"tb": traceback.format_exc()})
        return {
            "status": "failed",
            "msg": "error in getting count",
            "error": str(e)
        }
    finally:
        context.end_span()
Beispiel #17
0
def get_template(solution_id, template_type="", template_id="", payload={}):
    try:
        unknown_types = ["unknown_known", "unknown_unknown"]
        query = dict(solution_id=solution_id, is_deleted=False)

        if template_id != "":
            query["template_id"] = template_id
            t = MongoDbConn.find_one(TEMPLATE_COLLECTION, query, {"_id": 0})

            # Constructing Response
            templates = json.loads(t["template"]) if t else {}
            templates["template_id"] = templates["doc_id"]
            templates["no_of_pages"] = len(templates["pages"].keys())
            templates["elements"] = convert_elements_old(templates["elements"])
            count = 1
        else:
            t = MongoDbConn.find(TEMPLATE_COLLECTION, query, {"_id": 0})
            templates = list()
            for a in t:
                a = json.loads(a["template"])
                a["template_id"] = a["doc_id"]
                templates.append(a)

            if template_type != "":
                if template_type == "allpublished":
                    templates = [a for a in templates if not a["is_draft"] or a["template_type"] in unknown_types]
                else:
                    template_type = unknown_types if template_type == "unknown" else [template_type]
                    templates = [a for a in templates if a["template_type"] in template_type]

            count = len(templates)
            page_no = payload["page_no"] if "page_no" in payload else None
            limit = payload["no_of_recs"] if "no_of_recs" in payload else None
            if page_no and count > limit:
                skip = (int(page_no) - 1) * limit
                templates = templates[skip:skip + limit]

        return dict(success=True, msg="Template data", data=templates, total_count=count, status="success")
    except Exception:
        return dict(success=False, error=traceback.format_exc(), msg="Failed to get template", status="failure")
Beispiel #18
0
def training_set_get(collection, query, solution_id, flag=False):
    context = tracer.get_context(request_id=str(uuid4()), log_level="INFO")
    context.start_span(component=__name__)
    try:
        query['solution_id'] = solution_id
        response = MongoDbConn.find(collection, query).sort("_id", -1)

        final_data = dict()
        resp = list()
        for rec in response:
            rec['_id'] = str(rec['_id'])
            resp.append(rec)
        final_data['data'] = resp
        if not flag:
            final_data['resource_types'] = get_file_contents("upload_resource_types.json")
        return final_data
    # TODO raise specific exception
    except Exception as e:
        context.log(message=str(e), obj={"tb": traceback.format_exc()})
        return {'status': 'failure', 'msg': 'Error in get resource library', 'errorMsg': str(e)}
    finally:
        context.end_span()
Beispiel #19
0
 def get_all_documents(self, solution_id):
     """
     This function will fetch the documents from DB
     and return the list of documents records
     :param solution_id: Session solution Id
     :return: list of documents records
     """
     try:
         query = {
             "solution_id": solution_id,
             "$or": [{
                 "is_test": False
             }, {
                 "is_test": {
                     "$exists": False
                 }
             }]
         }
         projection = {
             "doc_id": 1,
             "solution_id": 1,
             "metadata": 1,
             "life_cycle": 1,
             "doc_state": 1,
             "children": 1,
             "created_ts": 1,
             "page_groups": 1,
             '_id': 0,
             'root_id': 1
         }
         documents = MongoDbConn.find(DOCUMENTS_COLLECTION, query,
                                      projection=projection).\
             sort('_id', -1).limit(int(DOC_COUNT))
         return [doc for doc in documents]
     except Exception as e:
         self.context.log(message=str(e),
                          obj={'tb': traceback.format_exc()})
         return []
Beispiel #20
0
 def fetch_sources(self, solution_id, source_id):
     """
     This function will fetch the all/particular sources information
     and return the dictionary as response
     :param solution_id: Session Solution Id
     :param source_id: Sources Id
     :return: Dictionary as response
     """
     try:
         query = {
             'solution_id': solution_id,
             'is_deleted': False,
             'source_type': self.source_type
         }
         if source_id:
             query.update({'source_id': source_id})
         projection = {'_id': 0}
         resp = MongoDbConn.find(SOURCES_COLLECTION,
                                 query,
                                 projection=projection)
         source_recs = [item for item in resp]
         source_recs.sort(key=lambda f: f['updated_ts'], reverse=True)
         return {
             'status': 'success',
             'status_code': STATUS_CODES['OK'],
             'msg': 'Source fetched successfully for this solution.',
             'data': source_recs,
             'total_recs': len(source_recs)
         }
     except Exception as e:
         self.context.log(message=str(e),
                          obj={"tb": traceback.format_exc()})
         return {
             'status': 'failure',
             'status_code': STATUS_CODES['INTERNAL_SERVER_ERROR'],
             'msg': 'Failed to fetch the source/s information.'
         }
Beispiel #21
0
def fetch_elements_data(solution_id, doc_id, section_id, need_review_count, extracted_count, template_data):
    query = {"doc_id": doc_id, "section_id": section_id, "solution_id": solution_id, "is_deleted": False}

    projection = {"element_id": 1, "label": 1, "name": 1, "page_no": 1, "list_elements": 1,
                  "score": 1, "section_id": 1, "solution_id": 1, "text": 1,
                  "type": 1, "label_coordinates": 1, "headings": 1, "columns": 1,
                  "value_coordinates": 1, "_id": 0, 'groups': 1, "label_coordinates_list": 1,
                  'insight_id': 1, 'slice_path': 1, 'feedback': 1, "value_coordinates_list": 1}
    section_elements = MongoDbConn.find(DOC_ELEMENTS_COLLECTION,
                                        query, projection=projection)

    elements = []
    heading = 'section'

    extraction_score = 100
    if template_data and "thresholds" in template_data:
        try:
            extraction_score = template_data['thresholds']['annotation']['entity_confidence_score']
        except Exception as e:
            extraction_score = 100

    for ele in section_elements:
        extracted_count += 1
        ele['is_corrected'] = False
        ele['is_accept'] = False
        ele['need_review'] = True
        ele['extracted_text'] = ''
        feedback_dict = dict()
        if 'score' not in ele:
            ele['score'] = 100

        if ele['score'] > extraction_score:
            ele['need_review'] = False
            ele['is_accept'] = True

        if 'text' in ele:
            ele['extracted_text'] = ele['text']
        if 'feedback' in ele and len(ele['feedback']) > 0:
            feedback_list = ele['feedback']
            latest_feedback = feedback_list[-1]
            if 'feedback_type' in latest_feedback and \
                    latest_feedback['feedback_type'] == 'delete':
                extracted_count -= 1
                continue
            ele['need_review'] = False
            if 'text' in latest_feedback or 'groups' in latest_feedback:
                if 'text' in latest_feedback:
                    latest_feedback_text = latest_feedback['text']
                    ele['text'] = latest_feedback_text
                if 'feedback_type' in latest_feedback:
                    if latest_feedback['feedback_type'] == 'edit':
                        ele['is_corrected'] = True
                    elif latest_feedback['feedback_type'] == 'accept':
                        ele['is_accept'] = True
            for feedback in feedback_list:
                if 'insight_id' in feedback:
                    if 'text' in feedback:
                        feedback_dict[feedback['insight_id']] = feedback['text']
                    else:
                        feedback_dict[feedback['insight_id']] = ''

        if ele['need_review']:
            need_review_count += 1
        ele = get_coordinates_list(ele, "value_coordinates", "value_coordinates_list")
        ele = get_coordinates_list(ele, "label_coordinates", "label_coordinates_list")
        temp_id = section_id + '_' + ele['element_id']
        ele['temp_id'] = temp_id

        if ele["type"] == "table":
            table = dict()
            if "columns" in ele:
                if "headings" not in ele:
                    ele["headings"] = None
                table["table"], table["headings"], avg_score, table["column_no_list"] = \
                    construct_table_data_new(ele["columns"], feedback_dict, temp_id)
                ele = remove_items(ele, ["headings", "columns"])
            ele["tables"] = table
            ele['score'] = avg_score

            if ele['score'] > extraction_score:
                ele['need_review'] = False
                ele['is_accept'] = True
        if ele["type"] == "list" and "list_elements" in ele:
            ele["list_elements"] = [construct_json(item, ["text", "value_coordinates_list", "label_coordinates_list"])
                                    for item in ele["list_elements"]]
        elements.append(ele)

        if ele['type'] == 'heading':
            heading = ele['text']

    return elements, need_review_count, extracted_count, heading
Beispiel #22
0
    #                     "fn" : "len",
    #                     "rval" : "4",
    #                     "lval" : "metadata",
    #                     "op" : "eq"
    #                 },
    #                 {
    #                     "fn" : "trim",
    #                     "rval" : "76",
    #                     "lval" : "root_id",
    #                     "op" : "lte"
    #                 }
    #             ]
    #         },
    #     }
    query_test = {"solution_id": solution_id_test}
    projection_test = {'_id': 0}
    documents_test = MongoDbConn.find(DOCUMENTS_COLLECTION, query_test,
                                 projection=projection_test).\
        sort('updated_ts', -1).limit(int(DOC_COUNT))
    documents_list_test = [doc for doc in documents_test]
    qd_map = QueueDocMapper()
    a = qd_map.map_document_queues(solution_id_test, queues_test,
                                   documents_list_test)
    # self.display_to_d_state = {'Needs Classification': 'classified',
    #                             'Post Processing': 'post_processed',
    #                             'Processing': 'processing',
    #                             'Reviewed': 'reviewed',
    #                             'Extraction': 'extracted',
    #                             'Annotation & Entity Linking': 'processed',
    #                             'Error': 'failed'}