def process_workflow_files(request): context = tracer.get_context(request_id=str(uuid4()), log_level="INFO") context.start_span(component=__name__) try: if request.method == "GET": result = MongoDbConn.find(RESOURCES_COLLECTION,dict(type="camunda_workflow")) workflow_files = list() for file in result: file.pop("_id") workflow_files.append(file) return {"data":workflow_files,"status":"success"} elif request.method == "POST": payload = json.loads(request.body.decode()) if "file_path" in payload: with open(payload["file_path"]) as fp: xml_string = fp.read() return {"status": "success","xml_string" : xml_string} if "resource_id" in payload: file = MongoDbConn.find_one(RESOURCES_COLLECTION,dict(type="camunda_workflow",resource_id=payload["resource_id"])) if file is not None: with open(file["file_path"], 'r+') as f: f.read() f.seek(0) f.write(payload["xml_string"]) f.truncate() return {"status":"success","msg":"Workflow updated successfully"} return {"status":"failure","msg":"Workflow update failed"} # TODO raise specific exception except Exception as e: context.log(message=str(e), obj={"tb": traceback.format_exc()}) return {"status": "failure", "msg": str(e)} finally: context.end_span()
def find_ug_based_queues(self, user_groups, solution_id): """ This function will query the DB and get the list of queues in which user group item are present and return the list of eligible queues :param user_groups: User group info for which user is tagged :param solution_id: Session solution id :return: list of eligible queues """ try: ug_ids = [] for ug in user_groups: ug_ids.append(ug['id']) query = {'is_deleted': False, 'solution_id': solution_id} projection = {'_id': 0} queues = MongoDbConn.find(WORKFLOW_QUEUE_COLLECTION, query, projection=projection) raw_queues = [queue for queue in queues] queues_list = [] for ele in raw_queues: if 'user_groups' in ele: for item in ele['user_groups']: if item['id'] in ug_ids: queues_list.append(ele) break return queues_list except Exception as e: self.context.log(message=str(e), obj={'tb': traceback.format_exc()}) return []
def find_documents(request, collection, query, solution_id, projection_fields=None): context = tracer.get_context(request_id=str(uuid4()), log_level="INFO") context.start_span(component=__name__) try: cursor = MongoDbConn.find(collection, query, projection=projection_fields) sort_by, order_by_asc, skip, limit = get_pagination_details(request, sort_by='updated_ts', order_by_asc=-1, skip=0, limit=0) documents_list = cursor.sort(sort_by, order_by_asc).skip(skip).limit(limit) documents = [] for document in documents_list: document.pop("_id", None) document = construct_json(document, DOCUMENT_SUMMARY_FIELDS) doc_type = get_doc_type(document['extn']) if doc_type == "image": document["is_digital"] = False else: document["is_digital"] = True if "confidence_score" not in document: document["confidence_score"] = get_confidence_score(document, solution_id, document["is_digital"]) document["is_failed"] = True if document["doc_state"] == "failed" else False document["review_text"] = get_review_text(document["doc_state"], document) documents.append(document) return documents # TODO raise specific exception except Exception as e: context.log(message=str(e), obj={"tb": traceback.format_exc()}) finally: context.end_span()
def construct_data(selector, field, solution_id): data = list() iterlen = get_iters(selector)['length'] iterstep = get_iters(selector)['step'] query = dict() if field['key_type'] == 'string': query["$or"] = [{ field['key']: str(field["name"]).lower() }, { field['key']: str(field["name"]).title() }] elif field['key_type'] == 'bool': if "," in field['key']: query["$or"] = [{ key.strip(): True } for key in field['key'].split(",")] else: query[field['key']] = True # Common code to get count. end_day = datetime.now().replace(hour=23, minute=59, second=59) for i in range(iterlen): start_day = end_day + timedelta(days=-iterstep) query[field['timestamp']] = {'$lte': end_day, '$gte': start_day} query["solution_id"] = solution_id count = MongoDbConn.find(field['collection'], query).sort('_id', -1) end_day = start_day data.append(count.count()) return data
def set_attributes(field, query): result = MongoDbConn.find(field['collection'], query) recs = list() for rec in result: temp = dict() temp[field['key']] = get_review_value(field, rec, query) recs.append(temp) return recs
def get_solutions(): resp = MongoDbConn.find(SOLN['collection'], {'is_active': True}) solutions_list = [] if resp is not None: for sol in resp: sol['_id'] = str(sol['_id']) solutions_list.append(sol) return solutions_list
def get_entities(solution_id, request): query = {"solution_id": solution_id, "entity_type": "domain_object"} projection = {"_id": 0, "entity_name": 1, "attributes": 1} response = MongoDbConn.find(ENTITY_COLLECTION, query, projection=projection).sort("ts", -1) response = [a for a in response] resp = format_entity_details(response) return resp
def get_entity_definitions(solution_id): result = MongoDbConn.find(ENTITY_DEFN_COLL, query={'solution_id': solution_id}) resp = list() if result is not None: for rec in result: rec.pop("_id") resp.append(rec) return resp
def add_intent(atts, query): records = MongoDbConn.find(CLAIMS_FIELDS_COLLECTION, query) if records is not None: for rec in records: upd_key = " ".join([i.strip() for i in str(rec["name"]).split("_") if i != "_"]).strip().title() for att in atts: if "key" in att.keys() and att["key"] == upd_key and "intent" in rec.keys(): att["intent"] = intent_json(rec["intent"]) return atts
def get_insight_template_from_db(self): filter_query = {'is_active': True, 'solution_id': self.solution_id} insight_template_defs = MongoDbConn.find(self.collection_name, filter_query) if insight_template_defs is not None: resp = list() for rec in insight_template_defs: rec['_id'] = str(rec['_id']) resp.append(rec) return resp return None
def get_all_resources(type,solution_id): response = dict(status="failure") if type is not None: query = dict(type=type, solution_id=solution_id) else: query = dict(solution_id=solution_id) data = MongoDbConn.find(RESOURCES_COLLECTION, query).sort("_id", -1) resources = [] for resource in data: resource.pop("_id") resources.append(resource) response["status"] = "success" response["data"] = resources return response
def entity_get(solution_id, config, endpoint): context = tracer.get_context(request_id=str(uuid4()), log_level="INFO") context.start_span(component=__name__) # implementing for entities only try: endpoints = ["", "domainmapping", "domainobject"] if endpoint in endpoints: query = {"solution_id": solution_id} projection = {"_id": 0} if endpoint == "domainobject": query["entity_type"] = {"$ne": "entity"} projection["entity_name"] = 1 response = MongoDbConn.find(ENTITY_COLLECTION, query, projection=projection).sort("ts", -1) response = [a for a in response] else: response = get_data_from_entity(config, solution_id) if response is not None: if endpoint in endpoints: if endpoint == "domainobject": return { "domain_object": [entity["entity_name"] for entity in response], "status": "success" } response_heirarchial, delete_entity_list, all_entity_list = convert_flat_to_heirarchial( response) if endpoint == "": return { "domain_object": response_heirarchial, "entities": all_entity_list } else: response_key_attr = process_all_domains( response_heirarchial) return response_key_attr else: return response else: return [] # TODO raise specific exception except Exception as e: context.log(message=str(e), obj={"tb": traceback.format_exc()}) finally: context.end_span()
def template_train_upload_get(request, template_id): context = tracer.get_context(request_id=str(uuid4()), log_level="INFO") context.start_span(component=__name__) try: solution_id = common.get_solution_from_session(request) query = {"solution_id": solution_id, "template_id": template_id} documents = MongoDbConn.find(TEMPLATE_TRAIN_SAMPLES_COLLECTION, query, {"_id": 0}) documents = [a for a in documents] return {"status": "success", "msg": "retrieved template samples", "data": documents} # TODO raise specific exception except Exception as e: context.log(message=str(e), obj={"tb": traceback.format_exc()}) tb = traceback.format_exc() return {"status": "failure", "msg": "unknown error, " + str(e), "traceback": str(tb)} finally: context.end_span()
def get_solutions(): context = tracer.get_context(request_id=str(uuid4()), log_level="INFO") context.start_span(component=__name__) try: solns_data = None try: query = {'is_deleted': False} projection = {'_id': 0} solns_data = MongoDbConn.find(SOLUTION_COLLECTION, query, projection=projection).sort( 'updated_ts', -1) # TODO raise specific exception except Exception as e: context.log(message=str(e), obj={"tb": traceback.format_exc()}) return { 'status': 'failure', 'msg': 'Error occurred while fetching solutions' } if solns_data: solns = [soln for soln in solns_data] resp = [{ "solution_id": e['solution_id'], "solution_name": e['solution_name'], "solution_type": e['solution_type'], "description": e['description'], "timestamp": e['updated_ts'], 'hocr_type': e['hocr_type'] } for e in solns] return { 'status': 'success', 'msg': 'Solutions list', "data": resp } else: return {'status': 'success', 'msg': 'No solutions exists'} except Exception as e: context.log(message=str(e), obj={"tb": traceback.format_exc()}) return { 'status': 'failure', 'msg': 'Error occurred while fetching solutions' } finally: context.end_span()
def get_document_details(request, doc_id, page_no): context = tracer.get_context(request_id=str(uuid4()), log_level="INFO") context.start_span(component=__name__) try: solution_id = common.get_solution_from_session(request) query = {"doc_id": doc_id, "page_no": int(page_no)} projection = {"solution_id": 0, "updated_ts": 0, "created_ts": 0, "_id": 0, "doc_id": 0} elements = MongoDbConn.find(DOC_ELEMENTS_COLLECTION, query, projection=projection) document = MongoDbConn.find_one(DOCUMENTS_COLLECTION, {"doc_id": doc_id}, projection={"doc_id": 1, "entity": 1}) processed_rules = {} if "entity" in document: processed_rules = get_all_rules_processed(document["entity"]) mapping_data = get_doc_mapping_from_template(doc_id, solution_id) element_list = [] for element in elements: domain_mapping = get_domain_mapping(mapping_data, element_id=element["element_id"], section_id=element["section_id"]) if element["type"] == "table": table = dict() if "headings" and "columns" in element: table["table"], table["headings"] = construct_table_data(element["headings"], element["columns"], domain_mapping) element["tables"] = table element = remove_items(element, ["headings", "columns"]) else: element["domain_mapping"] = "" if domain_mapping and isinstance(domain_mapping, dict) and "domain_mapping" in domain_mapping: element["domain_mapping"] = domain_mapping["domain_mapping"] if processed_rules: element["rules"] = get_rules_info(element["domain_mapping"], processed_rules, solution_id, element["text"]) if "score" not in element: element["score"] = 0 element_list.append(element) data = {"elements": element_list, "entity": {}} return {"status": "success", "data": data} # TODO raise specific exception except Exception as e: context.log(message=str(e), obj={"tb": traceback.format_exc()}) return {"status": "failure", "msg": "Error occured while processing", "error": str(e)} finally: context.end_span()
def get_count(request, count={}): """ :param request: get request :param count: creating empty dictionery for returning counts :return: count of known and unknown format counts """ solution_id = common.get_solution_from_session(request) context = tracer.get_context(request_id=str(uuid4()), log_level="ERROR") context.start_span(component=__name__) try: query = dict(solution_id=solution_id, is_deleted=False) templates = MongoDbConn.find(TEMPLATE_COLLECTION, query) templates = [json.loads(a["template"]) for a in templates] count["known_format_count"] = len( [a for a in templates if a["template_type"] == "known"]) count["unknown_format_count"] = len( [a for a in templates if a["template_type"] in UNKNOWN_TYPES]) try: config = config_from_endpoint(ENTITY_CONFIG, "") test = entity_get(solution_id, config, "") count["domain_objects"] = len(test["domain_object"]) return { "status": "success", "msg": "recived count Successfully", "data": count } except Exception as e: return { "status": "failed", "msg": "error in getting domain object count", "error": str(e) } except Exception as e: context.log(message=str(e), obj={"tb": traceback.format_exc()}) return { "status": "failed", "msg": "error in getting count", "error": str(e) } finally: context.end_span()
def get_template(solution_id, template_type="", template_id="", payload={}): try: unknown_types = ["unknown_known", "unknown_unknown"] query = dict(solution_id=solution_id, is_deleted=False) if template_id != "": query["template_id"] = template_id t = MongoDbConn.find_one(TEMPLATE_COLLECTION, query, {"_id": 0}) # Constructing Response templates = json.loads(t["template"]) if t else {} templates["template_id"] = templates["doc_id"] templates["no_of_pages"] = len(templates["pages"].keys()) templates["elements"] = convert_elements_old(templates["elements"]) count = 1 else: t = MongoDbConn.find(TEMPLATE_COLLECTION, query, {"_id": 0}) templates = list() for a in t: a = json.loads(a["template"]) a["template_id"] = a["doc_id"] templates.append(a) if template_type != "": if template_type == "allpublished": templates = [a for a in templates if not a["is_draft"] or a["template_type"] in unknown_types] else: template_type = unknown_types if template_type == "unknown" else [template_type] templates = [a for a in templates if a["template_type"] in template_type] count = len(templates) page_no = payload["page_no"] if "page_no" in payload else None limit = payload["no_of_recs"] if "no_of_recs" in payload else None if page_no and count > limit: skip = (int(page_no) - 1) * limit templates = templates[skip:skip + limit] return dict(success=True, msg="Template data", data=templates, total_count=count, status="success") except Exception: return dict(success=False, error=traceback.format_exc(), msg="Failed to get template", status="failure")
def training_set_get(collection, query, solution_id, flag=False): context = tracer.get_context(request_id=str(uuid4()), log_level="INFO") context.start_span(component=__name__) try: query['solution_id'] = solution_id response = MongoDbConn.find(collection, query).sort("_id", -1) final_data = dict() resp = list() for rec in response: rec['_id'] = str(rec['_id']) resp.append(rec) final_data['data'] = resp if not flag: final_data['resource_types'] = get_file_contents("upload_resource_types.json") return final_data # TODO raise specific exception except Exception as e: context.log(message=str(e), obj={"tb": traceback.format_exc()}) return {'status': 'failure', 'msg': 'Error in get resource library', 'errorMsg': str(e)} finally: context.end_span()
def get_all_documents(self, solution_id): """ This function will fetch the documents from DB and return the list of documents records :param solution_id: Session solution Id :return: list of documents records """ try: query = { "solution_id": solution_id, "$or": [{ "is_test": False }, { "is_test": { "$exists": False } }] } projection = { "doc_id": 1, "solution_id": 1, "metadata": 1, "life_cycle": 1, "doc_state": 1, "children": 1, "created_ts": 1, "page_groups": 1, '_id': 0, 'root_id': 1 } documents = MongoDbConn.find(DOCUMENTS_COLLECTION, query, projection=projection).\ sort('_id', -1).limit(int(DOC_COUNT)) return [doc for doc in documents] except Exception as e: self.context.log(message=str(e), obj={'tb': traceback.format_exc()}) return []
def fetch_sources(self, solution_id, source_id): """ This function will fetch the all/particular sources information and return the dictionary as response :param solution_id: Session Solution Id :param source_id: Sources Id :return: Dictionary as response """ try: query = { 'solution_id': solution_id, 'is_deleted': False, 'source_type': self.source_type } if source_id: query.update({'source_id': source_id}) projection = {'_id': 0} resp = MongoDbConn.find(SOURCES_COLLECTION, query, projection=projection) source_recs = [item for item in resp] source_recs.sort(key=lambda f: f['updated_ts'], reverse=True) return { 'status': 'success', 'status_code': STATUS_CODES['OK'], 'msg': 'Source fetched successfully for this solution.', 'data': source_recs, 'total_recs': len(source_recs) } except Exception as e: self.context.log(message=str(e), obj={"tb": traceback.format_exc()}) return { 'status': 'failure', 'status_code': STATUS_CODES['INTERNAL_SERVER_ERROR'], 'msg': 'Failed to fetch the source/s information.' }
def fetch_elements_data(solution_id, doc_id, section_id, need_review_count, extracted_count, template_data): query = {"doc_id": doc_id, "section_id": section_id, "solution_id": solution_id, "is_deleted": False} projection = {"element_id": 1, "label": 1, "name": 1, "page_no": 1, "list_elements": 1, "score": 1, "section_id": 1, "solution_id": 1, "text": 1, "type": 1, "label_coordinates": 1, "headings": 1, "columns": 1, "value_coordinates": 1, "_id": 0, 'groups': 1, "label_coordinates_list": 1, 'insight_id': 1, 'slice_path': 1, 'feedback': 1, "value_coordinates_list": 1} section_elements = MongoDbConn.find(DOC_ELEMENTS_COLLECTION, query, projection=projection) elements = [] heading = 'section' extraction_score = 100 if template_data and "thresholds" in template_data: try: extraction_score = template_data['thresholds']['annotation']['entity_confidence_score'] except Exception as e: extraction_score = 100 for ele in section_elements: extracted_count += 1 ele['is_corrected'] = False ele['is_accept'] = False ele['need_review'] = True ele['extracted_text'] = '' feedback_dict = dict() if 'score' not in ele: ele['score'] = 100 if ele['score'] > extraction_score: ele['need_review'] = False ele['is_accept'] = True if 'text' in ele: ele['extracted_text'] = ele['text'] if 'feedback' in ele and len(ele['feedback']) > 0: feedback_list = ele['feedback'] latest_feedback = feedback_list[-1] if 'feedback_type' in latest_feedback and \ latest_feedback['feedback_type'] == 'delete': extracted_count -= 1 continue ele['need_review'] = False if 'text' in latest_feedback or 'groups' in latest_feedback: if 'text' in latest_feedback: latest_feedback_text = latest_feedback['text'] ele['text'] = latest_feedback_text if 'feedback_type' in latest_feedback: if latest_feedback['feedback_type'] == 'edit': ele['is_corrected'] = True elif latest_feedback['feedback_type'] == 'accept': ele['is_accept'] = True for feedback in feedback_list: if 'insight_id' in feedback: if 'text' in feedback: feedback_dict[feedback['insight_id']] = feedback['text'] else: feedback_dict[feedback['insight_id']] = '' if ele['need_review']: need_review_count += 1 ele = get_coordinates_list(ele, "value_coordinates", "value_coordinates_list") ele = get_coordinates_list(ele, "label_coordinates", "label_coordinates_list") temp_id = section_id + '_' + ele['element_id'] ele['temp_id'] = temp_id if ele["type"] == "table": table = dict() if "columns" in ele: if "headings" not in ele: ele["headings"] = None table["table"], table["headings"], avg_score, table["column_no_list"] = \ construct_table_data_new(ele["columns"], feedback_dict, temp_id) ele = remove_items(ele, ["headings", "columns"]) ele["tables"] = table ele['score'] = avg_score if ele['score'] > extraction_score: ele['need_review'] = False ele['is_accept'] = True if ele["type"] == "list" and "list_elements" in ele: ele["list_elements"] = [construct_json(item, ["text", "value_coordinates_list", "label_coordinates_list"]) for item in ele["list_elements"]] elements.append(ele) if ele['type'] == 'heading': heading = ele['text'] return elements, need_review_count, extracted_count, heading
# "fn" : "len", # "rval" : "4", # "lval" : "metadata", # "op" : "eq" # }, # { # "fn" : "trim", # "rval" : "76", # "lval" : "root_id", # "op" : "lte" # } # ] # }, # } query_test = {"solution_id": solution_id_test} projection_test = {'_id': 0} documents_test = MongoDbConn.find(DOCUMENTS_COLLECTION, query_test, projection=projection_test).\ sort('updated_ts', -1).limit(int(DOC_COUNT)) documents_list_test = [doc for doc in documents_test] qd_map = QueueDocMapper() a = qd_map.map_document_queues(solution_id_test, queues_test, documents_list_test) # self.display_to_d_state = {'Needs Classification': 'classified', # 'Post Processing': 'post_processed', # 'Processing': 'processing', # 'Reviewed': 'reviewed', # 'Extraction': 'extracted', # 'Annotation & Entity Linking': 'processed', # 'Error': 'failed'}