def delete_project(event, context=None): """ End-point: Deletes a project. Also deletes the project's casebase index if it exists. """ pid = event['pathParameters']['id'] # project id es = getESConn() # delete casebase proj = utility.getByUniqueField(es, projects_db, "_id", pid) # get project casebase = proj['casebase'] es.indices.delete(index=casebase, ignore=[400, 404]) # delete index if it exists # delete any ontology indices that were created (if any) for attrib in proj['attributes']: if attrib['type'] == "Ontology Concept": ontologyId = attrib['options'].get('id') if ontologyId is not None: retrieve.removeOntoIndex(ontologyId) # delete project res = es.delete(index=projects_db, id=pid) response = { "statusCode": 200, "headers": headers, "body": json.dumps(res['result']) } return response
def cbr_retain(event, context=None): """ End-point: Completes the Retain step of the CBR cycle. Note: If the new case have id of an existing case, the new case will replace the existing entry. """ result = {} # retain logic here statusCode = 201 params = json.loads(event['body']) # parameters in request body proj = params.get('project') es = getESConn() if proj is None: projId = params.get('projectId') # name of casebase proj = utility.getByUniqueField(es, projects_db, "casebase", projId) # print(params) new_case = params['data'] new_case = retrieve.add_vector_fields(proj['attributes'], new_case) # add vectors to Semantic USE fields new_case['hash__'] = str(hashlib.md5(json.dumps(OrderedDict(sorted(new_case.items()))).encode('utf-8')).digest()) if not proj['retainDuplicateCases'] and utility.indexHasDocWithFieldVal(es, index=proj['casebase'], field='hash__', value=new_case['hash__']): result = "The case already exists in the casebase" statusCode = 400 else: result = es.index(index=proj['casebase'], body=new_case) response = { "statusCode": statusCode, "headers": headers, "body": json.dumps(result) } return response
def update_project(event, context=None): """ End-point: Updates a project """ pid = event['pathParameters']['id'] proj_old = utility.getByUniqueField(getESConn(), projects_db, "_id", pid) # get previous version of project body = json.loads(event['body']) # get to-update project from request body body.pop('id__', None) # remove id__ (was added to dict to use a plain structure) source_to_update = {} source_to_update['doc'] = body # parameters in request body # print(source_to_update) es = getESConn() res = es.update(index=projects_db, id=pid, body=source_to_update) # print(res) # create the ontology similarity if specified as part of project attributes (can be a lengthy operation for mid to large ontologies!) if body['hasCasebase']: # check that the casebase has been created since similarity is computed when the casebase is created for attrib in body['attributes']: # for each project casebase attribute if attrib['type'] == "Ontology Concept" and attrib.get('options') is not None and \ attrib['options']: # check that the attribute is ontology based old_onto_attrib = next((item for item in proj_old['attributes'] if item['name'] == attrib['name']), None) # get the pre project update version of the attribute if old_onto_attrib is not None and attrib.get('similarityType') is not None and attrib != old_onto_attrib: # update ontology similarity measures if there are changes sim_method = 'san' if attrib['similarityType'] == 'Feature-based' else 'wup' retrieve.setOntoSimilarity(attrib['options'].get('id'), attrib['options'].get('sources'), relation_type=attrib['options'].get('relation_type', None), root_node=attrib['options'].get('root'), similarity_method=sim_method) source_to_update['doc']['id__'] = pid response = { "statusCode": 201, "headers": headers, "body": json.dumps(source_to_update['doc']) } return response
def save_case_list(event, context=None): """ End-point: Saves list of case instances Creates index for the casebase if one does not exist """ # try: doc_list = json.loads(event['body']) # parameters in request body es = getESConn() pid = event['pathParameters']['id'] proj = utility.getByUniqueField(es, projects_db, "_id", pid) # project # create index with mapping if it does not exist already project.indexMapping(es, proj) # Add documents to created index # print("Adding a hash field to each case for duplicate-checking") for x in doc_list: # generate a hash after ordering dict by key x = retrieve.add_vector_fields(proj['attributes'], x) # add vectors to Semantic USE fields x = retrieve.add_lowercase_fields( proj['attributes'], x) # use lowercase values for EqualIgnoreCase fields x['hash__'] = str( hashlib.md5( json.dumps(OrderedDict(sorted(x.items()))).encode('utf-8')). digest()) # case hash for easy detection of duplicates # print("Attempting to index the list of docs using helpers.bulk()") resp = helpers.bulk(es, doc_list, index=proj['casebase'], doc_type="_doc") # Indicate that the project has a casebase # print("Casebase added. Attempting to update project detail. Set hasCasebase => True") proj['hasCasebase'] = True source_to_update = {'doc': proj} # print(source_to_update) res = es.update(index=projects_db, id=pid, body=source_to_update) # print(res) # create the ontology similarity if specified as part of project attributes (can be a lengthy operation for mid to large ontologies!) for attrib in proj['attributes']: if attrib['type'] == "Ontology Concept" and attrib.get( 'similarityType') is not None and attrib.get( 'options') is not None and retrieve.checkOntoSimilarity( attrib['options'].get('id'))['statusCode'] != 200: sim_method = 'san' if attrib[ 'similarityType'] == 'Feature-based' else 'wup' retrieve.setOntoSimilarity( attrib['options'].get('id'), attrib['options'].get('sources'), relation_type=attrib['options'].get('relation_type'), root_node=attrib['options'].get('root'), similarity_method=sim_method) response = { "statusCode": 201, "headers": headers, "body": json.dumps(resp) } return response
def get_project(event, context=None): """ End-point: Retrieves a project (details of a CBR application). """ pid = event['pathParameters']['id'] # retrieve if ES index does exist result = utility.getByUniqueField(getESConn(), projects_db, "_id", pid) response = { "statusCode": 200, "headers": headers, "body": json.dumps(result) } return response
def create_project_index(event, context=None): """ End-point: Creates the mapping for an index if it does not exist. """ es = getESConn() pid = event['pathParameters']['id'] proj = utility.getByUniqueField(es, projects_db, "_id", pid) # project index_name = proj['casebase'] res = project.indexMapping(es, proj) # Indicate that the project has a casebase (empty) # print("Casebase added. Attempting to update project detail. Set hasCasebase => True") proj['hasCasebase'] = True source_to_update = {'doc': proj} res = es.update(index=projects_db, id=pid, body=source_to_update) # print(res) response = {"statusCode": 201, "headers": headers, "body": json.dumps(res)} return response
def delete_project(event, context=None): """ End-point: Deletes a project. Also deletes the project's casebase index if it exists. """ pid = event['pathParameters']['id'] # project id es = getESConn() # delete casebase proj = utility.getByUniqueField(es, projects_db, "_id", pid) # get project casebase = proj['casebase'] es.indices.delete(index=casebase, ignore=[400, 404]) # delete index if it exists # delete project res = es.delete(index=projects_db, id=pid) response = { "statusCode": 200, "headers": headers, "body": json.dumps(res['result']) } return response
def save_case_list(event, context=None): """ End-point: Saves list of case instances Creates index for the casebase if one does not exist """ # try: doc_list = json.loads(event['body']) # parameters in request body es = getESConn() pid = event['pathParameters']['id'] proj = utility.getByUniqueField(es, projects_db, "_id", pid) # project index_name = proj['casebase'] # create index with mapping if it does not exist already project.indexMapping(es, proj) # Add documents to created index # print("Adding a hash field to each case for duplicate-checking") for x in doc_list: # generate a hash after ordering dict by key x['hash__'] = str( hashlib.md5( json.dumps(OrderedDict(sorted( x.items()))).encode('utf-8')).digest()) # print("Attempting to index the list of docs using helpers.bulk()") resp = helpers.bulk(es, doc_list, index=proj['casebase'], doc_type="_doc") # Indicate that the project has a casebase # print("Casebase added. Attempting to update project detail. Set hasCasebase => True") proj['hasCasebase'] = True source_to_update = {'doc': proj} # print(source_to_update) res = es.update(index=projects_db, id=pid, body=source_to_update) # print(res) response = { "statusCode": 201, "headers": headers, "body": json.dumps(resp) } return response
def cbr_retrieve(event, context=None): """ End-point: Completes the Retrieve step of the CBR cycle. """ start = timer() # start timer result = {'recommended': {}, 'bestK': []} es = getESConn() # es connection # query["query"]["bool"]["should"].append(queryFnc) queryAdded = False params = json.loads(event['body']) # parameters in request body # print(params) queryFeatures = params['data'] proj = params.get('project', None) if proj is None: projId = params.get('projectId', None) # name of casebase proj = utility.getByUniqueField(es, projects_db, "_id", projId) proj_attributes = proj['attributes'] globalSim = params['globalSim'] k = params['topk'] query = {"query": {"bool": {"should": []}}} query["size"] = int(k) # top k results for entry in queryFeatures: if ('value' in entry) and entry['value'] is not None and "" != entry['value'] and int( entry.get('weight', 0)) > 0 and entry['similarity'] != "None": queryAdded = True field = entry['name'] similarityType = entry['similarity'] options = retrieve.get_attribute_by_name(proj['attributes'], field).get('options', None) # print(options) # fieldType = entry['type'] # use lowercase when field is specified as case-insensitive value = entry['value'].lower() if similarityType == 'EqualIgnoreCase' else entry['value'] weight = entry['weight'] # isProblem = entry['unknown'] # strategy = entry['strategy'] qfnc = retrieve.getQueryFunction(field, value, weight, similarityType, options) query["query"]["bool"]["should"].append(qfnc) if not queryAdded: # retrieval all (up to k) if not query was added query["query"]["bool"]["should"].append(retrieve.MatchAll()) # perform retrieval counter = 0 res = es.search(index=proj['casebase'], body=query) for hit in res['hits']['hits']: entry = hit['_source'] entry.pop('hash__', None) # remove hash field and value entry = retrieve.remove_vector_fields(proj_attributes, entry) # remove vectors from Semantic USE fields if counter == 0: result['recommended'] = copy.deepcopy(entry) # entry['id__'] = hit['_id'] # using 'id__' to track this case (this is removed during an update operation) entry['score__'] = hit['_score'] # removed during an update operation result['bestK'].append(entry) counter += 1 # Recommend: Get the recommended result using chosen reuse strategies for unknown attribute values and keep known attribute values supplied if counter > 0: for entry in queryFeatures: if not entry['unknown'] and ('value' in entry) and entry['value'] is not None and "" != entry[ 'value']: # copy known values result['recommended'][entry['name']] = entry['value'] if entry.get('similarity') is not None and entry['unknown'] and entry[ 'strategy'] != "Best Match": # use reuse strategies for unknown fields if entry['strategy'] == "Maximum": result['recommended'][entry['name']] = max(d[entry['name']] for d in result['bestK']) if entry['strategy'] == "Minimum": result['recommended'][entry['name']] = min(d[entry['name']] for d in result['bestK']) if entry['strategy'] == "Mean": result['recommended'][entry['name']] = np.mean([x[entry['name']] for x in result['bestK']]) if entry['strategy'] == "Median": result['recommended'][entry['name']] = np.median([x[entry['name']] for x in result['bestK']]) if entry['strategy'] == "Mode": result['recommended'][entry['name']] = statistics.mode([x[entry['name']] for x in result['bestK']]) end = timer() # end timer result['retrieveTime'] = end - start result['esTime'] = res['took'] response = { "statusCode": 200, "headers": headers, "body": json.dumps(result) } return response