Exemple #1
0
def delete_project(event, context=None):
  """
  End-point: Deletes a project. Also deletes the project's casebase index if it exists.
  """
  pid = event['pathParameters']['id']  # project id
  es = getESConn()
  # delete casebase
  proj = utility.getByUniqueField(es, projects_db, "_id", pid)  # get project
  casebase = proj['casebase']
  es.indices.delete(index=casebase, ignore=[400, 404])  # delete index if it exists
  # delete any ontology indices that were created (if any)
  for attrib in proj['attributes']:
    if attrib['type'] == "Ontology Concept":
      ontologyId = attrib['options'].get('id')
      if ontologyId is not None:
        retrieve.removeOntoIndex(ontologyId)
  # delete project
  res = es.delete(index=projects_db, id=pid)

  response = {
    "statusCode": 200,
    "headers": headers,
    "body": json.dumps(res['result'])
  }
  return response
Exemple #2
0
def cbr_retain(event, context=None):
  """
  End-point: Completes the Retain step of the CBR cycle. Note: If the new case have id of an existing case, the new case will replace the existing entry.
  """
  result = {}
  # retain logic here
  statusCode = 201
  params = json.loads(event['body'])  # parameters in request body
  proj = params.get('project')
  es = getESConn()
  if proj is None:
    projId = params.get('projectId')  # name of casebase
    proj = utility.getByUniqueField(es, projects_db, "casebase", projId)
  # print(params)
  new_case = params['data']
  new_case = retrieve.add_vector_fields(proj['attributes'], new_case)  # add vectors to Semantic USE fields
  new_case['hash__'] = str(hashlib.md5(json.dumps(OrderedDict(sorted(new_case.items()))).encode('utf-8')).digest())

  if not proj['retainDuplicateCases'] and utility.indexHasDocWithFieldVal(es, index=proj['casebase'], field='hash__',
                                                                          value=new_case['hash__']):
    result = "The case already exists in the casebase"
    statusCode = 400
  else:
    result = es.index(index=proj['casebase'], body=new_case)

  response = {
    "statusCode": statusCode,
    "headers": headers,
    "body": json.dumps(result)
  }
  return response
Exemple #3
0
def update_project(event, context=None):
  """
  End-point: Updates a project
  """
  pid = event['pathParameters']['id']
  proj_old = utility.getByUniqueField(getESConn(), projects_db, "_id", pid)  # get previous version of project
  body = json.loads(event['body'])  # get to-update project from request body
  body.pop('id__', None)  # remove id__ (was added to dict to use a plain structure)
  source_to_update = {}
  source_to_update['doc'] = body  # parameters in request body
  # print(source_to_update)
  es = getESConn()
  res = es.update(index=projects_db, id=pid, body=source_to_update)
  # print(res)

  # create the ontology similarity if specified as part of project attributes (can be a lengthy operation for mid to large ontologies!)
  if body['hasCasebase']:  # check that the casebase has been created since similarity is computed when the casebase is created
    for attrib in body['attributes']:  # for each project casebase attribute
      if attrib['type'] == "Ontology Concept" and attrib.get('options') is not None and \
              attrib['options']:  # check that the attribute is ontology based
        old_onto_attrib = next((item for item in proj_old['attributes'] if item['name'] == attrib['name']), None)  # get the pre project update version of the attribute
        if old_onto_attrib is not None and attrib.get('similarityType') is not None and attrib != old_onto_attrib:  # update ontology similarity measures if there are changes
          sim_method = 'san' if attrib['similarityType'] == 'Feature-based' else 'wup'
          retrieve.setOntoSimilarity(attrib['options'].get('id'), attrib['options'].get('sources'), relation_type=attrib['options'].get('relation_type', None),
                                   root_node=attrib['options'].get('root'), similarity_method=sim_method)

  source_to_update['doc']['id__'] = pid
  response = {
    "statusCode": 201,
    "headers": headers,
    "body": json.dumps(source_to_update['doc'])
  }
  return response
Exemple #4
0
def save_case_list(event, context=None):
    """
  End-point: Saves list of case instances
  Creates index for the casebase if one does not exist
  """
    # try:
    doc_list = json.loads(event['body'])  # parameters in request body
    es = getESConn()
    pid = event['pathParameters']['id']
    proj = utility.getByUniqueField(es, projects_db, "_id", pid)  # project
    # create index with mapping if it does not exist already
    project.indexMapping(es, proj)

    # Add documents to created index
    # print("Adding a hash field to each case for duplicate-checking")
    for x in doc_list:  # generate a hash after ordering dict by key
        x = retrieve.add_vector_fields(proj['attributes'],
                                       x)  # add vectors to Semantic USE fields
        x = retrieve.add_lowercase_fields(
            proj['attributes'],
            x)  # use lowercase values for EqualIgnoreCase fields
        x['hash__'] = str(
            hashlib.md5(
                json.dumps(OrderedDict(sorted(x.items()))).encode('utf-8')).
            digest())  # case hash for easy detection of duplicates
    # print("Attempting to index the list of docs using helpers.bulk()")
    resp = helpers.bulk(es, doc_list, index=proj['casebase'], doc_type="_doc")

    # Indicate that the project has a casebase
    # print("Casebase added. Attempting to update project detail. Set hasCasebase => True")
    proj['hasCasebase'] = True
    source_to_update = {'doc': proj}
    # print(source_to_update)
    res = es.update(index=projects_db, id=pid, body=source_to_update)
    # print(res)

    # create the ontology similarity if specified as part of project attributes (can be a lengthy operation for mid to large ontologies!)
    for attrib in proj['attributes']:
        if attrib['type'] == "Ontology Concept" and attrib.get(
                'similarityType') is not None and attrib.get(
                    'options') is not None and retrieve.checkOntoSimilarity(
                        attrib['options'].get('id'))['statusCode'] != 200:
            sim_method = 'san' if attrib[
                'similarityType'] == 'Feature-based' else 'wup'
            retrieve.setOntoSimilarity(
                attrib['options'].get('id'),
                attrib['options'].get('sources'),
                relation_type=attrib['options'].get('relation_type'),
                root_node=attrib['options'].get('root'),
                similarity_method=sim_method)

    response = {
        "statusCode": 201,
        "headers": headers,
        "body": json.dumps(resp)
    }
    return response
Exemple #5
0
def get_project(event, context=None):
  """
  End-point: Retrieves a project (details of a CBR application).
  """
  pid = event['pathParameters']['id']
  # retrieve if ES index does exist
  result = utility.getByUniqueField(getESConn(), projects_db, "_id", pid)

  response = {
    "statusCode": 200,
    "headers": headers,
    "body": json.dumps(result)
  }
  return response
Exemple #6
0
def create_project_index(event, context=None):
    """
  End-point: Creates the mapping for an index if it does not exist.
  """
    es = getESConn()
    pid = event['pathParameters']['id']
    proj = utility.getByUniqueField(es, projects_db, "_id", pid)  # project
    index_name = proj['casebase']
    res = project.indexMapping(es, proj)

    # Indicate that the project has a casebase (empty)
    # print("Casebase added. Attempting to update project detail. Set hasCasebase => True")
    proj['hasCasebase'] = True
    source_to_update = {'doc': proj}
    res = es.update(index=projects_db, id=pid, body=source_to_update)
    # print(res)

    response = {"statusCode": 201, "headers": headers, "body": json.dumps(res)}
    return response
Exemple #7
0
def delete_project(event, context=None):
    """
  End-point: Deletes a project. Also deletes the project's casebase index if it exists.
  """
    pid = event['pathParameters']['id']  # project id
    es = getESConn()
    # delete casebase
    proj = utility.getByUniqueField(es, projects_db, "_id", pid)  # get project
    casebase = proj['casebase']
    es.indices.delete(index=casebase,
                      ignore=[400, 404])  # delete index if it exists
    # delete project
    res = es.delete(index=projects_db, id=pid)

    response = {
        "statusCode": 200,
        "headers": headers,
        "body": json.dumps(res['result'])
    }
    return response
Exemple #8
0
def save_case_list(event, context=None):
    """
  End-point: Saves list of case instances
  Creates index for the casebase if one does not exist
  """
    # try:
    doc_list = json.loads(event['body'])  # parameters in request body
    es = getESConn()
    pid = event['pathParameters']['id']
    proj = utility.getByUniqueField(es, projects_db, "_id", pid)  # project
    index_name = proj['casebase']
    # create index with mapping if it does not exist already
    project.indexMapping(es, proj)

    # Add documents to created index
    # print("Adding a hash field to each case for duplicate-checking")
    for x in doc_list:  # generate a hash after ordering dict by key
        x['hash__'] = str(
            hashlib.md5(
                json.dumps(OrderedDict(sorted(
                    x.items()))).encode('utf-8')).digest())
    # print("Attempting to index the list of docs using helpers.bulk()")
    resp = helpers.bulk(es, doc_list, index=proj['casebase'], doc_type="_doc")

    # Indicate that the project has a casebase
    # print("Casebase added. Attempting to update project detail. Set hasCasebase => True")
    proj['hasCasebase'] = True
    source_to_update = {'doc': proj}
    # print(source_to_update)
    res = es.update(index=projects_db, id=pid, body=source_to_update)
    # print(res)

    response = {
        "statusCode": 201,
        "headers": headers,
        "body": json.dumps(resp)
    }
    return response
Exemple #9
0
def cbr_retrieve(event, context=None):
  """
  End-point: Completes the Retrieve step of the CBR cycle.
  """
  start = timer()  # start timer
  result = {'recommended': {}, 'bestK': []}
  es = getESConn()  # es connection
  # query["query"]["bool"]["should"].append(queryFnc)
  queryAdded = False
  params = json.loads(event['body'])  # parameters in request body
  # print(params)
  queryFeatures = params['data']
  proj = params.get('project', None)
  if proj is None:
    projId = params.get('projectId', None)  # name of casebase
    proj = utility.getByUniqueField(es, projects_db, "_id", projId)

  proj_attributes = proj['attributes']
  globalSim = params['globalSim']
  k = params['topk']
  query = {"query": {"bool": {"should": []}}}
  query["size"] = int(k)  # top k results
  for entry in queryFeatures:
    if ('value' in entry) and entry['value'] is not None and "" != entry['value'] and int(
            entry.get('weight', 0)) > 0 and entry['similarity'] != "None":
      queryAdded = True
      field = entry['name']
      similarityType = entry['similarity']
      options = retrieve.get_attribute_by_name(proj['attributes'], field).get('options', None)
      # print(options)
      # fieldType = entry['type']
      # use lowercase when field is specified as case-insensitive
      value = entry['value'].lower() if similarityType == 'EqualIgnoreCase' else entry['value']
      weight = entry['weight']
      # isProblem = entry['unknown']
      # strategy = entry['strategy']

      qfnc = retrieve.getQueryFunction(field, value, weight, similarityType, options)
      query["query"]["bool"]["should"].append(qfnc)

  if not queryAdded:  # retrieval all (up to k) if not query was added
    query["query"]["bool"]["should"].append(retrieve.MatchAll())
  # perform retrieval
  counter = 0
  res = es.search(index=proj['casebase'], body=query)
  for hit in res['hits']['hits']:
    entry = hit['_source']
    entry.pop('hash__', None)  # remove hash field and value
    entry = retrieve.remove_vector_fields(proj_attributes, entry)  # remove vectors from Semantic USE fields
    if counter == 0:
      result['recommended'] = copy.deepcopy(entry)
    # entry['id__'] = hit['_id'] # using 'id__' to track this case (this is removed during an update operation)
    entry['score__'] = hit['_score']  # removed during an update operation
    result['bestK'].append(entry)
    counter += 1

  # Recommend: Get the recommended result using chosen reuse strategies for unknown attribute values and keep known attribute values supplied
  if counter > 0:
    for entry in queryFeatures:
      if not entry['unknown'] and ('value' in entry) and entry['value'] is not None and "" != entry[
        'value']:  # copy known values
        result['recommended'][entry['name']] = entry['value']
      if entry.get('similarity') is not None and entry['unknown'] and entry[
        'strategy'] != "Best Match":  # use reuse strategies for unknown fields
        if entry['strategy'] == "Maximum":
          result['recommended'][entry['name']] = max(d[entry['name']] for d in result['bestK'])
        if entry['strategy'] == "Minimum":
          result['recommended'][entry['name']] = min(d[entry['name']] for d in result['bestK'])
        if entry['strategy'] == "Mean":
          result['recommended'][entry['name']] = np.mean([x[entry['name']] for x in result['bestK']])
        if entry['strategy'] == "Median":
          result['recommended'][entry['name']] = np.median([x[entry['name']] for x in result['bestK']])
        if entry['strategy'] == "Mode":
          result['recommended'][entry['name']] = statistics.mode([x[entry['name']] for x in result['bestK']])

  end = timer()  # end timer
  result['retrieveTime'] = end - start
  result['esTime'] = res['took']
  response = {
    "statusCode": 200,
    "headers": headers,
    "body": json.dumps(result)
  }
  return response