Example #1
0
def crawlResourceForSubject(resourceName, pat_ids, collection, key, value, name, resource_val_path):
    # Dynamically load module for resource
    try:
        resource = getattr(importlib.import_module("fhirclient.models." + resourceName.lower()), resourceName)
    except Exception:
        logger.error("Resource " + resourceName + " does not exist", exc_info=1)
        raise

    # Perform search
    try:
        if resourceName == 'Patient':
            serverSearchParams = {"_id": pat_ids}
        else:
            serverSearchParams = {"patient": pat_ids, key: value}

        search = resource.where(serverSearchParams)
        ret = search.perform(server.server)
    except Exception:
        logger.error("Search failed", exc_info=1)
        raise

    if(len(ret.entry) == 0):
        logger.info("No values found for search for patients " + pat_ids + " on resource " + resourceName)
        return

    insert_list = []
    next_page = True
    while next_page:

        for entry_elem in ret.entry:
            ret_element = entry_elem.resource
            element = resource.as_json(ret_element)
            element["_id"] = str(ObjectId())
            element["feature"] = value 
            element["name"] = name if name is not None else value

            if resourceName == "Patient":
                element["patient_id"] = pat_ids
            elif resourceName == "Condition":
                element["patient_id"] = ret_element.patient.reference.replace("Patient/", "")
            else:
                element["patient_id"] = ret_element.subject.reference.replace("Patient/", "")

            if resource_val_path is not None:
                element["resource_val_path"] = resource_val_path
            
            insert_list.append(element)

        if len(ret.link) < 2 or ret.link[1].relation != "next":
            next_page = False
            break
            
        res = server.server.request_json(ret.link[1].url)
        ret = bundle.Bundle(res)

    mongodbConnection.get_db()[collection].insert(list(insert_list))
def insert_resource_config(resource_name, resource_value_relative_path, sort_order):
    mongodbConnection.get_db().resourceConfig.find_one_and_delete({"_id" : resource_name})
    mongodbConnection.get_db().resourceConfig.insert_one(
        {"_id": resource_name, "resource_value_relative_path" : resource_value_relative_path, "sort_order": sort_order, "resource_name": resource_name}
    )

    ret = mongodbConnection.get_db().resourceConfig.find_one({"_id" : resource_name}, {"_id": False})
    resourceLoader.writeResource(ret)

    return ret
    def setUp(self):
        with api.app.app_context(
        ):  # necessary because mongodbConnection uses flask functionality
            # TODO: would be better to start MongoDB from here instead of relying that it"s already started
            mongodbConnection.get_db()[self.aggregatorTestTable].delete_many(
                {})

            crawler_data = open(self.crawler_data_path, "r")
            crawler_data_json = json.loads(crawler_data.read())
            mongodbConnection.get_db()[self.aggregatorTestTable].insert(
                crawler_data_json)

            job_info = open(self.crawler_job_path, "r")
            self.job_info_json = json.loads(job_info.read())
    def get(self, crawler_id):
        args = parser.parse_args()
        aggregation_type = args["aggregation_type"]
        output_type = args["output_type"]

        crawlerJob = mongodbConnection.get_db().crawlerJobs.find_one(
            {"_id": crawler_id})
        if crawlerJob["status"] != "finished":
            return "Crawler Job did not finish yet", 404

        if aggregation_type.lower() not in allowedAggregationTypes:
            return "Wrong aggregation type provided: " + aggregation_type, 400

        if aggregation_type.lower() == "all" and output_type == "csv":
            return "Time series not supported", 400

        aggregator = Aggregator(crawler_id, aggregation_type.lower(),
                                crawlerJob["feature_set"],
                                crawlerJob["resource_configs"])
        aggregator.aggregate()

        if output_type == "csv":
            return Response(aggregator.getCSVOfAggregated(),
                            mimetype='text/csv')
        elif output_type == "json":
            return aggregator.getRestructured()
        else:
            return aggregator.getAggregated()
def loadResources():
    searchDir = os.path.join(os.path.dirname(__file__),
                             '../fhir_resource_configs')

    for f in os.listdir(searchDir):
        path = os.path.join(searchDir, f)
        if os.path.isfile(path):
            try:
                config_file = open(path, 'r')
                file_content = json.loads(config_file.read())

                if (file_content["resource_name"] is None
                        or file_content["resource_val_path"] is None):
                    raise ValueError(
                        'Wrong format of file. Must contain fields "resource_name" and "resource_val_path".'
                    )

                mongodbConnection.get_db().resourceConfig.find_one_and_delete(
                    {"_id": file_content["resource_name"]})
                mongodbConnection.get_db().resourceConfig.insert_one({
                    "_id":
                    file_content["resource_name"],
                    "resource_val_path":
                    file_content["resource_val_path"],
                    "sort_order":
                    file_content["sort_order"],
                    "resource_name":
                    file_content["resource_name"],
                    "key_path":
                    file_content.get("key_path"),
                    "key":
                    file_content.get("key"),
                })

                logger.info("Added resource " + file_content["resource_name"] +
                            " of file " + path + " to db.")
            except Exception:
                logger.error("Reading resource file " + path +
                             " failed. Skipping.",
                             exc_info=1)
                continue

            config_file.close()
Example #6
0
def crawlObservationForSubject(subject, collection, key, name):
    url_params = {"_pretty": "true", "subject": subject, "_format": "json", "_count": 100, key: name}

    next_page = configuration.HAPIFHIR_URL+"Observation"+'?'+urllib.parse.urlencode(url_params)

    all_entries = []
    while next_page != None:

        try:
            request = requests.get(next_page)

        except Exception as e:
            # this avoids connection refused when to many varialbes arre requested
            time.sleep(10)
            continue

        json = request.json()

        if "entry" not in json:
            return

        entries = json["entry"]

        if len(json["link"]) > 1 and json["link"][1]["relation"] == "next":
            next_page = json["link"][1]["url"]
        else:
            next_page = None

        all_entries += entries

    observations = []
    for entry in all_entries:
        reducer = ObservationReducer(entry["resource"])
        reduced = reducer.getReduced()
        #patient = reducer.getEntity()
        observations.append(reduced)

    mongodbConnection.get_db()[collection].find_one_and_update(
        { "_id": subject },
        {"$push": { "observations" : {"$each": observations}}},
        {"resource": "Observation"},
        upsert=True
    )
Example #7
0
def process_search_results(ret, resource_name, values, code_inf_map, resource_configs, key_path, collection):

    insert_list = []
    next_page = True

    while next_page:
        for entry_elem in ret.entry:
            if resource_name == 'Patient':
                process_patient_resource(insert_list, entry_elem, values, code_inf_map, resource_configs)
            else:
                process_resource(insert_list, entry_elem, resource_name, key_path, values, code_inf_map, resource_configs)

        if len(ret.link) < 2 or ret.link[1].relation != "next":
            next_page = False
            break
            
        res = server.server.request_json(ret.link[1].url)
        ret = bundle.Bundle(res)
    
    mongodbConnection.get_db()[collection].insert(list(insert_list))
Example #8
0
    def run(self):
        with self.app.app_context():
            while True:
                next_job = mongodbConnection.get_db().crawlerJobs.find_one(
                    {"status": "queued"})

                if (next_job is None):
                    time.sleep(self.interval)
                    continue

                logger.info("executing new job")
                crawler.executeCrawlerJob(next_job)
Example #9
0
def createCrawlerJob(crawler_id, crawler_status, patient_ids, feature_set, aggregation_type, resource_configs):
    from api import api

    if isinstance(patient_ids, str):
        patient_ids = [patient_ids]

    url_params = {"output_type": "csv", "aggregation_type": aggregation_type}
    url = "http://"+configuration.HOSTEXTERN+":"+str(configuration.WSPORT)+api.url_for(aggregationResource.Aggregation, crawler_id=crawler_id)+ "?" + urllib.parse.urlencode(url_params)

    crawlerJob =  {
        "_id": crawler_id,
        "patient_ids": patient_ids,
        "feature_set": feature_set,
        "resource_configs": resource_configs,
        "status": crawler_status,
        "finished": [],
        "queued_time": str(datetime.now()),
        "start_time": None,
        "url": url
    }

    mongodbConnection.get_db().crawlerJobs.insert_one(crawlerJob)
    return crawlerJob
Example #10
0
def executeCrawlerJob(crawlerJob):
    mongodbConnection.get_db().crawlerJobs.update({"_id": crawlerJob["_id"]}, {"$set": {"status": "running", "start_time": str(datetime.now())}})

    try:

        resource_map = create_resource_map(crawlerJob["feature_set"])
        pat_ids = ','.join(crawlerJob["patient_ids"])
        
        for resource_name, resource in resource_map.items():
            crawlResourceGroupsForSubjects(resource_name, pat_ids, crawlerJob["_id"], resource['feature_list'], resource['feature_maps'], crawlerJob['resource_configs'])
            #crawlResourceGroupsForSubjects(resource_name, pat_ids, crawlerJob["_id"], feature["value"], feature.get('name'), feature.get('resource_val_path'))

        mongodbConnection.get_db().crawlerJobs.update({"_id": crawlerJob["_id"]}, {"$set": {"status": "finished", "end_time": str(datetime.now())}})
    except Exception as e:
        print("error executing crawler", e, file=sys.stderr)
        traceback.print_exc()
        logger.error("Execution of Crawler " + crawlerJob["_id"] + " failed", exc_info=1)
        mongodbConnection.get_db().crawlerJobs.update({"_id": crawlerJob["_id"]}, {"$set": {"status": "error", "end_time": str(datetime.now())}})
        return "error"
 def get(self):
     return list(mongodbConnection.get_db().resourceConfig.find({}, {"_id": False}))
Example #12
0
    def delete(self):
        ret = mongodbConnection.get_db().crawlerJobs.delete_many({})

        return ret.deleted_count
Example #13
0
def getResourceConfig(resource_name, resource_configs):
        # If resource config was not provided in crawler job -> read config from mongo db
        resource_configs = [] if resource_configs is None else resource_configs
        return next((c for c in resource_configs if c["resource_name"] == resource_name),
            mongodbConnection.get_db().resourceConfig.find_one({"_id": resource_name}))
Example #14
0
    def get(self, crawler_id):

        return mongodbConnection.get_db().crawlerJobs.find_one(
            {"_id": crawler_id})
Example #15
0
    def get(self):

        print(request.environ['REMOTE_ADDR'], file=sys.stderr)

        return list(mongodbConnection.get_db().crawlerJobs.find())
def remove_resource_config(resource_name):
    resourceLoader.deleteResource(resource_name)
    return mongodbConnection.get_db().resourceConfig.find_one_and_delete({"_id" : resource_name}, {"_id": False})
Example #17
0
    def delete(self, crawler_id):
        ret = mongodbConnection.get_db().crawlerJobs.delete_many(
            {"_id": crawler_id})

        return ret.deleted_count
    def aggregateObservations(self):
        mongorequest = [{
            "$unwind": "$observations"
        }, {
            "$group": {
                "_id": {
                    "attribute": "$observations.attribute",
                    "patient_id": "$_id"
                },
                "entry": {
                    "$push": "$$CURRENT.observations"
                }
            }
        }, {
            "$unwind": "$entry"
        }, {
            "$sort": {
                "entry.timestamp": 1
            }
        }]

        if self.aggregation_type == "" or self.aggregation_type == "all":
            mongorequest += [{
                "$group": {
                    "_id": "$_id",
                    "observations": {
                        "$push": "$entry"
                    }
                }
            }, {
                "$group": {
                    "_id": "$_id.patient_id",
                    "observations": {
                        "$push": "$$CURRENT.observations"
                    }
                }
            }]
        elif self.aggregation_type == "latest" or self.aggregation_type == "oldest":
            tmp = "first" if self.aggregation_type == "oldest" else "last"
            mongorequest += [{
                "$group": {
                    "_id": "$_id",
                    "observations": {
                        "$" + tmp: "$entry"
                    }
                }
            }, {
                "$group": {
                    "_id": "$_id.patient_id",
                    "observations": {
                        "$push": "$$CURRENT.observations"
                    }
                }
            }]
        elif self.aggregation_type == "avg":
            mongorequest += [{
                "$group": {
                    "_id": "$_id",
                    "attribute": {
                        "$first": "$_id.attribute"
                    },
                    "observations": {
                        "$avg": "$entry.value"
                    }
                }
            }, {
                "$group": {
                    "_id": "$_id.patient_id",
                    "observations": {
                        "$push": {
                            "avg": "$$CURRENT.observations",
                            "attribute": "$_id.attribute"
                        }
                    }
                }
            }]
        else:
            return None

        result = list(mongodbConnection.get_db()[self.crawler_id].aggregate(
            mongorequest))
        for res in result:
            res["resourceType"] = "Observation"

        self.aggregatedElements.extend(result)
    def aggregateFeature(self, resource, selection):
        resource_config = self.getResourceConfig(resource)

        mongorequest = [{
            "$match": {
                "feature": selection,
                "resourceType": resource
            }
        }]

        foundSearchPath = False
        allElementsForFeature = list(mongodbConnection.get_db()[
            self.crawler_id].aggregate(mongorequest + [{
                "$group": {
                    "_id": None,
                    "count": {
                        "$sum": 1
                    }
                }
            }]))

        if len(allElementsForFeature) == 0:
            raise ValueError("Feature " + selection + " of resource " +
                             resource + " has no elements.")

        numAllElementsForFeature = allElementsForFeature[0]["count"]

        if resource_config["sort_order"] is not None and resource_config[
                "sort_order"] != "None":
            # Before actually sorting check if every single element has the attribute that should be sorted after -> throw error if they do not
            for sortPath in resource_config["sort_order"]:
                mongoSortPath = ".".join(
                    sortPath.split("/"))  # Change "/" to "."

                elementsWithPath = list(mongodbConnection.get_db()[
                    self.crawler_id].aggregate(mongorequest + [{
                        "$match": {
                            mongoSortPath: {
                                "$exists": True
                            }
                        }
                    }, {
                        "$group": {
                            "_id": None,
                            "count": {
                                "$sum": 1
                            }
                        }
                    }]))

                if len(elementsWithPath) == 0:
                    continue

                numElementsWithPath = elementsWithPath[0]["count"]

                # Check if every element has attribute search path
                if numAllElementsForFeature == numElementsWithPath:
                    mongorequest += [{"$sort": {mongoSortPath: 1}}]
                    foundSearchPath = True
                    break
        else:
            logger.warning("No sort order provided.")
            foundSearchPath = True

        if foundSearchPath:
            tmp = "first" if self.aggregation_type == "oldest" else "last"
            mongorequest += [{
                "$group": {
                    "_id": "$patient_id",
                    "elements": {
                        "$push": "$$CURRENT"
                    }
                }
            }, {
                "$group": {
                    "_id": "$_id",
                    "elements": {
                        "$first": "$elements"
                    }
                }
            }]

            sortedFeature = list(mongodbConnection.get_db()[
                self.crawler_id].aggregate(mongorequest))

            if self.aggregation_type == "" or self.aggregation_type == "all":
                self.aggregatedElements.extend(sortedFeature)
            elif self.aggregation_type == "latest":
                for res in sortedFeature:
                    res["resourceType"] = resource
                self.aggregatedElements.extend(sortedFeature)
                #self.aggregatedElements.append(sortedFeature[-1])
            elif self.aggregation_type == "oldest":
                self.aggregatedElements.append(sortedFeature[0])
        else:
            raise ValueError(
                "Elements have different fields to sort! Sorting not possible."
            )