def get_ground_truth_sections(username, section_collection): """ Returns all of the routes associated with a username's ground truthed sections """ ground_cluster_collection = edb.get_groundClusters_db() clusters = ground_cluster_collection.find_one( {"clusters": { "$exists": True }})["clusters"] ground_truth_sections = [] get_username = lambda x: x[0].split("_")[0] clusters = [ x for x in list(clusters.items()) if username == get_username(x) ] for key, section_ids in clusters: ground_truth_sections.extend(section_ids) ground_truth_section_data = {} for section_id in ground_truth_sections: section_data = section_collection.find_one({'_id': section_id}) if section_data is not None: ground_truth_section_data[section_data['_id']] = getRoute( section_data['_id']) else: logging.debug("%s not found" % section_id) return ground_truth_section_data
def update_db_with_clusters(user, infile_path): """ Updates the groundClusters collection with the sections stored in the KML file path infile kmls must be of format some_name_for_cluster_X.kml where X is number This is checked in check_named_clusters Currently this is very inefficient. It replaces a dictionary of ground truth clusters each time the code is run rather than inserting new ground truth entries, but we may not even be using this. """ gc_db = edb.get_groundClusters_db() cluster_name = infile_path.split("/")[-1].split(".")[0][:-2] cluster_name = "%s_%s" % (user, cluster_name) cluster_sids = etmu.get_kml_section_ids(infile_path) if (gc_db.count() == 0): gc_db.insert({"clusters": {}}) x = gc_db.find_one({"clusters": {"$exists": True}})["clusters"] if (cluster_name in x.keys()): x[cluster_name] += cluster_sids else: x[cluster_name] = cluster_sids gc_db.remove({"clusters": {"$exists": True}}) gc_db.insert({"clusters": x})
def update_db_with_clusters(user, infile_path): """ Updates the groundClusters collection with the sections stored in the KML file path infile kmls must be of format some_name_for_cluster_X.kml where X is number This is checked in check_named_clusters Currently this is very inefficient. It replaces a dictionary of ground truth clusters each time the code is run rather than inserting new ground truth entries, but we may not even be using this. """ gc_db = edb.get_groundClusters_db(); cluster_name = infile_path.split("/")[-1].split(".")[0][:-2] cluster_name = "%s_%s" % (user, cluster_name) cluster_sids = etmu.get_kml_section_ids(infile_path) if(gc_db.count() == 0): gc_db.insert({"clusters":{}}) x = gc_db.find_one({"clusters":{"$exists":True}})["clusters"] if(cluster_name in x.keys()): x[cluster_name] += cluster_sids else: x[cluster_name] = cluster_sids gc_db.remove({"clusters":{"$exists":True}}) gc_db.insert({"clusters":x})
def update_db_with_clusters_dict(user, clusters): """ Updates the groundClusters collection with the sections represented in the clusters dict Currently this is very inefficient. It replaces a dictionary of ground truth clusters each time the code is run rather than inserting new ground truth entries, but we may not even be using this. """ gc_db = edb.get_groundClusters_db() assert (clusters != {}), "clusters must be nonempty" if (gc_db.count() == 0): gc_db.insert({"clusters": {}}) x = gc_db.find_one({"clusters": {"$exists": True}})["clusters"] for name, sections in clusters.items(): x[name] = sections # There is likely better way to merge dictionaries gc_db.remove({"clusters": {"$exists": True}}) gc_db.insert({"clusters": x})
def update_db_with_clusters_dict(user, clusters): """ Updates the groundClusters collection with the sections represented in the clusters dict Currently this is very inefficient. It replaces a dictionary of ground truth clusters each time the code is run rather than inserting new ground truth entries, but we may not even be using this. """ gc_db = edb.get_groundClusters_db(); assert (clusters != {}), "clusters must be nonempty" if(gc_db.count() == 0): gc_db.insert({"clusters":{}}) x = gc_db.find_one({"clusters":{"$exists":True}})["clusters"] for name, sections in clusters.items(): x[name] = sections # There is likely better way to merge dictionaries gc_db.remove({"clusters":{"$exists":True}}) gc_db.insert({"clusters":x})
def get_ground_truth_sections(username, section_collection): """ Returns all of the routes associated with a username's ground truthed sections """ ground_cluster_collection = edb.get_groundClusters_db() clusters = ground_cluster_collection.find_one({"clusters":{"$exists":True}})["clusters"] ground_truth_sections = [] get_username = lambda x: x[0].split("_")[0] clusters = filter(lambda x: username == get_username(x), clusters.items()) for key, section_ids in clusters: ground_truth_sections.extend(section_ids) ground_truth_section_data = {} for section_id in ground_truth_sections: section_data = section_collection.find_one({'_id' : section_id}) if section_data is not None: ground_truth_section_data[section_data['_id']] = getRoute(section_data['_id']) else: logging.debug("%s not found" % section_id) return ground_truth_section_data