Beispiel #1
0
def get_ground_truth_sections(username, section_collection):
    """
    Returns all of the routes associated with a username's ground truthed sections
    """
    ground_cluster_collection = edb.get_groundClusters_db()
    clusters = ground_cluster_collection.find_one(
        {"clusters": {
            "$exists": True
        }})["clusters"]
    ground_truth_sections = []
    get_username = lambda x: x[0].split("_")[0]
    clusters = [
        x for x in list(clusters.items()) if username == get_username(x)
    ]
    for key, section_ids in clusters:
        ground_truth_sections.extend(section_ids)

    ground_truth_section_data = {}
    for section_id in ground_truth_sections:
        section_data = section_collection.find_one({'_id': section_id})
        if section_data is not None:
            ground_truth_section_data[section_data['_id']] = getRoute(
                section_data['_id'])
        else:
            logging.debug("%s not found" % section_id)
    return ground_truth_section_data
Beispiel #2
0
def update_db_with_clusters(user, infile_path):
    """
    Updates the groundClusters collection with the sections 
    stored in the KML file path

    infile kmls must be of format some_name_for_cluster_X.kml where X is number
    This is checked in check_named_clusters

    Currently this is very inefficient. It replaces a dictionary of 
    ground truth clusters each time the code is run rather than
    inserting new ground truth entries, but we may not even be using this.
    """
    gc_db = edb.get_groundClusters_db()
    cluster_name = infile_path.split("/")[-1].split(".")[0][:-2]
    cluster_name = "%s_%s" % (user, cluster_name)
    cluster_sids = etmu.get_kml_section_ids(infile_path)
    if (gc_db.count() == 0):
        gc_db.insert({"clusters": {}})
    x = gc_db.find_one({"clusters": {"$exists": True}})["clusters"]
    if (cluster_name in x.keys()):
        x[cluster_name] += cluster_sids
    else:
        x[cluster_name] = cluster_sids
    gc_db.remove({"clusters": {"$exists": True}})
    gc_db.insert({"clusters": x})
def update_db_with_clusters(user, infile_path):
    """
    Updates the groundClusters collection with the sections 
    stored in the KML file path

    infile kmls must be of format some_name_for_cluster_X.kml where X is number
    This is checked in check_named_clusters

    Currently this is very inefficient. It replaces a dictionary of 
    ground truth clusters each time the code is run rather than
    inserting new ground truth entries, but we may not even be using this.
    """
    gc_db = edb.get_groundClusters_db();
    cluster_name = infile_path.split("/")[-1].split(".")[0][:-2]
    cluster_name = "%s_%s" % (user, cluster_name)
    cluster_sids = etmu.get_kml_section_ids(infile_path)
    if(gc_db.count() == 0):
        gc_db.insert({"clusters":{}})
    x = gc_db.find_one({"clusters":{"$exists":True}})["clusters"]
    if(cluster_name in x.keys()):
        x[cluster_name] += cluster_sids
    else:
        x[cluster_name] = cluster_sids
    gc_db.remove({"clusters":{"$exists":True}})
    gc_db.insert({"clusters":x})
Beispiel #4
0
def update_db_with_clusters_dict(user, clusters):
    """
    Updates the groundClusters collection with the sections 
    represented in the clusters dict

    Currently this is very inefficient. It replaces a dictionary of 
    ground truth clusters each time the code is run rather than
    inserting new ground truth entries, but we may not even be using this.
    """
    gc_db = edb.get_groundClusters_db()
    assert (clusters != {}), "clusters must be nonempty"
    if (gc_db.count() == 0):
        gc_db.insert({"clusters": {}})
    x = gc_db.find_one({"clusters": {"$exists": True}})["clusters"]
    for name, sections in clusters.items():
        x[name] = sections  # There is likely better way to merge dictionaries
    gc_db.remove({"clusters": {"$exists": True}})
    gc_db.insert({"clusters": x})
def update_db_with_clusters_dict(user, clusters):
    """
    Updates the groundClusters collection with the sections 
    represented in the clusters dict

    Currently this is very inefficient. It replaces a dictionary of 
    ground truth clusters each time the code is run rather than
    inserting new ground truth entries, but we may not even be using this.
    """
    gc_db = edb.get_groundClusters_db();
    assert (clusters != {}), "clusters must be nonempty"
    if(gc_db.count() == 0):
        gc_db.insert({"clusters":{}})
    x = gc_db.find_one({"clusters":{"$exists":True}})["clusters"]
    for name, sections in clusters.items():         
        x[name] = sections # There is likely better way to merge dictionaries
    gc_db.remove({"clusters":{"$exists":True}})
    gc_db.insert({"clusters":x})
def get_ground_truth_sections(username, section_collection):
    """
    Returns all of the routes associated with a username's ground truthed sections
    """
    ground_cluster_collection = edb.get_groundClusters_db()
    clusters = ground_cluster_collection.find_one({"clusters":{"$exists":True}})["clusters"]
    ground_truth_sections = []
    get_username = lambda x: x[0].split("_")[0]
    clusters = filter(lambda x: username == get_username(x), clusters.items())
    for key, section_ids in clusters:
        ground_truth_sections.extend(section_ids)
 
    ground_truth_section_data = {}
    for section_id in ground_truth_sections:
        section_data = section_collection.find_one({'_id' : section_id})        
        if section_data is not None:
            ground_truth_section_data[section_data['_id']] = getRoute(section_data['_id'])
        else:
            logging.debug("%s not found" % section_id)
    return ground_truth_section_data