def average_time_per_package():
    annotation_id_blacklist = get_package_annotations()

    results_start = labelled_collection.aggregate(pipeline=[
        {
            "$match": {
                '_id': {'$nin': annotation_id_blacklist}
            }
        },
        {
            "$group": {
                "_id": {"pid": "$package_id", "uid": "$user_name"},
                "start_time": {"$min": "$start_time"}
            }
        }
    ])

    results_end = labelled_collection.aggregate(pipeline=[
        {
            "$match": {
                '_id': {'$nin': annotation_id_blacklist}
            }
        },
        {
            "$group": {
                "_id": {"pid": "$package_id", "uid": "$user_name"},
                "end_time": {"$max": "$end_time"}
            }
        }
    ])

    min_time_for_id = {(res['_id']['pid'], res['_id']['uid']): res['start_time'] for res in results_start}

    times_in_ms = []
    for result in results_end:
        pid = result['_id']['pid']
        uid = result['_id']['uid']

        end_time = result['end_time']

        if min_time_for_id.get((pid, uid), None) is not None:
            start_time = min_time_for_id[(pid, uid)]
            times_in_ms.append(end_time-start_time)

    sorted_times = sorted(times_in_ms)
    if (len(sorted_times)%2) == 0:
        idx = int(len(sorted_times)/2)
        return (sorted_times[idx] + sorted_times[idx-1])/2
    else:
        idx = int((len(sorted_times) - 1)/2)
        return sorted_times[idx]
def median_time_per_annotation():
    annotation_id_blacklist = get_package_annotations()

    results = labelled_collection.aggregate(pipeline=[
        {
            "$match": {
                '_id': {'$nin': annotation_id_blacklist}
            }
        },
        {
            "$group": {
                "_id": "dummy",
                "elapsed_times": {"$addToSet": "$elapsed_time"}
            }
        }
    ])
    elapsed_times = list(results)[0]['elapsed_times']
    sorted_elapsed_times = sorted(elapsed_times)

    if (len(sorted_elapsed_times) % 2) == 0:
        idx = int(len(sorted_elapsed_times) / 2)
        return (sorted_elapsed_times[idx] + sorted_elapsed_times[idx - 1]) / 2
    else:
        idx = int((len(sorted_elapsed_times) - 1) / 2)
        return sorted_elapsed_times[idx]
def number_of_annotators():
    results = labelled_collection.aggregate(pipeline=[
        {
            "$group": {
                "_id": {"uid": "$user_name"},
                "convos": {"$sum": 1}
            }
        }
    ])

    return len(list(results))
def number_of_annotators_filtered():
    annotation_id_blacklist = get_package_annotations()
    results = labelled_collection.aggregate(pipeline=[
        {
            "$match": {
                '_id': {'$nin': annotation_id_blacklist}
            }
        },
        {
            "$group": {
                "_id": {"uid": "$user_name"},
                "convos": {"$sum": 1}
            }
        }
    ])

    return len(list(results))
def total_time():
    annotation_id_blacklist = get_package_annotations()

    results = labelled_collection.aggregate(pipeline=[
        {
            "$match": {
                '_id': {'$nin': annotation_id_blacklist}
            }
        },
        {
            "$group": {
                "_id": "dummy",
                "total_time": {"$sum": "$elapsed_time"}
            }
        }
    ])
    total_time = list(results)[0]['total_time']
    #convert to hrs
    return total_time/1000/60/60
Beispiel #6
0
def get_package_annotations():
    results = labelled_collection.aggregate(pipeline=[{
        "$group": {
            "_id": {
                "pid": "$package_id",
                "uid": "$user_name"
            },
            "convos": {
                "$addToSet": "$convo_id"
            }
        }
    }])

    pres = package_collection.find({})
    pid_to_cids = {
        str(p['_id']): set([c['convo_id'] for c in p['package']])
        for p in pres
    }

    annotation_blacklist = []
    for res in results:
        pid = res['_id']['pid']
        convo_ids = set(res['convos'])
        if pid_to_cids.get(pid, None) is None:
            continue
        expected_ids = set(pid_to_cids[pid])

        if not convo_ids == expected_ids:
            annotation_blacklist.append(res['_id'])

    annotation_id_blacklist = []
    for entry in annotation_blacklist:
        annotations = labelled_collection.find({
            'package_id': entry['pid'],
            'user_name': entry['uid']
        })
        aids = [annotation['_id'] for annotation in annotations]

        annotation_id_blacklist.extend(aids)

    return annotation_id_blacklist