def queryUnclassifiedSections(uuid): now = datetime.now() weekago = now - timedelta(weeks=1) user_uuid = uuid clientSpecificQuery = getClientSpecificQueryFilter(user_uuid) Sections = get_section_db() logging.debug("section.count = %s" % Sections.count()) # Theoretically, we shouldn't need the 'predicted_mode' code because we # predict values right after reading the trips from moves. However, there # is still a small race window in which we are reading trips for other # users and haven't yet run the classifier. As we get more users, this # window can only grow, and it is easy to handle it, so let's just do so now. defaultQueryList = [ {"source": "Shankari"}, {"user_id": user_uuid}, {"predicted_mode": {"$exists": True}}, {"confirmed_mode": ""}, {"retained": True}, {"type": "move"}, {"section_end_datetime": {"$gt": weekago}}, ] completeQueryList = defaultQueryList + clientSpecificQuery unclassifiedSections = Sections.find({"$and": completeQueryList}) # totalUnclassifiedSections are for debugging only, can remove after we know that this works well totalUnclassifiedSections = Sections.find( {"$and": [{"source": "Shankari"}, {"user_id": user_uuid}, {"confirmed_mode": ""}, {"type": "move"}]} ) unclassifiedSectionCount = unclassifiedSections.count() totalUnclassifiedSectionCount = totalUnclassifiedSections.count() logging.debug("Unsec.count = %s" % unclassifiedSectionCount) logging.debug("Total Unsec.count = %s" % totalUnclassifiedSectionCount) # Keep track of what percent of sections are stripped out. # Sections can be stripped out for various reasons: # - they are too old # - they have enough confidence that above the magic threshold (90%) AND # the client has requested stripping out # - they have already been identified as being too short by the filter label stats.storeServerEntry( user_uuid, stats.STAT_TRIP_MGR_PCT_SHOWN, time.time(), 0 if totalUnclassifiedSectionCount == 0 else float(unclassifiedSectionCount) / totalUnclassifiedSectionCount, ) return unclassifiedSections
def getClassifiedRatio(uuid, start, end): from emission.analysis.result.userclient import getClientSpecificQueryFilter defaultQueryList = [{ 'source': 'Shankari' }, { 'user_id': uuid }, { 'predicted_mode': { '$exists': True } }, { 'type': 'move' }, { 'section_start_datetime': { '$gt': start, '$lt': end } }] clientSpecificQuery = getClientSpecificQueryFilter(uuid) completeQueryList = defaultQueryList + clientSpecificQuery logging.debug("completeQueryList = %s" % completeQueryList) unclassifiedQueryList = completeQueryList + [{'confirmed_mode': ''}] classifiedQueryList = completeQueryList + [{'confirmed_mode': {"$ne": ''}}] unclassifiedCount = get_section_db().find({ '$and': unclassifiedQueryList }).count() classifiedCount = get_section_db().find({ '$and': classifiedQueryList }).count() totalCount = get_section_db().find({'$and': completeQueryList}).count() logging.info( "unclassifiedCount = %s, classifiedCount = %s, totalCount = %s" % (unclassifiedCount, classifiedCount, totalCount)) assert (unclassifiedCount + classifiedCount == totalCount) if totalCount > 0: return float(classifiedCount) / totalCount else: return 0
def getClassifiedRatio(uuid, start, end): from emission.analysis.result.userclient import getClientSpecificQueryFilter defaultQueryList = [ {'source':'Shankari'}, {'user_id':uuid}, {'predicted_mode': { '$exists' : True } }, { 'type': 'move' }, { 'section_start_datetime': {'$gt': start, '$lt': end}}] clientSpecificQuery = getClientSpecificQueryFilter(uuid) completeQueryList = defaultQueryList + clientSpecificQuery logging.debug("completeQueryList = %s" % completeQueryList) unclassifiedQueryList = completeQueryList + [{'confirmed_mode': ''}] classifiedQueryList = completeQueryList + [{'confirmed_mode': {"$ne": ''}}] unclassifiedCount = get_section_db().find({'$and': unclassifiedQueryList}).count() classifiedCount = get_section_db().find({'$and': classifiedQueryList}).count() totalCount = get_section_db().find({'$and': completeQueryList}).count() logging.info("unclassifiedCount = %s, classifiedCount = %s, totalCount = %s" % (unclassifiedCount, classifiedCount, totalCount)) assert(unclassifiedCount + classifiedCount == totalCount) if totalCount > 0: return float(classifiedCount)/totalCount else: return 0
def queryUnclassifiedSections(uuid): now = datetime.now() weekago = now - timedelta(weeks=1) user_uuid = uuid clientSpecificQuery = getClientSpecificQueryFilter(user_uuid) Sections = get_section_db() logging.debug('section.count = %s' % Sections.count()) # Theoretically, we shouldn't need the 'predicted_mode' code because we # predict values right after reading the trips from moves. However, there # is still a small race window in which we are reading trips for other # users and haven't yet run the classifier. As we get more users, this # window can only grow, and it is easy to handle it, so let's just do so now. defaultQueryList = [{ 'source': 'Shankari' }, { 'user_id': user_uuid }, { 'predicted_mode': { '$exists': True } }, { 'confirmed_mode': '' }, { 'retained': True }, { 'type': 'move' }, { 'section_end_datetime': { "$gt": weekago } }] completeQueryList = defaultQueryList + clientSpecificQuery unclassifiedSections = Sections.find({"$and": completeQueryList}) # totalUnclassifiedSections are for debugging only, can remove after we know that this works well totalUnclassifiedSections = Sections.find({ "$and": [{ 'source': 'Shankari' }, { 'user_id': user_uuid }, { 'confirmed_mode': '' }, { 'type': 'move' }] }) unclassifiedSectionCount = unclassifiedSections.count() totalUnclassifiedSectionCount = totalUnclassifiedSections.count() logging.debug('Unsec.count = %s' % unclassifiedSectionCount) logging.debug('Total Unsec.count = %s' % totalUnclassifiedSectionCount) # Keep track of what percent of sections are stripped out. # Sections can be stripped out for various reasons: # - they are too old # - they have enough confidence that above the magic threshold (90%) AND # the client has requested stripping out # - they have already been identified as being too short by the filter label stats.storeServerEntry( user_uuid, stats.STAT_TRIP_MGR_PCT_SHOWN, time.time(), 0 if totalUnclassifiedSectionCount == 0 else float(unclassifiedSectionCount) / totalUnclassifiedSectionCount) return unclassifiedSections