예제 #1
0
    def test_recalculate_percentile(self):
        adapter = recalc_util.CachedMCDIAdapter()
        adapter.max_word_counts['standard'] = 681
        adapter.mcdi_models['standard'] = TEST_MCDI_MODEL
        adapter.percentiles['typical-male'] = TEST_PERCENTILES_MODEL
        test_snapshot = copy.deepcopy(TEST_SNAPSHOT)

        self.mox.StubOutWithMock(db_util, 'load_mcdi_model')
        self.mox.StubOutWithMock(db_util, 'load_snapshot_contents')

        test_word_1 = models.SnapshotContent(0, '', 1, 0)
        test_word_2 = models.SnapshotContent(0, '', 2, 0)
        test_word_3 = models.SnapshotContent(0, '', 3, 0)
        words_spoken = [test_word_1] * 31
        words_spoken.extend([test_word_2] * 22)
        words_spoken.extend([test_word_3] * 13)
        db_util.load_snapshot_contents(test_snapshot).AndReturn(words_spoken)

        self.mox.ReplayAll()

        recalc_util.recalculate_age(test_snapshot)
        recalc_util.recalculate_percentile(test_snapshot, adapter)
        self.assertTrue(abs(test_snapshot.age - 17.71) < 0.01)
        self.assertEqual(test_snapshot.words_spoken, 53)
        self.assertEqual(test_snapshot.percentile, 14)
예제 #2
0
def summarize_snapshots(snapshot_metas):
    cdi_spoken_set = {}
    ret_serialization = {}

    for meta in snapshot_metas:
        
        # Get the values that count as "spoken"
        mcdi_name = meta.mcdi_type
        cdi_date = meta.session_date
        if not mcdi_name in cdi_spoken_set:
            mcdi_info = db_util.load_mcdi_model(mcdi_name)
            words_spoken_set = mcdi_info.details['count_as_spoken']
            cdi_spoken_set[mcdi_name] = words_spoken_set
        else:
            words_spoken_set = cdi_spoken_set[mcdi_name]

        # Parse the words
        contents = db_util.load_snapshot_contents(meta)
        for word_info in contents:
            word = word_info.word
            value = word_info.value
            
            # Replace existing if this snapshot is earlier
            if value in words_spoken_set:
                to_enter = not word in ret_serialization
                to_enter = to_enter or ret_serialization[word] == None
                to_enter = to_enter or ret_serialization[word] > cdi_date
                if to_enter:
                    ret_serialization[word] = cdi_date

            # Report not known if not already reported
            elif not word in ret_serialization:
                ret_serialization[word] = None

    return ret_serialization
예제 #3
0
def generate_study_report_rows(snapshots_from_study, presentation_format):
    """Serialize a set of snapshots to a collection of lists of strings.

    @param snapshots_by_study: The snapshots to serialize.
    @type snapshots_by_study: Iterable of models.SnapshotMetadata
    @param presentation_format: The presentation format to use to render the
        string serialization.
    @type: presentation_format: models.PresentationFormat
    @return: List of serialized versions of snapshots with first list with
        header information.
    @rtype: List of list of str.
    """
    word_listing_set = set()
    for snapshot in snapshots_from_study:
        snapshot_contents = db_util.load_snapshot_contents(snapshot)
        candidate_word_listing = set(map(
            lambda x: x.word.encode('utf-8','ignore'),
            snapshot_contents
        ))
        word_listing_set = word_listing_set.union(candidate_word_listing)

    word_listing = list(word_listing_set)
    word_listing.sort()

    serialized_snapshots = map(
        lambda x: serialize_snapshot(x, presentation_format, word_listing),
        snapshots_from_study
    )
    
    header_col = [
        'database id',
        'child id',
        'study id',
        'study',
        'gender',
        'age',
        'birthday',
        'session date',
        'session num',
        'total num sessions',
        'words spoken',
        'items excluded',
        'percentile',
        'extra categories',
        'revision',
        'languages',
        'num languages',
        'mcdi type',
        'hard of hearing',
        'deleted'
    ]
    header_col.extend(word_listing)

    cols = [header_col]
    cols.extend(serialized_snapshots)

    return zip(*cols)
예제 #4
0
def recalculate_percentile(snapshot, cached_adapter):
    """
    @type snapshot: SnapshotMetadata
    """
    mcdi_type = snapshot.mcdi_type
    gender = snapshot.gender
    individual_words = db_util.load_snapshot_contents(snapshot)

    snapshot.words_spoken = get_words_spoken(
        cached_adapter,
        mcdi_type,
        individual_words
    )

    snapshot.percentile = recalculate_percentile_raw(
        cached_adapter,
        mcdi_type,
        gender,
        snapshot.words_spoken,
        snapshot.age
    )
예제 #5
0
def recalculate_percentile(snapshot, cached_adapter):
    """
    @type snapshot: SnapshotMetadata
    """
    mcdi_model = cached_adapter.load_mcdi_model(snapshot.mcdi_type)
    if mcdi_model == None:
        mcdi_model = cached_adapter.load_mcdi_model('fullenglishmcdi')

    meta_percentile_info = mcdi_model.details['percentiles']
    gender = snapshot.gender

    percentiles_name = None
    if gender == constants.MALE or gender == constants.OTHER_GENDER:
        percentiles_name = meta_percentile_info['male']
    else:
        percentiles_name = meta_percentile_info['female']

    percentiles = cached_adapter.load_percentile_model(percentiles_name)

    count_as_spoken_vals = mcdi_model.details['count_as_spoken']
    individual_words = db_util.load_snapshot_contents(snapshot)
    words_spoken = 0
    for word in individual_words:
        if word.value in count_as_spoken_vals:
            words_spoken += 1

    snapshot.words_spoken = words_spoken
    
    new_percentile = math_util.find_percentile(
        percentiles.details,
        snapshot.words_spoken,
        snapshot.age,
        cached_adapter.get_max_mcdi_words(snapshot.mcdi_type)
    )

    snapshot.percentile = new_percentile
예제 #6
0
    def test_summarize_snapshots(self):
        test_snap_1 = TEST_SNAPSHOT.clone()
        test_snap_1.mcdi_type = 'mcdi_type_1'
        test_snap_1.session_date = '2015/01/01'
        
        test_snap_2 = TEST_SNAPSHOT.clone()
        test_snap_2.mcdi_type = 'mcdi_type_1'
        test_snap_2.session_date = '2015/02/01'
        
        test_snap_3 = TEST_SNAPSHOT.clone()
        test_snap_3.mcdi_type = 'mcdi_type_2'
        test_snap_3.session_date = '2015/03/01'
        
        test_metadata = [test_snap_1, test_snap_2, test_snap_3]
        
        test_contents_1 = [
            models.SnapshotContent(0, 'word1', 1, 1),
            models.SnapshotContent(0, 'word2', 0, 1),
            models.SnapshotContent(0, 'word3', 0, 1)
        ]
        test_contents_2 = [
            models.SnapshotContent(0, 'word1', 1, 1),
            models.SnapshotContent(0, 'word2', 2, 1),
            models.SnapshotContent(0, 'word3', 0, 1)
        ]
        test_contents_3 = [
            models.SnapshotContent(0, 'word1', 1, 1),
            models.SnapshotContent(0, 'word2', 1, 1),
            models.SnapshotContent(0, 'word3', 1, 1),
            models.SnapshotContent(0, 'word4', 2, 1)
        ]

        self.mox.StubOutWithMock(db_util, 'load_mcdi_model')
        self.mox.StubOutWithMock(db_util, 'load_snapshot_contents')

        db_util.load_mcdi_model('mcdi_type_1').AndReturn(
            models.MCDIFormat('', '', '', {'count_as_spoken': [1, 2]})
        )
        db_util.load_snapshot_contents(test_metadata[0]).AndReturn(
            test_contents_1
        )

        db_util.load_snapshot_contents(test_metadata[1]).AndReturn(
            test_contents_2
        )

        db_util.load_mcdi_model('mcdi_type_2').AndReturn(
            models.MCDIFormat('', '', '', {'count_as_spoken': [1]})
        )
        db_util.load_snapshot_contents(test_metadata[2]).AndReturn(
            test_contents_3
        )

        self.mox.ReplayAll()

        serialization = report_util.summarize_snapshots(test_metadata)

        self.assertEqual(serialization['word1'], '2015/01/01')
        self.assertEqual(serialization['word2'], '2015/02/01')
        self.assertEqual(serialization['word3'], '2015/03/01')
        self.assertEqual(serialization['word4'], None)
예제 #7
0
def serialize_snapshot(snapshot, presentation_format=None, word_listing=None,
    report_dict=False, include_words=True):
    """Turn a snapshot uft8 encoded list of strings.

    @param snapshot: The snapshot to serialize.
    @type snapshot: models.SnapshotMetadata
    @param presentation_format: The presentation format to use to render the
        string serialization.
    @type presentation_format: models.PresentationFormat
    @return: Serialized version of the snapshot.
    @rtype: List of str
    """
    if not word_listing:
        word_listing = []

    if include_words:
        snapshot_contents = db_util.load_snapshot_contents(snapshot)
        snapshot_contents_dict = {}

        for entry in snapshot_contents:
            snapshot_contents_dict[entry.word.lower().replace('*', '')] = entry

        not_found_entry = NotFoundSnapshotContent()
        snapshot_contents_sorted = map(
            lambda x: snapshot_contents_dict.get(x.lower().replace('*', ''), not_found_entry),
            word_listing
        )

        word_values = map(
            lambda x: interpret_word_value(x.value, presentation_format),
            snapshot_contents_sorted
        )

    if report_dict:
        gender = interpret_word_value(snapshot.gender, presentation_format)
        extra_categories = interpret_word_value(snapshot.extra_categories,
            presentation_format)
        return_dict = {
            'database_id': snapshot.database_id,
            'child_id': snapshot.child_id,
            'study_id': snapshot.study_id,
            'study': snapshot.study,
            'gender': gender,
            'age': snapshot.age,
            'birthday': snapshot.birthday,
            'session_date': snapshot.session_date,
            'session_num': snapshot.session_num,
            'total_num_sessions': snapshot.total_num_sessions,
            'words_spoken': snapshot.words_spoken,
            'items_excluded': snapshot.items_excluded,
            'percentile': snapshot.percentile,
            'extra_categories': extra_categories,
            'revision': snapshot.revision,
            'languages': snapshot.languages,
            'num_languages': snapshot.num_languages,
            'mcdi_type': snapshot.mcdi_type,
            'hard_of_hearing': snapshot.hard_of_hearing,
            'deleted': snapshot.deleted
        }

        if include_words:
            return_dict['words'] = word_values

        return return_dict

    else:
        return_list = [
            snapshot.database_id,
            snapshot.child_id,
            snapshot.study_id,
            snapshot.study,
            interpret_word_value(snapshot.gender, presentation_format),
            snapshot.age,
            snapshot.birthday,
            snapshot.session_date,
            snapshot.session_num,
            snapshot.total_num_sessions,
            snapshot.words_spoken,
            snapshot.items_excluded,
            snapshot.percentile,
            interpret_word_value(snapshot.extra_categories, presentation_format),
            snapshot.revision,
            snapshot.languages,
            snapshot.num_languages,
            snapshot.mcdi_type,
            snapshot.hard_of_hearing,
            snapshot.deleted
        ]

        if include_words:
            return_list.extend(word_values)

        return_list = map(
            lambda x: x.encode('utf-8','ignore') if isinstance(x, str) else x,
            return_list
        )

        return return_list