Beispiel #1
0
    def summarize_results(self, results):
        """Return a summary report from a list of analysis reports `results`.

        Creates a dictionary in the following format ::

            {
                'attr': {
                    'columns_over': ('..', 'Wilcoxon rank sum test', 'Chi-sq'),
                    'columns_over_spans': (2, 8, 1),
                    'columns': ['Species', 'n (plates)', 'A', 'B', 'C', 'D', 'A+B', 'C+D', 'A+B+C', 'B+C+D', 'A,B,C,D']
                },
                'results': [
                    ['Obelia dichotoma', 166, 'p', 'n', 'r', 'r', 'p', 'r', 'n', 'r', 's'],
                    ['Obelia geniculata', 88, 'n', 'n', 'r', 'n', 'n', 'r', 'n', 'r', 's'],
                    ...
                ]
            }
        """
        summary = {
            'attr': {
                'columns_over': ('..', 'Wilcoxon rank sum test', 'Chi-sq'),
                'columns_over_spans': (2, 8, 1),
                'columns': ['Species','n (plates)','A','B','C','D','A+B','C+D','A+B+C','B+C+D','A,B,C,D']
            },
            'results': []
        }
        for result in results:
            chi_squared = None
            wilcoxon = None
            species_selection = [s for s in result.species_selections[0].values()]
            species = species_selection[0]['name_latin']
            if 'wilcoxon_areas_repeats' in result.statistics:
                wilcoxon = result.statistics['wilcoxon_areas_repeats'][0]
            if 'chi_squared_areas' in result.statistics:
                chi_squared = result.statistics['chi_squared_areas'][0]

            # Figure out for which plate areas the result was significant. A
            # result is considered significant if (confidence level)% of the
            # test repeats were significant.
            areas = ['A','B','C','D','A+B','C+D','A+B+C','B+C+D']
            row = []
            for plate_area in areas:
                if not wilcoxon:
                    row.append(None)
                    continue
                stats = wilcoxon['results'].get(plate_area, None)
                if stats:
                    # Calculate the P-value.
                    # Preference and rejection should not be summed up
                    # because they contradict. Only use the major value.
                    if stats['n_preference'] > stats['n_rejection']:
                        major = stats['n_preference']
                    else:
                        major = stats['n_rejection']
                    p = 1 - float(major) / wilcoxon['attr']['repeats']

                    if setlyze.std.is_significant(p, self.alpha_level):
                        # Significant: preference or rejection.
                        if stats['n_preference'] > stats['n_rejection']:
                            row.append("pr; p=%.4f" % p)
                        else:
                            row.append("rj; p=%.4f" % p)
                    else:
                        # Not significant.
                        row.append("ns; p=%.4f" % p)
                else:
                    # No data.
                    row.append(None)

            # Add the results for the Chi squared test.
            if chi_squared:
                # Check if the result was significant. When all values are
                # 0 the p-value will be NaN. Function `is_significant` will
                # raise ValueError if the p-value is NaN.
                try:
                    significant = setlyze.std.is_significant(chi_squared['results']['p_value'], self.alpha_level)
                except ValueError:
                    significant = False

                if significant:
                    code = 's'
                else:
                    code = 'ns'

                row.append("%s; χ²=%.2f; p=%.4f" %
                    (code, chi_squared['results']['chi_squared'],
                    chi_squared['results']['p_value']))
            else:
                row.append(None)

            # Only add the row to the summary if one item in the row was
            # significant.
            for val in row:
                if val and re.match('^(s|pr|rj);', val):
                    r = [species, result.get_option('Total plates')]
                    r.extend(row)
                    summary['results'].append(r)
                    break

        # Set the plate areas definition as the column name for the Chi-squared
        # test (last column).
        definition = getattr(results[0], 'plate_areas_definition', None)
        if definition:
            for key, area in definition.iteritems():
                definition[key] = '+'.join(area)
            definition = definition.values()
            definition.sort()
            definition = ','.join(definition)
            summary['attr']['columns'][10] = definition

        # Create a report object from the dictionary.
        report = setlyze.report.Report()
        report.set_statistics('plate_areas_summary', summary)
        return report
Beispiel #2
0
    def summarize_results(self, results):
        """Return a summary report from a list of analysis reports `results`.

        Creates a dictionary in the following format ::

            {
                'attr': {
                    'columns_over': ('..', 'Wilcoxon rank sum test', 'Chi-squared test'),
                    'columns_over_spans': (2, 24, 24),
                    'columns': ('Species', 'n (plates)', 'Wilcoxon 2-24', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', 'Chi sq 2-24', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24')
                },
                'results': [
                    ['Obelia dichotoma', 143, 'r', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'r', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'n', None, 'n', 'n', 'n', 'n', 'n', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 'n', 's', 'n', 's', 's', 's', 'n', 'n', None, 'n', 'n', 'n', 'n', 'n'],
                    ['Obelia geniculata', 62, 'r', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'n', None, 'n', 'r', 'n', None, 'n', 'r', None, None, None, None, None, None, None, None, 's', 's', 'n', 's', 'n', 'n', 'n', 's', 's', None, 's', 's', 's', None, 's', 's', None, None, None, None, None, None, None, None],
                    ...
                ]
            }
        """
        summary = {
            'attr': {
                'columns_over': ('..', 'Wilcoxon rank sum test', 'Chi-squared test'),
                'columns_over_spans': (2, 24, 24),
                'columns': ('Species','n (plates)','2-24','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','2-24','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24')
            },
            'results': []
        }
        for result in results:
            chi_squared = None
            wilcoxon = None
            species_selection = [s for s in result.species_selections[0].values()]
            species = species_selection[0]['name_latin']
            if 'wilcoxon_spots_repeats' in result.statistics:
                wilcoxon = result.statistics['wilcoxon_spots_repeats'][0]
            if 'chi_squared_spots' in result.statistics:
                chi_squared = result.statistics['chi_squared_spots'][0]

            # Figure out for which positive spots number the result was
            # significant. A result is considered significant if
            # (confidence level)% of the test repeats were significant.
            positive_spots = [-24,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24]
            row = []
            for spots in positive_spots:
                if not wilcoxon:
                    row.append(None)
                    continue
                stats = wilcoxon['results'].get(spots, None)
                if stats:
                    # Calculate the P-value.
                    # Attraction and repulsion should not be summed up
                    # because they contradict. Only use the major value.
                    if stats['n_attraction'] > stats['n_repulsion']:
                        major = stats['n_attraction']
                    else:
                        major = stats['n_repulsion']
                    p = 1 - float(major) / wilcoxon['attr']['repeats']

                    if setlyze.std.is_significant(p, self.alpha_level):
                        if stats['n_attraction'] > stats['n_repulsion']:
                            code = 'at'
                        else:
                            code = 'rp'
                    else:
                        code = 'ns'
                    row.append("%s; p=%.4f" % (code,p))
                else:
                    row.append(None)

            # Add the results for the Chi squared tests.
            for spots in positive_spots:
                if not chi_squared:
                    row.append(None)
                    continue
                stats = chi_squared['results'].get(spots, None)
                if stats:
                    # Check if the result was significant. When all values are
                    # 0 the p-value will be NaN. Function `is_significant` will
                    # raise ValueError if the p-value is NaN.
                    try:
                        significant = setlyze.std.is_significant(stats['p_value'], self.alpha_level)
                    except ValueError:
                        significant = False

                    if significant:
                        if stats['mean_observed'] < stats['mean_expected']:
                            code = 'at'
                        else:
                            code = 'rp'
                    else:
                        code = 'ns'

                    row.append("%s; χ²=%.2f; p=%.4f" %
                        (code, stats['chi_squared'], stats['p_value']))
                else:
                    # No data.
                    row.append(None)

            # Only add the row to the report if one item in the row was
            # significant.
            for val in row:
                if val and re.match('^(s|at|rp);', val):
                    r = [species, result.get_option('Total plates')]
                    r.extend(row)
                    summary['results'].append(r)
                    break

        # Create a report object from the dictionary.
        report = setlyze.report.Report()
        report.set_statistics('positive_spots_summary', summary)
        return report