Beispiel #1
0
    def test1(self):
        """1 way anova"""

        R="""Anova: Single Factor on Measure

SUMMARY
Groups   Count   Sum    Average   Variance
==========================================
A           14    870    62.143    202.132
B           15    858    57.200    584.743
C           16   1510    94.375    339.583

ANOVA
Source of       SS       df      MS        F       P-value
Variation
===========================================================
Treatments   12656.047    2   6328.023   16.707   4.589e-06
Error        15907.864   42    378.759
===========================================================
Total        28563.911   44                                 """
        listOflists=[[42,52,55,59,75,40,79,79,44,56,68,77,75,69],
                     [29,36,29,31,97,88,27,57,54,77,54,52,58,91,78],
                     [91,79,73,75,99,66,114,120,102,68,114,79,115,104,107,104]]

        D=Anova1way()
        D.run(listOflists)
        self.assertEqual(str(D),R)
Beispiel #2
0
 def test0(self):
     # http://www.utdallas.edu/~herve/abdi-NewmanKeuls2010-pretty.pdf
     d = [[21.0, 20.0, 26.0, 46.0, 35.0, 13.0, 41.0, 30.0, 42.0, 26.0],
          [23.0, 30.0, 34.0, 51.0, 20.0, 38.0, 34.0, 44.0, 41.0, 35.0],
          [35.0, 35.0, 52.0, 29.0, 54.0, 32.0, 30.0, 42.0, 50.0, 21.0],
          [44.0, 40.0, 33.0, 45.0, 45.0, 30.0, 46.0, 34.0, 49.0, 44.0],
          [39.0, 44.0, 51.0, 47.0, 50.0, 45.0, 39.0, 51.0, 39.0, 55.0]]
     conditions_list = 'Contact Hit Bump Collide Smash'.split()
     D=Anova1way()
     D.run(d, conditions_list=conditions_list)
Beispiel #3
0
    def test0(self):
        """1 way anova"""
        R="Anova1way([('f', 16.70726997413529), ('p', 4.5885798225758395e-06), ('ns', [14, 15, 16]), ('mus', [62.142857142857146, 57.2, 94.375]), ('vars', [202.13186813186815, 584.7428571428571, 339.5833333333333]), ('ssbn', 12656.046825396828), ('sswn', 15907.864285714284), ('dfbn', 2), ('dfwn', 42), ('msbn', 6328.023412698414), ('mswn', 378.7586734693877)], conditions_list=['A', 'B', 'C'])"
        listOflists=[[42,52,55,59,75,40,79,79,44,56,68,77,75,69],
                     [29,36,29,31,97,88,27,57,54,77,54,52,58,91,78],
                     [91,79,73,75,99,66,114,120,102,68,114,79,115,104,107,104]]

        D=Anova1way()
        D.run(listOflists)
        self.assertEqual(repr(D),R)
Beispiel #4
0
def getCatAlphaData(request):
    ### get sample list from cookie
    samples = Sample.objects.all()
    samples.query = pickle.loads(request.session['selected_samples'])
    selected = samples.values_list('sampleid')
    qs1 = Sample.objects.all().filter(sampleid__in=selected)

    if request.is_ajax():
        allJson = request.GET["all"]
        all = simplejson.loads(allJson)
        button = int(all["button"])
        sig_only = int(all["sig_only"])
        norm = int(all["normalize"])
        selectAll = int(all["selectAll"])

        metaString = all["meta"]

        ### function to merge values on common keys
        metaDict = simplejson.JSONDecoder(
            object_pairs_hook=multidict).decode(metaString)

        ### function to create a meta variable DataFrame
        metaDF = catAlphaMetaDF(qs1, metaDict)

        myList = metaDF['sampleid'].tolist()
        mySet = list(set(myList))

        ### function to create a taxa DataFrame
        taxaDF = taxaProfileDF(mySet)

        ### function to merge values on common keys
        taxaString = all["taxa"]
        ### this taxaDict is from the dynatree (ajax call)
        taxaDict = simplejson.JSONDecoder(
            object_pairs_hook=multidict).decode(taxaString)

        # change dict if selectAll levels is on (avoids loading entire tree first)
        if selectAll == 1:
            taxaDict = {}
            qs1 = Profile.objects.all().filter(sampleid__in=mySet).values_list(
                'kingdomid', flat='True').distinct()
            taxaDict['Kingdom'] = qs1
        elif selectAll == 2:
            taxaDict = {}
            qs1 = Profile.objects.all().filter(sampleid__in=mySet).values_list(
                'phylaid', flat='True').distinct()
            taxaDict['Phyla'] = qs1
        elif selectAll == 3:
            taxaDict = {}
            qs1 = Profile.objects.all().filter(sampleid__in=mySet).values_list(
                'classid', flat='True').distinct()
            taxaDict['Class'] = qs1
        elif selectAll == 4:
            taxaDict = {}
            qs1 = Profile.objects.all().filter(sampleid__in=mySet).values_list(
                'orderid', flat='True').distinct()
            taxaDict['Order'] = qs1
        elif selectAll == 5:
            taxaDict = {}
            qs1 = Profile.objects.all().filter(sampleid__in=mySet).values_list(
                'familyid', flat='True').distinct()
            taxaDict['Family'] = qs1
        elif selectAll == 6:
            taxaDict = {}
            qs1 = Profile.objects.all().filter(sampleid__in=mySet).values_list(
                'genusid', flat='True').distinct()
            taxaDict['Genus'] = qs1
        elif selectAll == 7:
            taxaDict = {}
            qs1 = Profile.objects.all().filter(sampleid__in=mySet).values_list(
                'speciesid', flat='True').distinct()
            taxaDict['Species'] = qs1

        factor = 'none'
        if norm == 1:
            factor = 'none'
        elif norm == 2:
            factor = 'min'
        elif norm == 3:
            factor = '10th percentile'
        elif norm == 4:
            factor = '25th percentile'
        elif norm == 5:
            factor = 'median'

        ### function to normalize the number of sequence reads per sample
        normDF = normalizeAlpha(taxaDF, taxaDict, mySet, factor)

        finalDF = metaDF.merge(normDF, on='sampleid', how='outer')
        finalDF[['count', 'rel_abund', 'rich', 'diversity'
                 ]] = finalDF[['count', 'rel_abund', 'rich',
                               'diversity']].astype(float)
        pd.set_option('display.max_rows', finalDF.shape[0],
                      'display.max_columns', finalDF.shape[1], 'display.width',
                      1000)

        final_fieldList = []
        for key in metaDict:
            final_fieldList.append(key)

        finalDict = {}
        result = ""
        seriesList = []
        xAxisDict = {}
        yAxisDict = {}

        ### group DataFrame by each taxa level selected
        grouped1 = finalDF.groupby(['rank', 'taxa_name', 'taxa_id'])
        equal_error = 'no'

        ### group DataFrame by each meta variable selected
        for name1, group1 in grouped1:
            trtList = []
            valList = []
            grouped2 = pd.DataFrame()
            if button == 1:
                grouped2 = group1.groupby(final_fieldList)['count']
            elif button == 2:
                grouped2 = group1.groupby(final_fieldList)['rel_abund']
            elif button == 3:
                grouped2 = group1.groupby(final_fieldList)['rich']
                ### for taxa with only 1 species all values will be '1' and cause an anova error
                if group1['rich'].sum() == group1['rich'].count():
                    equal_error = 'yes'
            elif button == 4:
                grouped2 = group1.groupby(final_fieldList)['diversity']

            for name2, group2 in grouped2:
                if isinstance(name2, unicode):
                    trt = name2
                else:
                    trt = ' & '.join(list(name2))
                trtList.append(trt)
                valList.append(list(group2.T))

            ### One-way ANOVA with some error checking
            D = Anova1way()
            if equal_error == 'no':
                try:
                    D.run(valList, conditions_list=trtList)
                    anova_error = 'no'
                except:
                    D['p'] = 1
                    anova_error = 'yes'
            else:
                D['p'] = 1
                anova_error = 'yes'

            ### select only significant ANOVAs for output (graph & text area)
            if sig_only == 1:
                if D['p'] <= 0.05:
                    result = result + '===============================================\n'
                    result = result + 'Taxa level: ' + str(name1[0]) + '\n'
                    result = result + 'Taxa name: ' + str(name1[1]) + '\n'
                    if button == 1:
                        result = result + 'Dependent Variable: Sequence Reads' + '\n'
                    elif button == 2:
                        result = result + 'Dependent Variable: Relative Abundance' + '\n'
                    elif button == 3:
                        result = result + 'Dependent Variable: Species Richness' + '\n'
                    elif button == 4:
                        result = result + 'Dependent Variable: Shannon Diversity' + '\n'

                    indVar = ' x '.join(final_fieldList)
                    result = result + 'Independent Variable: ' + str(
                        indVar) + '\n'

                    if equal_error == 'yes' or anova_error == 'yes':
                        result = result + 'Analysis cannot be performed...' + '\n'
                    else:
                        result = result + str(D) + '\n'
                    result = result + '===============================================\n'
                    result = result + '\n\n\n\n'

                    dataList = []
                    grouped2 = group1.groupby(final_fieldList).mean()

                    if button == 1:
                        dataList.extend(list(grouped2['count'].T))
                    elif button == 2:
                        dataList.extend(list(grouped2['rel_abund'].T))
                    elif button == 3:
                        dataList.extend(list(grouped2['rich'].T))
                    elif button == 4:
                        dataList.extend(list(grouped2['diversity'].T))

                    seriesDict = {}
                    seriesDict['name'] = name1
                    seriesDict['data'] = dataList
                    seriesList.append(seriesDict)

                    xTitle = {}
                    xTitle['text'] = indVar
                    xAxisDict['title'] = xTitle
                    xAxisDict['categories'] = trtList

                    yTitle = {}
                    if button == 1:
                        yTitle['text'] = 'Sequence Reads'
                    elif button == 2:
                        yTitle['text'] = 'Relative Abundance'
                    elif button == 3:
                        yTitle['text'] = 'Species Richness'
                    elif button == 4:
                        yTitle['text'] = 'Shannon Diversity'
                    yAxisDict['title'] = yTitle

            ### select all ANOVAs for output (graph & text area)
            if sig_only == 0:
                result = result + '===============================================\n'
                result = result + 'Taxa level: ' + str(name1[0]) + '\n'
                result = result + 'Taxa name: ' + str(name1[1]) + '\n'
                if button == 1:
                    result = result + 'Dependent Variable: Sequence Reads' + '\n'
                elif button == 2:
                    result = result + 'Dependent Variable: Relative Abundance' + '\n'
                elif button == 3:
                    result = result + 'Dependent Variable: Species Richness' + '\n'
                elif button == 4:
                    result = result + 'Dependent Variable: Shannon Diversity' + '\n'

                indVar = ' x '.join(final_fieldList)
                result = result + 'Independent Variable: ' + str(indVar) + '\n'

                if equal_error == 'yes' or anova_error == 'yes':
                    result = result + 'Analysis cannot be performed...' + '\n'
                else:
                    result = result + str(D) + '\n'
                result = result + '===============================================\n'
                result = result + '\n\n\n\n'

                dataList = []
                grouped2 = group1.groupby(final_fieldList).mean()
                if button == 1:
                    dataList.extend(list(grouped2['count'].T))
                elif button == 2:
                    dataList.extend(list(grouped2['rel_abund'].T))
                elif button == 3:
                    dataList.extend(list(grouped2['rich'].T))
                elif button == 4:
                    dataList.extend(list(grouped2['diversity'].T))

                seriesDict = {}
                seriesDict['name'] = name1
                seriesDict['data'] = dataList
                seriesList.append(seriesDict)

                xTitle = {}
                xTitle['text'] = indVar
                xAxisDict['title'] = xTitle
                xAxisDict['categories'] = trtList

                yTitle = {}
                if button == 1:
                    yTitle['text'] = 'Sequence Reads'
                elif button == 2:
                    yTitle['text'] = 'Relative Abundance'
                elif button == 3:
                    yTitle['text'] = 'Species Richness'
                elif button == 4:
                    yTitle['text'] = 'Shannon Diversity'
                yAxisDict['title'] = yTitle

        finalDict['series'] = seriesList
        finalDict['xAxis'] = xAxisDict
        finalDict['yAxis'] = yAxisDict
        finalDict['text'] = result
        if not seriesList:
            finalDict['empty'] = 0
        else:
            finalDict['empty'] = 1

        finalDF.reset_index(drop=True, inplace=True)
        res_table = finalDF.to_html(classes="table display")
        res_table = res_table.replace('border="1"', 'border="0"')
        finalDict['res_table'] = str(res_table)

        res = simplejson.dumps(finalDict)
        return HttpResponse(res, content_type='application/json')