def test1(self): """1 way anova""" R="""Anova: Single Factor on Measure SUMMARY Groups Count Sum Average Variance ========================================== A 14 870 62.143 202.132 B 15 858 57.200 584.743 C 16 1510 94.375 339.583 ANOVA Source of SS df MS F P-value Variation =========================================================== Treatments 12656.047 2 6328.023 16.707 4.589e-06 Error 15907.864 42 378.759 =========================================================== Total 28563.911 44 """ listOflists=[[42,52,55,59,75,40,79,79,44,56,68,77,75,69], [29,36,29,31,97,88,27,57,54,77,54,52,58,91,78], [91,79,73,75,99,66,114,120,102,68,114,79,115,104,107,104]] D=Anova1way() D.run(listOflists) self.assertEqual(str(D),R)
def test0(self): # http://www.utdallas.edu/~herve/abdi-NewmanKeuls2010-pretty.pdf d = [[21.0, 20.0, 26.0, 46.0, 35.0, 13.0, 41.0, 30.0, 42.0, 26.0], [23.0, 30.0, 34.0, 51.0, 20.0, 38.0, 34.0, 44.0, 41.0, 35.0], [35.0, 35.0, 52.0, 29.0, 54.0, 32.0, 30.0, 42.0, 50.0, 21.0], [44.0, 40.0, 33.0, 45.0, 45.0, 30.0, 46.0, 34.0, 49.0, 44.0], [39.0, 44.0, 51.0, 47.0, 50.0, 45.0, 39.0, 51.0, 39.0, 55.0]] conditions_list = 'Contact Hit Bump Collide Smash'.split() D=Anova1way() D.run(d, conditions_list=conditions_list)
def test0(self): """1 way anova""" R="Anova1way([('f', 16.70726997413529), ('p', 4.5885798225758395e-06), ('ns', [14, 15, 16]), ('mus', [62.142857142857146, 57.2, 94.375]), ('vars', [202.13186813186815, 584.7428571428571, 339.5833333333333]), ('ssbn', 12656.046825396828), ('sswn', 15907.864285714284), ('dfbn', 2), ('dfwn', 42), ('msbn', 6328.023412698414), ('mswn', 378.7586734693877)], conditions_list=['A', 'B', 'C'])" listOflists=[[42,52,55,59,75,40,79,79,44,56,68,77,75,69], [29,36,29,31,97,88,27,57,54,77,54,52,58,91,78], [91,79,73,75,99,66,114,120,102,68,114,79,115,104,107,104]] D=Anova1way() D.run(listOflists) self.assertEqual(repr(D),R)
def getCatAlphaData(request): ### get sample list from cookie samples = Sample.objects.all() samples.query = pickle.loads(request.session['selected_samples']) selected = samples.values_list('sampleid') qs1 = Sample.objects.all().filter(sampleid__in=selected) if request.is_ajax(): allJson = request.GET["all"] all = simplejson.loads(allJson) button = int(all["button"]) sig_only = int(all["sig_only"]) norm = int(all["normalize"]) selectAll = int(all["selectAll"]) metaString = all["meta"] ### function to merge values on common keys metaDict = simplejson.JSONDecoder( object_pairs_hook=multidict).decode(metaString) ### function to create a meta variable DataFrame metaDF = catAlphaMetaDF(qs1, metaDict) myList = metaDF['sampleid'].tolist() mySet = list(set(myList)) ### function to create a taxa DataFrame taxaDF = taxaProfileDF(mySet) ### function to merge values on common keys taxaString = all["taxa"] ### this taxaDict is from the dynatree (ajax call) taxaDict = simplejson.JSONDecoder( object_pairs_hook=multidict).decode(taxaString) # change dict if selectAll levels is on (avoids loading entire tree first) if selectAll == 1: taxaDict = {} qs1 = Profile.objects.all().filter(sampleid__in=mySet).values_list( 'kingdomid', flat='True').distinct() taxaDict['Kingdom'] = qs1 elif selectAll == 2: taxaDict = {} qs1 = Profile.objects.all().filter(sampleid__in=mySet).values_list( 'phylaid', flat='True').distinct() taxaDict['Phyla'] = qs1 elif selectAll == 3: taxaDict = {} qs1 = Profile.objects.all().filter(sampleid__in=mySet).values_list( 'classid', flat='True').distinct() taxaDict['Class'] = qs1 elif selectAll == 4: taxaDict = {} qs1 = Profile.objects.all().filter(sampleid__in=mySet).values_list( 'orderid', flat='True').distinct() taxaDict['Order'] = qs1 elif selectAll == 5: taxaDict = {} qs1 = Profile.objects.all().filter(sampleid__in=mySet).values_list( 'familyid', flat='True').distinct() taxaDict['Family'] = qs1 elif selectAll == 6: taxaDict = {} qs1 = Profile.objects.all().filter(sampleid__in=mySet).values_list( 'genusid', flat='True').distinct() taxaDict['Genus'] = qs1 elif selectAll == 7: taxaDict = {} qs1 = Profile.objects.all().filter(sampleid__in=mySet).values_list( 'speciesid', flat='True').distinct() taxaDict['Species'] = qs1 factor = 'none' if norm == 1: factor = 'none' elif norm == 2: factor = 'min' elif norm == 3: factor = '10th percentile' elif norm == 4: factor = '25th percentile' elif norm == 5: factor = 'median' ### function to normalize the number of sequence reads per sample normDF = normalizeAlpha(taxaDF, taxaDict, mySet, factor) finalDF = metaDF.merge(normDF, on='sampleid', how='outer') finalDF[['count', 'rel_abund', 'rich', 'diversity' ]] = finalDF[['count', 'rel_abund', 'rich', 'diversity']].astype(float) pd.set_option('display.max_rows', finalDF.shape[0], 'display.max_columns', finalDF.shape[1], 'display.width', 1000) final_fieldList = [] for key in metaDict: final_fieldList.append(key) finalDict = {} result = "" seriesList = [] xAxisDict = {} yAxisDict = {} ### group DataFrame by each taxa level selected grouped1 = finalDF.groupby(['rank', 'taxa_name', 'taxa_id']) equal_error = 'no' ### group DataFrame by each meta variable selected for name1, group1 in grouped1: trtList = [] valList = [] grouped2 = pd.DataFrame() if button == 1: grouped2 = group1.groupby(final_fieldList)['count'] elif button == 2: grouped2 = group1.groupby(final_fieldList)['rel_abund'] elif button == 3: grouped2 = group1.groupby(final_fieldList)['rich'] ### for taxa with only 1 species all values will be '1' and cause an anova error if group1['rich'].sum() == group1['rich'].count(): equal_error = 'yes' elif button == 4: grouped2 = group1.groupby(final_fieldList)['diversity'] for name2, group2 in grouped2: if isinstance(name2, unicode): trt = name2 else: trt = ' & '.join(list(name2)) trtList.append(trt) valList.append(list(group2.T)) ### One-way ANOVA with some error checking D = Anova1way() if equal_error == 'no': try: D.run(valList, conditions_list=trtList) anova_error = 'no' except: D['p'] = 1 anova_error = 'yes' else: D['p'] = 1 anova_error = 'yes' ### select only significant ANOVAs for output (graph & text area) if sig_only == 1: if D['p'] <= 0.05: result = result + '===============================================\n' result = result + 'Taxa level: ' + str(name1[0]) + '\n' result = result + 'Taxa name: ' + str(name1[1]) + '\n' if button == 1: result = result + 'Dependent Variable: Sequence Reads' + '\n' elif button == 2: result = result + 'Dependent Variable: Relative Abundance' + '\n' elif button == 3: result = result + 'Dependent Variable: Species Richness' + '\n' elif button == 4: result = result + 'Dependent Variable: Shannon Diversity' + '\n' indVar = ' x '.join(final_fieldList) result = result + 'Independent Variable: ' + str( indVar) + '\n' if equal_error == 'yes' or anova_error == 'yes': result = result + 'Analysis cannot be performed...' + '\n' else: result = result + str(D) + '\n' result = result + '===============================================\n' result = result + '\n\n\n\n' dataList = [] grouped2 = group1.groupby(final_fieldList).mean() if button == 1: dataList.extend(list(grouped2['count'].T)) elif button == 2: dataList.extend(list(grouped2['rel_abund'].T)) elif button == 3: dataList.extend(list(grouped2['rich'].T)) elif button == 4: dataList.extend(list(grouped2['diversity'].T)) seriesDict = {} seriesDict['name'] = name1 seriesDict['data'] = dataList seriesList.append(seriesDict) xTitle = {} xTitle['text'] = indVar xAxisDict['title'] = xTitle xAxisDict['categories'] = trtList yTitle = {} if button == 1: yTitle['text'] = 'Sequence Reads' elif button == 2: yTitle['text'] = 'Relative Abundance' elif button == 3: yTitle['text'] = 'Species Richness' elif button == 4: yTitle['text'] = 'Shannon Diversity' yAxisDict['title'] = yTitle ### select all ANOVAs for output (graph & text area) if sig_only == 0: result = result + '===============================================\n' result = result + 'Taxa level: ' + str(name1[0]) + '\n' result = result + 'Taxa name: ' + str(name1[1]) + '\n' if button == 1: result = result + 'Dependent Variable: Sequence Reads' + '\n' elif button == 2: result = result + 'Dependent Variable: Relative Abundance' + '\n' elif button == 3: result = result + 'Dependent Variable: Species Richness' + '\n' elif button == 4: result = result + 'Dependent Variable: Shannon Diversity' + '\n' indVar = ' x '.join(final_fieldList) result = result + 'Independent Variable: ' + str(indVar) + '\n' if equal_error == 'yes' or anova_error == 'yes': result = result + 'Analysis cannot be performed...' + '\n' else: result = result + str(D) + '\n' result = result + '===============================================\n' result = result + '\n\n\n\n' dataList = [] grouped2 = group1.groupby(final_fieldList).mean() if button == 1: dataList.extend(list(grouped2['count'].T)) elif button == 2: dataList.extend(list(grouped2['rel_abund'].T)) elif button == 3: dataList.extend(list(grouped2['rich'].T)) elif button == 4: dataList.extend(list(grouped2['diversity'].T)) seriesDict = {} seriesDict['name'] = name1 seriesDict['data'] = dataList seriesList.append(seriesDict) xTitle = {} xTitle['text'] = indVar xAxisDict['title'] = xTitle xAxisDict['categories'] = trtList yTitle = {} if button == 1: yTitle['text'] = 'Sequence Reads' elif button == 2: yTitle['text'] = 'Relative Abundance' elif button == 3: yTitle['text'] = 'Species Richness' elif button == 4: yTitle['text'] = 'Shannon Diversity' yAxisDict['title'] = yTitle finalDict['series'] = seriesList finalDict['xAxis'] = xAxisDict finalDict['yAxis'] = yAxisDict finalDict['text'] = result if not seriesList: finalDict['empty'] = 0 else: finalDict['empty'] = 1 finalDF.reset_index(drop=True, inplace=True) res_table = finalDF.to_html(classes="table display") res_table = res_table.replace('border="1"', 'border="0"') finalDict['res_table'] = str(res_table) res = simplejson.dumps(finalDict) return HttpResponse(res, content_type='application/json')