Example #1
0
def areSimilarLst(A,B,level=0.05):
    """ finds the similarity of the given list and the next. default level is 0.05
    
        Returns True if they are similar, or returns False otherwise
    """
    
    #only for same length list
    if (len(A) != len(B)):
        return None;
    
    #set some parameters
    degree_of_freedom = len(A);
    significance_level = level; 
    
    
    #calculate chisquared between the two lists
    Exp_vals = A;
    Obv_vals = B;
    
    chiSquared = 0.0;
    
    #use the pearson chi squared test
    chiSquared,p_val = stats.lchisquare(Obv_vals,Exp_vals);
    
    print "chiSquared, p_val: ", chiSquared, p_val;
    if p_val <= significance_level:
        return True,chiSquared,p_val;
    else:
        return False,chiSquared,p_val;
       
    
#    
#W1 = numpy.random.rand(50).astype(numpy.float32);
#W2 = numpy.random.rand(50).astype(numpy.float32);
#W3 = numpy.random.poisson(1.0,50);
#print areSimilarLst(W1.tolist(),W2.tolist(),0.05);
#print areSimilarLst(W1.tolist(),W3.tolist(),0.05);
Example #2
0
    def data_table(self):
        global My_Global_NDRL
        NDRL_num = My_Global_NDRL
        data_table = [None]
        try:
            number_of_points = len(self.sequence)

        except:
            number_of_points = 1
            data_table.append("""<br><div class="warning">No data!  """)
            data_table.append(
                """Enter data or select from sample tab.</div>""")

        if number_of_points > 2:
            number_of_points = number_of_points
            output_comment = stats.lchisquare(self.freq_bin(), self.normal())
            comment = """Your data is awful! """

            if output_comment[1] >= .98:
                comment = """ Your data is an excellent fit to a "log normal distribution" so you may be confident in the output of this and any other statistical analysis on this data. """
            elif output_comment[1] < .98 and output_comment[1] >= .80:
                comment = """Your data is a reasonable fit to a "log normal distribution" but you should investigate for potential reasons for the discrepancy.  <br>
<strong>Any local DRL or other statistic claculated from this data should be treated with some caution.</strong> """
            elif output_comment[1] < .80 and output_comment[1] >= .50:
                comment = """<div class="warning">Your data is a very poor fit to a "log normal distribution" so you should investgate why, as any attempt to calculate a local DRL or other statistic will be invalid.</div> """
            elif output_comment[1] < .50 and output_comment[1] >= .20:
                comment = """<div class="warning">Your data very random and not behaving like a set of radiation total doses.  You should investgate why, as any attempt to calculate a local DRL or other statistic will be invalid.  The data comprises of mixed procedures or mixed units.</div> """

            else:
                comment = """<div class="warning">Your data is very, very random and not behaving like a set of radiation total doses so you should investgate why, as any attempt to calculate a local DRL or other statistic will be invalid.  Perhaps the data comprises of mixed procedures or mixed units.</div> """

            table_start1 = """ <div = "information"><p>This is a summary of your data that includes so statistical tests for validity. A good quality set of data will fit a "log normal distribution". When plotted correctly, a log normal distribution looks like a smooth bell shaped curve.  If your data does not fit such a curve, then you should seek a reason for the discrepancy.</p>
<p>You may cut and paste data from this page into a spreadsheet or other document. (Formatting is usually preserved better when pasting into a spreadsheet).</p>
</div>"""

            table_start2 = """<table style="text-align: left; width: 100%;" border="0" cellpadding="1" cellspacing="2"> <tbody> """
            table_end = """</tbody>
	</table>"""
            table_start_row = "<TR>"
            table_end_row = "</TR>"
            table_start_cell = """<TD align="right">"""
            table_end_cell = "</TD>"
            # 	  Now create a table of all the data
            # 	  This method is "clunky" but easy to read
            #	  Start the table:
            #	  data_table = [None]
            data_table.append(table_start1)
            data_table.append(comment)
            data_table.append(table_start2)
            # Histogram
            data_table.append(table_start_row)
            #data_table.append(table_start_cell)
            #data_table.append("Histogram")
            #data_table.append(table_end_cell)
            data = self.standard_normal_histo_chart()
            bins = self.bin_limits()
            cols_span = len(bins)
            data_table.append("""<TD colspan="%s" align="left" >""" %
                              cols_span)
            data_table.append("""
<img alt="Plot of input data" src="%s"/> <br>

""" % data)

            # Units
            data_table.append(table_start_row)
            data_table.append(table_start_cell)
            data_table.append("Units")

            data = self.bin_limits()
            for item in range(len(data) - 1):
                data_table.append(table_start_cell)
                data_table.append("%s" % NDRL_num[3])
                data_table.append(table_end_cell)

# Observed data
            data_table.append(table_start_row)
            data_table.append(table_start_cell)
            data_table.append("Observed Frequency")

            data = self.standard_normal_freq_bin()
            Observed_data_frequency = data[0]
            for item in range(len(Observed_data_frequency)):
                data_table.append(table_start_cell)
                data_table.append("%d" % Observed_data_frequency[item])
                data_table.append(table_end_cell)

# Expected data
            data_table.append(table_start_row)
            data_table.append(table_start_cell)
            data_table.append("Expected Frequency")

            Expected_data_frequency = self.standard_z()
            for item in range(len(Expected_data_frequency)):
                data_table.append(table_start_cell)
                data_table.append("%d" % Expected_data_frequency[item])
                data_table.append(table_end_cell)

# Standard Normal Histogram
            data_table.append(table_start_row)
            #data_table.append(table_start_cell)
            bins = self.bin_limits()
            cols_span = len(bins)
            data_table.append("""<TD colspan="%s" align="left" >""" %
                              cols_span)
            data_table.append("""

<p>The standard normal histogram plots the data as bell shaped curve of with a mean of 0 and a standard deviation of 1.  This view allows for easier interpretation of anomalies in the data set. </p><br>

""")

            output = stats.lchisquare(self.freq_bin(), self.normal())
            # Single data row
            data_table.append(table_start_row)
            data_table.append(table_start_cell)
            data_table.append("Sum of Chi Squared  ")

            data_table.append("""<TD colspan="%s" align="left" >""" %
                              cols_span)
            data_table.append(
                """ (The closer this value is to zero the better fit your data is to a normal distribution ) =  %.2f """
                % output[0])

            # Single data row
            data_table.append(table_start_row)
            data_table.append(table_start_cell)
            data_table.append("P value ")

            #data = self.histo_fit_chart()
            #bins = self.bin_limits()
            #cols_span = len(bins)-1
            data_table.append("""<TD colspan="%s" align="left" >""" %
                              cols_span)
            output_percent = output[1] * 100
            data_table.append(
                """ (The closer this is to 100%% higher the probability that you have a normal distribution) = %.2f %% """
                % output_percent)

            # Single data row skewness
            data_table.append(table_start_row)
            data_table.append(table_start_cell)
            data_table.append("Skewness ")
            skewness = self.skewness()

            data_table.append("""<TD colspan="%s" align="left" >""" %
                              cols_span)
            data_table.append(""" left = %s, right = %s """ %
                              (skewness[0], skewness[1]))

            # Single data My_Global_NDRL:
            #	  data_table.append(table_start_row)
            #	  data_table.append(table_start_cell)
            #	  data_table.append("Selected NDRL Data ")

            #
            #	  data_table.append("""<TD colspan="%s" align="left" >""" % cols_span)
            #	  data_table.append( """ left = %s, right =  """ % (My_Global_NDRL[0]) )
            #
            #

            data_table.append(table_end)

        return data_table
Example #3
0
	def  data_table(self):
	 global My_Global_NDRL
	 NDRL_num = My_Global_NDRL
	 data_table = [None]
	 try:
	  number_of_points = len(self.sequence)
	  
	 except:
	  number_of_points = 1
	  data_table.append("""<br><div class="warning">No data!  """)
	  data_table.append("""Enter data or select from sample tab.</div>""")






	 if number_of_points > 2:
	  number_of_points = number_of_points
	  output_comment = stats.lchisquare(self.freq_bin(),self.normal())
	  comment = """Your data is awful! """

	  if output_comment[1] >= .98:
	    comment = """ Your data is an excellent fit to a "log normal distribution" so you may be confident in the output of this and any other statistical analysis on this data. """
	  elif output_comment[1] < .98 and output_comment[1] >= .80 :
	    comment = """Your data is a reasonable fit to a "log normal distribution" but you should investigate for potential reasons for the discrepancy.  <br>
<strong>Any local DRL or other statistic claculated from this data should be treated with some caution.</strong> """	
	  elif output_comment[1] < .80 and output_comment[1] >= .50 :
	    comment = """<div class="warning">Your data is a very poor fit to a "log normal distribution" so you should investgate why, as any attempt to calculate a local DRL or other statistic will be invalid.</div> """	 
	  elif output_comment[1] < .50 and output_comment[1] >= .20 :
	   comment = """<div class="warning">Your data very random and not behaving like a set of radiation total doses.  You should investgate why, as any attempt to calculate a local DRL or other statistic will be invalid.  The data comprises of mixed procedures or mixed units.</div> """	

	  else:
	    comment = """<div class="warning">Your data is very, very random and not behaving like a set of radiation total doses so you should investgate why, as any attempt to calculate a local DRL or other statistic will be invalid.  Perhaps the data comprises of mixed procedures or mixed units.</div> """



	  table_start1 = """ <div = "information"><p>This is a summary of your data that includes so statistical tests for validity. A good quality set of data will fit a "log normal distribution". When plotted correctly, a log normal distribution looks like a smooth bell shaped curve.  If your data does not fit such a curve, then you should seek a reason for the discrepancy.</p>
<p>You may cut and paste data from this page into a spreadsheet or other document. (Formatting is usually preserved better when pasting into a spreadsheet).</p>
</div>"""

	  table_start2 ="""<table style="text-align: left; width: 100%;" border="0" cellpadding="1" cellspacing="2"> <tbody> """
	  table_end= """</tbody>
	</table>"""
	  table_start_row = "<TR>"
	  table_end_row = "</TR>"
	  table_start_cell = """<TD align="right">"""
	  table_end_cell = "</TD>"
# 	  Now create a table of all the data
# 	  This method is "clunky" but easy to read
#	  Start the table:
#	  data_table = [None]
	  data_table.append(table_start1)
	  data_table.append(comment)
	  data_table.append(table_start2)
# Histogram
	  data_table.append(table_start_row)
	  #data_table.append(table_start_cell)
	  #data_table.append("Histogram")
	  #data_table.append(table_end_cell)
	  data = self.standard_normal_histo_chart()
	  bins = self.bin_limits()
	  cols_span = len(bins)
	  data_table.append("""<TD colspan="%s" align="left" >""" % cols_span) 
	  data_table.append("""
<img alt="Plot of input data" src="%s"/> <br>

""" % data)




# Units
	  data_table.append(table_start_row)
	  data_table.append(table_start_cell)
	  data_table.append("Units")

	  data = self.bin_limits()
	  for item in range(len(data)-1):
	   data_table.append(table_start_cell) 
	   data_table.append("%s" %NDRL_num[3])
	   data_table.append(table_end_cell)



# Observed data
	  data_table.append(table_start_row)
	  data_table.append(table_start_cell)
	  data_table.append("Observed Frequency")

	  data = self.standard_normal_freq_bin()
	  Observed_data_frequency = data[0]
	  for item in range(len(Observed_data_frequency)):
	   data_table.append(table_start_cell)	   
	   data_table.append("%d" % Observed_data_frequency[item])
	   data_table.append(table_end_cell)

# Expected data
	  data_table.append(table_start_row)
	  data_table.append(table_start_cell)
	  data_table.append("Expected Frequency")

	  Expected_data_frequency = self.standard_z()
	  for item in range(len(Expected_data_frequency)):
	   data_table.append(table_start_cell)	   
	   data_table.append("%d" % Expected_data_frequency[item])
	   data_table.append(table_end_cell)


# Standard Normal Histogram
	  data_table.append(table_start_row)
	  #data_table.append(table_start_cell)
	  bins = self.bin_limits()
	  cols_span = len(bins)
	  data_table.append("""<TD colspan="%s" align="left" >""" % cols_span) 
	  data_table.append("""

<p>The standard normal histogram plots the data as bell shaped curve of with a mean of 0 and a standard deviation of 1.  This view allows for easier interpretation of anomalies in the data set. </p><br>

""" )





	  output = stats.lchisquare(self.freq_bin(), self.normal() )
# Single data row
	  data_table.append(table_start_row)
	  data_table.append(table_start_cell)
	  data_table.append("Sum of Chi Squared  ")

	  data_table.append("""<TD colspan="%s" align="left" >""" % cols_span) 
	  data_table.append( """ (The closer this value is to zero the better fit your data is to a normal distribution ) =  %.2f """ % output[0] )



# Single data row
	  data_table.append(table_start_row)
	  data_table.append(table_start_cell)
	  data_table.append("P value ")

	  #data = self.histo_fit_chart()
	  #bins = self.bin_limits()
	  #cols_span = len(bins)-1
	  data_table.append("""<TD colspan="%s" align="left" >""" % cols_span)
	  output_percent = output[1]*100
	  data_table.append( """ (The closer this is to 100%% higher the probability that you have a normal distribution) = %.2f %% """ % output_percent )



# Single data row skewness
	  data_table.append(table_start_row)
	  data_table.append(table_start_cell)
	  data_table.append("Skewness ")
	  skewness = self.skewness()

	  data_table.append("""<TD colspan="%s" align="left" >""" % cols_span)
	  data_table.append( """ left = %s, right = %s """ % (skewness[0], skewness[1]) )



# Single data My_Global_NDRL:
#	  data_table.append(table_start_row)
#	  data_table.append(table_start_cell)
#	  data_table.append("Selected NDRL Data ")

#
#	  data_table.append("""<TD colspan="%s" align="left" >""" % cols_span)
#	  data_table.append( """ left = %s, right =  """ % (My_Global_NDRL[0]) )
#
#



	  data_table.append(table_end)	  
	 	  

	  
	 return data_table