Ejemplo n.º 1
0
 def gen_ctx():
     ctx = Chi2GoodnessOfFitData(seed = seed) 
 
     #Here we sample from a non-uniform distribution for the die!
     ctx1_args = {
         'o_dist':[1/5, 1/5, 1/5, 1/5, 1/10, 1/10],
         'alternative':"The die is not fair.",
         'note':"""
             For this problem the truth is tha the die is not fair. \
             If you accepted H<sub>0</sub>, then this is a <strong>miss</strong>
             (Type II error).
             """
     }
     
     ctx1 = Chi2GoodnessOfFitData(seed = seed, **ctx1_args)
     
             
     ######################################
     # Pass the pigs game: Due to embedding the table in the story
     # this one is a little harder to set up. The problem is that this 
     # pollutes namespace for future contexts. 
     
      
     # Append _ to avoid namespace pollution
     _outcomes =  ['Pink', 'Dot', 'Razorback', 'Trotter', 
                  'Snouter', 'Leaning Jowler']
     _t_dist = [.35, .30, .20, .10, .04, .01]
     tb = Table(_t_dist, col_headers = _outcomes, 
                row_headers = ['Position', 'Expected Frequency'])
     
     if fmt == 'html':
         styles = Table.get_style()
         tbl = tb.html()
     else:
         styles = ""
         tbl = tb.latex()
     
     _s_size = random.randint(20, 30) * 10
     
     ctx2_args = {
         'outcome_type':'Position',
         'outcomes':_outcomes,
         't_dist':_t_dist,
         's_size':_s_size,
         'story':"""Pass The Pigs&reg; is a game from Milton-Bradley&#8482; 
             which is essentially a dice game, except that instead of dice 
             players toss small plastic pigs that can land in any of 6 
             positions. For example, you roll a 'trotter' if the pig falls 
             standing on all 4 legs. It is claimed that the distribution
             for the 6 positions are:
             
             {styles}
             {tbl}            
             
             To test this you toss a pig {s_size} times and get the observed 
             frequencies below:
             """.format(styles = styles, tbl = tbl, s_size = _s_size),
         'null':"The observed values of the positions of the pigs agrees \
         with the expected distribution.",
         'alternative':"The observed values of the positions of the pigs \
         differs from what should be the case if the expected distribution\
         was correct.",
         'note':"""
             In this case the observed data was sampled from the given
             distribution. So if the null hypothesis is rejected, this
             is <string>a Type-I error</strong> or <strong>a false 
             positive</strong>.
             """
     }
     
     ctx2 = Chi2GoodnessOfFitData(seed = seed, **ctx2_args)
     
    
         
     ###########################################
     ## 11.2 from text
     s_size = random.randint(5, 10) * 10
     ctx3_args = {
         'outcomes':['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
                 'Friday', 'Saturday'],
         't_dist':np.ones(7) * 1/7,
         's_size':s_size,
         'a_level': random.choice([0.1,0.01,0.05]),
         'outcome_type':'Day of Week',
         'story':"""
               Teachers want to know which night each week their students are 
               doing most of their homework. Most teachers think that students 
               do homework equally throughout the week. Suppose a random sample 
               of %s students were asked on which night of the week they 
               did the most homework. The results were distributed as in  
               """ % s_size,
         'null':"Students are equally likely to do the majority of their \
         homework on any of the seven nights of the week.",
         'alternative':"Students are more likely to do the majority of their \
         homework on certain nights rather than others."
     }
         
     ctx3 = Chi2GoodnessOfFitData(seed = seed, **ctx3_args)
         
     return [ctx, ctx1, ctx2, ctx3]
Ejemplo n.º 2
0
 def explanation(self, path = 'explanations', 
             expanded = True, a_type = 'preview',                     
             force = False, xkcd = False, fmt = 'latex'):
     """
     Provides an explanation of the hyperbola.
 
     Parameters: (Largely shared with show)
     -----------
     path      : str
         The output directory for image files
     file_name : str
         The name of output file without extension.
     a_type   : Strint ('MC', 'preview')
         If 'preview', then set up for previewing.
     fmt       : String ('html', 'latex')
         If 'latex' us MathJax, if 'html' use CSS tables.
     """
     
     if fmt == 'html':
         ex = Table.get_style()
     else:
         ex = ""
 
 
     file_name = path + "/" + self.url + ".png"
 
     if a_type == 'preview':
         file_name = file_name.replace('%2','%252')
 
 
     const = self.a ** 2 * self.b **2
     x_part = const*(self(x, self.k).expand() - self(0, self.k))
     y_part = const*(self(self.h, y).expand() - self(self.h, 0))
     rhs = const - const*self(0, 0)
 
     # The following is an attempt to avoid lots of code duplication.
     if self.trans == 'x':
         X = x
         Y = y
         X_part = x_part
         Y_part = y_part
         HH = self.h
         KK = self.k
         TX = 1
         TY = 0
     else:
         X = y
         Y = x
         X_part = y_part
         Y_part = x_part
         HH = self.k
         KK = self.h
         TX = 0
         TY = 1
     # To reduce typing
     A = self.a
     B = self.b
     H = self.h
     K = self.k
     C = self.c 
 
     k, h, a, b, c = sym.symbols('k h a b c')
 
     gcd_X = sym.gcd(X_part.collect(X).coeff(X,1), X_part.collect(X).coeff(X,2))
     gcd_Y = sym.gcd(Y_part.collect(Y).coeff(Y,1), Y_part.collect(Y).coeff(Y,2))
     b_X = X_part.collect(X).coeff(X,1)/gcd_X
     b_Y = -Y_part.collect(Y).coeff(Y,1)/gcd_Y        
 
     if expanded:
         ex += "The first step to finding the graph of $$%s = 0$$ is\
             to find the normal form of the equation." \
         % sym.latex((const*(self(x, y) - 1)).expand())
 
         ex += " To begin move the constant term to the right hand side. \
             This gives: $$(%s) + (%s) = %s$$" \
                 % (sym.latex(x_part), sym.latex(y_part), rhs)
 
 
 
         ex += "Next factor common factors from the $_x$_ and $_y$_ terms to get: \
             $$%s(%s) - %s(%s) = %s$$" \
         % (gcd_X if gcd_X != 1 else "", sym.latex(X_part/gcd_X), 
            gcd_Y if gcd_Y != 1 else "", sym.latex(-Y_part/gcd_Y), rhs)
 
         ex += "Now complete the squares: \
             $$%s\\left(%s  %s\\right) \
             - %s\\left(%s  %s\\right) = \
             %s %s %s$$"\
              % (gcd_X if gcd_X != 1 else "", sym.latex(X_part/gcd_X), 
                 "+ \\left(\\frac{%s}{2}\\right)^2" % b_X if b_X != 0 else "", 
                 gcd_Y if gcd_Y != 1 else "", sym.latex(-Y_part/gcd_Y), 
                 "+ \\left(\\frac{%s}{2}\\right)^2" % b_Y if b_Y != 0 else "", 
                 rhs, 
                 "+ %s\\left(\\frac{%s}{2}\\right)^2" % (gcd_X, b_X) if b_X != 0 else "", 
                 "- %s\\left(\\frac{%s}{2}\\right)^2" % (gcd_Y, b_Y) if b_Y != 0 else "")
 
         ex += "This simplifies to: $$%s\\left(%s\\right)^2 - %s\\left(%s\\right)^2 = %s$$" \
             % (gcd_X if gcd_X != 1 else "",
                sym.latex(X + b_X/2),
                gcd_Y if gcd_Y != 1 else "",
                 sym.latex(Y + b_Y/2),
                const)
 
         ex += "Lastly divide both side by the right hand side to get: \
             $$%s\\left(%s\\right)^2 - %s\\left(%s\\right)^2 = 1$$" \
             % (sym.latex(gcd_X/const) if gcd_X/const != 1 else "",
                sym.latex(X + b_X/2),
                sym.latex(gcd_Y/const) if gcd_Y/const != 1 else "",
                 sym.latex(Y + b_Y/2))
 
         ex += "This simplifies to the final normal form: \
             $$\\frac{(%s)^2}{%s^2} - \\frac{(%s)^2}{%s^2} = 1$$" \
             % (sym.latex(X-HH), A, sym.latex(Y-KK), B)
     else:
         ex += "The hyperbola is given in standard normal form: \
         $$\\frac{(%s)^2}{%s^2} - \\frac{(%s)^2}{%s^2} = 1$$" \
             % (sym.latex(X-HH), A, sym.latex(Y-KK), B)
 
     ex += "From this we can read off the center to be at $_(h,k) = (%s, %s)$_. "\
         % (H,K)
 
     ex += "The tansverse (major) axis is along $_%s = %s$_ and has length $_2a = %s$_. "\
         % (sym.latex(Y), K, 2*A)
 
     ex += "The vertices are $_(%s, %s) = (%s,%s)$_ and $_(%s, %s) = (%s, %s)$_. "\
         % (sym.latex(h - a * TX), sym.latex(k - a * TY), H - A * TX, K - A * TY,
            sym.latex(h + a * TX), sym.latex(k + a * TY), H + A * TX, K + A * TY)
 
     ex += "The conjugate (minor) axis is along $_%s = %s$_ and has length \
           $_2b = %s$_. " % (sym.latex(Y), H, 2*B)
 
     ex += "The co-vertices are $_(%s, %s) = (%s,%s)$_ and $_(%s, %s) = (%s, %s)$_. "\
         % (sym.latex(h - B * (1 - TX)), sym.latex(k - B * (1 - TY)), 
                 H - B * (1 - TX), K - B * (1 - TY), 
                 sym.latex(h + B * (1 - TX)), sym.latex(k + B * (1 - TY)), 
                 H + B * (1 - TX), K + B * (1 - TY))
 
     ex += "The two assymptotes are $_y = \\pm\\frac{b}{a}(%s) %s %s \
           = \\pm\\frac{%s}{%s}(%s) %s %s $_. " \
             % (sym.latex(x - H),  "+" if K > 0 else "-",
                 sym.Abs(K), B, A, sym.latex(x - H), "+" if K > 0 else "-",
                 sym.Abs(K))
 
     ex += "Finally, the focal length is $_c = \\sqrt{a^2+b^2}=%s$_ and the foci \
         are located at $_(%s, %s) = (%s,%s)$_ and $_(%s, %s) = (%s, %s)$_. "\
         % (sym.latex(C),
                 sym.latex(h - c * TX), sym.latex(k - c * TY), 
                 sym.latex(H - C * TX), sym.latex(K - C * TY), 
                 sym.latex(h + c * TX), sym.latex(k + c * TY), 
                 sym.latex(H + C * TX), sym.latex(K + C * TY))
 
 
     
         
     
     data =  [
             ['center', '$_(%s, %s)$_' % (H, K)],
             ['vertices', '$_(%s, %s), (%s, %s)$_' \
                 % tuple(map(sym.latex, [H - A * TX, K - A * TY, 
                                         H + A * TX, K + A *TY]))],
             ['length of conjugate axis', '$_%s$_' % sym.latex(2*A)],
             ['co-vertices', '$_(%s, %s), (%s, %s)$_' \
                 % tuple(map(sym.latex, [H - B * (1 - TX), K - B * (1 - TY), 
                                         H + B * (1 - TX), K - B * (1 - TY)]))],
             ['length of conjugate axis', '$_%s$_'% sym.latex(2*B)],
             ['foci', '$_(%s, %s), (%s, %s)$_' \
                 % tuple(map(sym.latex, [H - C * TX, K - C * TY, 
                                         H + C * TX, K + C * TY]))],
             ['asymptotes', '$_y = \\pm\\frac{%s}{%s}(%s) %s %s$_' \
                 % tuple(map(sym.latex, [B, A, sym.sympify(x - H), 
                                         "+" if K > 0 else "-",
                                         sym.Abs(K)]))] 
 
         ]
     
     tb = Table(data)
     
     if fmt == 'latex':
         tbl = tb.latex()
     else:
         tbl = tb.html()
 
 
     img = html_image(image_url = file_name, width = '300px', 
                      preview = (a_type == 'preview'))
 
     ex +="""      
     %s
     <div class='outer-container-rk'>
         <div class='centering-rk'>
             <div class='container-rk'>
                 <figure>
                     %s
                     <figcaption>$_%s = 1$_</figcaption>
                 </figure>
             </div>
 
             <div class='container-rk'>
                 %s
             </div>
         </div>
     </div>
     """ % (Table.get_style(), img, self.latex, tbl)
 
 
     self.show(path = path, file_name = self.url, force = force, 
               xkcd = xkcd)
 
 
     return ex
Ejemplo n.º 3
0
    def stem(self, context = None, table = None, q_type = None,
             a_type = 'preview', force = False, fmt = 'html'):
        """
        This will generate a problem for $\chi^2$ goodness of fit.
        
        Parameters:
        ----------
        context : context object
            This describes the problem. A default context is used if this is 
            none.
        table   : string ['hist', 'table'] 
            Display t_dist as a table (html/latex) or as a histogram.
        q_type  : string [None, 'STAT', 'HT', 'CI'] 
            If None randomly choose. If 'STAT' just compute the chi2 statistic 
            and the degrees of freedom. If 'HT, compute the p-value for the  
            data and determine whether or not reject the null hypothesis. If 
            'CI' compute the confidence interval.
        a_type  : string
            This is eithe "MC" or "preview" for now
        fmt   : String ['html', 'latex']
            Use nice CSS/HTML (responsive) or plain LaTeX (static)
        
        Notes:
        -----
        The default here is to simulate a roll of a die 30 times and record the
        frequency of values. The :math`\chi^2` test should test whether the die
        is fair at an :math:`alpha` 
        level of 5%.
        """
        
        kwargs = {
            'context': context,
            'table': table,
            'q_type': q_type,
            'fmt': fmt,
            'a_type': a_type
        }
        
        if q_type is None:
            q_type = random.choice(['STAT', 'HT', 'PVAL'])
          

        if table == None:
            table = random.choice(['table', 'hist'])
        
        if context == None:
            context = Chi2GoodnessOfFitData()
            
        if not context.is_valid:
            warnings.warn("Context had invalid cell counts.")
            return       
        
        # Generate unique name
        q_name = hash(context)
        self.cache.add(q_name)
        self.hist = str(q_name) + "_hist.png"
        self.solution_plot = str(q_name) + "_plot.png"
        
        style = None
            
        
        if a_type == 'preview':
                question_stem = "<h2>Question</h2><br>"
        else:
            question_stem = ""

        if fmt == 'html':
            question_stem += Table.get_style()
        
        question_stem += "<div class='par'>" + context.story + "</div>\n" 
        if table == 'table':
            if fmt == 'html':
                tbl = context.observed.html()
                question_stem += tbl
            else:
                tbl = context.observed.latex()
                question_stem +=  tbl
            
        elif table == 'hist':
            
            fname = context.hist(path = self.path, force = force)
                                  
            img = html_image(fname, width = '300px', 
                             preview = (a_type == 'preview'))
            
            
            if style is None:
                style = Table.get_style()
                question_stem += style
            
            question_stem +="""      
            <div class='outer-container-rk'>
                <div class='centering-rk'>
                    <div class='container-rk'>
                        <figure>
                            %s
                            <figcaption>%s</figcaption>
                        </figure>
                    </div>
                </div>
            </div>
            """ % (img, "Observed Frequencies")                
        
 
        N = len(context.outcomes)     
        df =  N - 1        
        chi2eq = "$$\\chi^2_{%s}=\\sum_{i=1}^{%s}\\frac{(O_i-E_i)^2}{E_i}\
                = %.3g$$" % (df, N, context.chi2_stat)
                
        if q_type == 'STAT':
            question_stem += "Compute the $_\\chi^2$_-statistic and degrees \
                of freedom for the given observed values."

        elif q_type == 'PVAL':
           
            question_stem += """The $_\\chi^2$_ statistic is 
                {chi2eq}
                                 
                
            Use this information to find the degrees of freedom (df) and the 
            $_p\\text{{-value}} = P(\\chi^2_{{{df}}} > {chi2:.3g})$_.
            """.format(chi2eq=chi2eq, N=N, df = 'df', chi2=context.chi2_stat)
                       
        elif q_type == 'HT':
            
            question_stem += """The degrees of freedom are $_df = {N} - 1 = 
            {df}$_ and the $_\\chi^2$_ statistic is {chi2eq} 
            
            Use this information to conduct a hypothesis test with 
            $_\\alpha = {a_level}$_. Choose the answer that best captures
            the null hypothesis and conclusion.
            """.format(N = N, df = df, chi2eq = chi2eq,
                       a_level = context.a_level)
       
        if fmt == 'html':
            explanation = Table.get_style()
        else:
            explanation = ""
        
        if a_type == 'preview':
            explanation += "<br><h2>Explanation</h2><br>"
            
        tb1 = Table(context.t_dist, col_headers = context.outcomes,
                    row_headers = [context.outcome_type, 'Probabilities'])
                    
        if fmt == 'html':                    
                        
            tbl1 = tb1.html()
            tbl2 = context.oe.html()       
        else:
            tbl1 = tb1.latex()
            tbl2 = context.oe.latex()   
        
        explanation += "<div class='par'>To find the expected counts multiply the total\
            number of observations by the expected probability for an outcome. \
            The probabilities for the expected outcomes are summarized in the \
            following table:"
        explanation += tbl1
        explanation += "and there are %s observations." % context.s_size
        explanation += " So the expected and observed counts are:<br>"
        explanation += tbl2 
        explanation += "</div>"
        
        explanation += "<div class='par'>The degrees of freedom are $_df = {N} - 1 = \
        {df}$_ and the $_\\chi^2$_-statistic is:{chi2eq}</div>"\
                .format(N=N,df=df,chi2eq=chi2eq)
        
        if q_type in ['HT','PVAL']:
            
            rv = stats.chi2(df)
            p_val = 1 - rv.cdf(context.chi2_stat)
            
            fname = context.show(path = self.path, force = force)
                    
            img = html_image(fname, width = '300px', 
                             preview = (a_type == 'preview'))
            
            caption = """
                Lightshading (right of the red line) indicates the \p-value.
                <br>
                The darker shading indicate the $_\\alpha = $_ {a_level:.0%} 
                level.<br>
                The background histogram is a bootstrap sampling distribution.
                """.format(a_level = context.a_level)
            explanation +="""
            The p-value for this data is:
                $$\\text{p-value} = P(\\chi^2_{%s} > %.3g) = %.4g%s$$
            <div class='outer-container-rk'>
                <div class='centering-rk'>
                    <div class='container-rk'>
                        <figure>
                            %s
                            <figcaption>%s</figcaption>
                        </figure>
                    </div>
                </div>
            </div>
            """ % (df, context.chi2_stat, p_val * 100, '\\%', img, caption)
        
        
        if q_type == 'HT':
            
            if p_val < context.a_level:
                
                explanation += """
                    <div class='par'>The p-value is less than the 
                    $_\\alpha$_-level so the null hypothesis is rejected. 
                    That is, we accept the alternative hypothesis:
                    <strong>H<sub>a</sub>: {alt} </strong>
                
                    More precisely, assuming the null hypothesis, there
                    is only a {p_val:.2%} probability due to 
                    random chance in sampling that
                    the difference in the expected and observed data is 
                    least this large.</div>
                    """.format(alt=context.alternative, p_val=p_val)
            else:
                explanation += """
                    <div class='par'>The p-value is greater than the $_\\alpha$_-level so
                    the null hypothesis
                    is not rejected. Precisely, assuming the null hypothesis
                    <strong>H<sub>0</sub>: {null}</strong>,
                    there is a {p_val:.2%} probability due to 
                    random chance in sampling that
                    the difference in the expected and observed data is at 
                    least this large.</div>
                    """.format(null=context.null, p_val=p_val)
                    
            explanation += """
                <div class='par'>Note: {note}</div>                
                """.format(note=context.note)
        
        
        errors = self.gen_errors(q_type, context)
        
        if a_type == 'preview':
            errs = [[er] for er in errors]
            choices = "<br><h2>Choices</h2><br>"
            tb = Table(errs, row_headers = ['Answer'] + ['Distractor']*4)
         
            # Choices need the richer html structure in preview.
            choices += Table.get_style() + tb.html()   
                        
            if fmt == 'html':
                return question_stem + choices +  explanation
            else:
                return question_stem.replace("<div ","<p ")\
                        .replace("</div>","</p>") + choices + \
                        explanation.replace("<div ","<p ")\
                        .replace("</div>","</p>")
                        
        elif a_type == 'MC':
            if fmt == 'latex':
                question_stem = question_stem.replace("<div ","<p ")\
                        .replace("</div>","</p>")
                explanation = explanation.replace("<div ","<p ")\
                        .replace("</div>","</p>")
                distractors = [err.replace("<div ","<p ")\
                        .replace("</div>","</p>") for err in errors]
                        
            question_stem = ' '.join(question_stem.split())
            distractors = [' '.join(err.split()) for err in errors]
            explanation = ' '.join(explanation.split()) + "\n"

            return tools.fully_formatted_question(question_stem, explanation, 
                                                  answer_choices=distractors)
        elif a_type == 'Match':
            pass
        else:
            pass