Python extractField Examples, samples.extractField Python Examples

Example #1

0

Show file

File: simulations.py Project: Rhombus13/Calvin

def isLinearGrowth(fld, minRange):
    """
    This function looks at how linearly fld grows. The closer it can come to a straight line that
    goes through all the values of fld (assuming even growth along the other axis), the higher th
    confidence.
    """
    
    samples.sampleList.sort(key=lambda x: samples.extractField(x, fld))
    plot = __getPlot('id', fld)
    
    if (samples.sampleList[-1][fld] - samples.sampleList[0][fld]) < minRange:
        return SimResult(confidence.Confidence(confidence.Applic.cf, confidence.Validity.sound), 
                         "even distribution of sample property '" + fld + "'",
                         'insufficient distribution of samples', plot)
    
    fldList = samples.getAllFlds(fld)
    if len(fldList) < 3:
        #can't check for *even* distribution, but they are not right next to each other
        #if we got here at all, I think.
        if len(fldList) == 2:
            app = -observations.neareq(fldList[0], fldList[1])
            return SimResult(confidence.Confidence(app, confidence.Validity.plaus), 
                         "even distribution of sample property '" + fld + "'",
                         '2 samples ' + (app.isTrue() and '' or 'not ') + 'about equal', plot)
        else:
            return SimResult(confidence.Confidence(confidence.Applic.df, confidence.Validity.prob), 
                         "even distribution of sample property '" + fld + "'",
                         'fewer than 2 samples', '')
    #fldList.sort()
    
    line = stats.linregress(range(len(fldList)), fldList)
    
    #line[0] is slope
    #line[1] is intercept
    
    qual = __getQuality(line[3])
    if len(fldList) < 5:
        qual -= 1
    
    conf = __getConfidence((.8, .85, .9, .95, .99), line[2], qual)
    
    plot.plotLine(line[0], line[1])
    
    """
    visDesc = "Graph of " + fld + " spaced out evenly, plus the best fit line"
    visDesc += "\npoints are:\n"
    visDesc += "\n".join([str(tup) for tup in zip(range(len(fldList)), fldList)])
    visDesc += "\nLine is slope " + str(line[0]) + " intercept " + str(line[1]) 
    """
    """
    visDesc += '\nfits ' + fld + ' within ' + str(line[2])
    visDesc += '.\nStatistical significance: ' + str(line[3])
    """
    
    return SimResult(conf, "even distribution of sample property '" + fld + "'",
                     "'" + fld + "' is " + (line[2] < .9 and 'not ' or '') +
                     "evenly distributed among all samples", plot)

Example #2

0

Show file

File: simulations.py Project: Rhombus13/Calvin

def distantFromOthers(sample, field, spread):
    """
    Discover how different (based on spread) this sample is from the sample nearest to it
    """
    
    value = sample[field]
    spr = sample[spread]
    
    minDist = 50
    
    samples.sampleList.sort(key=lambda x: samples.extractField(x, field))
    plot = __getPlot('id', field)
    
    if any([sam[field] > sample[field] for sam in samples.sampleList]) and \
       any([sam[field] < sample[field] for sam in samples.sampleList]):
        return SimResult(confidence.Confidence(confidence.Applic.cf, confidence.Validity.sound), 
                         str(sample) + " has a different " + field + " from any other sample",
                         str(sample) + "'s value for " + field + ' is between that of other samples', 
                         plot)
    
    for sam in samples.sampleList:
        if sam == sample:
            continue
        
        distance = abs(sample[field] - sam[field])
        spr = sample[spread] + sam[spread]
        
        if spr == 0:
            continue
        
        minDist = min(minDist, distance / float(spr))
        
    if len(samples.sampleList) < 3:
        qual = confidence.Validity.plaus
    elif len(samples.sampleList) < 6:
        qual = confidence.Validity.prob
    else:
        qual = confidence.Validity.sound
        
    minDist *= 2
    conf = __getConfidence((1, 2, 3, 4, 6), minDist, qual)
    
    plot.plotLine(0, sample[field])
    plot.plotLine(0, sample[field]-sample[spread])
    plot.plotLine(0, sample[field]+sample[spread])
    
    return SimResult(conf, str(sample) + " has a different " + field + " from any other sample",
                     str(sample) + "'s value for " + field + ' is ' + str(minDist) + 
                     ' times ' + spread + ' from any other sample', plot)

Example #3

0

Show file

File: simulations.py Project: Rhombus13/Calvin

def skewsField(sample, field):
    """
    Checks whether the value of field in the passed in sample is significantly different from the
    value of field for the rest of the samples under consideration.
    """
    
    savedSamples = samples.sampleList[:]
    samples.sampleList.remove(sample)
    
    try:
        flds = samples.getAllFlds(field)
        
        mean = stats.mean(flds)
        stddev = stats.std(flds)
        val = sample[field]
        
        if stddev == 0:
            devs = 0
        else:
            devs = abs(val - mean) / stddev
    
    finally:
        #we should be fixing the sample list even when I crash!
        samples.sampleList = savedSamples
    
    if len(samples.sampleList) < 3:
        qual = confidence.Validity.plaus
    elif len(samples.sampleList) < 6:
        qual = confidence.Validity.prob
    else:
        qual = confidence.Validity.sound
        
    conf = __getConfidence((.5, 1, 2, 3, 5), devs, qual)
    
    samples.sampleList.sort(key=lambda x: samples.extractField(x, field))
    
    plot = __getPlot('id', field)
    plot.plotLine(0, mean)
    plot.plotLine(0, mean-stddev)
    plot.plotLine(0, mean+stddev)
    plot.plotLine(0, sample[field])
    
    return SimResult(conf, str(sample) + " has a different " + field + " from other samples",
                     str(sample) + "'s value for " + field + ' is ' + str(devs) + 
                     ' standard deviations from the mean', plot)

Example #4

0

Show file

File: simulations.py Project: Rhombus13/Calvin

def checkOverlap(anchor, spread):
    """
    Checks that every sample overlaps with every other sample at at least one point 
    in anchor/spread (or spread * 2)
    """
    
    if len(samples.sampleList) == 0:
         return SimResult(confidence.Confidence(confidence.Applic.cf, confidence.Validity.sound), 
                          'sample ' + anchor + ' plus or minus ' + spread + ' overlaps for all samples', 
                          'No samples in set', "")

    samples.sampleList.sort(key=lambda x: samples.extractField(x, anchor))
    plot = __getPlot('id', anchor)
    
    range = [0, samples.sampleList[0][anchor] + samples.sampleList[0][spread]]
    range2 = [0, samples.sampleList[0][anchor] + 2 * samples.sampleList[0][spread]]
    
    for sample in samples.sampleList:
        sAnch = sample[anchor]
        sSpre = sample[spread]
        
        range[0] = max(range[0], sAnch-sSpre)
        range[1] = min(range[1], sAnch+sSpre)
        range2[0] = max(range2[0], sAnch-2*sSpre)
        range2[1] = min(range2[1], sAnch+2*sSpre)
        
    if range[1] > range[0]:
        dif = abs(range[1] - range[0]) / float(range[0] + range[1])
        qual = dif >= .05 and confidence.Validity.accept or confidence.Validity.sound
        desc = 'Samples overlap within 1 sigma'
        plot.plotLine(0, range[0])
        plot.plotLine(0, range[1])
        conf = True
    elif range2[1] > range2[0]:
        dif = abs(range2[1] - range2[0]) / float(range2[0] + range2[1])
        qual = dif >= .5 and confidence.Validity.sound or confidence.Validity.prob
        desc = 'Samples overlap within 2 sigma'
        plot.plotLine(0, range2[0])
        plot.plotLine(0, range2[1])
        conf = True
    else:
        dif = abs(range2[1] - range2[0]) / float(range2[0] + range2[1])
        desc = 'Samples do not overlap within 2 sigma'
        #plot.plotLine(0, range2[0])
        #plot.plotLine(0, range2[1])
        
        if dif > .2:
            qual = confidence.Validity.accept
        elif dif > .1:
            qual = confidence.Validity.sound
        elif dif > .02:
            qual = confidence.Validity.prob
        else:
            qual = confidence.Validity.plaus
        conf = False
        
    confid = confidence.Confidence(confidence.Applic.ft, qual)
    if not conf:
        confid = -confid
    
    return SimResult(confid, 'sample ' + anchor + ' plus or minus ' 
                     + spread + ' overlaps for all samples', desc, plot)