def features_to_exponential(header, row, limits): if len(header) != 2: return None if 'mean' not in header: return None mean = float(row[header.index('mean')]) # Is it one-sided? if mean > limits[0] and limits[0] + (mean - limits[0]) * 3 < limits[1]: # positive exponential return SplineModelConditional.make_single(limits[0], limits[1], [limits[0] / (mean - limits[0]), -1/(mean - limits[0])]).rescale() if mean < limits[1] or limits[1] - (limits[1] - mean) * 3 > limits[0]: # negative exponential return SplineModelConditional.make_single(limits[0], limits[1], [-limits[1] / (limits[1] - mean), 1/(limits[1] - mean)]).rescale() else: return None
def features_to_exponential(header, row, limits): if len(header) != 2: return None if 'mean' not in header: return None mean = float(row[header.index('mean')]) # Is it one-sided? if mean > limits[0] and limits[0] + (mean - limits[0]) * 3 < limits[1]: # positive exponential return SplineModelConditional.make_single( limits[0], limits[1], [limits[0] / (mean - limits[0]), -1 / (mean - limits[0])]).rescale() if mean < limits[1] or limits[1] - (limits[1] - mean) * 3 > limits[0]: # negative exponential return SplineModelConditional.make_single( limits[0], limits[1], [-limits[1] / (limits[1] - mean), 1 / (limits[1] - mean)]).rescale() else: return None
def features_to_gaussian(header, row, limits): # Does this look like a mean-variance feature file? if len(header) == 3: mean = None if 'mean' in header: mean = float(row[header.index('mean')]) if 'mode' in header: mean = float(row[header.index('mode')]) if .5 in header: mean = float(row[header.index(.5)]) if mean is None: return None if 'var' in header: var = float(row[header.index('var')]) elif 'sdev' in header: var = float(row[header.index('sdev')]) * float(row[header.index('sdev')]) else: return None if np.isnan(var) or var == 0: return SplineModelConditional.make_single(mean, mean, []) # This might be uniform if mean - 2*var < limits[0] or mean + 2*var > limits[1]: return None return SplineModelConditional.make_gaussian(limits[0], limits[1], mean, var) elif len(header) == 4: # Does this look like a mean and evenly spaced p-values? header = header[1:] # Make a copy of the list row = row[1:] mean = None if 'mean' in header: mean = float(row.pop(header.index('mean'))) header.remove('mean') elif 'mode' in header: mean = float(row.pop(header.index('mode'))) header.remove('mode') elif .5 in header: mean = float(row.pop(header.index(.5))) header.remove(.5) else: return None # Check that the two other values are evenly spaced p-values row = map(float, row[0:2]) if np.all(np.isnan(row)): return SplineModelConditional.make_single(mean, mean, []) if header[1] == 1 - header[0] and abs(row[1] - mean - (mean - row[0])) < abs(row[1] - row[0]) / 1000.0: lowp = min(header) lowv = np.array(row)[np.array(header) == lowp][0] if lowv == mean: return SplineModelConditional.make_single(mean, mean, []) lowerbound = 1e-4 * (mean - lowv) upperbound = np.sqrt((mean - lowv) / lowp) sdev = brentq(lambda sdev: norm.cdf(lowv, mean, sdev) - lowp, lowerbound, upperbound) if float(limits[0]) < mean - 3*sdev and float(limits[1]) > mean + 3*sdev: return SplineModelConditional.make_gaussian(limits[0], limits[1], mean, sdev*sdev) else: return None else: # Heuristic best curve: known tails, fit to mean lowp = min(header) lowv = np.array(row)[np.array(header) == lowp][0] lowerbound = 1e-4 * (mean - lowv) upperbound = np.log((mean - lowv) / lowp) low_sdev = brentq(lambda sdev: norm.cdf(lowv, mean, sdev) - lowp, lowerbound, upperbound) if float(limits[0]) > mean - 3*low_sdev: return None low_segment = SplineModelConditional.make_gaussian(float(limits[0]), lowv, mean, low_sdev*low_sdev) highp = max(header) highv = np.array(row)[np.array(header) == highp][0] lowerbound = 1e-4 * (highv - mean) upperbound = np.log((highv - mean) / (1 - highp)) high_scale = brentq(lambda scale: .5 + expon.cdf(highv, mean, scale) / 2 - highp, lowerbound, upperbound) if float(limits[1]) < mean + 3*high_scale: return None # Construct exponential, starting at mean, with full cdf of .5 high_segment = SplineModelConditional.make_single(highv, float(limits[1]), [np.log(1/high_scale) + np.log(.5) + mean / high_scale, -1 / high_scale]) sevenys = np.linspace(lowv, highv, 7) ys = np.append(sevenys[0:2], [mean, sevenys[-2], sevenys[-1]]) lps0 = norm.logpdf(ys[0:2], mean, low_sdev) lps1 = expon.logpdf([ys[-2], ys[-1]], mean, high_scale) + np.log(.5) #bounds = [norm.logpdf(mean, mean, low_sdev), norm.logpdf(mean, mean, high_sdev)] result = minimize(lambda lpmean: FeaturesInterpreter.skew_gaussian_evaluate(ys, np.append(np.append(lps0, [lpmean]), lps1), low_segment, high_segment, mean, lowp, highp), .5, method='Nelder-Mead') print np.append(np.append(lps0, result.x), lps1) return FeaturesInterpreter.skew_gaussian_construct(ys, np.append(np.append(lps0, result.x), lps1), low_segment, high_segment)
def features_to_uniform(header, row, limits): if len(header) != 1: return None return SplineModelConditional.make_single(limits[0], limits[1], [1/(limits[1] - limits[0])])
def features_to_uniform(header, row, limits): if len(header) != 1: return None return SplineModelConditional.make_single( limits[0], limits[1], [1 / (limits[1] - limits[0])])
def features_to_gaussian(header, row, limits): # Does this look like a mean-variance feature file? if len(header) == 3: mean = None if 'mean' in header: mean = float(row[header.index('mean')]) if 'mode' in header: mean = float(row[header.index('mode')]) if .5 in header: mean = float(row[header.index(.5)]) if mean is None: return None if 'var' in header: var = float(row[header.index('var')]) elif 'sdev' in header: var = float(row[header.index('sdev')]) * float( row[header.index('sdev')]) else: return None if np.isnan(var) or var == 0: return SplineModelConditional.make_single(mean, mean, []) # This might be uniform if mean - 2 * var < limits[0] or mean + 2 * var > limits[1]: return None return SplineModelConditional.make_gaussian( limits[0], limits[1], mean, var) elif len(header) == 4: # Does this look like a mean and evenly spaced p-values? header = header[1:] # Make a copy of the list row = row[1:] mean = None if 'mean' in header: mean = float(row.pop(header.index('mean'))) header.remove('mean') elif 'mode' in header: mean = float(row.pop(header.index('mode'))) header.remove('mode') elif .5 in header: mean = float(row.pop(header.index(.5))) header.remove(.5) else: return None # Check that the two other values are evenly spaced p-values row = map(float, row[0:2]) if np.all(np.isnan(row)): return SplineModelConditional.make_single(mean, mean, []) if header[1] == 1 - header[0] and abs(row[1] - mean - ( mean - row[0])) < abs(row[1] - row[0]) / 1000.0: lowp = min(header) lowv = np.array(row)[np.array(header) == lowp][0] if lowv == mean: return SplineModelConditional.make_single(mean, mean, []) lowerbound = 1e-4 * (mean - lowv) upperbound = np.sqrt((mean - lowv) / lowp) sdev = brentq(lambda sdev: norm.cdf(lowv, mean, sdev) - lowp, lowerbound, upperbound) if float(limits[0]) < mean - 3 * sdev and float( limits[1]) > mean + 3 * sdev: return SplineModelConditional.make_gaussian( limits[0], limits[1], mean, sdev * sdev) else: return None else: # Heuristic best curve: known tails, fit to mean lowp = min(header) lowv = np.array(row)[np.array(header) == lowp][0] lowerbound = 1e-4 * (mean - lowv) upperbound = np.log((mean - lowv) / lowp) low_sdev = brentq( lambda sdev: norm.cdf(lowv, mean, sdev) - lowp, lowerbound, upperbound) if float(limits[0]) > mean - 3 * low_sdev: return None low_segment = SplineModelConditional.make_gaussian( float(limits[0]), lowv, mean, low_sdev * low_sdev) highp = max(header) highv = np.array(row)[np.array(header) == highp][0] lowerbound = 1e-4 * (highv - mean) upperbound = np.log((highv - mean) / (1 - highp)) high_scale = brentq( lambda scale: .5 + expon.cdf(highv, mean, scale) / 2 - highp, lowerbound, upperbound) if float(limits[1]) < mean + 3 * high_scale: return None # Construct exponential, starting at mean, with full cdf of .5 high_segment = SplineModelConditional.make_single( highv, float(limits[1]), [ np.log(1 / high_scale) + np.log(.5) + mean / high_scale, -1 / high_scale ]) sevenys = np.linspace(lowv, highv, 7) ys = np.append(sevenys[0:2], [mean, sevenys[-2], sevenys[-1]]) lps0 = norm.logpdf(ys[0:2], mean, low_sdev) lps1 = expon.logpdf([ys[-2], ys[-1]], mean, high_scale) + np.log(.5) #bounds = [norm.logpdf(mean, mean, low_sdev), norm.logpdf(mean, mean, high_sdev)] result = minimize( lambda lpmean: FeaturesInterpreter.skew_gaussian_evaluate( ys, np.append(np.append(lps0, [lpmean]), lps1), low_segment, high_segment, mean, lowp, highp), .5, method='Nelder-Mead') print np.append(np.append(lps0, result.x), lps1) return FeaturesInterpreter.skew_gaussian_construct( ys, np.append(np.append(lps0, result.x), lps1), low_segment, high_segment)