def compare(self, dist_1, dist_2): dist_1_interval = TimeInterval(*self.bounds_of(dist_1)) dist_2_interval = TimeInterval(*self.bounds_of(dist_2)) dictionary_input_output = {} for time_step in dist_1_interval + dist_2_interval: dictionary_input_output[time_step] = sqrt( dist_1.pdf(time_step) * dist_2.pdf(time_step)) geometric_mean = FunctionPiecewiseLinear( dictionary_input_output, function_undefined=FUNCTION_ZERO) same = integral(geometric_mean, NEGATIVE_INFINITY, POSITIVE_INFINITY) dist_1_mean, dist_1_skewness, dist_1_kurtosis = dist_1.stats( moments='msk') dist_1_standard_deviation = dist_1.std() dist_2_mean, dist_2_skewness, dist_2_kurtosis = dist_2.stats( moments='msk') dist_2_standard_deviation = dist_2.std() distance = fabs(dist_1_standard_deviation - dist_2_standard_deviation) + fabs(dist_1_skewness - dist_2_skewness) distance += fabs(dist_1_kurtosis - dist_2_kurtosis) delta = dist_1_mean - dist_2_mean non_same_portion = 1.0 - same portion_after, portion_before = 1.0, 0.0 if almost_equals(distance, 0): if delta < 0: portion_after, portion_before = 0.0, 1.0 else: dist_1_standardized_pdf = lambda x: dist_1.pdf( dist_1_standard_deviation * x + dist_1_mean) dist_2_standardized_pdf = lambda x: dist_2.pdf( dist_2_standard_deviation * x + dist_2_mean) geometric_mean = lambda t: sqrt( dist_1_standardized_pdf(t) * dist_2_standardized_pdf(t)) geometric_mean_scaled = lambda p: geometric_mean(p / distance) geometric_mean_scaled_length = max(self.duration_of(dist_1), self.duration_of(dist_2)) dictionary_input_output = {} for time_step in TimeInterval(-geometric_mean_scaled_length / 2.0, geometric_mean_scaled_length / 2.0): dictionary_input_output[time_step] = geometric_mean_scaled( time_step) geometric_mean_scaled = FunctionPiecewiseLinear( dictionary_input_output, function_undefined=FUNCTION_ZERO) portion_after = integral(geometric_mean_scaled, NEGATIVE_INFINITY, delta) portion_before = integral(geometric_mean_scaled, delta, POSITIVE_INFINITY) after = portion_after / (portion_after + portion_before) * non_same_portion return 1.0 - same - after, same, after
def compare(self, dist_1, dist_2): convolution = self.function_convolution(dist_1, dist_2) before = integral(convolution, NEGATIVE_INFINITY, 0) after = integral(convolution, 0, POSITIVE_INFINITY) similarity = self.calculate_similarity(dist_1, dist_2) correlation = 1 - fabs(before - after) same = similarity * correlation return before, same, after
def compare(self, dist_1, dist_2): dist_1_interval = TimeInterval(*self.bounds_of(dist_1)) dist_2_interval = TimeInterval(*self.bounds_of(dist_2)) dictionary_input_output = {} for time_step in dist_1_interval + dist_2_interval: dictionary_input_output[time_step] = sqrt(dist_1.pdf(time_step) * dist_2.pdf(time_step)) geometric_mean = FunctionPiecewiseLinear(dictionary_input_output, function_undefined=FUNCTION_ZERO) same = integral(geometric_mean, NEGATIVE_INFINITY, POSITIVE_INFINITY) dist_1_mean, dist_1_skewness, dist_1_kurtosis = dist_1.stats(moments='msk') dist_1_standard_deviation = dist_1.std() dist_2_mean, dist_2_skewness, dist_2_kurtosis = dist_2.stats(moments='msk') dist_2_standard_deviation = dist_2.std() distance = fabs(dist_1_standard_deviation - dist_2_standard_deviation) + fabs(dist_1_skewness - dist_2_skewness) distance += fabs(dist_1_kurtosis - dist_2_kurtosis) delta = dist_1_mean - dist_2_mean non_same_portion = 1.0 - same portion_after, portion_before = 1.0, 0.0 if almost_equals(distance, 0): if delta < 0: portion_after, portion_before = 0.0, 1.0 else: dist_1_standardized_pdf = lambda x: dist_1.pdf(dist_1_standard_deviation * x + dist_1_mean) dist_2_standardized_pdf = lambda x: dist_2.pdf(dist_2_standard_deviation * x + dist_2_mean) geometric_mean = lambda t: sqrt(dist_1_standardized_pdf(t) * dist_2_standardized_pdf(t)) geometric_mean_scaled = lambda p: geometric_mean(p / distance) geometric_mean_scaled_length = max(self.duration_of(dist_1), self.duration_of(dist_2)) dictionary_input_output = {} for time_step in TimeInterval(-geometric_mean_scaled_length / 2.0, geometric_mean_scaled_length / 2.0): dictionary_input_output[time_step] = geometric_mean_scaled(time_step) geometric_mean_scaled = FunctionPiecewiseLinear(dictionary_input_output, function_undefined=FUNCTION_ZERO) portion_after = integral(geometric_mean_scaled, NEGATIVE_INFINITY, delta) portion_before = integral(geometric_mean_scaled, delta, POSITIVE_INFINITY) after = portion_after / (portion_after + portion_before) * non_same_portion return 1.0 - same - after, same, after
def calculate_similarity(self, dist_1, dist_2): if (type(dist_1.dist), type(dist_2.dist)) == (uniform_gen, uniform_gen): length_dist_1 = self.duration_of(dist_1) length_dist_2 = self.duration_of(dist_2) return min(length_dist_1, length_dist_2) / sqrt(length_dist_1 * length_dist_2) dist_1_mean, dist_2_mean = dist_1.mean(), dist_2.mean() dist_1_transformed = lambda t: dist_1.pdf(t + dist_1_mean) dist_2_transformed = lambda t: dist_2.pdf(t + dist_2_mean) geometric_mean = lambda t: sqrt(dist_1_transformed(t) * dist_2_transformed(t)) return integral(geometric_mean, NEGATIVE_INFINITY, POSITIVE_INFINITY)
def degree(self, time_step=None, a=None, b=None, interval=None): """ usage: provide 'time_step' or 'a' and 'b' or 'interval' """ if time_step is not None: return self.membership_function(time_step) if interval is None: if (a, b) == (None, None): interval = self else: interval = TimeInterval(a, b) else: check_is_time_interval(interval) return integral(self.membership_function, interval.a, interval.b)
def calculate_similarity(self, dist_1, dist_2): if (type(dist_1.dist), type(dist_2.dist)) == (uniform_gen, uniform_gen): length_dist_1 = self.duration_of(dist_1) length_dist_2 = self.duration_of(dist_2) return min(length_dist_1, length_dist_2) / sqrt( length_dist_1 * length_dist_2) dist_1_mean, dist_2_mean = dist_1.mean(), dist_2.mean() dist_1_transformed = lambda t: dist_1.pdf(t + dist_1_mean) dist_2_transformed = lambda t: dist_2.pdf(t + dist_2_mean) geometric_mean = lambda t: sqrt( dist_1_transformed(t) * dist_2_transformed(t)) return integral(geometric_mean, NEGATIVE_INFINITY, POSITIVE_INFINITY)
def after(self, dist_1, dist_2): return integral(lambda x: self.after_point(dist_1.pdf(x), dist_2.pdf(x)), *self.after_integral_bounds(dist_1, dist_2))
def same(self, dist_1, dist_2): return integral(lambda x: self.same_point(dist_1.pdf(x), dist_2.pdf(x)), *self.same_integral_bounds(dist_1, dist_2))
def after(self, dist_1, dist_2): return integral( lambda x: self.after_point(dist_1.pdf(x), dist_2.pdf(x)), *self.after_integral_bounds(dist_1, dist_2))
def same(self, dist_1, dist_2): return integral( lambda x: self.same_point(dist_1.pdf(x), dist_2.pdf(x)), *self.same_integral_bounds(dist_1, dist_2))
if __name__ == '__main__': from utility.functions import integral from scipy.stats import norm import matplotlib.pyplot as plt #event = TemporalInstance(datetime(2010, 1, 1), datetime(2011, 2, 1)) #plt = event.plot() #plt.show() events = [ # TemporalEvent(norm(loc=10, scale=2), norm(loc=30, scale=2), 100), # TemporalEvent(norm(loc=5, scale=2), norm(loc=15, scale=4), 100), TemporalEventPiecewiseLinear({1: 0, 2: 0.1, 3: 0.3, 4: 0.7, 5: 1}, {6: 1, 7: 0.9, 8: 0.6, 9: 0.1, 10: 0}), TemporalEventPiecewiseLinear({1: 0, 2: 0.1, 3: 0.3, 4: 0.7, 5: 1}, {3.5: 1, 4.5: 0.9, 8: 0.6, 9: 0.1, 10: 0}) ] print type(events[0]) print events[0] * events[1] for event in events: plt = event.plot() print integral(event.distribution_beginning.pdf, event.a, event.beginning) print event.distribution_beginning.rvs(10) plt.ylim(ymax=1.1) #plt.figure() plt.show()