def sample_next(self, prev, incl_eos=True): """Samples a single word from context. Can be useful to debug the model, for example if you have a bigram model, and know the probability of X-Y should be really high, you can run sample_next([Y]) to see how often X get generated. incl_eos determines whether the space of words should include EOS or not. """ wps = [] tot = -np.inf # this is the log (total mass) for w in self.lm.vocab(): if not incl_eos and w == "END_OF_SENTENCE": continue lp = self.lm.cond_logprob(w, prev) # log sum wps.append([w, lp / self.temp]) tot = np.logaddexp2(lp / self.temp, tot) p = self.rnd.random() word = self.rnd.choice(wps)[0] s = -np.inf # running mass for w, lp in wps: s = np.logaddexp2(s, lp) if p < pow(2, s - tot): word = w break return word
def sample_next(self, prev, incl_eos=True): """Samples a single word from context. Can be useful to debug the model, for example if you have a bigram model, and know the probability of X-Y should be really high, you can run sample_next([Y]) to see how often X get generated. incl_eos determines whether the space of words should include EOS or not. """ wps = [] tot = -np.inf # this is the log (total mass) for w in self.lm.vocab(): if not incl_eos and w == "<EOS>": continue lp = self.lm.cond_logprob(w, prev) #lp = self.lm.conditional_log2_probability(w, prev) #log probability wps.append([w, lp / self.temp ]) #temp will determine weightage, 1: normal case #log2(2^lp +2^tot) tot = np.logaddexp2( lp / self.temp, tot) #we are calculating cumilative log probability p = self.rnd.random() word = self.rnd.choice(wps)[0] #predict some random nuber find the coresponding interval. s = -np.inf # running mass / accumulated (log) probability for w, lp in wps: s = np.logaddexp2(s, lp) if p < pow(2, s - tot): word = w break return word
def test_nan(self): err = np.seterr(invalid="ignore") try: assert np.isnan(np.logaddexp2(np.nan, np.inf)) assert np.isnan(np.logaddexp2(np.inf, np.nan)) assert np.isnan(np.logaddexp2(np.nan, 0)) assert np.isnan(np.logaddexp2(0, np.nan)) assert np.isnan(np.logaddexp2(np.nan, np.nan)) finally: np.seterr(**err)
def _nllx(xi, i): ll = 0.0 zi[0] = sum(xi ** 2.0) zi[1 : k + 1] = xi for j in range(l): gij = snp_matrix[i, j] if gij == geosnp.MISSING: continue qnf = zi.T.dot(Y[j]) r = numpy.logaddexp2(-qnf, 0) r2 = numpy.logaddexp2(qnf, 0) ll -= (gij * r) + ((2.0 - gij) * (r2)) # return NLL in order to minimize return -ll
def _nlly(yj, j): ll = 0.0 q, a, b = yj[0], yj[1 : k + 1], yj[-1] for i in range(n): gij = snp_matrix[i, j] if gij == geosnp.MISSING: continue xi = X[i] qnf = (q * sum(xi ** 2.0)) + a.dot(xi) + b r = numpy.logaddexp2(-qnf, 0) r2 = numpy.logaddexp2(qnf, 0) ll -= (gij * r) + ((2.0 - gij) * (r2)) # return NLL in order to minimize return -ll
def test_exp_log(): """Test exponents and logarithms""" x = np.linspace(-5, 5, 21) exp_x = np.exp(x) y = exp_x # Evaluate exp(x), log(y) _exp, _dexp = fl.exp(x)() _log, _dlog = fl.log(y)() # Known answers assert np.allclose(_exp, exp_x) assert np.allclose(_dexp, exp_x) assert np.allclose(_log, x) assert np.allclose(_dlog, 1.0 / y) # Log base 2 and 10; exp base 2 _log2, _dlog2 = fl.log2(y)() _log10, _dlog10 = fl.log10(y)() _exp2, _dexp2 = fl.exp2(x)() # Known answers assert np.allclose(_log2, x / np.log(2.0)) assert np.allclose(_dlog2, 1.0 / y / np.log(2.0)) assert np.allclose(_log10, x / np.log(10.0)) assert np.allclose(_dlog10, 1.0 / y / np.log(10.0)) assert np.allclose(_exp2, 2.0**x) assert np.allclose(_dexp2, np.log(2.0) * (2.0**x)) # exponential minus 1, log plus 1 _expm1, _dexpm1 = fl.expm1(x)() _log1p, _dlog1p = fl.log1p(y)() # Known answers assert np.allclose(_expm1, exp_x - 1.0) assert np.allclose(_dexpm1, exp_x) assert np.allclose(_log1p, np.log(1.0 + y)) assert np.allclose(_dlog1p, 1.0 / (1.0 + y)) # exponential minus 1, log plus 1 _logaddexp, _dlogaddexp = fl.logaddexp(x, x)() _logaddexp2, _dlogaddexp2 = fl.logaddexp2(x, x)() assert (str(fl.logaddexp( fl.Var('x'), fl.Var('y'))) == "logaddexp(Var(x, None),Var(y, None))") # Known answers assert np.allclose(_logaddexp, np.logaddexp(x, x)) assert np.allclose(_dlogaddexp, np.vstack([0 * y + 1 / 2, 0 * y + 1 / 2]).T) assert np.allclose(_logaddexp2, np.logaddexp2(x, x)) assert np.allclose(_dlogaddexp2, np.vstack([0 * y + 1 / 2, 0 * y + 1 / 2]).T) # forward mode f = fl.logaddexp(fl.Var('x'), fl.Var('y')) val, diff = f(0, 0) assert (np.isclose(val, np.array([[0.69314718]]))) assert (diff.all() == np.array([[0.5, 0.5]]).all()) report_success()
def _increment_by_frecency(self, frecency_added, multiplier=1.): """Increment this frecency by another frecency, with optional multiplier. NOTE: No attempt is made here to handle differing timescales or other parameters. """ log2_multiplier = log2(multiplier) log2_weight_added = frecency_added.log2_value + log2_multiplier self.log2_value = logaddexp2(self.log2_value, log2_weight_added)
def add(self, x, y): # Convert log_b probabilities to log_2 probabilities. x2 = x * np.log2(base) y2 = y * np.log2(base) z = np.logaddexp2(x2, y2) # Convert log_2 probabilities to log_b probabilities. z *= self.log(2) return z
def test_logaddexp2_1(self): prob1 = np.log2(1e-50) prob2 = np.log2(2.5e-50) a = np.logaddexp2(prob1, prob2) print(a) b = 2**a print(b)
def _baum_welch_step(self, sequence, model, symbol_to_number): N = len(model._states) M = len(model._symbols) T = len(sequence) # compute forward and backward probabilities alpha = model._forward_probability(sequence) beta = model._backward_probability(sequence) # find the log probability of the sequence lpk = logsumexp2(alpha[T - 1]) A_numer = _ninf_array((N, N)) B_numer = _ninf_array((N, M)) A_denom = _ninf_array(N) B_denom = _ninf_array(N) transitions_logprob = model._transitions_matrix().T for t in range(T): symbol = sequence[t][_TEXT] # not found? FIXME next_symbol = None if t < T - 1: next_symbol = sequence[t + 1][_TEXT] # not found? FIXME xi = symbol_to_number[symbol] next_outputs_logprob = model._outputs_vector(next_symbol) alpha_plus_beta = alpha[t] + beta[t] if t < T - 1: numer_add = ( transitions_logprob + next_outputs_logprob + beta[t + 1] + alpha[t].reshape(N, 1) ) A_numer = np.logaddexp2(A_numer, numer_add) A_denom = np.logaddexp2(A_denom, alpha_plus_beta) else: B_denom = np.logaddexp2(A_denom, alpha_plus_beta) B_numer[:, xi] = np.logaddexp2(B_numer[:, xi], alpha_plus_beta) return lpk, A_numer, A_denom, B_numer, B_denom
def test_logaddexp2_range(self): x = [1000000, -1000000, 1000200, -1000200] y = [1000200, -1000200, 1000000, -1000000] z = [1000200, -1000000, 1000200, -1000000] for dt in ["f", "d", "g"]: logxf = np.array(x, dtype=dt) logyf = np.array(y, dtype=dt) logzf = np.array(z, dtype=dt) assert_almost_equal(np.logaddexp2(logxf, logyf), logzf)
def misc_floating(mod, x): "miscellaneous" # y0 = math.erfc(x) y1 = math.atan2(x, x) y2 = np.arctan2(x, x) y3 = np.logaddexp(x, x) y4 = np.logaddexp2(x, x) return (y1, y2, y3, y4) #(y0, y1)
def test_logaddexp2_values(self): x = [1, 2, 3, 4, 5] y = [5, 4, 3, 2, 1] z = [6, 6, 6, 6, 6] for dt, dec in zip(["f", "d", "g"], [6, 15, 15]): xf = np.log2(np.array(x, dtype=dt)) yf = np.log2(np.array(y, dtype=dt)) zf = np.log2(np.array(z, dtype=dt)) assert_almost_equal(np.logaddexp2(xf, yf), zf, decimal=dec)
def test_logaddexp2_values(self): x = [1, 2, 3, 4, 5] y = [5, 4, 3, 2, 1] z = [6, 6, 6, 6, 6] for dt, dec in zip(['f', 'd', 'g'], [6, 15, 15]): xf = np.log2(np.array(x, dtype=dt)) yf = np.log2(np.array(y, dtype=dt)) zf = np.log2(np.array(z, dtype=dt)) assert_almost_equal(np.logaddexp2(xf, yf), zf, decimal=dec)
def test_logaddexp2_range(self): x = [1000000, -1000000, 1000200, -1000200] y = [1000200, -1000200, 1000000, -1000000] z = [1000200, -1000000, 1000200, -1000000] for dt in ['f', 'd', 'g']: logxf = np.array(x, dtype=dt) logyf = np.array(y, dtype=dt) logzf = np.array(z, dtype=dt) assert_almost_equal(np.logaddexp2(logxf, logyf), logzf)
def test_inf(self): inf = np.inf x = [inf, -inf, inf, -inf, inf, 1, -inf, 1] y = [inf, inf, -inf, -inf, 1, inf, 1, -inf] z = [inf, inf, inf, -inf, inf, inf, 1, 1] for dt in ['f', 'd', 'g']: logxf = np.array(x, dtype=dt) logyf = np.array(y, dtype=dt) logzf = np.array(z, dtype=dt) assert_equal(np.logaddexp2(logxf, logyf), logzf)
def test_inf(self): inf = np.inf x = [inf, -inf, inf, -inf, inf, 1, -inf, 1] y = [inf, inf, -inf, -inf, 1, inf, 1, -inf] z = [inf, inf, inf, -inf, inf, inf, 1, 1] for dt in ["f", "d", "g"]: logxf = np.array(x, dtype=dt) logyf = np.array(y, dtype=dt) logzf = np.array(z, dtype=dt) assert_equal(np.logaddexp2(logxf, logyf), logzf)
def increment(self, value_added=1., event_time=None): """ Increment frecency, with value_added weighted according to time of observation. * *value_added* is the number or weight of current events to add to the Frecency counter. (e.g., 1 for one view) * *event_time* can be used to set the time at which the new value was added; otherwise, the present time is used. """ if not event_time: event_time = time.time() log2_weight_added = (event_time - self.time0) / self.timescale + log2(value_added) self.log2_value = logaddexp2(self.log2_value, log2_weight_added) # All calculations in log2 space to avoid overflow
def test_inf(self) : inf = np.inf x = [inf, -inf, inf, -inf, inf, 1, -inf, 1] y = [inf, inf, -inf, -inf, 1, inf, 1, -inf] z = [inf, inf, inf, -inf, inf, inf, 1, 1] with np.errstate(invalid='ignore'): for dt in ['f', 'd', 'g'] : logxf = np.array(x, dtype=dt) logyf = np.array(y, dtype=dt) logzf = np.array(z, dtype=dt) assert_equal(np.logaddexp2(logxf, logyf), logzf)
def test_inf(self): inf = np.inf x = [inf, -inf, inf, -inf, inf, 1, -inf, 1] y = [inf, inf, -inf, -inf, 1, inf, 1, -inf] z = [inf, inf, inf, -inf, inf, inf, 1, 1] with np.errstate(invalid='ignore'): for dt in ['f', 'd', 'g']: logxf = np.array(x, dtype=dt) logyf = np.array(y, dtype=dt) logzf = np.array(z, dtype=dt) assert_equal(np.logaddexp2(logxf, logyf), logzf)
def test_inf(self): err = np.seterr(invalid='ignore') inf = np.inf x = [inf, -inf, inf, -inf, inf, 1, -inf, 1] y = [inf, inf, -inf, -inf, 1, inf, 1, -inf] z = [inf, inf, inf, -inf, inf, inf, 1, 1] try: for dt in ['f', 'd', 'g']: logxf = np.array(x, dtype=dt) logyf = np.array(y, dtype=dt) logzf = np.array(z, dtype=dt) assert_equal(np.logaddexp2(logxf, logyf), logzf) finally: np.seterr(**err)
def test_inf(self): err = np.seterr(invalid="ignore") inf = np.inf x = [inf, -inf, inf, -inf, inf, 1, -inf, 1] y = [inf, inf, -inf, -inf, 1, inf, 1, -inf] z = [inf, inf, inf, -inf, inf, inf, 1, 1] try: for dt in ["f", "d", "g"]: logxf = np.array(x, dtype=dt) logyf = np.array(y, dtype=dt) logzf = np.array(z, dtype=dt) assert_equal(np.logaddexp2(logxf, logyf), logzf) finally: np.seterr(**err)
def speak(self, meaning): ''' Returns a signal for a given meaning with some chance of error according to the noise parameter. Uses a roulette wheel to select a signal. ''' random_prob = np.log2(np.random.random()) summation = self._signal_probability(0, self.language, meaning) for signal in range(1, self._maxcats): if random_prob < summation: return signal - 1 signal_prob = self._signal_probability(signal, self.language, meaning) summation = np.logaddexp2(summation, signal_prob) return signal
def lse2(seq: np.ndarray, axis=None) -> np.ndarray: """log-sum-exp2""" if axis == None: return lse2(seq.ravel(), 0) elif isinstance(axis, tuple): if len(axis) == 0: return seq elif len(axis) == 1: return lse2(seq, axis[0]) elif len(axis) > 1: return lse2(lse2(seq, axis[-1]), axis[:-1]) elif isinstance(axis, int): if seq.shape[axis] == 0: return np.full(tuple(filter(lambda d: d != 0, seq.shape)), -np.inf) elif seq.shape[axis] == 1: return seq.take(0, axis) elif seq.shape[axis] == 2: return np.logaddexp2(seq.take(0, axis), seq.take(1, axis)) else: # return np.logaddexp2(seq.take(0, axis), lse2(seq.take(np.arange(1, seq.shape[axis]), axis), axis)) res = seq.take(0, axis) for k in np.arange(1, seq.shape[axis]): res = np.logaddexp2(res, seq.take(k, axis)) return res
def invoke_binary_function(func, item1, item2): # take decision based on func if func == 'add': return np.add(item1, item2) elif func == 'subtract': return np.subtract(item1, item2) elif func == 'multiply': return np.multiply(item1, item2) elif func == 'divide': return np.divide(item1, item2) elif func == 'floor_divide': return np.floor_divide(item1, item2) elif func == 'power': return np.power(item1, item2) elif func == 'mod': return np.mod(item1, item2) elif func == 'logaddexp': return np.logaddexp(item1, item2) elif func == 'logaddexp2': return np.logaddexp2(item1, item2) elif func == 'gcd': return np.gcd(item1, item2) elif func == 'lcm': return np.lcm(item1, item2) elif func == 'arctan2': return np.arctan2(item1, item2) elif func == 'hypot': return np.hypot(item1, item2) elif func == 'bitwise_and': return np.bitwise_and(item1, item2) elif func == 'bitwise_or': return np.bitwise_or(item1, item2) elif func == 'bitwise_xor': return np.bitwise_xor(item1, item2) elif func == 'left_shift': return np.left_shift(item1, item2) elif func == 'right_shift': return np.right_shift(item1, item2) elif func == 'maximum': return np.maximum(item1, item2) elif func == 'minimum': return np.minimum(item1, item2) else: raise Exception("Unknown function {}".format(func))
ax.set_xlim([0, n.amax(index)]) dir = '/Users/Peter/Experiments/platform test/samples/workspace/sampling/0/' # Plot the samples data = n.genfromtxt(dir+'single-sample.csv', delimiter=',') avg = n.mean(data) std = n.std(data) lnEst = avg + 0.5 * std regEst = float('-inf') for v in data: regEst = n.logaddexp2(regEst, v) regEst = regEst - n.log2(len(data)) f, (ax1, ax2) = p.subplots(2) ax1.hist(data, 100, normed=1) ax1.set_title('samples (mean estimate: '+str(regEst)+', ln est: '+str(lnEst)+')') stats.probplot(data, plot=ax2) f.savefig(dir+'single-sample.png') # Plot the convergence data = n.genfromtxt(dir+'convergence.csv', delimiter=',')
"gcd": F.pandas_udf(lambda s1, s2: np.gcd(s1, s2), DoubleType()), "heaviside": F.pandas_udf(lambda s1, s2: np.heaviside(s1, s2), DoubleType()), "hypot": F.hypot, "lcm": F.pandas_udf(lambda s1, s2: np.lcm(s1, s2), DoubleType()), "ldexp": F.pandas_udf(lambda s1, s2: np.ldexp(s1, s2), DoubleType()), "left_shift": F.pandas_udf(lambda s1, s2: np.left_shift(s1, s2), LongType()), "logaddexp": F.pandas_udf(lambda s1, s2: np.logaddexp(s1, s2), DoubleType()), "logaddexp2": F.pandas_udf(lambda s1, s2: np.logaddexp2(s1, s2), DoubleType()), "logical_and": lambda c1, c2: c1.cast(BooleanType()) & c2.cast(BooleanType()), "logical_or": lambda c1, c2: c1.cast(BooleanType()) | c2.cast(BooleanType()), "logical_xor": lambda c1, c2: ( # mimics xor by logical operators. (c1.cast(BooleanType()) | c2.cast(BooleanType())) & (~(c1.cast(BooleanType())) | ~(c2.cast(BooleanType())))), "maximum": F.greatest, "minimum": F.least, "modf": F.pandas_udf(lambda s1, s2: np.modf(s1, s2), DoubleType()),
def test_logaddexp2(self): import math import sys float_max, float_min = sys.float_info.max, sys.float_info.min from numpy import logaddexp2 log2 = math.log(2) # From the numpy documentation prob1 = math.log(1e-50) / log2 prob2 = math.log(2.5e-50) / log2 prob12 = logaddexp2(prob1, prob2) assert math.fabs(-164.28904982231052 - prob12) < 0.000000000001 assert logaddexp2(0, 0) == 1 assert logaddexp2(float("-inf"), 0) == 0 assert logaddexp2(float_max, float_max) == float_max assert logaddexp2(float_min, float_min) == 1.0 assert math.isnan(logaddexp2(float("nan"), 1)) assert math.isnan(logaddexp2(1, float("nan"))) assert math.isnan(logaddexp2(float("nan"), float("inf"))) assert math.isnan(logaddexp2(float("inf"), float("nan"))) assert logaddexp2(float("-inf"), float("-inf")) == float("-inf") assert logaddexp2(float("-inf"), float("inf")) == float("inf") assert logaddexp2(float("inf"), float("-inf")) == float("inf") assert logaddexp2(float("inf"), float("inf")) == float("inf")
import numpy as np prob1 = np.log2(1e-50) prob2 = np.log2(2.5e-50) prob12 = np.logaddexp2(prob1, prob2) prob1, prob2, prob12 2 ** prob12
F.hypot, "lcm": pandas_udf( # type: ignore[call-overload] lambda s1, s2: np.lcm(s1, s2), DoubleType()), "ldexp": pandas_udf( # type: ignore[call-overload] lambda s1, s2: np.ldexp(s1, s2), DoubleType()), "left_shift": pandas_udf( # type: ignore[call-overload] lambda s1, s2: np.left_shift(s1, s2), LongType()), "logaddexp": pandas_udf( # type: ignore[call-overload] lambda s1, s2: np.logaddexp(s1, s2), DoubleType()), "logaddexp2": pandas_udf( # type: ignore[call-overload] lambda s1, s2: np.logaddexp2(s1, s2), DoubleType()), "logical_and": lambda c1, c2: c1.cast(BooleanType()) & c2.cast(BooleanType()), "logical_or": lambda c1, c2: c1.cast(BooleanType()) | c2.cast(BooleanType()), "logical_xor": lambda c1, c2: ( # mimics xor by logical operators. (c1.cast(BooleanType()) | c2.cast(BooleanType())) & (~(c1.cast(BooleanType())) | ~(c2.cast(BooleanType())))), "maximum": F.greatest, "minimum": F.least, "modf": pandas_udf( # type: ignore[call-overload]
def main(): # choose which schemes to plot from NISTschemes import Kyber768, Saber, LAC192, LAC256, Kyber512, Kyber1024, LightSaber, FireSaber, FrodoKEM976, LizardCat3, Kyber1024, FireSaber from NISTschemesSEC import SCHEMES toplot = [Saber, Kyber768, FrodoKEM976] import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt for i in toplot: if os.path.exists(i['name'] + "-entropy.pkl"): continue ecc = 1 if i.get('errorCorrection', 0): ecc = i['errorCorrection'][1]['te'] + 1 entropy = theoreticentropy(samples=int(2**14), failures=2**10 * ecc, **i) with open(i['name'] + "-entropy.pkl", "wb") as f: pickle.dump([entropy], f) for i in toplot: if os.path.exists(i['name'] + "-sec.pkl"): continue with open(i['name'] + "-entropy.pkl", "rb") as f: entropy, = pickle.load(f) scheme = SCHEMES[i['name']] primal, dual = security_failure(scheme, entropy, i['name']) with open(i['name'] + "-sec.pkl", "wb") as f: pickle.dump([primal, dual, entropy], f) # get colors fig, ax = plt.subplots() colors = ax._get_lines.prop_cycler for i in toplot: with open(i['name'] + "-sec.pkl", "rb") as f: primal, dual, entropy = pickle.load(f) color = colors.next()[u'color'] plt.semilogx(primal[0:256], color=color, label=i['name'], basex=2) # plt.plot(dual, color=color, linestyle='.') plt.xlabel(r'positive failure vectors') plt.ylabel(r'security') plt.legend(loc='upper right') plt.tight_layout() plt.savefig('secreduction.pdf') plt.show() # get colors fig, ax = plt.subplots() colors = ax._get_lines.prop_cycler for i in toplot: with open(i['name'] + "-sec.pkl", "rb") as f: primal, dual, entropy = pickle.load(f) color = colors.next()[u'color'] with open(i['name'] + "-2.pkl", "rb") as f: alpha, beta = pickle.load(f) workforonefailure = np.log2( [sqrt(a) * b**-1 for a, b in zip(alpha, beta)]) minwork = min(workforonefailure) ecc = 1 if i.get('errorCorrection', 0): ecc = i['errorCorrection'][1]['te'] + 1 security = primal security = np.array(security) samples = np.array(range(0, len(security))) idx = samples * ecc tmp = idx < security.shape[0] idx = idx[tmp] samples = samples[tmp] security = security[idx] security = np.logaddexp2(security, (np.log2(samples) + minwork)) print i['name'], minwork, np.log2( alpha[np.argmin(workforonefailure)]**-1) print np.argmin(security), np.log2( np.argmin(security) * beta[np.argmin(workforonefailure)]**-1) print min(security), security[0] plt.semilogx(samples, security, color=color, label=i['name'], basex=2) # plt.semilogx(primal[0:256], color=color, label=i['name']) # plt.plot(dual, color=color, linestyle='.') plt.xlabel(r'positive failure vectors') plt.ylabel(r'attack cost') plt.legend(loc='best') plt.tight_layout() plt.savefig('secreduction2.pdf') plt.show() # get colors fig, ax = plt.subplots() colors = ax._get_lines.prop_cycler for i in toplot: with open(i['name'] + "-sec.pkl", "rb") as f: primal, dual, entropy = pickle.load(f) color = colors.next()[u'color'] plt.semilogx(entropy[0:256], color=color, label=i['name'], linestyle='--', basex=2) # plt.plot(dual, color=color, linestyle='.') plt.xlabel(r'positive failure vectors') plt.ylabel(r'relative variance') plt.legend(loc='upper right') plt.tight_layout() plt.savefig('entropyreduction.pdf') plt.show()
def make_fig(figure_path): comp_weights = np.linspace(0, 3, 100) cost_weights = np.linspace(0, 100, 100) comp_stripe = np.array([-35.90911969399966 * i for i in comp_weights]) comp_qudrnt = np.array([-39.25512476486815 * i for i in comp_weights]) comp_sum = np.logaddexp2(comp_stripe, comp_qudrnt) p_comp_stripe = 2**(comp_stripe - comp_sum) p_comp_qudrnt = 2**(comp_qudrnt - comp_sum) cost_stripe = np.array([-4.294113843380405 * i for i in cost_weights]) cost_qudrnt = np.array([-4.200174400715539 * i for i in cost_weights]) cost_sum = np.logaddexp2(cost_stripe, cost_qudrnt) p_cost_stripe = 2**(cost_stripe - cost_sum) p_cost_qudrnt = 2**(cost_qudrnt - cost_sum) fig, axes = plt.subplots(1, 2, figsize=(5.5, 2.1), sharey=True) axes[0].plot(comp_weights, p_comp_stripe, c=colors.blue, linewidth=2, linestyle='-') axes[0].plot(comp_weights, p_comp_qudrnt, c=colors.blue, linewidth=2, linestyle=':') axes[0].set_ylabel('Prior probability') axes[0].set_xlabel('Weight (w)') axes[0].set_title('Simplicity prior (πsim)', fontsize=10) axes[0].set_xlim(0, comp_weights[-1]) axes[0].text(comp_weights[-1] * 0.97, 0.9, 'stripes', horizontalalignment='right', verticalalignment='center') axes[0].text(comp_weights[-1] * 0.97, 0.1, 'quadrants', horizontalalignment='right', verticalalignment='center') axes[1].plot(cost_weights, p_cost_stripe, c=colors.red, linewidth=2, linestyle='-') axes[1].plot(cost_weights, p_cost_qudrnt, c=colors.red, linewidth=2, linestyle=':') axes[1].set_xlabel('Weight (w)') axes[1].set_title('Informativeness prior (πinf)', fontsize=10) axes[1].set_xlim(0, cost_weights[-1]) axes[1].text(cost_weights[-1] * 0.97, 0.9, 'quadrants', horizontalalignment='right', verticalalignment='center') axes[1].text(cost_weights[-1] * 0.97, 0.1, 'stripes', horizontalalignment='right', verticalalignment='center') fig.tight_layout(pad=0.1, h_pad=0.5, w_pad=0.5) fig.savefig(figure_path, format='svg') tools.format_svg_labels(figure_path) if not figure_path.endswith('.svg'): tools.convert_svg(figure_path, figure_path)
def test_logaddexp2_int_int_mat(): expect(np.logaddexp2, [int_mat, int_mat], np.logaddexp2(int_mat,int_mat))
def test_nan(self): assert_(np.isnan(np.logaddexp2(np.nan, np.inf))) assert_(np.isnan(np.logaddexp2(np.inf, np.nan))) assert_(np.isnan(np.logaddexp2(np.nan, 0))) assert_(np.isnan(np.logaddexp2(0, np.nan))) assert_(np.isnan(np.logaddexp2(np.nan, np.nan)))
def logaddexp2_usecase(x, y, result): np.logaddexp2(x, y, result)
def test_ufunc_logaddexp2_ff(A: dace.float32[10], B: dace.float32[10]): return np.logaddexp2(A, B)
def test_logaddexp2_bool_bool_vec(): expect(np.logaddexp2, [bool_vec, bool_vec], np.logaddexp2(bool_vec,bool_vec))
def __call__(self, a, b): self.variables = (a, b) out = np.logaddexp2(a.data, b.data) return out
def logaddexp2(x, y): return np.logaddexp2(x, y)
"fmod": pandas_udf(lambda s1, s2: np.fmod(s1, s2), DoubleType(), PandasUDFType.SCALAR), "gcd": pandas_udf(lambda s1, s2: np.gcd(s1, s2), DoubleType(), PandasUDFType.SCALAR), "heaviside": pandas_udf( lambda s1, s2: np.heaviside(s1, s2), DoubleType(), PandasUDFType.SCALAR ), "hypot": F.hypot, "lcm": pandas_udf(lambda s1, s2: np.lcm(s1, s2), DoubleType(), PandasUDFType.SCALAR), "ldexp": pandas_udf(lambda s1, s2: np.ldexp(s1, s2), DoubleType(), PandasUDFType.SCALAR), "left_shift": pandas_udf( lambda s1, s2: np.left_shift(s1, s2), LongType(), PandasUDFType.SCALAR ), "logaddexp": pandas_udf( lambda s1, s2: np.logaddexp(s1, s2), DoubleType(), PandasUDFType.SCALAR ), "logaddexp2": pandas_udf( lambda s1, s2: np.logaddexp2(s1, s2), DoubleType(), PandasUDFType.SCALAR ), "logical_and": lambda c1, c2: c1.cast(BooleanType()) & c2.cast(BooleanType()), "logical_or": lambda c1, c2: c1.cast(BooleanType()) | c2.cast(BooleanType()), "logical_xor": lambda c1, c2: ( # mimics xor by logical operators. (c1.cast(BooleanType()) | c2.cast(BooleanType())) & (~(c1.cast(BooleanType())) | ~(c2.cast(BooleanType()))) ), "maximum": F.greatest, "minimum": F.least, "modf": pandas_udf(lambda s1, s2: np.modf(s1, s2), DoubleType(), PandasUDFType.SCALAR), "nextafter": pandas_udf( lambda s1, s2: np.nextafter(s1, s2), DoubleType(), PandasUDFType.SCALAR ), "right_shift": pandas_udf(
def logaddexp2(x, **kwargs): _dimless_warn('numpy.logaddexp2', x) return Qty(mag=np.logaddexp2(x.mag, **kwargs))
def test_logaddexp2(): assert isnear(numpy.logaddexp2(7, 8), logaddexp2(7, 8))
def test_logaddexp2_int_float64_vec(): expect(np.logaddexp2, [int_vec, float64_vec], np.logaddexp2(int_vec,float64_vec))
def add_inplace(self, x, y): x *= np.log2(base) y2 = y * np.log2(base) np.logaddexp2(x, y2, x) x *= self.log(2) return x
def train_unsupervised(self, unlabeled_sequences, update_outputs=True, **kwargs): """ Trains the HMM using the Baum-Welch algorithm to maximise the probability of the data sequence. This is a variant of the EM algorithm, and is unsupervised in that it doesn't need the state sequences for the symbols. The code is based on 'A Tutorial on Hidden Markov Models and Selected Applications in Speech Recognition', Lawrence Rabiner, IEEE, 1989. :return: the trained model :rtype: HiddenMarkovModelTagger :param unlabeled_sequences: the training data, a set of sequences of observations :type unlabeled_sequences: list kwargs may include following parameters: :param model: a HiddenMarkovModelTagger instance used to begin the Baum-Welch algorithm :param max_iterations: the maximum number of EM iterations :param convergence_logprob: the maximum change in log probability to allow convergence """ # create a uniform HMM, which will be iteratively refined, unless # given an existing model model = kwargs.get('model') if not model: priors = RandomProbDist(self._states) transitions = DictionaryConditionalProbDist( dict((state, RandomProbDist(self._states)) for state in self._states)) outputs = DictionaryConditionalProbDist( dict((state, RandomProbDist(self._symbols)) for state in self._states)) model = HiddenMarkovModelTagger(self._symbols, self._states, transitions, outputs, priors) self._states = model._states self._symbols = model._symbols N = len(self._states) M = len(self._symbols) symbol_numbers = dict((sym, i) for i, sym in enumerate(self._symbols)) # update model prob dists so that they can be modified # model._priors = MutableProbDist(model._priors, self._states) model._transitions = DictionaryConditionalProbDist( dict((s, MutableProbDist(model._transitions[s], self._states)) for s in self._states)) if update_outputs: model._outputs = DictionaryConditionalProbDist( dict((s, MutableProbDist(model._outputs[s], self._symbols)) for s in self._states)) model.reset_cache() # iterate until convergence converged = False last_logprob = None iteration = 0 max_iterations = kwargs.get('max_iterations', 1000) epsilon = kwargs.get('convergence_logprob', 1e-6) while not converged and iteration < max_iterations: A_numer = _ninf_array((N, N)) B_numer = _ninf_array((N, M)) A_denom = _ninf_array(N) B_denom = _ninf_array(N) logprob = 0 for sequence in unlabeled_sequences: sequence = list(sequence) if not sequence: continue (lpk, seq_A_numer, seq_A_denom, seq_B_numer, seq_B_denom) = self._baum_welch_step(sequence, model, symbol_numbers) # add these sums to the global A and B values for i in range(N): A_numer[i] = np.logaddexp2(A_numer[i], seq_A_numer[i] - lpk) B_numer[i] = np.logaddexp2(B_numer[i], seq_B_numer[i] - lpk) A_denom = np.logaddexp2(A_denom, seq_A_denom - lpk) B_denom = np.logaddexp2(B_denom, seq_B_denom - lpk) logprob += lpk # use the calculated values to update the transition and output # probability values for i in range(N): logprob_Ai = A_numer[i] - A_denom[i] logprob_Bi = B_numer[i] - B_denom[i] # We should normalize all probabilities (see p.391 Huang et al) # Let sum(P) be K. # We can divide each Pi by K to make sum(P) == 1. # Pi' = Pi/K # log2(Pi') = log2(Pi) - log2(K) logprob_Ai -= logsumexp2(logprob_Ai) logprob_Bi -= logsumexp2(logprob_Bi) # update output and transition probabilities si = self._states[i] for j in range(N): sj = self._states[j] model._transitions[si].update(sj, logprob_Ai[j]) if update_outputs: for k in range(M): ok = self._symbols[k] model._outputs[si].update(ok, logprob_Bi[k]) # Rabiner says the priors don't need to be updated. I don't # believe him. FIXME # test for convergence if iteration > 0 and abs(logprob - last_logprob) < epsilon: converged = True print('iteration', iteration, 'logprob', logprob) iteration += 1 last_logprob = logprob return model
def test_logaddexp2_int_float64_mat(): expect(np.logaddexp2, [int_mat, float64_mat], np.logaddexp2(int_mat,float64_mat))
def train_unsupervised(self, unlabeled_sequences, update_outputs=True, **kwargs): """ Trains the HMM using the Baum-Welch algorithm to maximise the probability of the data sequence. This is a variant of the EM algorithm, and is unsupervised in that it doesn't need the state sequences for the symbols. The code is based on 'A Tutorial on Hidden Markov Models and Selected Applications in Speech Recognition', Lawrence Rabiner, IEEE, 1989. :return: the trained model :rtype: HiddenMarkovModelTagger :param unlabeled_sequences: the training data, a set of sequences of observations :type unlabeled_sequences: list kwargs may include following parameters: :param model: a HiddenMarkovModelTagger instance used to begin the Baum-Welch algorithm :param max_iterations: the maximum number of EM iterations :param convergence_logprob: the maximum change in log probability to allow convergence """ # create a uniform HMM, which will be iteratively refined, unless # given an existing model model = kwargs.get('model') if not model: priors = RandomProbDist(self._states) transitions = DictionaryConditionalProbDist( dict((state, RandomProbDist(self._states)) for state in self._states)) outputs = DictionaryConditionalProbDist( dict((state, RandomProbDist(self._symbols)) for state in self._states)) model = HiddenMarkovModelTagger(self._symbols, self._states, transitions, outputs, priors) self._states = model._states self._symbols = model._symbols N = len(self._states) M = len(self._symbols) symbol_numbers = dict((sym, i) for i, sym in enumerate(self._symbols)) # update model prob dists so that they can be modified # model._priors = MutableProbDist(model._priors, self._states) model._transitions = DictionaryConditionalProbDist( dict((s, MutableProbDist(model._transitions[s], self._states)) for s in self._states)) if update_outputs: model._outputs = DictionaryConditionalProbDist( dict((s, MutableProbDist(model._outputs[s], self._symbols)) for s in self._states)) model.reset_cache() # iterate until convergence converged = False last_logprob = None iteration = 0 max_iterations = kwargs.get('max_iterations', 1000) epsilon = kwargs.get('convergence_logprob', 1e-6) while not converged and iteration < max_iterations: A_numer = _ninf_array((N, N)) B_numer = _ninf_array((N, M)) A_denom = _ninf_array(N) B_denom = _ninf_array(N) logprob = 0 for sequence in unlabeled_sequences: sequence = list(sequence) if not sequence: continue (lpk, seq_A_numer, seq_A_denom, seq_B_numer, seq_B_denom) = self._baum_welch_step(sequence, model, symbol_numbers) # add these sums to the global A and B values for i in range(N): A_numer[i] = np.logaddexp2(A_numer[i], seq_A_numer[i]-lpk) B_numer[i] = np.logaddexp2(B_numer[i], seq_B_numer[i]-lpk) A_denom = np.logaddexp2(A_denom, seq_A_denom-lpk) B_denom = np.logaddexp2(B_denom, seq_B_denom-lpk) logprob += lpk # use the calculated values to update the transition and output # probability values for i in range(N): logprob_Ai = A_numer[i] - A_denom[i] logprob_Bi = B_numer[i] - B_denom[i] # We should normalize all probabilities (see p.391 Huang et al) # Let sum(P) be K. # We can divide each Pi by K to make sum(P) == 1. # Pi' = Pi/K # log2(Pi') = log2(Pi) - log2(K) logprob_Ai -= logsumexp2(logprob_Ai) logprob_Bi -= logsumexp2(logprob_Bi) # update output and transition probabilities si = self._states[i] for j in range(N): sj = self._states[j] model._transitions[si].update(sj, logprob_Ai[j]) if update_outputs: for k in range(M): ok = self._symbols[k] model._outputs[si].update(ok, logprob_Bi[k]) # Rabiner says the priors don't need to be updated. I don't # believe him. FIXME # test for convergence if iteration > 0 and abs(logprob - last_logprob) < epsilon: converged = True print('iteration', iteration, 'logprob', logprob) iteration += 1 last_logprob = logprob return model
def test_logaddexp2_float32_float64_vec(): expect(np.logaddexp2, [float32_vec, float64_vec], np.logaddexp2(float32_vec,float64_vec))
def test_logaddexp2_float32_float64_mat(): expect(np.logaddexp2, [float32_mat, float64_mat], np.logaddexp2(float32_mat,float64_mat))
def test_logaddexp2_bool_bool_mat(): expect(np.logaddexp2, [bool_mat, bool_mat], np.logaddexp2(bool_mat,bool_mat))
import numpy as np import matplotlib.pyplot as plt prob1 = np.log2(1e-50) prob2 = np.log2(2.5e-50) prob12 = np.logaddexp2(prob1, prob2) print("prob12: ", prob12) print(np.exp2(prob12))
def test_logaddexp2_int_int_vec(): expect(np.logaddexp2, [int_vec, int_vec], np.logaddexp2(int_vec,int_vec))