def pdf(self, data): # Note: The multinomial coefficient is omitted in the implementation. # Result is proportional to the true log densitiy which is sufficient for # the EM. # gsl computes the true density, including the multinomial coefficient normalizing constant # therefore it is less efficient than the implementation below if isinstance(data, DataSet): x = data.internalData elif hasattr(data, "__iter__"): x = data else: raise TypeError, "Unknown/Invalid input type." # switch to log scale for density computation log_phi = np.log(self.phi) # computing un-normalized density res = np.zeros(len(x), dtype='Float64') for j in range(len(x)): for i in range(self.M): res[j] += (log_phi[i] * x[j, i]) res2 = np.sum(x * log_phi, axis=1) assertAlmostEqual(res, res2) return res
def linear_pdf(self, x): # computing log likelihood res = stats.norm.pdf(x, loc=self.mean, scale=math.sqrt(self.variance)) expo = math.exp(-1 * sqr(self.mean - x) / (2 * self.variance)) res2 = expo / math.sqrt(2 * math.pi * self.variance) assertAlmostEqual(res, res2, places=10) return res
def __eq__(self, other): Log.warning("expensive") from pyLibrary.testing.fuzzytestcase import assertAlmostEqual try: assertAlmostEqual(convert.json2value(self.json), other) return True except Exception: return False
def compare_to_expected(query, result, expect): query = wrap(query) expect = wrap(expect) if result.meta.format == "table": assertAlmostEqual(set(result.header), set(expect.header)) # MAP FROM expected COLUMN TO result COLUMN mapping = zip(*zip(*filter( lambda v: v[0][1] == v[1][1], itertools.product(enumerate(expect.header), enumerate( result.header))))[1])[0] result.header = [result.header[m] for m in mapping] if result.data: columns = zip(*unwrap(result.data)) result.data = zip(*[columns[m] for m in mapping]) if not query.sort: sort_table(result) sort_table(expect) elif result.meta.format == "list": if query["from"].startswith("meta."): pass else: query = QueryOp.wrap(query) if not query.sort: try: #result.data MAY BE A LIST OF VALUES, NOT OBJECTS data_columns = jx.sort( set(jx.get_columns(result.data, leaves=True)) | set(jx.get_columns(expect.data, leaves=True)), "name") except Exception: data_columns = [{"name": "."}] sort_order = listwrap(coalesce(query.edges, query.groupby)) + data_columns if isinstance(expect.data, list): try: expect.data = jx.sort(expect.data, sort_order.name) except Exception, _: pass if isinstance(result.data, list): try: result.data = jx.sort(result.data, sort_order.name) except Exception, _: pass
def get_container(settings): if isinstance(settings, (MultiDayIndex, aws.s3.Bucket)): return settings if settings == None: return DummySink() elif settings.type == "redshift": for e in sinks: try: fuzzytestcase.assertAlmostEqual(e[0], settings) return e[1] except Exception, _: pass sink = Json2Redshift(settings=settings) # sink = Threaded(sink) sinks.append((settings, sink)) return sink
def linear_pdf(self, x): ay = 0 for i in range(self.dimension): tempv = 0 for j in range(self.dimension): tempv += (x[j] - self.mean[j]) * self.variance_inv[j][i] # sigmainv == transpose(sigmainv) so i, j mixup has no effect ay += tempv * (x[i] - self.mean[i]) res2 = math.exp(-0.5 * ay) / math.sqrt(pow(2* math.pi, self.dimension) * self.variance_det) #-------------------------------------------------- ff = math.pow(2 * math.pi, -self.dimension / 2.0) * math.pow(self.variance_det, -0.5) centered = x-self.mean res = ff * np.exp(-0.5 * np.sum(centered * centered.dot(self.variance_inv))) assertAlmostEqual(res2, res, places=12) return res
def parse_sql(sql): query = wrap(moz_sql_parser.parse(sql)) # PULL OUT THE AGGREGATES for s in listwrap(query.select): val = s.value # LOOK FOR GROUPBY COLUMN IN SELECT CLAUSE, REMOVE DUPLICATION for g in listwrap(query.groupby): try: assertAlmostEqual(g.value, val, "") g.name = s.name s.value = None # MARK FOR REMOVAL break except Exception, e: pass if isinstance(val, Mapping): for a in KNOWN_SQL_AGGREGATES: if val[a]: s.aggregate = a s.value = val[a]
def __init__(self, name, work_queue, workers, resources, please_stop, wait_forever=False, settings=None): # FIND THE WORKERS METHODS settings.workers = [] for w in workers: w = deepcopy(w) for existing_worker in settings.workers: try: fuzzytestcase.assertAlmostEqual(existing_worker.source, w.source) fuzzytestcase.assertAlmostEqual( existing_worker.transformer, w.transformer) # SAME SOURCE AND TRANSFORMER, MERGE THE destinations except Exception, e: continue destination = get_container(w.destination) existing_worker._destination = Split( existing_worker._destination, destination) break else: t_name = w.transformer w._transformer = dot.get_attr(sys.modules, t_name) if not w._transformer: Log.error( "Can not find {{path}} to transformer (are you sure you are pointing to a function?)", path=t_name) w._source = get_container(w.source) w._destination = get_container(w.destination) settings.workers.append(w) w._notify = [] for notify in listwrap(w.notify): w._notify.append(aws.Queue(notify))
def __init__( self, name, work_queue, workers, resources, please_stop, wait_forever=False, settings=None ): # FIND THE WORKERS METHODS settings.workers = [] for w in workers: w = deepcopy(w) for existing_worker in settings.workers: try: fuzzytestcase.assertAlmostEqual(existing_worker.source, w.source) fuzzytestcase.assertAlmostEqual(existing_worker.transformer, w.transformer) # SAME SOURCE AND TRANSFORMER, MERGE THE destinations except Exception, e: continue destination = get_container(w.destination) existing_worker._destination = Split(existing_worker._destination, destination) break else: t_name = w.transformer w._transformer = dot.get_attr(sys.modules, t_name) if not w._transformer: Log.error("Can not find {{path}} to transformer (are you sure you are pointing to a function?)", path=t_name) w._source = get_container(w.source) w._destination = get_container(w.destination) settings.workers.append(w) w._notify = [] for notify in listwrap(w.notify): w._notify.append(aws.Queue(notify))
elif result.meta.format == "cube" and len( result.edges ) == 1 and result.edges[0].name == "rownum" and not query.sort: header = list(result.data.keys()) result.data = cube2list(result.data) result.data = jx.sort(result.data, header) result.data = list2cube(result.data, header) expect.data = cube2list(expect.data) expect.data = jx.sort(expect.data, header) expect.data = list2cube(expect.data, header) # CONFIRM MATCH assertAlmostEqual(result, expect, places=6) def cube2list(c): rows = zip(*[[(k, v) for v in a] for k, a in c.items()]) rows = [dict(r) for r in rows] return rows def list2cube(rows, header): return {h: [r[h] for r in rows] for h in header} def sort_table(result): """ SORT ROWS IN TABLE, EVEN IF ELEMENTS ARE JSON
for e in sinks: try: fuzzytestcase.assertAlmostEqual(e[0], settings) return e[1] except Exception, _: pass sink = Json2Redshift(settings=settings) # sink = Threaded(sink) sinks.append((settings, sink)) return sink elif coalesce(settings.aws_access_key_id, settings.aws_access_key_id, settings.region): # ASSUME BUCKET NAME with sinks_locker: for e in sinks: try: fuzzytestcase.assertAlmostEqual(e[0], settings) return e[1] except Exception, _: pass output = S3Bucket(settings) sinks.append((settings, output)) return output else: with sinks_locker: for e in sinks: try: fuzzytestcase.assertAlmostEqual(e[0], settings) return e[1] except Exception, _: pass output = elasticsearch.Cluster(settings).get_or_create_index(settings)
def allclose(a, b): try: assertAlmostEqual(a, b) return True except Exception, e: return False
try: fuzzytestcase.assertAlmostEqual(e[0], settings) return e[1] except Exception, _: pass sink = Json2Redshift(settings=settings) # sink = Threaded(sink) sinks.append((settings, sink)) return sink elif coalesce(settings.aws_access_key_id, settings.aws_access_key_id, settings.region): # ASSUME BUCKET NAME with sinks_locker: for e in sinks: try: fuzzytestcase.assertAlmostEqual(e[0], settings) return e[1] except Exception, _: pass output = S3Bucket(settings) sinks.append((settings, output)) return output else: with sinks_locker: for e in sinks: try: fuzzytestcase.assertAlmostEqual(e[0], settings) return e[1] except Exception, _: pass output = elasticsearch.Cluster(settings).get_or_create_index(