コード例 #1
0
ファイル: multinomial.py プロジェクト: klahnakoski/pymix
    def pdf(self, data):
        # Note: The multinomial coefficient is omitted in the implementation.
        # Result is proportional to the true log densitiy which is sufficient for
        # the EM.
        # gsl computes the true density, including the multinomial coefficient normalizing constant
        # therefore it is less efficient than the implementation below
        if isinstance(data, DataSet):
            x = data.internalData
        elif hasattr(data, "__iter__"):
            x = data
        else:
            raise TypeError, "Unknown/Invalid input type."

        # switch to log scale for density computation
        log_phi = np.log(self.phi)

        # computing un-normalized density
        res = np.zeros(len(x), dtype='Float64')
        for j in range(len(x)):
            for i in range(self.M):
                res[j] += (log_phi[i] * x[j, i])

        res2 = np.sum(x * log_phi, axis=1)
        assertAlmostEqual(res, res2)

        return res
コード例 #2
0
ファイル: normal.py プロジェクト: klahnakoski/pymix
    def linear_pdf(self, x):
        # computing log likelihood
        res = stats.norm.pdf(x, loc=self.mean, scale=math.sqrt(self.variance))

        expo = math.exp(-1 * sqr(self.mean - x) / (2 * self.variance))
        res2 = expo / math.sqrt(2 * math.pi * self.variance)

        assertAlmostEqual(res, res2, places=10)

        return res
コード例 #3
0
ファイル: expressions.py プロジェクト: klahnakoski/MoDevETL
    def __eq__(self, other):
        Log.warning("expensive")

        from pyLibrary.testing.fuzzytestcase import assertAlmostEqual

        try:
            assertAlmostEqual(convert.json2value(self.json), other)
            return True
        except Exception:
            return False
コード例 #4
0
def compare_to_expected(query, result, expect):
    query = wrap(query)
    expect = wrap(expect)

    if result.meta.format == "table":
        assertAlmostEqual(set(result.header), set(expect.header))

        # MAP FROM expected COLUMN TO result COLUMN
        mapping = zip(*zip(*filter(
            lambda v: v[0][1] == v[1][1],
            itertools.product(enumerate(expect.header), enumerate(
                result.header))))[1])[0]
        result.header = [result.header[m] for m in mapping]

        if result.data:
            columns = zip(*unwrap(result.data))
            result.data = zip(*[columns[m] for m in mapping])

        if not query.sort:
            sort_table(result)
            sort_table(expect)
    elif result.meta.format == "list":
        if query["from"].startswith("meta."):
            pass
        else:
            query = QueryOp.wrap(query)

        if not query.sort:
            try:
                #result.data MAY BE A LIST OF VALUES, NOT OBJECTS
                data_columns = jx.sort(
                    set(jx.get_columns(result.data, leaves=True))
                    | set(jx.get_columns(expect.data, leaves=True)), "name")
            except Exception:
                data_columns = [{"name": "."}]

            sort_order = listwrap(coalesce(query.edges,
                                           query.groupby)) + data_columns

            if isinstance(expect.data, list):
                try:
                    expect.data = jx.sort(expect.data, sort_order.name)
                except Exception, _:
                    pass

            if isinstance(result.data, list):
                try:
                    result.data = jx.sort(result.data, sort_order.name)
                except Exception, _:
                    pass
コード例 #5
0
ファイル: etl.py プロジェクト: klahnakoski/Activedata-ETL
def get_container(settings):
    if isinstance(settings, (MultiDayIndex, aws.s3.Bucket)):
        return settings

    if settings == None:
        return DummySink()
    elif settings.type == "redshift":
        for e in sinks:
            try:
                fuzzytestcase.assertAlmostEqual(e[0], settings)
                return e[1]
            except Exception, _:
                pass
        sink = Json2Redshift(settings=settings)
        # sink = Threaded(sink)
        sinks.append((settings, sink))
        return sink
コード例 #6
0
def get_container(settings):
    if isinstance(settings, (MultiDayIndex, aws.s3.Bucket)):
        return settings

    if settings == None:
        return DummySink()
    elif settings.type == "redshift":
        for e in sinks:
            try:
                fuzzytestcase.assertAlmostEqual(e[0], settings)
                return e[1]
            except Exception, _:
                pass
        sink = Json2Redshift(settings=settings)
        # sink = Threaded(sink)
        sinks.append((settings, sink))
        return sink
コード例 #7
0
ファイル: multinormal.py プロジェクト: klahnakoski/pymix
    def linear_pdf(self, x):
        ay = 0
        for i in range(self.dimension):
            tempv = 0
            for j in range(self.dimension):
                tempv += (x[j] - self.mean[j]) * self.variance_inv[j][i]  # sigmainv == transpose(sigmainv) so i, j mixup has no effect

            ay += tempv * (x[i] - self.mean[i])

        res2 = math.exp(-0.5 * ay) / math.sqrt(pow(2* math.pi, self.dimension) * self.variance_det)

        #--------------------------------------------------

        ff = math.pow(2 * math.pi, -self.dimension / 2.0) * math.pow(self.variance_det, -0.5)
        centered = x-self.mean
        res = ff * np.exp(-0.5 * np.sum(centered * centered.dot(self.variance_inv)))

        assertAlmostEqual(res2, res, places=12)
        return res
コード例 #8
0
ファイル: query.py プロジェクト: davehunt/ActiveData
def parse_sql(sql):
    query = wrap(moz_sql_parser.parse(sql))
    # PULL OUT THE AGGREGATES
    for s in listwrap(query.select):
        val = s.value
        # LOOK FOR GROUPBY COLUMN IN SELECT CLAUSE, REMOVE DUPLICATION
        for g in listwrap(query.groupby):
            try:
                assertAlmostEqual(g.value, val, "")
                g.name = s.name
                s.value = None  # MARK FOR REMOVAL
                break
            except Exception, e:
                pass

        if isinstance(val, Mapping):
            for a in KNOWN_SQL_AGGREGATES:
                if val[a]:
                    s.aggregate = a
                    s.value = val[a]
コード例 #9
0
    def __init__(self,
                 name,
                 work_queue,
                 workers,
                 resources,
                 please_stop,
                 wait_forever=False,
                 settings=None):
        # FIND THE WORKERS METHODS
        settings.workers = []
        for w in workers:
            w = deepcopy(w)

            for existing_worker in settings.workers:
                try:
                    fuzzytestcase.assertAlmostEqual(existing_worker.source,
                                                    w.source)
                    fuzzytestcase.assertAlmostEqual(
                        existing_worker.transformer, w.transformer)
                    # SAME SOURCE AND TRANSFORMER, MERGE THE destinations
                except Exception, e:
                    continue
                destination = get_container(w.destination)
                existing_worker._destination = Split(
                    existing_worker._destination, destination)
                break
            else:
                t_name = w.transformer
                w._transformer = dot.get_attr(sys.modules, t_name)
                if not w._transformer:
                    Log.error(
                        "Can not find {{path}} to transformer (are you sure you are pointing to a function?)",
                        path=t_name)
                w._source = get_container(w.source)
                w._destination = get_container(w.destination)
                settings.workers.append(w)

            w._notify = []
            for notify in listwrap(w.notify):
                w._notify.append(aws.Queue(notify))
コード例 #10
0
ファイル: etl.py プロジェクト: klahnakoski/Activedata-ETL
    def __init__(
        self,
        name,
        work_queue,
        workers,
        resources,
        please_stop,
        wait_forever=False,
        settings=None
    ):
        # FIND THE WORKERS METHODS
        settings.workers = []
        for w in workers:
            w = deepcopy(w)

            for existing_worker in settings.workers:
                try:
                    fuzzytestcase.assertAlmostEqual(existing_worker.source, w.source)
                    fuzzytestcase.assertAlmostEqual(existing_worker.transformer, w.transformer)
                    # SAME SOURCE AND TRANSFORMER, MERGE THE destinations
                except Exception, e:
                    continue
                destination = get_container(w.destination)
                existing_worker._destination = Split(existing_worker._destination, destination)
                break
            else:
                t_name = w.transformer
                w._transformer = dot.get_attr(sys.modules, t_name)
                if not w._transformer:
                    Log.error("Can not find {{path}} to transformer (are you sure you are pointing to a function?)", path=t_name)
                w._source = get_container(w.source)
                w._destination = get_container(w.destination)
                settings.workers.append(w)

            w._notify = []
            for notify in listwrap(w.notify):
                w._notify.append(aws.Queue(notify))
コード例 #11
0
    elif result.meta.format == "cube" and len(
            result.edges
    ) == 1 and result.edges[0].name == "rownum" and not query.sort:
        header = list(result.data.keys())

        result.data = cube2list(result.data)
        result.data = jx.sort(result.data, header)
        result.data = list2cube(result.data, header)

        expect.data = cube2list(expect.data)
        expect.data = jx.sort(expect.data, header)
        expect.data = list2cube(expect.data, header)

    # CONFIRM MATCH
    assertAlmostEqual(result, expect, places=6)


def cube2list(c):
    rows = zip(*[[(k, v) for v in a] for k, a in c.items()])
    rows = [dict(r) for r in rows]
    return rows


def list2cube(rows, header):
    return {h: [r[h] for r in rows] for h in header}


def sort_table(result):
    """
    SORT ROWS IN TABLE, EVEN IF ELEMENTS ARE JSON
コード例 #12
0
ファイル: etl.py プロジェクト: klahnakoski/Activedata-ETL
     for e in sinks:
         try:
             fuzzytestcase.assertAlmostEqual(e[0], settings)
             return e[1]
         except Exception, _:
             pass
     sink = Json2Redshift(settings=settings)
     # sink = Threaded(sink)
     sinks.append((settings, sink))
     return sink
 elif coalesce(settings.aws_access_key_id, settings.aws_access_key_id, settings.region):
     # ASSUME BUCKET NAME
     with sinks_locker:
         for e in sinks:
             try:
                 fuzzytestcase.assertAlmostEqual(e[0], settings)
                 return e[1]
             except Exception, _:
                 pass
         output =  S3Bucket(settings)
         sinks.append((settings, output))
         return output
 else:
     with sinks_locker:
         for e in sinks:
             try:
                 fuzzytestcase.assertAlmostEqual(e[0], settings)
                 return e[1]
             except Exception, _:
                 pass
         output = elasticsearch.Cluster(settings).get_or_create_index(settings)
コード例 #13
0
ファイル: array.py プロジェクト: klahnakoski/Activedata-ETL
def allclose(a, b):
    try:
        assertAlmostEqual(a, b)
        return True
    except Exception, e:
        return False
コード例 #14
0
         try:
             fuzzytestcase.assertAlmostEqual(e[0], settings)
             return e[1]
         except Exception, _:
             pass
     sink = Json2Redshift(settings=settings)
     # sink = Threaded(sink)
     sinks.append((settings, sink))
     return sink
 elif coalesce(settings.aws_access_key_id, settings.aws_access_key_id,
               settings.region):
     # ASSUME BUCKET NAME
     with sinks_locker:
         for e in sinks:
             try:
                 fuzzytestcase.assertAlmostEqual(e[0], settings)
                 return e[1]
             except Exception, _:
                 pass
         output = S3Bucket(settings)
         sinks.append((settings, output))
         return output
 else:
     with sinks_locker:
         for e in sinks:
             try:
                 fuzzytestcase.assertAlmostEqual(e[0], settings)
                 return e[1]
             except Exception, _:
                 pass
         output = elasticsearch.Cluster(settings).get_or_create_index(