Esempio n. 1
0
def generate_model(feature_count, feature_type, hyper_prior=None):
    module = FEATURE_TYPES[feature_type]
    shared = module.Shared.from_dict(module.EXAMPLES[0]['shared'])
    shared.realize()
    cross_cat = loom.schema_pb2.CrossCat()
    kind = cross_cat.kinds.add()
    CLUSTERING.protobuf_dump(kind.product_model.clustering)
    features = getattr(kind.product_model, feature_type)
    for featureid in xrange(feature_count):
        shared.protobuf_dump(features.add())
        kind.featureids.append(featureid)
    CLUSTERING.protobuf_dump(cross_cat.topology)

    # FIXME(jglidden) this belongs in a separate function
    fixed_models = []
    if hyper_prior is not None:
        hp_name, grid_in = hyper_prior
        if hp_name == 'topology':
            get_grid_out = lambda model: model.hyper_prior.topology
            extend = lambda grid_out, point: PitmanYor.to_protobuf(
                point, grid_out.add())
        elif hp_name == 'clustering':
            get_grid_out = lambda model: model.hyper_prior.clustering
            extend = lambda grid_out, point: PitmanYor.to_protobuf(
                point, grid_out.add())
        else:
            param_name, grid_in = grid_in
            get_grid_out = lambda model: getattr(
                getattr(model.hyper_prior, hp_name), param_name)
            extend = lambda grid_out, point: grid_out.extend([point])

        cross_cat_base = loom.schema_pb2.CrossCat()
        cross_cat_base.MergeFrom(cross_cat)
        for point in grid_in:
            extend(get_grid_out(cross_cat), point)
            if hp_name == 'dd':
                pass
            else:
                fixed_model = loom.schema_pb2.CrossCat()
                fixed_model.MergeFrom(cross_cat_base)
                extend(get_grid_out(fixed_model), point)
                fixed_models.append(fixed_model)

        if hp_name == 'dd':
            assert feature_count == 1
            dim = len(shared.dump()['alphas'])
            if dim > 4:
                raise SkipTest('FIXME test runs out of memory')
            for grid in product(*[grid_in] * dim):
                fixed_model = loom.schema_pb2.CrossCat()
                fixed_model.MergeFrom(cross_cat_base)
                alphas = fixed_model.kinds[0].product_model.dd[0].alphas
                assert len(alphas) == len(grid)
                for i, alpha in enumerate(grid):
                    alphas[i] = alpha
                fixed_models.append(fixed_model)

    return cross_cat, fixed_models
Esempio n. 2
0
def generate_model(feature_count, feature_type, hyper_prior=None):
    module = FEATURE_TYPES[feature_type]
    shared = module.Shared.from_dict(module.EXAMPLES[0]["shared"])
    shared.realize()
    cross_cat = loom.schema_pb2.CrossCat()
    kind = cross_cat.kinds.add()
    CLUSTERING.protobuf_dump(kind.product_model.clustering)
    features = getattr(kind.product_model, feature_type)
    for featureid in xrange(feature_count):
        shared.protobuf_dump(features.add())
        kind.featureids.append(featureid)
    CLUSTERING.protobuf_dump(cross_cat.topology)

    # FIXME(jglidden) this belongs in a separate function
    fixed_models = []
    if hyper_prior is not None:
        hp_name, grid_in = hyper_prior
        if hp_name == "topology":
            get_grid_out = lambda model: model.hyper_prior.topology
            extend = lambda grid_out, point: PitmanYor.to_protobuf(point, grid_out.add())
        elif hp_name == "clustering":
            get_grid_out = lambda model: model.hyper_prior.clustering
            extend = lambda grid_out, point: PitmanYor.to_protobuf(point, grid_out.add())
        else:
            param_name, grid_in = grid_in
            get_grid_out = lambda model: getattr(getattr(model.hyper_prior, hp_name), param_name)
            extend = lambda grid_out, point: grid_out.extend([point])

        cross_cat_base = loom.schema_pb2.CrossCat()
        cross_cat_base.MergeFrom(cross_cat)
        for point in grid_in:
            extend(get_grid_out(cross_cat), point)
            if hp_name == "dd":
                pass
            else:
                fixed_model = loom.schema_pb2.CrossCat()
                fixed_model.MergeFrom(cross_cat_base)
                extend(get_grid_out(fixed_model), point)
                fixed_models.append(fixed_model)

        if hp_name == "dd":
            assert feature_count == 1
            dim = len(shared.dump()["alphas"])
            if dim > 4:
                raise SkipTest("FIXME test runs out of memory")
            for grid in product(*[grid_in] * dim):
                fixed_model = loom.schema_pb2.CrossCat()
                fixed_model.MergeFrom(cross_cat_base)
                alphas = fixed_model.kinds[0].product_model.dd[0].alphas
                assert len(alphas) == len(grid)
                for i, alpha in enumerate(grid):
                    alphas[i] = alpha
                fixed_models.append(fixed_model)

    return cross_cat, fixed_models
Esempio n. 3
0
 def __init__(self):
     self.clustering = PitmanYor.from_dict({
         'alpha': 100.0,
         'd': 0.1,
     })
     self.feature = nich.Shared.from_dict({
         'mu': 0.0,
         'kappa': 0.1,
         'sigmasq': 0.01,
         'nu': 1.0,
     })
Esempio n. 4
0
 def __init__(self):
     self.clustering = PitmanYor.from_dict({
         'alpha': 100.0,
         'd': 0.1,
     })
     self.feature = nich.Shared.from_dict({
         'mu': 0.0,
         'kappa': 0.1,
         'sigmasq': 0.01,
         'nu': 1.0,
     })
Esempio n. 5
0
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import random
import parsable
from distributions.lp.clustering import PitmanYor
from loom.util import tempdir
from distributions.io.stream import open_compressed
import loom.schema
import loom.hyperprior
import loom.config
import loom.runner
parsable = parsable.Parsable()

CLUSTERING = PitmanYor.from_dict({'alpha': 2.0, 'd': 0.1})


def generate_kinds(feature_count):
    '''
    Generate an exponential kind structure, e.g.,
    [o|oo|oooo|oooooooo|oooooooooooooooo|oooooooooooooooooooooooooooooooo]
    '''
    featureid_to_kindid = []
    for i in xrange(feature_count):
        featureid_to_kindid.extend([i] * (2 ** i))
        if len(featureid_to_kindid) >= feature_count:
            break
    featureid_to_kindid = featureid_to_kindid[:feature_count]
    random.shuffle(featureid_to_kindid)
    return featureid_to_kindid
Esempio n. 6
0
        'alpha': [.5, 1.5],
        'gamma': [.5, 1.5],
    },
    'gp': {
        'alpha': [.5, 1.5],
        'inv_beta': [.5, 1.5],
    },
    'nich': {
        'kappa': [.5, 1.5],
        'mu': [-1., 1.],
        'nu': [.5, 1.5],
        'sigmasq': [.5, 1.5],
    }
}

CLUSTERING = PitmanYor.from_dict({'alpha': 2.0, 'd': 0.1})

if __name__ == '__main__' and sys.stdout.isatty():
    colorize = {
        'Info': '\x1b[34mInfo\x1b[0m',
        'Warn': '\x1b[33mWarn\x1b[0m',
        'Fail': '\x1b[31mFail\x1b[0m',
        'Pass': '******',
    }
else:
    colorize = {}


def LOG(prefix, casename, comment=''):
    prefix = colorize.get(prefix, prefix)
    message = '{: <4} {: <18} {}'.format(prefix, casename, comment)
Esempio n. 7
0
KIND_MAX_SIZE = 205
GRID_SIZE = 2

PITMAN_YOR_GRID = [{"alpha": 2.0, "d": 0.1}, {"alpha": 10.0, "d": 0.1}]

HYPER_PRIOR = {
    "topology": PITMAN_YOR_GRID,
    "clustering": PITMAN_YOR_GRID,
    "bb": {"alpha": [0.5, 2.0], "beta": [0.5, 2.0]},
    "dd": {"alpha": [0.5, 1.5]},
    "dpd": {"alpha": [0.5, 1.5], "gamma": [0.5, 1.5]},
    "gp": {"alpha": [0.5, 1.5], "inv_beta": [0.5, 1.5]},
    "nich": {"kappa": [0.5, 1.5], "mu": [-1.0, 1.0], "nu": [0.5, 1.5], "sigmasq": [0.5, 1.5]},
}

CLUSTERING = PitmanYor.from_dict({"alpha": 2.0, "d": 0.1})

if __name__ == "__main__" and sys.stdout.isatty():
    colorize = {
        "Info": "\x1b[34mInfo\x1b[0m",
        "Warn": "\x1b[33mWarn\x1b[0m",
        "Fail": "\x1b[31mFail\x1b[0m",
        "Pass": "******",
    }
else:
    colorize = {}


def LOG(prefix, casename, comment=""):
    prefix = colorize.get(prefix, prefix)
    message = "{: <4} {: <18} {}".format(prefix, casename, comment)
Esempio n. 8
0
 def __init__(self):
     self.clustering = PitmanYor.Mixture()
     self.feature_x = nich.Mixture()
     self.feature_y = nich.Mixture()
     self.id_tracker = MixtureIdTracker()