def generate_model(feature_count, feature_type, hyper_prior=None): module = FEATURE_TYPES[feature_type] shared = module.Shared.from_dict(module.EXAMPLES[0]['shared']) shared.realize() cross_cat = loom.schema_pb2.CrossCat() kind = cross_cat.kinds.add() CLUSTERING.protobuf_dump(kind.product_model.clustering) features = getattr(kind.product_model, feature_type) for featureid in xrange(feature_count): shared.protobuf_dump(features.add()) kind.featureids.append(featureid) CLUSTERING.protobuf_dump(cross_cat.topology) # FIXME(jglidden) this belongs in a separate function fixed_models = [] if hyper_prior is not None: hp_name, grid_in = hyper_prior if hp_name == 'topology': get_grid_out = lambda model: model.hyper_prior.topology extend = lambda grid_out, point: PitmanYor.to_protobuf( point, grid_out.add()) elif hp_name == 'clustering': get_grid_out = lambda model: model.hyper_prior.clustering extend = lambda grid_out, point: PitmanYor.to_protobuf( point, grid_out.add()) else: param_name, grid_in = grid_in get_grid_out = lambda model: getattr( getattr(model.hyper_prior, hp_name), param_name) extend = lambda grid_out, point: grid_out.extend([point]) cross_cat_base = loom.schema_pb2.CrossCat() cross_cat_base.MergeFrom(cross_cat) for point in grid_in: extend(get_grid_out(cross_cat), point) if hp_name == 'dd': pass else: fixed_model = loom.schema_pb2.CrossCat() fixed_model.MergeFrom(cross_cat_base) extend(get_grid_out(fixed_model), point) fixed_models.append(fixed_model) if hp_name == 'dd': assert feature_count == 1 dim = len(shared.dump()['alphas']) if dim > 4: raise SkipTest('FIXME test runs out of memory') for grid in product(*[grid_in] * dim): fixed_model = loom.schema_pb2.CrossCat() fixed_model.MergeFrom(cross_cat_base) alphas = fixed_model.kinds[0].product_model.dd[0].alphas assert len(alphas) == len(grid) for i, alpha in enumerate(grid): alphas[i] = alpha fixed_models.append(fixed_model) return cross_cat, fixed_models
def generate_model(feature_count, feature_type, hyper_prior=None): module = FEATURE_TYPES[feature_type] shared = module.Shared.from_dict(module.EXAMPLES[0]["shared"]) shared.realize() cross_cat = loom.schema_pb2.CrossCat() kind = cross_cat.kinds.add() CLUSTERING.protobuf_dump(kind.product_model.clustering) features = getattr(kind.product_model, feature_type) for featureid in xrange(feature_count): shared.protobuf_dump(features.add()) kind.featureids.append(featureid) CLUSTERING.protobuf_dump(cross_cat.topology) # FIXME(jglidden) this belongs in a separate function fixed_models = [] if hyper_prior is not None: hp_name, grid_in = hyper_prior if hp_name == "topology": get_grid_out = lambda model: model.hyper_prior.topology extend = lambda grid_out, point: PitmanYor.to_protobuf(point, grid_out.add()) elif hp_name == "clustering": get_grid_out = lambda model: model.hyper_prior.clustering extend = lambda grid_out, point: PitmanYor.to_protobuf(point, grid_out.add()) else: param_name, grid_in = grid_in get_grid_out = lambda model: getattr(getattr(model.hyper_prior, hp_name), param_name) extend = lambda grid_out, point: grid_out.extend([point]) cross_cat_base = loom.schema_pb2.CrossCat() cross_cat_base.MergeFrom(cross_cat) for point in grid_in: extend(get_grid_out(cross_cat), point) if hp_name == "dd": pass else: fixed_model = loom.schema_pb2.CrossCat() fixed_model.MergeFrom(cross_cat_base) extend(get_grid_out(fixed_model), point) fixed_models.append(fixed_model) if hp_name == "dd": assert feature_count == 1 dim = len(shared.dump()["alphas"]) if dim > 4: raise SkipTest("FIXME test runs out of memory") for grid in product(*[grid_in] * dim): fixed_model = loom.schema_pb2.CrossCat() fixed_model.MergeFrom(cross_cat_base) alphas = fixed_model.kinds[0].product_model.dd[0].alphas assert len(alphas) == len(grid) for i, alpha in enumerate(grid): alphas[i] = alpha fixed_models.append(fixed_model) return cross_cat, fixed_models
def __init__(self): self.clustering = PitmanYor.from_dict({ 'alpha': 100.0, 'd': 0.1, }) self.feature = nich.Shared.from_dict({ 'mu': 0.0, 'kappa': 0.1, 'sigmasq': 0.01, 'nu': 1.0, })
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE # USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os import random import parsable from distributions.lp.clustering import PitmanYor from loom.util import tempdir from distributions.io.stream import open_compressed import loom.schema import loom.hyperprior import loom.config import loom.runner parsable = parsable.Parsable() CLUSTERING = PitmanYor.from_dict({'alpha': 2.0, 'd': 0.1}) def generate_kinds(feature_count): ''' Generate an exponential kind structure, e.g., [o|oo|oooo|oooooooo|oooooooooooooooo|oooooooooooooooooooooooooooooooo] ''' featureid_to_kindid = [] for i in xrange(feature_count): featureid_to_kindid.extend([i] * (2 ** i)) if len(featureid_to_kindid) >= feature_count: break featureid_to_kindid = featureid_to_kindid[:feature_count] random.shuffle(featureid_to_kindid) return featureid_to_kindid
'alpha': [.5, 1.5], 'gamma': [.5, 1.5], }, 'gp': { 'alpha': [.5, 1.5], 'inv_beta': [.5, 1.5], }, 'nich': { 'kappa': [.5, 1.5], 'mu': [-1., 1.], 'nu': [.5, 1.5], 'sigmasq': [.5, 1.5], } } CLUSTERING = PitmanYor.from_dict({'alpha': 2.0, 'd': 0.1}) if __name__ == '__main__' and sys.stdout.isatty(): colorize = { 'Info': '\x1b[34mInfo\x1b[0m', 'Warn': '\x1b[33mWarn\x1b[0m', 'Fail': '\x1b[31mFail\x1b[0m', 'Pass': '******', } else: colorize = {} def LOG(prefix, casename, comment=''): prefix = colorize.get(prefix, prefix) message = '{: <4} {: <18} {}'.format(prefix, casename, comment)
KIND_MAX_SIZE = 205 GRID_SIZE = 2 PITMAN_YOR_GRID = [{"alpha": 2.0, "d": 0.1}, {"alpha": 10.0, "d": 0.1}] HYPER_PRIOR = { "topology": PITMAN_YOR_GRID, "clustering": PITMAN_YOR_GRID, "bb": {"alpha": [0.5, 2.0], "beta": [0.5, 2.0]}, "dd": {"alpha": [0.5, 1.5]}, "dpd": {"alpha": [0.5, 1.5], "gamma": [0.5, 1.5]}, "gp": {"alpha": [0.5, 1.5], "inv_beta": [0.5, 1.5]}, "nich": {"kappa": [0.5, 1.5], "mu": [-1.0, 1.0], "nu": [0.5, 1.5], "sigmasq": [0.5, 1.5]}, } CLUSTERING = PitmanYor.from_dict({"alpha": 2.0, "d": 0.1}) if __name__ == "__main__" and sys.stdout.isatty(): colorize = { "Info": "\x1b[34mInfo\x1b[0m", "Warn": "\x1b[33mWarn\x1b[0m", "Fail": "\x1b[31mFail\x1b[0m", "Pass": "******", } else: colorize = {} def LOG(prefix, casename, comment=""): prefix = colorize.get(prefix, prefix) message = "{: <4} {: <18} {}".format(prefix, casename, comment)
def __init__(self): self.clustering = PitmanYor.Mixture() self.feature_x = nich.Mixture() self.feature_y = nich.Mixture() self.id_tracker = MixtureIdTracker()