def get_example_requests(model): cross_cat = CrossCat() with open_compressed(model) as f: cross_cat.ParseFromString(f.read()) feature_count = sum(len(kind.featureids) for kind in cross_cat.kinds) all_observed = [True] * feature_count none_observed = [False] * feature_count observeds = [] observeds.append(all_observed) for f in xrange(feature_count): observed = all_observed[:] observed[f] = False observeds.append(observed) for f in xrange(feature_count): observed = [sample_bernoulli(0.5) for _ in xrange(feature_count)] observeds.append(observed) for f in xrange(feature_count): observed = none_observed[:] observed[f] = True observeds.append(observed) observeds.append(none_observed) requests = [] for i, observed in enumerate(observeds): request = Query.Request() request.id = "example-{}".format(i) request.sample.data.observed[:] = none_observed request.sample.to_sample[:] = observed request.sample.sample_count = 1 requests.append(request) return requests
def get_example_requests(model, rows, query_type='mixed'): assert query_type in ['sample', 'score', 'mixed'] cross_cat = CrossCat() with open_compressed(model, 'rb') as f: cross_cat.ParseFromString(f.read()) feature_count = sum(len(kind.featureids) for kind in cross_cat.kinds) featureids = range(feature_count) nontrivials = [True] * feature_count for kind in cross_cat.kinds: fs = iter(kind.featureids) for model in loom.schema.MODELS.iterkeys(): for shared in getattr(kind.product_model, model): f = fs.next() if model == 'dd': if len(shared.alphas) == 0: nontrivials[f] = False elif model == 'dpd': if len(shared.betas) == 0: nontrivials[f] = False all_observed = nontrivials[:] none_observed = [False] * feature_count observeds = [] observeds.append(all_observed) for f, nontrivial in izip(featureids, nontrivials): if nontrivial: observed = all_observed[:] observed[f] = False observeds.append(observed) for f in featureids: observed = [ nontrivial and sample_bernoulli(0.5) for nontrivial in nontrivials ] observeds.append(observed) for f, nontrivial in izip(featureids, nontrivials): if nontrivial: observed = none_observed[:] observed[f] = True observeds.append(observed) observeds.append(none_observed) requests = [] for i, observed in enumerate(observeds): request = Query.Request() request.id = "example-{}".format(i) if query_type in ['sample', 'mixed']: set_diff(request.sample.data, none_observed) request.sample.to_sample.sparsity = DENSE request.sample.to_sample.dense[:] = observed request.sample.sample_count = 1 if query_type in ['score', 'mixed']: set_diff(request.score.data, none_observed) requests.append(request) for row in load_rows(rows)[:20]: i += 1 request = Query.Request() request.id = "example-{}".format(i) if query_type in ['sample', 'mixed']: request.sample.sample_count = 1 request.sample.data.MergeFrom(row.diff) request.sample.to_sample.sparsity = DENSE conditions = izip(nontrivials, row.diff.pos.observed.dense) to_sample = [ nontrivial and not is_observed for nontrivial, is_observed in conditions ] set_observed(request.sample.to_sample, to_sample) if query_type in ['score', 'mixed']: request.score.data.MergeFrom(row.diff) requests.append(request) return requests
def eval(self, shared): return sample_bernoulli(self.p)