Example #1
0
def get_example_requests(model):
    cross_cat = CrossCat()
    with open_compressed(model) as f:
        cross_cat.ParseFromString(f.read())
    feature_count = sum(len(kind.featureids) for kind in cross_cat.kinds)

    all_observed = [True] * feature_count
    none_observed = [False] * feature_count
    observeds = []
    observeds.append(all_observed)
    for f in xrange(feature_count):
        observed = all_observed[:]
        observed[f] = False
        observeds.append(observed)
    for f in xrange(feature_count):
        observed = [sample_bernoulli(0.5) for _ in xrange(feature_count)]
        observeds.append(observed)
    for f in xrange(feature_count):
        observed = none_observed[:]
        observed[f] = True
        observeds.append(observed)
    observeds.append(none_observed)

    requests = []
    for i, observed in enumerate(observeds):
        request = Query.Request()
        request.id = "example-{}".format(i)
        request.sample.data.observed[:] = none_observed
        request.sample.to_sample[:] = observed
        request.sample.sample_count = 1
        requests.append(request)

    return requests
Example #2
0
def get_example_requests(model, rows, query_type='mixed'):
    assert query_type in ['sample', 'score', 'mixed']
    cross_cat = CrossCat()
    with open_compressed(model, 'rb') as f:
        cross_cat.ParseFromString(f.read())
    feature_count = sum(len(kind.featureids) for kind in cross_cat.kinds)
    featureids = range(feature_count)

    nontrivials = [True] * feature_count
    for kind in cross_cat.kinds:
        fs = iter(kind.featureids)
        for model in loom.schema.MODELS.iterkeys():
            for shared in getattr(kind.product_model, model):
                f = fs.next()
                if model == 'dd':
                    if len(shared.alphas) == 0:
                        nontrivials[f] = False
                elif model == 'dpd':
                    if len(shared.betas) == 0:
                        nontrivials[f] = False
    all_observed = nontrivials[:]
    none_observed = [False] * feature_count

    observeds = []
    observeds.append(all_observed)
    for f, nontrivial in izip(featureids, nontrivials):
        if nontrivial:
            observed = all_observed[:]
            observed[f] = False
            observeds.append(observed)
    for f in featureids:
        observed = [
            nontrivial and sample_bernoulli(0.5)
            for nontrivial in nontrivials
        ]
        observeds.append(observed)
    for f, nontrivial in izip(featureids, nontrivials):
        if nontrivial:
            observed = none_observed[:]
            observed[f] = True
            observeds.append(observed)
    observeds.append(none_observed)

    requests = []
    for i, observed in enumerate(observeds):
        request = Query.Request()
        request.id = "example-{}".format(i)
        if query_type in ['sample', 'mixed']:
            set_diff(request.sample.data, none_observed)
            request.sample.to_sample.sparsity = DENSE
            request.sample.to_sample.dense[:] = observed
            request.sample.sample_count = 1
        if query_type in ['score', 'mixed']:
            set_diff(request.score.data, none_observed)
        requests.append(request)
    for row in load_rows(rows)[:20]:
        i += 1
        request = Query.Request()
        request.id = "example-{}".format(i)
        if query_type in ['sample', 'mixed']:
            request.sample.sample_count = 1
            request.sample.data.MergeFrom(row.diff)
            request.sample.to_sample.sparsity = DENSE
            conditions = izip(nontrivials, row.diff.pos.observed.dense)
            to_sample = [
                nontrivial and not is_observed
                for nontrivial, is_observed in conditions
            ]
            set_observed(request.sample.to_sample, to_sample)
        if query_type in ['score', 'mixed']:
            request.score.data.MergeFrom(row.diff)
        requests.append(request)
    return requests
Example #3
0
 def eval(self, shared):
     return sample_bernoulli(self.p)
Example #4
0
 def eval(self, shared):
     return sample_bernoulli(self.p)
Example #5
0
def get_example_requests(model, rows, query_type='mixed'):
    assert query_type in ['sample', 'score', 'mixed']
    cross_cat = CrossCat()
    with open_compressed(model, 'rb') as f:
        cross_cat.ParseFromString(f.read())
    feature_count = sum(len(kind.featureids) for kind in cross_cat.kinds)
    featureids = range(feature_count)

    nontrivials = [True] * feature_count
    for kind in cross_cat.kinds:
        fs = iter(kind.featureids)
        for model in loom.schema.MODELS.iterkeys():
            for shared in getattr(kind.product_model, model):
                f = fs.next()
                if model == 'dd':
                    if len(shared.alphas) == 0:
                        nontrivials[f] = False
                elif model == 'dpd':
                    if len(shared.betas) == 0:
                        nontrivials[f] = False
    all_observed = nontrivials[:]
    none_observed = [False] * feature_count

    observeds = []
    observeds.append(all_observed)
    for f, nontrivial in izip(featureids, nontrivials):
        if nontrivial:
            observed = all_observed[:]
            observed[f] = False
            observeds.append(observed)
    for f in featureids:
        observed = [
            nontrivial and sample_bernoulli(0.5)
            for nontrivial in nontrivials
        ]
        observeds.append(observed)
    for f, nontrivial in izip(featureids, nontrivials):
        if nontrivial:
            observed = none_observed[:]
            observed[f] = True
            observeds.append(observed)
    observeds.append(none_observed)

    requests = []
    for i, observed in enumerate(observeds):
        request = Query.Request()
        request.id = "example-{}".format(i)
        if query_type in ['sample', 'mixed']:
            set_diff(request.sample.data, none_observed)
            request.sample.to_sample.sparsity = DENSE
            request.sample.to_sample.dense[:] = observed
            request.sample.sample_count = 1
        if query_type in ['score', 'mixed']:
            set_diff(request.score.data, none_observed)
        requests.append(request)
    for row in load_rows(rows)[:20]:
        i += 1
        request = Query.Request()
        request.id = "example-{}".format(i)
        if query_type in ['sample', 'mixed']:
            request.sample.sample_count = 1
            request.sample.data.MergeFrom(row.diff)
            request.sample.to_sample.sparsity = DENSE
            conditions = izip(nontrivials, row.diff.pos.observed.dense)
            to_sample = [
                nontrivial and not is_observed
                for nontrivial, is_observed in conditions
            ]
            set_observed(request.sample.to_sample, to_sample)
        if query_type in ['score', 'mixed']:
            request.score.data.MergeFrom(row.diff)
        requests.append(request)
    return requests