def composite_dataset(dataset="objectome24", threshold=12000, mongo_reload=False):
    if dataset == "objectome24":
        collections = ["objectome64", "objectome_imglvl", "ko_obj24_basic_2ways", "monkobjectome"]
        meta = obj.objectome24_meta()
    elif dataset == "hvm10":
        collections = [
            "hvm10_basic_2ways",
            "hvm10_allvar_basic_2ways",
        ]  # , 'hvm10_basic_2ways_newobj', 'hvm10-finegrain']
        meta = obj.hvm10_meta()
    fns = ["sample_obj", "id", "dist_obj", "choice", "WorkerID"]
    col_data = ()
    for col in collections:
        dset = obj.psychophysDatasetObject(col, {}, meta, mongo_reload=mongo_reload)
        col_data = col_data + (dset.trials,)

    trials = tb.rowstack(col_data)

    #    segregate into pool and individuals
    workers = trials["WorkerID"]
    col_data_seg = {"all": trials, "pool": ()}
    for uw in np.unique(workers):
        tw = np.nonzero([w == uw for w in workers])[0]
        if len(tw) < threshold:
            col_data_seg["pool"] = col_data_seg["pool"] + (trials[tw],)
        else:
            col_data_seg[uw] = trials[tw]
    col_data_seg["pool"] = tb.rowstack(col_data_seg["pool"])

    return col_data_seg
def testFeatures(all_features, all_metas, features_oi, objects_oi):
    if type(objects_oi) is dict:
        objs_oi = objects_oi['objs']
        tasks_oi = objects_oi['tasks']
    else:
        tasks_oi = np.array(objects_oi)
        objs_oi = np.array(objects_oi)

    subsample = 1000
    noise_model = None
    nsamples_noisemodel = 10
    nsplits = 100
    result = {'objs_oi':objs_oi}

    task_trials = ()
    rec = {}

    for feat in features_oi:
        if feat not in all_features.keys():
            continue
        features = all_features[feat]
        meta = fix_meta(all_metas[feat])
        tasks = getBinaryTasks(meta, tasks_oi)
        print 'Running machine_objectome : ' + str(feat) + ': ' + str(features.shape)
        for isample in range(nsamples_noisemodel):
            features_sample = sampleFeatures(features, noise_model, subsample)
            for task in tasks:
                trials = testFeatures_base(features_sample, meta, task, objs_oi, nsplits=nsplits)
                task_trials = task_trials + (trials,)
        task_trials = tb.rowstack(task_trials)
        task_trials['WorkerID'] = feat
        rec[feat] = task_trials
    return task_trials