Beispiel #1
0
def do_makesamples(args):

    settings = Settings(
        num_samples=args.num_samples,
        block_variables=args.block,
        condition_variables=args.condition,
        stat='f')

    schema = load_schema(args.schema)
    input  = Input.from_raw_file(args.infile, schema, limit=1)
    job = Job(input=input,
              settings=settings,
              schema=schema)

    res = an.new_sample_indexes(job)
    if args.output is None:
        output = sys.stdout
    else:
        output = args.output

    output.write("# Block layout:     " + str(job.block_layout) + "\n")
    output.write("# Condition layout: " + str(job.condition_layout) + "\n")

    test = GroupSymbols(job.condition_layout)

    for row in res:
        for x in row:
            output.write(' {:3d}'.format(x))
        output.write(" # " + test(np.array(row)) + "\n")
Beispiel #2
0
def copy_input(path, input_path, schema, settings, job_id):
    logging.info("Loading input for job from {0}".format(input_path))
    input = Input.from_raw_file(input_path, schema)
    logging.info("Saving input, settings, and schema to " + str(path))

    with h5py.File(path, 'w') as db:

        # Save the input object
        ids = input.feature_ids

        # Saving feature ids is tricky because they are strings
        dt = h5py.special_dtype(vlen=str)
        db.create_dataset("table", data=input.table)
        db.create_dataset("feature_ids", (len(ids), ), dt)

        for i, fid in enumerate(ids):
            input.feature_ids[i] = fid
            db['feature_ids'][i] = fid

        print('block vars are', settings.block_variables)

        # Save the settings object
        db.create_dataset("tuning_params", data=settings.tuning_params)
        db.attrs['job_id'] = job_id
        db.attrs['stat'] = settings.stat,
        db.attrs['glm_family'] = settings.glm_family,
        db.attrs['num_bins'] = settings.num_bins
        db.attrs['num_samples'] = settings.num_samples
        db.attrs['sample_from_residuals'] = settings.sample_from_residuals
        db.attrs['sample_with_replacement'] = settings.sample_with_replacement
        db.attrs['condition_variables'] = map(str,
                                              settings.condition_variables)
        if settings.block_variables != []:
            db.attrs['block_variables'] = settings.block_variables
        db.attrs['summary_min_conf'] = settings.summary_min_conf
        db.attrs['summary_step_size'] = settings.summary_step_size
        db.attrs['equalize_means'] = settings.equalize_means
        db.attrs['shrink'] = settings.shrink

        # Save the schema object
        schema_str = StringIO()
        schema.save(schema_str)
        db.attrs['schema'] = str(schema_str.getvalue())

        if settings.equalize_means_ids is not None:
            db['equalize_means_ids'] = settings.equalize_means_ids
Beispiel #3
0
def copy_input(path, input_path, schema, settings, job_id):
    logging.info("Loading input for job from {0}".format(input_path))
    input = Input.from_raw_file(input_path, schema)
    logging.info("Saving input, settings, and schema to " + str(path))

    with h5py.File(path, 'w') as db:

        # Save the input object
        ids = input.feature_ids

        # Saving feature ids is tricky because they are strings
        dt = h5py.special_dtype(vlen=str)
        db.create_dataset("table", data=input.table)
        db.create_dataset("feature_ids", (len(ids),), dt)

        for i, fid in enumerate(ids):
            input.feature_ids[i] = fid
            db['feature_ids'][i] = fid

        print('block vars are', settings.block_variables)

        # Save the settings object
        db.create_dataset("tuning_params", data=settings.tuning_params)
        db.attrs['job_id'] = job_id
        db.attrs['stat'] = settings.stat,
        db.attrs['glm_family'] = settings.glm_family,
        db.attrs['num_bins'] = settings.num_bins
        db.attrs['num_samples'] = settings.num_samples
        db.attrs['sample_from_residuals'] = settings.sample_from_residuals
        db.attrs['sample_with_replacement'] = settings.sample_with_replacement
        db.attrs['condition_variables'] = map(str, settings.condition_variables)
        if settings.block_variables != []:
            db.attrs['block_variables'] = settings.block_variables
        db.attrs['summary_min_conf'] = settings.summary_min_conf
        db.attrs['summary_step_size'] = settings.summary_step_size
        db.attrs['equalize_means'] = settings.equalize_means
        db.attrs['shrink'] = settings.shrink

        # Save the schema object
        schema_str = StringIO()
        schema.save(schema_str)
        db.attrs['schema'] = str(schema_str.getvalue())

        if settings.equalize_means_ids is not None:
            db['equalize_means_ids'] = settings.equalize_means_ids
Beispiel #4
0
def load_input(db):
    return Input(db['table'][...], db['feature_ids'][...])