Example #1
0
def _make_encoder_builders_file((schema_in, rows_in)):
    assert os.path.isfile(rows_in)
    schema = json_load(schema_in)
    with csv_reader(rows_in) as reader:
        header = reader.next()
        builders = []
        seen = set()
        for name in header:
            if name in schema:
                if name in seen:
                    raise LoomError('Repeated column {} in csv file {}'.format(
                        name, rows_in))
                seen.add(name)
                model = schema[name]
                Builder = ENCODER_BUILDERS[model]
                builder = Builder(name, model)
            else:
                builder = None
            builders.append(builder)
        if all(builder is None for builder in builders):
            raise LoomError(
                'Csv file has no known features;'
                ', try adding a header to {}'.format(rows_in))
        missing_features = sorted(set(schema) - seen)
        if missing_features:
            raise LoomError('\n  '.join(
                ['Csv file is missing features:'] + missing_features))
        for row in reader:
            for value, builder in izip(row, builders):
                if builder is not None:
                    value = value.strip()
                    if value:
                        builder.add_value(value)
    return [b for b in builders if b is not None]
Example #2
0
def _make_encoder_builders_file((schema_in, rows_in)):
    assert os.path.isfile(rows_in)
    schema = json_load(schema_in)
    with csv_reader(rows_in) as reader:
        header = reader.next()
        builders = []
        seen = set()
        for name in header:
            if name in schema:
                if name in seen:
                    raise LoomError('Repeated column {} in csv file {}'.format(
                        name, rows_in))
                seen.add(name)
                model = schema[name]
                Builder = ENCODER_BUILDERS[model]
                builder = Builder(name, model)
            else:
                builder = None
            builders.append(builder)
        if all(builder is None for builder in builders):
            raise LoomError('Csv file has no known features;'
                            ', try adding a header to {}'.format(rows_in))
        missing_features = sorted(set(schema) - seen)
        if missing_features:
            raise LoomError('\n  '.join(['Csv file is missing features:'] +
                                        missing_features))
        for row in reader:
            for value, builder in izip(row, builders):
                if builder is not None:
                    value = value.strip()
                    if value:
                        builder.add_value(value)
    return [b for b in builders if b is not None]
Example #3
0
File: main.py Project: fritzo/loom
def synthesize_clusters(name, sample_count, cluster_count, pixel_count):
    with csv_reader(SAMPLES) as reader:
        reader.next()
        samples = map(tuple, reader)
        pts = random.sample(samples, sample_count)
        samples = random.sample(samples, pixel_count)

    root = loom.store.get_paths(name)['root']
    with loom.preql.get_server(root) as server:
        sample_labels = server.cluster(
            rows_to_cluster=samples,
            seed_rows=pts,
            cluster_count=cluster_count)

    labels = set(zip(*sample_labels)[0])
    label_count = max(labels) + 1

    shape = IMAGE.shape
    image = IMAGE.reshape(shape[0], shape[1], 1).repeat(3, 2)
    colors = pyplot.cm.Set1(numpy.linspace(0, 1, label_count))
    colors = (255 * colors[:, :3]).astype(numpy.uint8)
    for label, sample in sample_labels:
        x, y = to_image_coordinates(float(sample[0]), float(sample[1]))
        image[x, y] = colors[label]
    return image
Example #4
0
def synthesize_clusters(name, sample_count, cluster_count, pixel_count):
    with csv_reader(SAMPLES) as reader:
        reader.next()
        samples = map(tuple, reader)
        pts = random.sample(samples, sample_count)
        samples = random.sample(samples, pixel_count)

    root = loom.store.get_paths(name)['root']
    with loom.preql.get_server(root) as server:
        sample_labels = server.cluster(
            rows_to_cluster=samples,
            seed_rows=pts,
            cluster_count=cluster_count)

    labels = set(zip(*sample_labels)[0])
    label_count = max(labels) + 1

    shape = IMAGE.shape
    image = IMAGE.reshape(shape[0], shape[1], 1).repeat(3, 2)
    colors = pyplot.cm.Set1(numpy.linspace(0, 1, label_count))
    colors = (255 * colors[:, :3]).astype(numpy.uint8)
    for label, sample in sample_labels:
        x, y = to_image_coordinates(float(sample[0]), float(sample[1]))
        image[x, y] = colors[label]
    return image
Example #5
0
def load_rows_csv(dirname):
    filenames = os.listdir(dirname)
    rows = [None]
    for filename in filenames:
        filename = os.path.join(dirname, filename)
        with csv_reader(filename) as reader:
            header = reader.next()
            rows += reader
    rows[0] = header
    return rows
Example #6
0
def load_rows_csv(dirname):
    filenames = os.listdir(dirname)
    rows = [None]
    for filename in filenames:
        filename = os.path.join(dirname, filename)
        with csv_reader(filename) as reader:
            header = reader.next()
            rows += reader
    rows[0] = header
    return rows
Example #7
0
def _import_rowids_file(args):
    rows_csv_in, rowids_out, id_offset, id_stride, id_field = args
    assert os.path.isfile(rows_csv_in)
    with csv_reader(rows_csv_in) as reader:
        header = reader.next()
        if id_field is None:
            basename = os.path.basename(rows_csv_in)
            get_rowid = lambda i, row: '{}:{}'.format(basename, i)
        else:
            pos = header.index(id_field)
            get_rowid = lambda i, row: row[pos]
        with csv_writer(rowids_out) as writer:
            for i, row in enumerate(reader):
                writer.writerow((id_offset + id_stride * i, get_rowid(i, row)))
Example #8
0
def _import_rowids_file(args):
    rows_csv_in, rowids_out, id_offset, id_stride, id_field = args
    assert os.path.isfile(rows_csv_in)
    with csv_reader(rows_csv_in) as reader:
        header = reader.next()
        if id_field is None:
            basename = os.path.basename(rows_csv_in)
            get_rowid = lambda i, row: '{}:{}'.format(basename, i)
        else:
            pos = header.index(id_field)
            get_rowid = lambda i, row: row[pos]
        with csv_writer(rowids_out) as writer:
            for i, row in enumerate(reader):
                writer.writerow((id_offset + id_stride * i, get_rowid(i, row)))
Example #9
0
File: main.py Project: fritzo/loom
def cluster(cluster_count=5, sample_count=1000, pixel_count=None):
    '''
    Draw a fox map
    '''
    cluster_count = int(cluster_count)
    sample_count = int(sample_count)
    if pixel_count is None:
        with csv_reader(SAMPLES) as reader:
            pixel_count = len(list(reader)) - 1
    else:
        pixel_count = int(pixel_count)
    assert loom.store.get_paths(NAME)['samples'], 'first compress image'

    image = synthesize_clusters(NAME, sample_count, cluster_count, pixel_count)
    scipy.misc.imsave(os.path.join(RESULTS, 'cluster.png'), image)
Example #10
0
File: main.py Project: fritzo/loom
def create_dataset(row_count=ROW_COUNT):
    '''
    Extract dataset from image.
    '''
    scipy.misc.imsave(os.path.join(RESULTS, 'original.png'), IMAGE)
    print 'sampling {} points from image'.format(row_count)
    with open_compressed(SAMPLES, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(['x', 'y'])
        for row in sample_from_image(IMAGE, row_count):
            writer.writerow(row)
    with csv_reader(SAMPLES) as reader:
        reader.next()
        image = visualize_dataset(map(float, row) for row in reader)
    scipy.misc.imsave(os.path.join(RESULTS, 'samples.png'), image)
Example #11
0
def cluster(cluster_count=5, sample_count=1000, pixel_count=None):
    '''
    Draw a fox map
    '''
    cluster_count = int(cluster_count)
    sample_count = int(sample_count)
    if pixel_count is None:
        with csv_reader(SAMPLES) as reader:
            pixel_count = len(list(reader)) - 1
    else:
        pixel_count = int(pixel_count)
    assert loom.store.get_paths(NAME)['samples'], 'first compress image'

    image = synthesize_clusters(NAME, sample_count, cluster_count, pixel_count)
    scipy.misc.imsave(os.path.join(RESULTS, 'cluster.png'), image)
Example #12
0
def create_dataset(row_count=ROW_COUNT):
    '''
    Extract dataset from image.
    '''
    scipy.misc.imsave(os.path.join(RESULTS, 'original.png'), IMAGE)
    print 'sampling {} points from image'.format(row_count)
    with open_compressed(SAMPLES, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(['x', 'y'])
        for row in sample_from_image(IMAGE, row_count):
            writer.writerow(row)
    with csv_reader(SAMPLES) as reader:
        reader.next()
        image = visualize_dataset(map(float, row) for row in reader)
    scipy.misc.imsave(os.path.join(RESULTS, 'samples.png'), image)
Example #13
0
File: main.py Project: fritzo/loom
def synthesize_search(name, image_pos):
    shape = IMAGE.shape
    image = IMAGE.reshape(shape[0], shape[1], 1).repeat(3, 2)
    image[image_pos] = [0, 255, 0]
    with csv_reader(SAMPLES) as reader:
        rows = list(reader)[1:]
        rows = [map(float, r) for r in rows]
    root = loom.store.get_paths(name)['root']
    with loom.preql.get_server(root) as server:
        x, y = to_loom_coordinates(*image_pos)
        search = server.search((str(x), str(y)))
    search = csv.reader(StringIO(search))
    search.next()
    for row_id, score in search:
        score = numpy.exp(float(score))
        if score < 1.:
            return image
        row_id = int(row_id.split(':')[1])
        sample_x, sample_y = rows[row_id]
        x, y = to_image_coordinates(sample_x, sample_y)
        image[x, y] = [255 * (1 - 1 / score), 0, 0]
    return image
Example #14
0
def synthesize_search(name, image_pos):
    shape = IMAGE.shape
    image = IMAGE.reshape(shape[0], shape[1], 1).repeat(3, 2)
    image[image_pos] = [0, 255, 0]
    with csv_reader(SAMPLES) as reader:
        rows = list(reader)[1:]
        rows = [map(float, r) for r in rows]
    root = loom.store.get_paths(name)['root']
    with loom.preql.get_server(root) as server:
        x, y = to_loom_coordinates(*image_pos)
        search = server.search((str(x), str(y)))
    search = csv.reader(StringIO(search))
    search.next()
    for row_id, score in search:
        score = numpy.exp(float(score))
        if score < 1.:
            return image
        row_id = int(row_id.split(':')[1])
        sample_x, sample_y = rows[row_id]
        x, y = to_image_coordinates(sample_x, sample_y)
        image[x, y] = [255 * (1 - 1/score), 0, 0]
    return image
Example #15
0
def _import_rows_file(args):
    rows_csv_in, rows_out, id_offset, id_stride, encoding_in = args
    assert os.path.isfile(rows_csv_in)
    encoders = json_load(encoding_in)
    message = loom.cFormat.Row()
    add_field = {
        'booleans': message.add_booleans,
        'counts': message.add_counts,
        'reals': message.add_reals,
    }
    with csv_reader(rows_csv_in) as reader:
        feature_names = list(reader.next())
        header_length = len(feature_names)
        name_to_pos = {name: i for i, name in enumerate(feature_names)}
        schema = []
        for encoder in encoders:
            pos = name_to_pos.get(encoder['name'])
            add = add_field[loom.schema.MODEL_TO_DATATYPE[encoder['model']]]
            encode = load_encoder(encoder)
            schema.append((pos, add, encode))

        def rows():
            for i, row in enumerate(reader):
                if len(row) != header_length:
                    raise LoomError('row {} has wrong length {}:\n{}'.format(
                        i, len(row), row))
                message.id = id_offset + id_stride * i
                for pos, add, encode in schema:
                    value = None if pos is None else row[pos].strip()
                    observed = bool(value)
                    message.add_observed(observed)
                    if observed:
                        add(encode(value))
                yield message
                message.Clear()

        loom.cFormat.row_stream_dump(rows(), rows_out)
Example #16
0
def _import_rows_file(args):
    rows_csv_in, rows_out, id_offset, id_stride, encoding_in = args
    assert os.path.isfile(rows_csv_in)
    encoders = json_load(encoding_in)
    message = loom.cFormat.Row()
    add_field = {
        'booleans': message.add_booleans,
        'counts': message.add_counts,
        'reals': message.add_reals,
    }
    with csv_reader(rows_csv_in) as reader:
        feature_names = list(reader.next())
        header_length = len(feature_names)
        name_to_pos = {name: i for i, name in enumerate(feature_names)}
        schema = []
        for encoder in encoders:
            pos = name_to_pos.get(encoder['name'])
            add = add_field[loom.schema.MODEL_TO_DATATYPE[encoder['model']]]
            encode = load_encoder(encoder)
            schema.append((pos, add, encode))

        def rows():
            for i, row in enumerate(reader):
                if len(row) != header_length:
                    raise LoomError('row {} has wrong length {}:\n{}'.format(
                        i, len(row), row))
                message.id = id_offset + id_stride * i
                for pos, add, encode in schema:
                    value = None if pos is None else row[pos].strip()
                    observed = bool(value)
                    message.add_observed(observed)
                    if observed:
                        add(encode(value))
                yield message
                message.Clear()

        loom.cFormat.row_stream_dump(rows(), rows_out)