Example #1
0
def run(logger, args, extra_argv=None):
    try:
        os.makedirs(args.out_dir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            sys.exit('Cannot create output dir: %s' % e)
    tag, ext = os.path.splitext(os.path.basename(args.in_fn))
    out_fn = os.path.join(args.out_dir, '%s_features%s' % (tag, ext))
    logger.info('writing to %s', out_fn)
    with open(out_fn, 'w') as fout:
        writer = AvroFileWriter(fout, out_schema)
        with open(args.in_fn) as fin:
            reader = AvroFileReader(fin)
            for r in reader:
                p = BioImgPlane(r)
                pixels = p.get_xy()
                logger.info('processing %r', [p.z, p.c, p.t])
                kw = {
                    'long': args.long,
                    'w': args.width,
                    'h': args.height,
                    'dx': args.delta_x,
                    'dy': args.delta_y,
                    'ox': args.offset_x,
                    'oy': args.offset_y,
                }
                for fv in calc_features(pixels, p.name, **kw):
                    out_rec = to_avro(fv)
                    for name in 'img_path', 'series', 'z', 'c', 't':
                        out_rec[name] = getattr(p, name)
                    writer.write(out_rec)
        writer.close()
    return 0
def run(logger, args, extra_argv=None):
    if not args.out_fn:
        tag = os.path.splitext(os.path.basename(args.in_fn))[0]
        args.out_fn = "%s.summary" % tag
    str_keys = ["name", "img_path"]
    int_keys = ["series", "z", "c", "t", "w", "h", "x", "y"]
    d = {"n_features": set()}
    with open(args.in_fn) as f:
        reader = AvroFileReader(f)
        for r in reader:
            d["n_features"].add(
                sum(len(v) for k, v in r.iteritems() if type(v) is list)
            )
            for k in str_keys:
                d.setdefault(k, set()).add(r[k])
            for k in int_keys:
                d.setdefault(k, set()).add(int(r[k]))
    logger.info("writing to %s", args.out_fn)
    with open(args.out_fn, "w") as fo:
        for k in str_keys:
            fo.write("%s: %s\n" % (k, ", ".join(sorted(d[k]))))
        for k in int_keys:
            v = sorted(d[k])
            if len(v) > 2 and v == range(v[0], v[-1] + 1):
                fo.write("%s: %d-%d\n" % (k, v[0], v[-1]))
            else:
                fo.write("%s: %s\n" % (k, ", ".join(map(str, v))))
Example #3
0
def iter_records(f, logger, num_records=None):
    reader = AvroFileReader(f)
    for i, r in enumerate(reader):
        logger.debug("record #%d", i)
        if num_records is not None and i >= num_records:
            raise StopIteration
        else:
            yield r
Example #4
0
def convert_avro(f, omero_ids, id_fields, expected_features, repattern,
                 rematch):
    """
    f: File handle to the input file
    omero_ids: Maps of plate-series to OMERO IDs
    id_fields: A list of ID columns: [(NameID, Type)]
    expected_features: The list of expected features, if empty this will be
    populated by the first avro record
    repattern, rematch: Match map screen name against a regular expression
    """
    cols = []
    a = AvroFileReader(f)

    for i, r in enumerate(a):
        all_fields = r.keys()
        feature_fields = sorted(
            set(all_fields).difference(
                metadata_fields.keys()).difference(exclude_fields))
        if expected_features:
            assert (expected_features == feature_fields), 'Mismatched features'
        else:
            expected_features = feature_fields
        assert len(all_fields) == (len(feature_fields) + len(metadata_fields) +
                                   len(exclude_fields)), 'Unexpected fields'

        if not cols:
            for mk, mv in id_fields.iteritems():
                c = column_type(mv, mk)
                c.description = json.dumps({'_metadata': True})
                cols.append(c)

            for mk, mv in metadata_fields.iteritems():
                c = column_type(mv, mk)
                c.description = json.dumps({'_metadata': True})
                cols.append(c)

            for fk in feature_fields:
                size = len(r[fk])
                if size > 0:
                    c = column_type('DoubleArray %d' % size, fk)
                    cols.append(c)

        for c in cols:
            if c.name.endswith('ID'):
                if rematch:
                    rname = re.sub(repattern, rematch, r['name'])
                else:
                    rname = re.search(repattern, r['name']).group(0)
                oid = omero_ids[c.name][rname]
                c.values.append(oid)
            else:
                c.values.append(r[c.name])

    return cols
Example #5
0
def iter_records(fn):
    ext = os.path.splitext(fn)[-1]
    if ext == ".db":
        with closing(shelve.open(fn, flag="r")) as shelf:
            for r in shelf.itervalues():
                yield r
    elif ext == ".pickle":
        with open(fn) as f:
            for r in cPickle.load(f):
                yield r
    else:
        with open(fn) as f:
            reader = AvroFileReader(f)
            for r in reader:
                yield r
Example #6
0
 def get_avro_reader(fp):
     try:
         from pyavroc import AvroFileReader
         return AvroFileReader(fp, False)
     except ImportError:
         return DataFileReader(fp, DatumReader())
Example #7
0
def get_image_size(fin):
    reader = AvroFileReader(fin)
    r = reader.next()
    size_map = dict(zip(r['dimension_order'], r['pixel_data']['shape']))
    return tuple(size_map[_] for _ in 'ZCT')
Example #8
0
def iterplanes(avro_file):
    with open(avro_file, 'rb') as f:
        reader = AvroFileReader(f)
        for r in reader:
            yield BioImgPlane(r)