def run(logger, args, extra_argv=None): try: os.makedirs(args.out_dir) except OSError as e: if e.errno != errno.EEXIST: sys.exit('Cannot create output dir: %s' % e) tag, ext = os.path.splitext(os.path.basename(args.in_fn)) out_fn = os.path.join(args.out_dir, '%s_features%s' % (tag, ext)) logger.info('writing to %s', out_fn) with open(out_fn, 'w') as fout: writer = AvroFileWriter(fout, out_schema) with open(args.in_fn) as fin: reader = AvroFileReader(fin) for r in reader: p = BioImgPlane(r) pixels = p.get_xy() logger.info('processing %r', [p.z, p.c, p.t]) kw = { 'long': args.long, 'w': args.width, 'h': args.height, 'dx': args.delta_x, 'dy': args.delta_y, 'ox': args.offset_x, 'oy': args.offset_y, } for fv in calc_features(pixels, p.name, **kw): out_rec = to_avro(fv) for name in 'img_path', 'series', 'z', 'c', 't': out_rec[name] = getattr(p, name) writer.write(out_rec) writer.close() return 0
def run(logger, args, extra_argv=None): if not args.out_fn: tag = os.path.splitext(os.path.basename(args.in_fn))[0] args.out_fn = "%s.summary" % tag str_keys = ["name", "img_path"] int_keys = ["series", "z", "c", "t", "w", "h", "x", "y"] d = {"n_features": set()} with open(args.in_fn) as f: reader = AvroFileReader(f) for r in reader: d["n_features"].add( sum(len(v) for k, v in r.iteritems() if type(v) is list) ) for k in str_keys: d.setdefault(k, set()).add(r[k]) for k in int_keys: d.setdefault(k, set()).add(int(r[k])) logger.info("writing to %s", args.out_fn) with open(args.out_fn, "w") as fo: for k in str_keys: fo.write("%s: %s\n" % (k, ", ".join(sorted(d[k])))) for k in int_keys: v = sorted(d[k]) if len(v) > 2 and v == range(v[0], v[-1] + 1): fo.write("%s: %d-%d\n" % (k, v[0], v[-1])) else: fo.write("%s: %s\n" % (k, ", ".join(map(str, v))))
def iter_records(f, logger, num_records=None): reader = AvroFileReader(f) for i, r in enumerate(reader): logger.debug("record #%d", i) if num_records is not None and i >= num_records: raise StopIteration else: yield r
def convert_avro(f, omero_ids, id_fields, expected_features, repattern, rematch): """ f: File handle to the input file omero_ids: Maps of plate-series to OMERO IDs id_fields: A list of ID columns: [(NameID, Type)] expected_features: The list of expected features, if empty this will be populated by the first avro record repattern, rematch: Match map screen name against a regular expression """ cols = [] a = AvroFileReader(f) for i, r in enumerate(a): all_fields = r.keys() feature_fields = sorted( set(all_fields).difference( metadata_fields.keys()).difference(exclude_fields)) if expected_features: assert (expected_features == feature_fields), 'Mismatched features' else: expected_features = feature_fields assert len(all_fields) == (len(feature_fields) + len(metadata_fields) + len(exclude_fields)), 'Unexpected fields' if not cols: for mk, mv in id_fields.iteritems(): c = column_type(mv, mk) c.description = json.dumps({'_metadata': True}) cols.append(c) for mk, mv in metadata_fields.iteritems(): c = column_type(mv, mk) c.description = json.dumps({'_metadata': True}) cols.append(c) for fk in feature_fields: size = len(r[fk]) if size > 0: c = column_type('DoubleArray %d' % size, fk) cols.append(c) for c in cols: if c.name.endswith('ID'): if rematch: rname = re.sub(repattern, rematch, r['name']) else: rname = re.search(repattern, r['name']).group(0) oid = omero_ids[c.name][rname] c.values.append(oid) else: c.values.append(r[c.name]) return cols
def iter_records(fn): ext = os.path.splitext(fn)[-1] if ext == ".db": with closing(shelve.open(fn, flag="r")) as shelf: for r in shelf.itervalues(): yield r elif ext == ".pickle": with open(fn) as f: for r in cPickle.load(f): yield r else: with open(fn) as f: reader = AvroFileReader(f) for r in reader: yield r
def get_avro_reader(fp): try: from pyavroc import AvroFileReader return AvroFileReader(fp, False) except ImportError: return DataFileReader(fp, DatumReader())
def get_image_size(fin): reader = AvroFileReader(fin) r = reader.next() size_map = dict(zip(r['dimension_order'], r['pixel_data']['shape'])) return tuple(size_map[_] for _ in 'ZCT')
def iterplanes(avro_file): with open(avro_file, 'rb') as f: reader = AvroFileReader(f) for r in reader: yield BioImgPlane(r)