def run(logger, args, extra_argv=None): try: os.makedirs(args.out_dir) except OSError as e: if e.errno != errno.EEXIST: sys.exit('Cannot create output dir: %s' % e) tag, ext = os.path.splitext(os.path.basename(args.in_fn)) out_fn = os.path.join(args.out_dir, '%s_features%s' % (tag, ext)) logger.info('writing to %s', out_fn) with open(out_fn, 'w') as fout: writer = AvroFileWriter(fout, out_schema) with open(args.in_fn) as fin: reader = AvroFileReader(fin) for r in reader: p = BioImgPlane(r) pixels = p.get_xy() logger.info('processing %r', [p.z, p.c, p.t]) kw = { 'long': args.long, 'w': args.width, 'h': args.height, 'dx': args.delta_x, 'dy': args.delta_y, 'ox': args.offset_x, 'oy': args.offset_y, } for fv in calc_features(pixels, p.name, **kw): out_rec = to_avro(fv) for name in 'img_path', 'series', 'z', 'c', 't': out_rec[name] = getattr(p, name) writer.write(out_rec) writer.close() return 0
def test_tiling(self): a = make_random_data() w, h = 3, 4 s = list(calc_features(a, self.name, w=w, h=h)) self.assertEqual(len(s), 6) r = [to_avro(_) for _ in s] for i in xrange(6): for k in 'img_path', 'series', 'z', 'c', 't': r[i][k] = getattr(self, k) try: [pyavroc_emu.AvroSerializer(Signatures).serialize(_) for _ in r] except AvroException as e: self.fail("Could not serialize record: %s" % e) for i in xrange(6): self.assertEquals(r[i]["version"], s[i].feature_set_version) self.assertEquals(r[i]["name"], self.name) fmap = dict(izip(s[i].feature_names, s[i].values)) for fname, (vname, idx) in FEATURE_NAMES.iteritems(): v = fmap.get(fname) if v is None: self.assertEqual(len(r[i][vname]), 0) else: self.assertEqual(r[i][vname][idx], v) self.assertEqual( (r[0]["x"], r[0]["y"], r[0]["w"], r[0]["h"]), (0, 0, 3, 4)) self.assertEqual( (r[1]["x"], r[1]["y"], r[1]["w"], r[1]["h"]), (3, 0, 3, 4)) self.assertEqual( (r[2]["x"], r[2]["y"], r[2]["w"], r[2]["h"]), (6, 0, 2, 4)) self.assertEqual( (r[3]["x"], r[3]["y"], r[3]["w"], r[3]["h"]), (0, 4, 3, 2)) self.assertEqual( (r[4]["x"], r[4]["y"], r[4]["w"], r[4]["h"]), (3, 4, 3, 2)) self.assertEqual( (r[5]["x"], r[5]["y"], r[5]["w"], r[5]["h"]), (6, 4, 2, 2))
def test_no_tiling(self): a = make_random_data() for long in False, True: all_sigs = list(calc_features(a, self.name, long=long)) self.assertEqual(len(all_sigs), 1) sigs = all_sigs[0] rec = to_avro(sigs) for k in 'img_path', 'series', 'z', 'c', 't': rec[k] = getattr(self, k) try: pyavroc_emu.AvroSerializer(Signatures).serialize(rec) except AvroException as e: self.fail("Could not serialize record: %s" % e) self.assertEquals(rec["version"], sigs.feature_set_version) self.assertEquals(rec["name"], self.name) self.assertEquals((rec["x"], rec["y"]), (0, 0)) self.assertEquals((rec["h"], rec["w"]), a.shape) fmap = dict(izip(sigs.feature_names, sigs.values)) for fname, (vname, idx) in FEATURE_NAMES.iteritems(): v = fmap.get(fname) if v is None: assert not long self.assertEqual(len(rec[vname]), 0) else: self.assertEqual(rec[vname][idx], v)
def map(self, ctx): p = BioImgPlane(ctx.value) pixels = p.get_xy() # TODO: support tiling out_rec = to_avro(calc_features(pixels, p.name)) for name in 'img_path', 'series', 'z', 'c', 't': out_rec[name] = getattr(p, name) ctx.emit(None, out_rec)
def run(logger, args, extra_argv=None): try: os.makedirs(args.out_dir) except OSError as e: if e.errno != errno.EEXIST: sys.exit('Cannot create output dir: %s' % e) tag, ext = os.path.splitext(os.path.basename(args.in_fn)) out_fn = os.path.join(args.out_dir, '%s_features%s' % (tag, ext)) logger.info('writing to %s', out_fn) zsubset, csubset, tsubset = get_subsets(args) with open(out_fn, 'w') as fout: writer = AvroFileWriter(fout, out_schema) with open(args.in_fn) as fin: reader = AvroFileReader(fin) for r in reader: p = BioImgPlane(r) if zsubset and p.z not in zsubset: continue if csubset and p.c not in csubset: continue if tsubset and p.t not in tsubset: continue pixels = p.get_xy() logger.info('processing %r', [p.z, p.c, p.t]) kw = { 'long': args.long, 'w': args.width, 'h': args.height, 'dx': args.delta_x, 'dy': args.delta_y, 'ox': args.offset_x, 'oy': args.offset_y, } for fv in calc_features(pixels, p.name, **kw): out_rec = to_avro(fv) for name in 'img_path', 'series', 'z', 'c', 't': out_rec[name] = getattr(p, name) writer.write(out_rec) writer.close() return 0