def test_fasterrcnn_preprocessor(self): args = set_preprocessor_parser().parse_args( ['--yaml_path', self.fasterrcnn_yaml]) c_args = _set_client_parser().parse_args( ['--port_in', str(args.port_out), '--port_out', str(args.port_in)]) all_zips = zipfile.ZipFile(self.data_path) all_bytes = [all_zips.open(v).read() for v in all_zips.namelist()] with ServiceManager(PreprocessorService, args), ZmqClient(c_args) as client: for req in RequestGenerator.index(all_bytes): msg = gnes_pb2.Message() msg.request.index.CopyFrom(req.index) client.send_message(msg) r = client.recv_message() for d in r.request.index.docs: self.assertGreater(len(d.chunks), 0) for _ in range(len(d.chunks)): self.assertEqual( len(blob2array(d.chunks[_].blob).shape), 3) self.assertEqual( blob2array(d.chunks[_].blob).shape[-1], 3) self.assertEqual( blob2array(d.chunks[_].blob).shape[0], 224) self.assertEqual( blob2array(d.chunks[_].blob).shape[1], 224) print(blob2array(d.chunks[0].blob).dtype)
def test_singleton_preprocessor_service_realdata(self): args = set_preprocessor_service_parser().parse_args( ['--yaml_path', self.singleton_img_pre_yaml]) c_args = _set_client_parser().parse_args( ['--port_in', str(args.port_out), '--port_out', str(args.port_in)]) all_zips = zipfile.ZipFile(os.path.join(self.dirname, 'imgs/test.zip')) all_bytes = [all_zips.open(v).read() for v in all_zips.namelist()] with PreprocessorService(args), ZmqClient(c_args) as client: for req in RequestGenerator.index(all_bytes): msg = gnes_pb2.Message() msg.request.index.CopyFrom(req.index) client.send_message(msg) r = client.recv_message() self.assertEqual( r.envelope.routes[0].service, 'PreprocessorService:BaseSingletonPreprocessor') for d in r.request.index.docs: self.assertEqual(len(d.chunks), 1) self.assertEqual(len(blob2array(d.chunks[0].blob).shape), 3) self.assertEqual(blob2array(d.chunks[0].blob).shape[-1], 3) self.assertEqual( blob2array(d.chunks[0].blob).shape[0], 224) self.assertEqual( blob2array(d.chunks[0].blob).shape[1], 224) print(blob2array(d.chunks[0].blob).dtype)
def test_get_frames(self): doc = copy.deepcopy(self.doc) frame_selector = FrameSelectPreprocessor(sframes=3) frame_selector.apply(doc) for idx, chunk in enumerate(doc.chunks): if idx == 0: self.assertEqual(blob2array(chunk.blob).shape[0], 2) else: self.assertEqual(blob2array(chunk.blob).shape[0], 3)
def test_benchmark5(self): all_msgs = self.build_msgs2() all_msgs_bak = copy.deepcopy(all_msgs) with ZmqClient(self.c1_args) as c1, ZmqClient(self.c2_args) as c2: with TimeContext('send->recv, squeeze_pb=True'): for m, m1 in zip(all_msgs, all_msgs_bak): c1.send_message(m, squeeze_pb=True) r_m = c2.recv_message() for d, r_d in zip(m1.request.index.docs, r_m.request.index.docs): for c, r_c in zip(d.chunks, r_d.chunks): np.allclose(blob2array(c.embedding), blob2array(r_c.embedding)) np.allclose(blob2array(c.blob), blob2array(r_c.blob))
def apply(self, docs: 'gnes_pb2.Document') -> None: """ write GIFs of each document into disk folder structure: /data_path/doc_id/0.gif, 1.gif... :param docs: docs """ dirs = os.path.join(self.data_path, str(docs.doc_id)) if not os.path.exists(dirs): os.makedirs(dirs) # keep doc meta in .meta file with open(os.path.join(dirs, '.meta'), 'wb') as f: f.write(docs.meta_info or b'') self.logger.info("successfully write meta info for: %s" % str(docs.doc_id)) self.logger.info("%s has total %d chunks." % (str(docs.doc_id), len(docs.chunks))) for i, chunk in enumerate(docs.chunks): data_type = chunk.WhichOneof('content') if data_type == 'raw': with open(os.path.join(dirs, '%d.%s' % (i, self.file_suffix)), 'wb') as f: f.write(chunk.raw) elif data_type == 'blob': np.save(os.path.join(dirs, '%d' % i), blob2array(chunk.blob)) self.logger.info("successfully write blob %d for: %s" % (i, str(docs.doc_id))) else: self.logger.info("data_type is : %s" % str(data_type)) raise NotImplementedError
def apply(self, doc: 'gnes_pb2.Document') -> None: super().apply(doc) if len(doc.chunks) > 0: for chunk in doc.chunks: images = blob2array(chunk.blob) if len(images) == 0: self.logger.warning("this chunk has no frame!") elif self.sframes == 1: idx = int(len(images) / 2) frame = np.array(Image.fromarray(images[idx].astype('uint8')).resize((self.target_width, self.target_height))) frame = np.expand_dims(frame, axis=0) # self.logger.info("choose one frame, the shape is: (%d, %d, %d, %d)" % ( # frame.shape[0], frame.shape[1], frame.shape[2], frame.shape[3] # )) chunk.blob.CopyFrom(array2blob(frame)) elif self.sframes > 0 and len(images) > self.sframes: if len(images) >= 2 * self.sframes: step = math.ceil(len(images) / self.sframes) frames = images[::step] else: idx = np.sort(np.random.choice(len(images), self.sframes, replace=False)) frames = images[idx] frames = np.array( [np.array(Image.fromarray(img.astype('uint8')).resize((self.target_width, self.target_height))) for img in frames]) chunk.blob.CopyFrom(array2blob(frames)) del images else: self.logger.error( 'bad document: "doc.chunks" is empty!')
def apply(self, doc: 'gnes_pb2.Document') -> None: super().apply(doc) if len(doc.chunks) > 0: for chunk in doc.chunks: images = blob2array(chunk.blob) chunk.raw = self._encode(images) elif doc.WhichOneof('raw_data'): raw_type = type(getattr(doc, doc.WhichOneof('raw_data'))) if raw_type == gnes_pb2.NdArray: images = blob2array(doc.raw_video) doc.raw_bytes = self._encode(images) else: self.logger.error('bad document: "doc.raw_video" is empty!') else: self.logger.error( 'bad document: "doc.chunks" and "doc.raw_video" is empty!')
def img_process_for_test(dirname): zipfile_ = zipfile.ZipFile(os.path.join(dirname, 'imgs/test.zip')) all_bytes = [zipfile_.open(v).read() for v in zipfile_.namelist()] test_img = [] for raw_bytes in all_bytes: d = gnes_pb2.Document() d.raw_bytes = raw_bytes test_img.append(d) test_img_all_preprocessor = [] pipline_prep1 = PipelinePreprocessor() pipline_prep1.components = lambda: [ UnaryPreprocessor(doc_type=gnes_pb2.Document.IMAGE), ResizeChunkPreprocessor() ] pipline_prep2 = PipelinePreprocessor() pipline_prep2.components = lambda: [ VanillaSlidingPreprocessor(), ResizeChunkPreprocessor() ] for preprocessor in [pipline_prep1, pipline_prep2]: test_img_copy = copy.deepcopy(test_img) for img in test_img_copy: preprocessor.apply(img) test_img_all_preprocessor.append([ blob2array(chunk.blob) for img in test_img_copy for chunk in img.chunks ]) return test_img_all_preprocessor
def test_video_cut_by_frame(self): args = set_preprocessor_parser().parse_args([ '--yaml_path', self.yml_path_2, ]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in) ]) with PreprocessorService(args), ZmqClient(c_args) as client: for req in RequestGenerator.index(self.video_bytes): msg = gnes_pb2.Message() msg.request.index.CopyFrom(req.index) client.send_message(msg) r = client.recv_message() for d in r.request.index.docs: self.assertGreater(len(d.chunks), 0) for _ in range(len(d.chunks) - 1): shape = blob2array(d.chunks[_].blob).shape self.assertEqual(shape, (30, 168, 192, 3)) shape = blob2array(d.chunks[-1].blob).shape self.assertLessEqual(shape[0], 30)
def apply(self, doc: 'gnes_pb2.Document') -> None: super().apply(doc) video_frames = [] if doc.WhichOneof('raw_data'): raw_type = type(getattr(doc, doc.WhichOneof('raw_data'))) if doc.raw_bytes: video_frames = video.capture_frames(input_data=doc.raw_bytes, scale=self.frame_size, fps=self.frame_rate, vframes=self.vframes) elif raw_type == gnes_pb2.NdArray: video_frames = blob2array(doc.raw_video) if self.vframes > 0: video_frames = video_frames[0:self.vframes, :].copy() num_frames = len(video_frames) if num_frames > 0: shots = self.detect_shots(video_frames) for ci, frames in enumerate(shots): c = doc.chunks.add() c.doc_id = doc.doc_id c.offset = ci shot_len = len(frames) c.weight = shot_len / num_frames if self.sframes > 0 and shot_len > self.sframes: if shot_len >= 2 * self.sframes: step = math.ceil(shot_len / self.sframes) frames = frames[::step] else: idx = np.sort( np.random.choice(shot_len, self.sframes, replace=False)) frames = [frames[idx_] for idx_ in idx] chunk_data = np.array(frames) c.blob.CopyFrom(array2blob(chunk_data)) else: self.logger.error( 'bad document: "raw_bytes" or "raw_video" is empty!') else: self.logger.error('bad document: "raw_data" is empty!') if self.drop_raw_data: self.logger.info("document raw data will be cleaned!") doc.ClearField('raw_data')
def img_process_for_test(dirname): zipfile_ = zipfile.ZipFile(os.path.join(dirname, 'imgs/test.zip'), "r") all_bytes = [zipfile_.open(v).read() for v in zipfile_.namelist()] test_img = [] for raw_bytes in all_bytes: d = gnes_pb2.Document() d.raw_bytes = raw_bytes test_img.append(d) test_img_all_preprocessor = [] for preprocessor in [BaseSingletonPreprocessor(doc_type=gnes_pb2.Document.IMAGE), VanillaSlidingPreprocessor()]: test_img_copy = copy.deepcopy(test_img) for img in test_img_copy: preprocessor.apply(img) test_img_all_preprocessor.append([blob2array(chunk.blob) for img in test_img_copy for chunk in img.chunks]) return test_img_all_preprocessor
def test_video_decode_preprocessor(self): args = set_preprocessor_parser().parse_args(['--yaml_path', self.yml_path]) c_args = _set_client_parser().parse_args([ '--port_in', str(args.port_out), '--port_out', str(args.port_in)]) video_bytes = [ open(os.path.join(self.video_path, _), 'rb').read() for _ in os.listdir(self.video_path) ] with ServiceManager(PreprocessorService, args), ZmqClient(c_args) as client: for req in RequestGenerator.index(video_bytes): msg = gnes_pb2.Message() msg.request.index.CopyFrom(req.index) client.send_message(msg) r = client.recv_message() for d in r.request.index.docs: self.assertGreater(len(d.chunks), 0) for _ in range(len(d.chunks)): shape = blob2array(d.chunks[_].blob).shape self.assertEqual(shape[1:], (299, 299, 3))
def apply(self, doc: 'gnes_pb2.Document') -> None: super().apply(doc) if len(doc.chunks) > 0: for chunk in doc.chunks: images = blob2array(chunk.blob) if len(images) == 0: self.logger.warning("this chunk has no frame!") elif self.sframes == 1: idx = [int(len(images) / 2)] chunk.blob.CopyFrom(array2blob(images[idx])) elif self.sframes > 0 and len(images) > self.sframes: if len(images) >= 2 * self.sframes: step = math.ceil(len(images) / self.sframes) chunk.blob.CopyFrom(array2blob(images[::step])) else: idx = np.sort( np.random.choice(len(images), self.sframes, replace=False)) chunk.blob.CopyFrom(array2blob(images[idx])) else: self.logger.error('bad document: "doc.chunks" is empty!')
def apply(self, doc: 'gnes_pb2.Document') -> None: super().apply(doc) video_frames = [] if doc.WhichOneof('raw_data'): raw_type = type(getattr(doc, doc.WhichOneof('raw_data'))) if doc.raw_bytes: video_frames = video.capture_frames(input_data=doc.raw_bytes, scale=self.frame_size, fps=self.frame_rate, vframes=self.frame_num) elif raw_type == gnes_pb2.NdArray: video_frames = blob2array(doc.raw_video) if self.frame_num > 0: stepwise = len(video_frames) / self.frame_num video_frames = video_frames[0::stepwise, :] num_frames = len(video_frames) if num_frames > 0: shots = self.detect_shots(video_frames) for ci, frames in enumerate(shots): c = doc.chunks.add() c.doc_id = doc.doc_id chunk_data = np.array(frames) c.blob.CopyFrom(array2blob(chunk_data)) c.offset = ci c.weight = len(frames) / num_frames else: self.logger.error( 'bad document: "raw_bytes" or "raw_video" is empty!') else: self.logger.error('bad document: "raw_data" is empty!') if self.drop_raw_data: self.logger.info("document raw data will be cleaned!") doc.ClearField('raw_data')
def add(self, keys: List[int], docs: List['gnes_pb2.Document'], *args, **kwargs): """ """ for k, d in zip(keys, docs): dirs = os.path.join(self.data_path, str(k)) if not os.path.exists(dirs): os.makedirs(dirs) # keep doc meta in .meta file with open(os.path.join(dirs, '.meta'), 'wb') as f: f.write(d.meta_info or b'') for i, chunk in enumerate(d.chunks): if self.file_format in ['png', 'webp', 'jpeg']: chunk_dir = os.path.join(dirs, '%d' % i) if not os.path.exists(chunk_dir): os.makedirs(chunk_dir) frames = blob2array(chunk.blob) for j, x in enumerate(frames): frame_path = os.path.join( chunk_dir, '%d.%s' % (j, self.file_format)) img = Image.fromarray(x, 'RGB') img.save(frame_path, self.file_format, quality=self.quality) elif self.file_format in ['mp4', 'gif']: with open( os.path.join(dirs, '%d.%s' % (i, self.file_format)), 'wb') as f: f.write(chunk.raw) else: self.logger.error( "the file format %s has not been supported!" % self.file_format) raise NotImplementedError
def test_get_one_frame(self): doc = copy.deepcopy(self.doc) frame_selector = FrameSelectPreprocessor(sframes=1) frame_selector.apply(doc) for chunk in doc.chunks: self.assertEqual(blob2array(chunk.blob).shape[0], 1)
def test_array_proto(self): x = np.random.random([5, 4]) blob = array2blob(x) x1 = blob2array(blob) assert_array_equal(x, x1)