def get_coco_stream(tags, ann_file, data_dir, threshold=0.1): coco = COCO(ann_file) catIds = coco.getCatIds(catNms=tags) imgIds = sum([coco.getImgIds(catIds=catId) for catId in catIds], []) stream = (coco.loadImgs(imgIds) | mp.as_field('meta') | mp.apply('meta', 'width', lambda x: x['width']) | mp.apply('meta', 'height', lambda x: x['height']) | mp.apply('meta', 'url', lambda x: x['coco_url']) | mp.apply('meta', 'filename', lambda x: x['file_name']) | mp.apply( 'meta', 'anns_ids', lambda x: coco.getAnnIds( imgIds=x['id'], catIds=catIds, iscrowd=None)) | mp.apply('anns_ids', 'anns', lambda x: coco.loadAnns(x)) | mp.apply( 'anns', 'ground_truth', lambda x: x | mp.select(lambda m: bbox_to_dict( m['bbox'], coco.cats[m['category_id']]['name'])) | mp.as_list) | mp.apply( 'ground_truth', 'class_id', lambda x: most_common(x | mp.select(lambda m: m['tag']) | mp.as_list)) | mp.iter('meta', lambda x: coco.download(data_dir, [x['id']])) | mp.delfield(['meta', 'anns_ids', 'anns'])) return stream
# mPyPl - Monadic Pipeline Library for Python # http://github.com/shwars/mPyPl # Simple samples sys.path.append('z:\\GitWork\mPyPl') import mPyPl as mp from pipe import * from mPyPl.utils.pipeutils import * range(100) | mp.as_field('n') | mp.apply( 'n', 'n5', lambda x: x % 5) | mp.dict_group_by('n5') data = range(100) | mp.as_field('n') | mp.apply( 'n', 'class_id', lambda x: x % 5) | mp.datasplit(split_value=0.2) Tr, Te = data | mp.make_train_test_split() len(Tr | as_list) len(Te | as_list) data = range(100) | mp.as_field('n') | mp.apply('n', 'class_id', lambda x: x % 5) | pshuffle data | mp.sample_classes('class_id', 1, classes=range(15)) | as_list x = mp.get_xmlstream_fromdir('e:\\data\\babylon\\')
print("Face Dataset Generator") print(" + Loading descriptions from {}".format(dir)) def loadjs(fn): with open(fn) as f: return json.load(f) min_size = size / 3 if args.ignore_small else 0 max_faces_no = 2 if args.ignore_multiface else 99999 data = (mp.get_files(dir, ext='.json') | mp.as_field('filename') | mp.apply('filename', 'descr', loadjs) | mp.filter('descr', lambda x: len(x) > 0 and len(x) < max_faces_no) | mp.unroll('descr') | mp.filter( 'descr', lambda x: abs(x['faceAttributes']['headPose']['yaw']) < 15 and abs(x['faceAttributes']['headPose']['pitch']) < 15) | mp.filter( 'descr', lambda x: x['faceLandmarks']['pupilRight']['x'] - x[ 'faceLandmarks']['pupilLeft']['x'] > min_size) | mp.as_list) print(" + Found {} faces".format(len(data))) print(" + Storing dataset...")