def comment_list(comments_html): gen = \ pipe(lambda html: html.find_all('div', class_='comment_line'), cmap(lambda tag:tag.find('span')), cfilter(lambda x:x is not None), cmap(lambda span:span.text)) return list(gen(comments_html))
def article_no_list(board_name, page_no, category): return base.article_no_list( INSTIZ_LIST_STEM, {'id':board_name, 'page':str(page_no), 'category':str(category)}, pipe(lambda html:html.find_all('td', class_='listno'), cmap(lambda tag:tag.find('a')), cfilter(lambda x:x is not None), cmap(lambda a:a.text), list) )
def article_id_list(url_stem, page_no): all_tr = lambda html: html.find_all('tr', class_='table_body') is_not_notice = lambda tag: 'notice' not in tag['class'] get_id = lambda tag: tag.find('td', class_='id').text.strip() return base.article_no_list( url_stem, {'page': str(page_no)}, pipe(all_tr, cfilter(is_not_notice), cmap(get_id), list))
def article_no_list(gall_id, page_no): html2no_list = \ pipe(lambda html:html.find_all('td', class_='gall_num'), cfilter(lambda tag:tag.text != '공지'), cmap(lambda tag:tag.text), list) return base.article_no_list(DC_LIST_STEM, { 'id': gall_id, 'page': str(page_no) }, html2no_list)
def article_url_list(board_name, page_no): html2urls = \ pipe(lambda html:html.find('table', class_='tbl_type01') .find_all('tr'), cfilter(lambda tag:tag.find('td').text.isdigit()), cmap(lambda tag:tag.find('a')['href']), list) return base.article_no_list(MLB_LIST_STEM, { 'p': (int(page_no) - 1) * 30 + 1, 'b': board_name, 'm': 'list' }, html2urls)
def new_job_records(rootpath): ''' rootpath is root of directory structure like below rootpath dirname1 filepath1 filepath2 ... dirname2 filepath1 ... return (now_index, list<dirname,list<filepath>>, selected) now_index is last worked index. selected is empty list. img paths would be saved. ''' dirnames = os.listdir(rootpath) filepaths = \ pipe(cmap(lambda filename: os.path.join(rootpath,filename)), cmap(utils.file_paths), cmap(list)) return 0, list(zip(dirnames, filepaths(dirnames))), []
def article_no_list(board_name, page_no, category=''): html2no_list = \ pipe(lambda html: html.find_all('td', class_='eng list_vspace'), list) return base.article_no_list( PPOM_LIST_STEM, {'id':board_name, 'page':str(page_no), 'category':str(category)}, pipe(lambda html: html.find_all('td', class_='eng list_vspace'), cfilter(lambda tag:tag.get('nowrap') == None), cfilter(lambda tag:tag.text.isdigit()), cmap(lambda tag:tag.text), list) )
bgr_img[:, :, 1] = g_minus_b #cv2.imshow('intersection removed',bgr_img); cv2.waitKey(0) return bgr_img if __name__ == '__main__': mask_dir = sys.argv[1] mask_paths = list(utils.file_paths(mask_dir)) check_img = cv2.imread(mask_paths[0]) if (np.array_equal(check_img[:, :, 0], check_img[:, :, 1]) and np.array_equal(check_img[:, :, 1], check_img[:, :, 2])): img_type = cv2.IMREAD_GRAYSCALE else: img_type = cv2.IMREAD_COLOR leaving_channels = sys.argv[2] if len(sys.argv) == 3 else 'rgb' masks \ = pipe(cmap(lambda path: cv2.imread(path, img_type)), cfilter(lambda img: img is not None), cmap(binarization), cmap(dilation), cmap(remove_intersection), cmap(lambda img: grayscale(img, leaving_channels))) for path, mask in zip(mask_paths, masks(mask_paths)): cv2.imwrite(path, mask) print('Now all masks in %s are clean!' % mask_dir)
shutil.copyfile(file_path, os.path.join(eval_result_dirpath, file_name)) print("file '%s' is copyed into '%s'" % (file_name,eval_result_dirpath)) import sys,pathlib from fp import pipe,cmap,cfilter if __name__ == '__main__': ''' python evaluator.py segnet.h5 imgs_dir output_dir ''' segnet_model_path = sys.argv[1] imgs_dir = sys.argv[2] output_dir = sys.argv[3] utils.safe_copytree(imgs_dir, output_dir,['*.*']) segnet = model.unet(segnet_model_path, (None,None,1)) f = pipe(utils.file_paths, cmap(lambda path: (cv2.imread(path,0), path)), cfilter(lambda img_path: img_path[0] is not None), cmap(lambda img_path: (utils.bgr_float32(img_path[0]), img_path[1]) ), cmap(lambda im_p: (im_p[0].reshape((1,)+im_p[0].shape), im_p[1]) ), cmap(lambda im_p: (inference(segnet,im_p[0]), im_p[1]))) old_parent_dir = pathlib.Path(imgs_dir).parts[-1] for segmap_list, img_path in f(imgs_dir): new_path = utils.make_dstpath(img_path, old_parent_dir, output_dir) segmap = segmap_list[0] segmap = (segmap.reshape(segmap.shape[:2]) * 255).astype(np.uint8) #cv2.imshow('segmap',segmap); cv2.waitKey(0) cv2.imwrite(new_path, segmap)
def view_comments(comment_list): gen = \ pipe(enumerate, cmap(lambda s:'[%d] %s \n' % s)) print(*gen(comment_list))
create new argv[2] directory(tree structure preserved), separate images in argv[1], and move rgb imgs to new directory. ex) python separator.py ./data/examples/ ./data/rgb ^~~~~~~~~~~~~~~~ ^~~~~~~~~~ origin img dir new directory for rgb imgs. ''') is_grayscale = (lambda img: np.all(img[:, :, 0] == img[:, :, 1]) and np.all( img[:, :, 1] == img[:, :, 2])) if __name__ == '__main__': mixed_imgs_path = sys.argv[1] rgb_imgs_path = sys.argv[2] utils.safe_copytree(mixed_imgs_path, rgb_imgs_path, ('*.jpg', '*.jpeg', '*.png')) f = pipe(utils.file_paths, cmap(lambda path: (cv2.imread(path), path)), cfilter(lambda img_path: img_path[0] is not None), cfilter(lambda img_path: not is_grayscale(img_path[0]))) old_parent_dir = pathlib.Path(mixed_imgs_path).parts[-1] timer = utils.ElapsedTimer('moving in') for img, img_path in f(mixed_imgs_path): new_path = utils.make_dstpath(img_path, old_parent_dir, rgb_imgs_path) #print(img_path, old_parent_dir, new_path) shutil.move(img_path, new_path) timer.elapsed_time()
def main(src_imgs_path, dataset_name, num_crop, crop_size, chk_size): if num_crop != 0: rand_sqr_crop = img2rand_sqr_crops(crop_size) gen \ = pipe(utils.file_paths, cmap(lambda path: cv2.imread(path)), cfilter(lambda img: img is not None), cfilter(lambda img: is_cuttable(img, crop_size)), cmap(utils.slice1channel), cflatMap(crepeat(num_crop)), cmap(lambda img: rand_sqr_crop(img)), cmap(lambda img: (img / 255).astype(np.float32)), lambda imgs: split_every(chk_size, imgs)) else: print('!') num_crop = 100 # big enough value.. gen \ = pipe(utils.file_paths, cmap(lambda path: cv2.imread(path)), cfilter(lambda img: img is not None), cfilter(lambda img: is_cuttable(img, crop_size)), cmap(utils.slice1channel), cflatMap(lambda img: img2sqr_crops(img, crop_size)), cmap(lambda img: (img / 255).astype(np.float32)), lambda imgs: split_every(chk_size, imgs)) print(src_imgs_path) expected_num_imgs = len(list(utils.file_paths(src_imgs_path))) * num_crop print('-------------- SUMARY --------------') print(' dataset name = ', dataset_name) print(' size of crop = ', crop_size) print(' num crops per img = ', num_crop) print(' expected num imgs = ', expected_num_imgs) print(' chunk size = ', chk_size) f = h5py.File(dataset_name, 'w') timer = utils.ElapsedTimer() #------------------------------------------------------------- f.create_dataset('images', (expected_num_imgs, crop_size, crop_size, 1), maxshape=(None, crop_size, crop_size, 1), chunks=(chk_size, crop_size, crop_size, 1)) mean = 0 num_img_elems = (crop_size**2) for chk_no, chunk in tqdm(enumerate(gen(src_imgs_path)), total=expected_num_imgs // chk_size): beg_idx = chk_no * chk_size f['images'][beg_idx:beg_idx + len(chunk)] = chunk mean = iter_mean(mean, beg_idx * num_img_elems, np.sum(chunk), len(chunk) * num_img_elems) f.create_dataset('mean_pixel_value', data=mean) last_chunk_size = len(chunk) actual_num_img = chk_no * chk_size + last_chunk_size if actual_num_img != expected_num_imgs: print(expected_num_imgs, ' != ', actual_num_img) print('dataset resized!') f['images'].resize((actual_num_img, crop_size, crop_size, 1)) # [mean test code] #li = list(flatten(gen(src_imgs_path))) #real_mean = np.mean(li) #print('real MEAN:', real_mean) #print(len(li)) #print('saved MEAN:', f['mean_pixel_value'][()]) #------------------------------------------------------------- f.close() print('------------------------------------') print('dataset generated successfully.') msg = timer.elapsed_time() ''' import mailing mailing.send_mail_to_kur( 'Dataset generated successfully.',msg ) ''' # [load test code] f = h5py.File(dataset_name, 'r') #------------------------------------------------------------- print('f', f['images'].shape) print('loaded MEAN:', f['mean_pixel_value'][()]) #for i in range(f['images'].shape[0] ): #cv2.imshow('img',f['images'][i]);cv2.waitKey(0) cv2.imshow('img', f['images'][-1]) cv2.waitKey(0) #------------------------------------------------------------- f.close() ''' ''' '''
num_crop = args.num_crop# 3 crop_size = args.crop_size#128 chk_size = args.chk_size#100 #00 print(src_imgs_path) expected_num_imgs = len(list(utils.file_paths(src_imgs_path))) * num_crop print('-------------- SUMARY --------------') print(' dataset name = ', dataset_name) print(' size of crop = ', crop_size) print(' num crops per img = ', num_crop) print(' expected num imgs = ', expected_num_imgs) print(' chunk size = ', chk_size) img2_128x128crop = img2sqr_crop(crop_size) gen = pipe(utils.file_paths, cmap(lambda path: cv2.imread(path)), cfilter(lambda img: img is not None), cfilter(lambda img: is_cuttable(img, crop_size)), cmap(slice1channel), cflatMap(crepeat(num_crop)), cmap(lambda img: img2_128x128crop(img)), cmap(lambda img: (img / 255).astype(np.float32)), lambda imgs: split_every(chk_size, imgs)) f = h5py.File(dataset_name,'w') timer = utils.ElapsedTimer() #------------------------------------------------------------- f.create_dataset('images', (expected_num_imgs,crop_size,crop_size,1), maxshape = (None,crop_size,crop_size,1), chunks = (chk_size,crop_size,crop_size,1))
if __name__ == '__main__': def path2path_img(path): return (path, cv2.imread(path)) crop_size = int(sys.argv[1]) imgs_dir = sys.argv[2] pieces_dir = sys.argv[3] utils.safe_copytree(imgs_dir, pieces_dir, ['*.jpg', '*.jpeg', '*.png']) timer = utils.ElapsedTimer('Total Cutting') #------------------------------------------------------------- pieces \ = pipe(utils.file_paths, cmap(path2path_img), cfilter(lambda path_img:path_img[1] is not None), cmap(lambda pair: path_img2path_pieces(pair,crop_size,imgs_dir,pieces_dir)), flatten)(imgs_dir) for path, img in pieces: #print(path) #print(path) #cv2.imwrite(path, img) cv2.imwrite(path, img) pass #------------------------------------------------------------- timer.elapsed_time() import unittest
import os, sys, cv2 import utils from fp import pipe, cmap, cfilter, flatten img_dir = sys.argv[1] f = \ pipe(utils.file_paths, cmap(lambda path: cv2.imread(path,0)), cfilter(lambda img: img is not None), cmap(lambda img: img.shape), flatten, sorted, list) sizes = f(img_dir) print(sizes) print('smallest img size =', sizes[0])