コード例 #1
0
ファイル: instiz_core.py プロジェクト: KUR-creative/crawlers
def comment_list(comments_html):
    gen = \
    pipe(lambda html:
             html.find_all('div', class_='comment_line'),
         cmap(lambda tag:tag.find('span')),
         cfilter(lambda x:x is not None),
         cmap(lambda span:span.text))
    return list(gen(comments_html))
コード例 #2
0
ファイル: instiz_core.py プロジェクト: KUR-creative/crawlers
def article_no_list(board_name, page_no, category):
    return base.article_no_list(
        INSTIZ_LIST_STEM,
        {'id':board_name, 
         'page':str(page_no), 'category':str(category)},
        pipe(lambda html:html.find_all('td', class_='listno'),
             cmap(lambda tag:tag.find('a')),
             cfilter(lambda x:x is not None),
             cmap(lambda a:a.text),
             list)
    )
コード例 #3
0
ファイル: ruli_core.py プロジェクト: KUR-creative/crawlers
def article_id_list(url_stem, page_no):
    all_tr = lambda html: html.find_all('tr', class_='table_body')
    is_not_notice = lambda tag: 'notice' not in tag['class']
    get_id = lambda tag: tag.find('td', class_='id').text.strip()
    return base.article_no_list(
        url_stem, {'page': str(page_no)},
        pipe(all_tr, cfilter(is_not_notice), cmap(get_id), list))
コード例 #4
0
ファイル: dc_core.py プロジェクト: KUR-creative/crawlers
def article_no_list(gall_id, page_no):
    html2no_list = \
        pipe(lambda html:html.find_all('td', class_='gall_num'),
             cfilter(lambda tag:tag.text != '공지'),
             cmap(lambda tag:tag.text),
             list)
    return base.article_no_list(DC_LIST_STEM, {
        'id': gall_id,
        'page': str(page_no)
    }, html2no_list)
コード例 #5
0
def article_url_list(board_name, page_no):
    html2urls = \
        pipe(lambda html:html.find('table', class_='tbl_type01')
                             .find_all('tr'),
             cfilter(lambda tag:tag.find('td').text.isdigit()),
             cmap(lambda tag:tag.find('a')['href']),
             list)

    return base.article_no_list(MLB_LIST_STEM, {
        'p': (int(page_no) - 1) * 30 + 1,
        'b': board_name,
        'm': 'list'
    }, html2urls)
コード例 #6
0
def new_job_records(rootpath):
    ''' 
    rootpath is root of directory structure like below
    rootpath
      dirname1 
        filepath1 
        filepath2 
        ...
      dirname2 
        filepath1
      ...

    return (now_index, list<dirname,list<filepath>>, selected)
    now_index is last worked index.
    selected is empty list. img paths would be saved.
    '''

    dirnames = os.listdir(rootpath)
    filepaths = \
    pipe(cmap(lambda filename: os.path.join(rootpath,filename)),
         cmap(utils.file_paths),
         cmap(list))
    return 0, list(zip(dirnames, filepaths(dirnames))), []
コード例 #7
0
ファイル: ppom_core.py プロジェクト: KUR-creative/crawlers
def article_no_list(board_name, page_no, category=''):
    html2no_list = \
        pipe(lambda html:
                 html.find_all('td', class_='eng list_vspace'),
             list)
    return base.article_no_list(
        PPOM_LIST_STEM, 
        {'id':board_name, 'page':str(page_no), 'category':str(category)},
        pipe(lambda html:
                 html.find_all('td', class_='eng list_vspace'),
             cfilter(lambda tag:tag.get('nowrap') == None),
             cfilter(lambda tag:tag.text.isdigit()),
             cmap(lambda tag:tag.text),
             list)
    )
コード例 #8
0
ファイル: clean_masks.py プロジェクト: KUR-creative/bioseg
        bgr_img[:, :, 1] = g_minus_b
    #cv2.imshow('intersection removed',bgr_img); cv2.waitKey(0)
    return bgr_img


if __name__ == '__main__':
    mask_dir = sys.argv[1]
    mask_paths = list(utils.file_paths(mask_dir))

    check_img = cv2.imread(mask_paths[0])
    if (np.array_equal(check_img[:, :, 0], check_img[:, :, 1])
            and np.array_equal(check_img[:, :, 1], check_img[:, :, 2])):
        img_type = cv2.IMREAD_GRAYSCALE
    else:
        img_type = cv2.IMREAD_COLOR

    leaving_channels = sys.argv[2] if len(sys.argv) == 3 else 'rgb'

    masks \
    = pipe(cmap(lambda path: cv2.imread(path, img_type)),
           cfilter(lambda img: img is not None),
           cmap(binarization),
           cmap(dilation),
           cmap(remove_intersection),
           cmap(lambda img: grayscale(img, leaving_channels)))

    for path, mask in zip(mask_paths, masks(mask_paths)):
        cv2.imwrite(path, mask)

    print('Now all masks in %s are clean!' % mask_dir)
コード例 #9
0
ファイル: evaluator.py プロジェクト: KUR-creative/unet
        shutil.copyfile(file_path, os.path.join(eval_result_dirpath, file_name))
        print("file '%s' is copyed into '%s'" % (file_name,eval_result_dirpath))

import sys,pathlib
from fp import pipe,cmap,cfilter
if __name__ == '__main__':
    '''
    python evaluator.py segnet.h5 imgs_dir output_dir
    '''
    segnet_model_path = sys.argv[1]
    imgs_dir = sys.argv[2]
    output_dir = sys.argv[3]
    utils.safe_copytree(imgs_dir, output_dir,['*.*'])

    segnet = model.unet(segnet_model_path, (None,None,1))

    f = pipe(utils.file_paths, 
             cmap(lambda path: (cv2.imread(path,0), path)),
             cfilter(lambda img_path: img_path[0] is not None),
             cmap(lambda img_path: (utils.bgr_float32(img_path[0]), img_path[1]) ),
             cmap(lambda im_p: (im_p[0].reshape((1,)+im_p[0].shape), im_p[1]) ),
             cmap(lambda im_p: (inference(segnet,im_p[0]), im_p[1])))
    old_parent_dir = pathlib.Path(imgs_dir).parts[-1]
    
    for segmap_list, img_path in f(imgs_dir):
        new_path = utils.make_dstpath(img_path, old_parent_dir, output_dir)
        segmap = segmap_list[0]
        segmap = (segmap.reshape(segmap.shape[:2]) * 255).astype(np.uint8)
        #cv2.imshow('segmap',segmap); cv2.waitKey(0)
        cv2.imwrite(new_path, segmap)
コード例 #10
0
ファイル: instiz_core.py プロジェクト: KUR-creative/crawlers
def view_comments(comment_list):
    gen = \
    pipe(enumerate,
         cmap(lambda s:'[%d] %s \n' % s))
    print(*gen(comment_list))
コード例 #11
0
create new argv[2] directory(tree structure preserved), 
separate images in argv[1], and move rgb imgs to new directory.

ex)
python separator.py ./data/examples/ ./data/rgb
                    ^~~~~~~~~~~~~~~~ ^~~~~~~~~~  
                    origin img dir   new directory for rgb imgs.
''')

is_grayscale = (lambda img: np.all(img[:, :, 0] == img[:, :, 1]) and np.all(
    img[:, :, 1] == img[:, :, 2]))

if __name__ == '__main__':
    mixed_imgs_path = sys.argv[1]
    rgb_imgs_path = sys.argv[2]

    utils.safe_copytree(mixed_imgs_path, rgb_imgs_path,
                        ('*.jpg', '*.jpeg', '*.png'))
    f = pipe(utils.file_paths, cmap(lambda path: (cv2.imread(path), path)),
             cfilter(lambda img_path: img_path[0] is not None),
             cfilter(lambda img_path: not is_grayscale(img_path[0])))
    old_parent_dir = pathlib.Path(mixed_imgs_path).parts[-1]

    timer = utils.ElapsedTimer('moving in')
    for img, img_path in f(mixed_imgs_path):
        new_path = utils.make_dstpath(img_path, old_parent_dir, rgb_imgs_path)
        #print(img_path, old_parent_dir, new_path)
        shutil.move(img_path, new_path)
    timer.elapsed_time()
コード例 #12
0
def main(src_imgs_path, dataset_name, num_crop, crop_size, chk_size):
    if num_crop != 0:
        rand_sqr_crop = img2rand_sqr_crops(crop_size)
        gen \
        = pipe(utils.file_paths,
               cmap(lambda path: cv2.imread(path)),
               cfilter(lambda img: img is not None),
               cfilter(lambda img: is_cuttable(img, crop_size)),
               cmap(utils.slice1channel),
               cflatMap(crepeat(num_crop)),
               cmap(lambda img: rand_sqr_crop(img)),
               cmap(lambda img: (img / 255).astype(np.float32)),
               lambda imgs: split_every(chk_size, imgs))
    else:
        print('!')
        num_crop = 100  # big enough value..
        gen \
        = pipe(utils.file_paths,
               cmap(lambda path: cv2.imread(path)),
               cfilter(lambda img: img is not None),
               cfilter(lambda img: is_cuttable(img, crop_size)),
               cmap(utils.slice1channel),
               cflatMap(lambda img: img2sqr_crops(img, crop_size)),
               cmap(lambda img: (img / 255).astype(np.float32)),
               lambda imgs: split_every(chk_size, imgs))

    print(src_imgs_path)
    expected_num_imgs = len(list(utils.file_paths(src_imgs_path))) * num_crop
    print('-------------- SUMARY --------------')
    print('      dataset name = ', dataset_name)
    print('      size of crop = ', crop_size)
    print(' num crops per img = ', num_crop)
    print(' expected num imgs = ', expected_num_imgs)
    print('        chunk size = ', chk_size)

    f = h5py.File(dataset_name, 'w')
    timer = utils.ElapsedTimer()
    #-------------------------------------------------------------
    f.create_dataset('images', (expected_num_imgs, crop_size, crop_size, 1),
                     maxshape=(None, crop_size, crop_size, 1),
                     chunks=(chk_size, crop_size, crop_size, 1))

    mean = 0
    num_img_elems = (crop_size**2)
    for chk_no, chunk in tqdm(enumerate(gen(src_imgs_path)),
                              total=expected_num_imgs // chk_size):
        beg_idx = chk_no * chk_size
        f['images'][beg_idx:beg_idx + len(chunk)] = chunk
        mean = iter_mean(mean, beg_idx * num_img_elems, np.sum(chunk),
                         len(chunk) * num_img_elems)
    f.create_dataset('mean_pixel_value', data=mean)

    last_chunk_size = len(chunk)
    actual_num_img = chk_no * chk_size + last_chunk_size
    if actual_num_img != expected_num_imgs:
        print(expected_num_imgs, ' != ', actual_num_img)
        print('dataset resized!')
        f['images'].resize((actual_num_img, crop_size, crop_size, 1))

    # [mean test code]
    #li = list(flatten(gen(src_imgs_path)))
    #real_mean = np.mean(li)
    #print('real MEAN:', real_mean)
    #print(len(li))
    #print('saved MEAN:', f['mean_pixel_value'][()])
    #-------------------------------------------------------------
    f.close()
    print('------------------------------------')
    print('dataset generated successfully.')
    msg = timer.elapsed_time()
    '''
    import mailing
    mailing.send_mail_to_kur(
        'Dataset generated successfully.',msg
    )
    '''

    # [load test code]
    f = h5py.File(dataset_name, 'r')
    #-------------------------------------------------------------
    print('f', f['images'].shape)
    print('loaded MEAN:', f['mean_pixel_value'][()])
    #for i in range(f['images'].shape[0] ):
    #cv2.imshow('img',f['images'][i]);cv2.waitKey(0)
    cv2.imshow('img', f['images'][-1])
    cv2.waitKey(0)
    #-------------------------------------------------------------
    f.close()
    '''
    '''
    '''
コード例 #13
0
    num_crop = args.num_crop# 3
    crop_size = args.crop_size#128
    chk_size = args.chk_size#100 #00 

    print(src_imgs_path)
    expected_num_imgs = len(list(utils.file_paths(src_imgs_path))) * num_crop
    print('-------------- SUMARY --------------')
    print('      dataset name = ', dataset_name)
    print('      size of crop = ', crop_size)
    print(' num crops per img = ', num_crop)
    print(' expected num imgs = ', expected_num_imgs)
    print('        chunk size = ', chk_size)

    img2_128x128crop = img2sqr_crop(crop_size)
    gen = pipe(utils.file_paths,
               cmap(lambda path: cv2.imread(path)),
               cfilter(lambda img: img is not None),
               cfilter(lambda img: is_cuttable(img, crop_size)),
               cmap(slice1channel),
               cflatMap(crepeat(num_crop)),
               cmap(lambda img: img2_128x128crop(img)),
               cmap(lambda img: (img / 255).astype(np.float32)),
               lambda imgs: split_every(chk_size, imgs))

    f = h5py.File(dataset_name,'w')
    timer = utils.ElapsedTimer()
    #-------------------------------------------------------------
    f.create_dataset('images', 
                     (expected_num_imgs,crop_size,crop_size,1),
                       maxshape = (None,crop_size,crop_size,1),
                     chunks = (chk_size,crop_size,crop_size,1))
コード例 #14
0
ファイル: img_cutter.py プロジェクト: KUR-creative/bioseg
if __name__ == '__main__':

    def path2path_img(path):
        return (path, cv2.imread(path))

    crop_size = int(sys.argv[1])
    imgs_dir = sys.argv[2]
    pieces_dir = sys.argv[3]

    utils.safe_copytree(imgs_dir, pieces_dir, ['*.jpg', '*.jpeg', '*.png'])

    timer = utils.ElapsedTimer('Total Cutting')
    #-------------------------------------------------------------
    pieces \
    = pipe(utils.file_paths,
           cmap(path2path_img),
           cfilter(lambda path_img:path_img[1] is not None),
           cmap(lambda pair: path_img2path_pieces(pair,crop_size,imgs_dir,pieces_dir)),
           flatten)(imgs_dir)

    for path, img in pieces:
        #print(path)
        #print(path)
        #cv2.imwrite(path, img)
        cv2.imwrite(path, img)
        pass
    #-------------------------------------------------------------
    timer.elapsed_time()

import unittest
コード例 #15
0
import os, sys, cv2
import utils
from fp import pipe, cmap, cfilter, flatten

img_dir = sys.argv[1]
f = \
pipe(utils.file_paths,
     cmap(lambda path: cv2.imread(path,0)),
     cfilter(lambda img: img is not None),
     cmap(lambda img: img.shape),
     flatten,
     sorted,
     list)

sizes = f(img_dir)
print(sizes)
print('smallest img size =', sizes[0])