Beispiel #1
0
def detect_duplicate_images(imgpath_list):
    import sys
    global DUPLICATE_HASH_PRECISION
    nImg = len(imgpath_list)
    lbl = 'checking duplicate'
    duplicates = {}
    mark_progress, end_progress = helpers.progress_func(nImg, lbl=lbl)
    for count, gpath in enumerate(imgpath_list):
        mark_progress(count)
        img = imread(gpath)
        img_hash = helpers.hashstr(img, DUPLICATE_HASH_PRECISION)
        if not img_hash in duplicates:
            duplicates[img_hash] = []
        duplicates[img_hash].append(gpath)

    if '--strict' in sys.argv:
        # Be very safe: Check for collisions
        for hashstr, gpath_list in duplicates.iteritems():
            img1 = imread(gpath_list[0])
            for gpath in gpath_list:
                img2 = imread(gpath)
                if not np.all(img1 == img2):
                    DUPLICATE_HASH_PRECISION += 8
                    raise Exception("hash collision. try again")
    end_progress()
    return duplicates
Beispiel #2
0
 def _gen(fpath_list):
     # Exif generator
     nGname = len(fpath_list)
     lbl = '[io] Load Image EXIF'
     mark_progress, end_progress = helpers.progress_func(nGname, lbl, 16)
     for count, fpath in enumerate(fpath_list):
         mark_progress(count)
         yield read_exif(fpath, **kwargs)
     end_progress()
Beispiel #3
0
 def _gen(fpath_list):
     # Exif generator
     nGname = len(fpath_list)
     lbl = '[io] Load Image EXIF'
     mark_progress, end_progress = helpers.progress_func(nGname, lbl, 16)
     for count, fpath in enumerate(fpath_list):
         mark_progress(count)
         yield read_exif(fpath, **kwargs)
     end_progress()
Beispiel #4
0
def _compute_in_serial(task_list, task_lbl='', verbose=True):
    # Serialize Tasks
    result_list = []
    nTasks = len(task_list)
    if verbose:
        mark_progress, end_prog = helpers.progress_func(nTasks, lbl=task_lbl)
        # Compute each task
        for count, (fn, args) in enumerate(task_list):
            mark_progress(count)
            result = fn(*args)
            result_list.append(result)
        end_prog()
    else:
        # Compute each task
        for (fn, args) in iter(task_list):
            result = fn(*args)
            result_list.append(result)
        print('[parallel]  ... done')
    return result_list
Beispiel #5
0
def _compute_in_serial(task_list, task_lbl='', verbose=True):
    # Serialize Tasks
    result_list = []
    nTasks = len(task_list)
    if verbose:
        mark_progress, end_prog = util.progress_func(nTasks, lbl=task_lbl)
        # Compute each task
        for count, (fn, args) in enumerate(task_list):
            mark_progress(count)
            #sys.stdout.flush()
            result = fn(*args)
            result_list.append(result)
        end_prog()
    else:
        # Compute each task
        for (fn, args) in iter(task_list):
            result = fn(*args)
            result_list.append(result)
        print('[parallel]  ... done')
    return result_list
Beispiel #6
0
def _compute_in_parallel(task_list, num_procs, task_lbl='', verbose=True):
    '''
    Input: task list: [ (fn, args), ... ]
    '''
    task_queue = multiprocessing.Queue()
    done_queue = multiprocessing.Queue()
    nTasks = len(task_list)
    # queue tasks
    for task in iter(task_list):
        task_queue.put(task)
    # start processes
    proc_list = []
    for i in xrange(num_procs):
        printDBG('[parallel] creating process %r' % (i, ))
        proc = multiprocessing.Process(target=_worker,
                                       args=(task_queue, done_queue))
        proc.start()
        proc_list.append(proc_list)
    # wait for results
    printDBG('[parallel] waiting for results')
    sys.stdout.flush()
    result_list = []
    if verbose:
        mark_progress, end_prog = helpers.progress_func(nTasks,
                                                        lbl=task_lbl,
                                                        spacing=num_procs)
        for count in xrange(len(task_list)):
            mark_progress(count)
            printDBG('[parallel] done_queue.get()')
            result = done_queue.get()
            result_list.append(result)
        end_prog()
    else:
        for i in xrange(nTasks):
            done_queue.get()
        print('[parallel]  ... done')
    printDBG('[parallel] stopping children')
    # stop children processes
    for i in xrange(num_procs):
        task_queue.put('STOP')
    return result_list
Beispiel #7
0
def _compute_in_parallel(task_list, num_procs, task_lbl='', verbose=True):
    '''
    Input: task list: [ (fn, args), ... ]
    '''
    task_queue = multiprocessing.Queue()
    done_queue = multiprocessing.Queue()
    nTasks = len(task_list)
    # queue tasks
    for task in iter(task_list):
        task_queue.put(task)
    # start processes
    proc_list = []
    for i in xrange(num_procs):
        printDBG('[parallel] creating process %r' % (i,))
        proc = multiprocessing.Process(target=_worker, args=(task_queue, done_queue))
        proc.daemon = True
        proc.start()
        proc_list.append(proc)
    # wait for results
    printDBG('[parallel] waiting for results')
    sys.stdout.flush()
    result_list = []
    if verbose:
        mark_progress, end_prog = util.progress_func(nTasks, lbl=task_lbl, spacing=num_procs)
        for count in xrange(len(task_list)):
            mark_progress(count)
            printDBG('[parallel] done_queue.get()')
            result = done_queue.get()
            result_list.append(result)
        end_prog()
    else:
        for i in xrange(nTasks):
            done_queue.get()
        print('[parallel]  ... done')
    printDBG('[parallel] stopping children')
    # stop children processes
    for i in xrange(num_procs):
        task_queue.put('STOP')
    for proc in proc_list:
        proc.join()
    return result_list