Ejemplo n.º 1
0
def training_patches(imnames,
                     npatches,
                     psize,
                     maxdim=None,
                     colour=False,
                     verbose=False):
    """ Extract patches from images for dictionary training

    Arguments:
        imnames: A list of image names from which to extract training patches.
        npatches: The number (int) of patches to extract from the images
        maxdim: The maximum dimension of the image in pixels. The image is
            rescaled if it is larger than this. By default there is no scaling. 
        psize: A int of the size of the square patches to extract
        verbose: bool, print progress bar

    Returns:
        An np.array (npatches, psize**2*3) for RGB or (npatches, psize**2) for
        grey of flattened image patches. NOTE, the actual npatches found may be
        less than that requested.

    """

    nimg = len(imnames)
    ppeimg = int(round(float(npatches) / nimg))
    plist = []

    # Set up progess updates
    progbar = Progress(nimg, title='Extracting patches', verbose=verbose)

    for i, ims in enumerate(imnames):
        img = imread_resize(ims, maxdim)  # read in and resize the image
        spaceing = max(int(round(img.shape[1] * ppeimg**(-0.5))), 1)

        # Extract patches and map to grayscale if necessary
        if (colour == False) and (img.ndim == 3):
            imgg = rgb2gray(img)
            plist.append(grid_patches(imgg, psize, spaceing)[0])
        else:
            plist.append(grid_patches(img, psize, spaceing)[0])

        progbar.update(i)

    progbar.finished()
    patches = np.concatenate(plist, axis=0)
    return np.reshape(patches, (patches.shape[0], np.prod(patches.shape[1:])))
Ejemplo n.º 2
0
def training_patches (imnames, npatches, psize, maxdim=None, colour=False,
                        verbose=False):
    """ Extract patches from images for dictionary training

    Arguments:
        imnames: A list of image names from which to extract training patches.
        npatches: The number (int) of patches to extract from the images
        maxdim: The maximum dimension of the image in pixels. The image is
            rescaled if it is larger than this. By default there is no scaling. 
        psize: A int of the size of the square patches to extract
        verbose: bool, print progress bar

    Returns:
        An np.array (npatches, psize**2*3) for RGB or (npatches, psize**2) for
        grey of flattened image patches. NOTE, the actual npatches found may be
        less than that requested.

    """

    nimg = len(imnames)
    ppeimg = int(round(float(npatches)/nimg))
    plist = []

    # Set up progess updates
    progbar = Progress(nimg, title='Extracting patches', verbose=verbose)

    for i, ims in enumerate(imnames):
        img = imread_resize(ims, maxdim) # read in and resize the image
        spaceing = max(int(round(img.shape[1] *  ppeimg**(-0.5))), 1)
        
        # Extract patches and map to grayscale if necessary
        if (colour == False) and (img.ndim == 3):
            imgg = rgb2gray(img)
            plist.append(grid_patches(imgg, psize, spaceing)[0])
        else:
            plist.append(grid_patches(img, psize, spaceing)[0])

        progbar.update(i)

    progbar.finished()
    patches = np.concatenate(plist, axis=0)
    return np.reshape(patches, (patches.shape[0], np.prod(patches.shape[1:])))
Ejemplo n.º 3
0
    data = csv.DictReader(fd, delimiter="\t", quotechar='"', escapechar='')
    for r in data:
        raw_id = r['raw_id']

        # Check if valid with regex
        match = re.match(r"^(tt)*(?P<id>\d{7,10}).*", raw_id)
        if not match:
            progress.count()
            wrongs.append(raw_id)
            continue

        imdb_id = match.group(2)
        film_node = n['Movie/tt' + imdb_id]

        # Create a node for dbpedia
        uri = r['uri']
        wiki_node = URIRef(uri)
        g.add((film_node, n['has' + source + 'Node'], wiki_node))

        progress.count()
        if progress.finished():
            break

g.serialize(destination=outfile, format='turtle')
end = time.time()

print('Wrong formatted IMDB IDs found: ', len(wrongs))
print(wrongs)
print("Total Items Processed: ", progress.total)
print("Total Time: ", end - start)
g.close()