Ejemplo n.º 1
0
def consolidate_canopy_clusters(output, cluster_pkl):
    clusters = list(hadoopy.cat(output + '/p*'))
    clusters.sort(lambda x, y: cmp(x[0][0], y[0][0]))
    cluster_canopies = [(x, y[0][1]) for x, y in enumerate(clusters)]
    canopy_clusters = _map_cluster_canopies(cluster_canopies)
    clusters = np.array([np.fromstring(x[1], dtype=np.float32) for x in clusters])
    with open(cluster_pkl, 'w') as fp:
        pickle.dump((clusters, canopy_clusters), fp, 2)
Ejemplo n.º 2
0
 def _run(self, fn):
     in_path = self.data_path + fn
     out_path = self.data_path + 'out-' + fn
     cmd = 'hadoop fs -put %s %s' % (fn,  in_path)
     subprocess.check_call(cmd.split())
     # Don't let the file split, CDH3 has a bug and will try to split gz's
     hadoopy.launch_frozen(in_path, out_path, 'wc.py', jobconfs='mapred.min.split.size=100000000')
     wc = dict(hadoopy.cat(out_path))
     self.assertEqual(wc['the'], 1664)
     self.assertEqual(wc['Alice'], 221)
Ejemplo n.º 3
0
        cv.CvtColor(cv_im, cv_im_cvt, cv.CV_RGB2BGR)
        cv_im = cv_im_cvt
    return cv_im

def str2pil(image_data):
    return Image.open(StringIO.StringIO(image_data))

def cvcrop(cv_image, x, y, w, h):
    x, y, w, h = int(x), int(y), int(w), int(h)
    cropped = cv.CreateImage((w, h), 8, cv_image.channels)
    src_region = cv.GetSubRect(image, (x, y, w, h))
    cv.Copy(src_region, cropped)
    return cropped

#key: Image name
#value: (image, faces) where image is the input value and faces is
#       a list of ((x, y, w, h), n)
run_time = '1306607174.041919'
out_path = '/mnt/nfsdrives/shared/facefinder/run-%s/' % run_time
chip_out_path = '/mnt/nfsdrives/shared/facefinder/run-%s/chips' % run_time
os.makedirs(chip_out_path)
for image_name, (image, faces) in hadoopy.cat('/user/brandyn/tp/facefinder/run-%s' % run_time):
    image = pil2cv(str2pil(image))
    for num, ((x, y, w, h), n) in enumerate(faces):
        cv.SaveImage('%s/%s-%d.jpg' % (chip_out_path, image_name, num), cvcrop(image, x, y, w, h))
    for (x, y, w, h), n in faces:
        pt1 = (int(x), int(y))
        pt2 = (int((x + w)), int((y + h)))
        cv.Rectangle(image, pt1, pt2, cv.RGB(255, 0, 0), 3, 8, 0)
    cv.SaveImage('%s/%s.jpg' % (out_path, image_name), image)
Ejemplo n.º 4
0
def fetch_output():
    global faces_output
    faces_output = [x for x in hadoopy.cat('faces/face_output')]
Ejemplo n.º 5
0
 def test_err(self):
     self.assertRaises(IOError, hadoopy.ls, self.nonsense_path)
     self.assertRaises(IOError, hadoopy.cat(self.nonsense_path).next)
Ejemplo n.º 6
0
 def test_cat(self):
     cat_output = [_ for _ in hadoopy.cat(self.file_path)]
     line = (331, 'Title: Alice\'s Adventures in Wonderland')
     self.assertTrue(line in cat_output)
Ejemplo n.º 7
0
import hadoopy
import os
import time

out_path = '/mnt/nfsdrives/shared/tp/cluster/%f/' % time.time()

for group, (image_name, image_data) in hadoopy.cat('/user/brandyn/tp/image_cluster/run-15//samples'):
    group_path = '%s/%d/' % (out_path, int(group))
    try:
        os.makedirs(group_path)
    except OSError:
        pass
    print(group_path + '%s.jpg' % image_name)
    with open(group_path + '%s.jpg' % image_name, 'w') as fp:
        fp.write(image_data)
import glob
import base64
import os

import Image
import hadoopy

FILE = '/tmp/bwhite/output/pets2006.video_frame_data.b/0.903472866947'
OUTPUT = 'out'

try:
    os.mkdir(OUTPUT)
except OSError:
    pass

for name, data in hadoopy.cat(FILE):
    if name == '1-1-2241':
        print(name)
        Image.fromstring('L', (720, 576),
                         data).save(OUTPUT + '/' + name + '.jpg')
Ejemplo n.º 9
0
def consolidate_clusters(output, cluster_pkl):
    clusters = list(hadoopy.cat(output + '/p*'))
    clusters.sort(lambda x, y: cmp(x[0], y[0]))
    clusters = np.array([np.fromstring(x[1], dtype=np.float32) for x in clusters])
    with open(cluster_pkl, 'w') as fp:
        pickle.dump(clusters, fp, 2)