Esempio n. 1
0
 def _masks_to_ilp(self, input_table, input_column, output_column, **kw):
     cmdenvs = {'HBASE_INPUT_COLUMN': input_column,
                'HBASE_TABLE': input_table,
                'HBASE_OUTPUT_COLUMN': output_column}
     hadoopy_hbase.launch(input_table, output_hdfs + str(random.random()), 'masks_to_ilp.py', libjars=['hadoopy_hbase.jar'],
                          num_mappers=self.num_mappers, columns=[cmdenvs['HBASE_INPUT_COLUMN']],
                          cmdenvs=cmdenvs, **kw)
Esempio n. 2
0
 def _image_to_superpixels(self, input_table, input_column, output_table, output_column):
     cmdenvs = {'HBASE_INPUT_COLUMN': input_column,
                'HBASE_TABLE': output_table,
                'HBASE_OUTPUT_COLUMN': output_column}
     hadoopy_hbase.launch(input_table, output_hdfs + str(random.random()), 'image_to_superpixels.py', libjars=['hadoopy_hbase.jar'],
                          num_mappers=self.num_mappers, columns=[cmdenvs['HBASE_INPUT_COLUMN']],
                          cmdenvs=cmdenvs, jobconfs={'mapred.task.timeout': '6000000'})
Esempio n. 3
0
 def takeout_chain_job(self, model, input_column, output_column, **kw):
     model_fp = picarus.api.model_tofile(model)
     cmdenvs = {'HBASE_TABLE': self.images_table,
                'HBASE_OUTPUT_COLUMN': base64.b64encode(output_column),
                'MODEL_FN': os.path.basename(model_fp.name)}
     hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'hadoop/takeout_chain_job.py', libjars=['hadoopy_hbase.jar'],
                          num_mappers=self.num_mappers, files=[model_fp.name], columns=[input_column], single_value=True,
                          jobconfs={'mapred.task.timeout': '6000000'}, cmdenvs=cmdenvs, dummy_fp=model_fp, **kw)
Esempio n. 4
0
 def image_resize(self):
     cmdenvs = {'HBASE_INPUT_COLUMN': self.image_orig_column,
                'HBASE_TABLE': self.images_table,
                'HBASE_OUTPUT_COLUMN': self.image_column,
                'MAX_SIDE': 320}
     hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'image_resize.py', libjars=['hadoopy_hbase.jar'],
                          num_mappers=self.num_mappers, columns=[cmdenvs['HBASE_INPUT_COLUMN']],
                          cmdenvs=cmdenvs)
Esempio n. 5
0
 def image_thumbnail(self):
     cmdenvs = {'HBASE_INPUT_COLUMN': self.image_orig_column,
                'HBASE_TABLE': self.images_table,
                'HBASE_OUTPUT_COLUMN': self.thumbnails_column,
                'SIZE': 75}
     hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'image_thumbnail.py', libjars=['hadoopy_hbase.jar'],
                          num_mappers=self.num_mappers, columns=[cmdenvs['HBASE_INPUT_COLUMN']],
                          cmdenvs=cmdenvs)
Esempio n. 6
0
 def image_preprocessor(self, model_key, **kw):
     model, columns = self.key_to_model(model_key)
     model_fp = picarus.api.model_tofile(model)
     cmdenvs = {'HBASE_TABLE': self.images_table,
                'HBASE_OUTPUT_COLUMN': base64.b64encode(model_key),
                'MODEL_FN': os.path.basename(model_fp.name)}
     hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'hadoop/image_preprocess.py', libjars=['hadoopy_hbase.jar'],
                          num_mappers=self.num_mappers, files=[model_fp.name], columns=[base64.urlsafe_b64decode(columns['input'])], single_value=True,
                          cmdenvs=cmdenvs, dummy_fp=model_fp, check_script=False, make_executable=False, **kw)
Esempio n. 7
0
 def feature_to_hash(self, model_key, **kw):
     input_dict, hasher, _ = self.key_to_input_model_param(model_key)
     hasher_fp = picarus.api.model_tofile(hasher)
     cmdenvs = {'HBASE_TABLE': self.images_table,
                'HBASE_OUTPUT_COLUMN': base64.b64encode(model_key),
                'HASHER_FN': os.path.basename(hasher_fp.name)}
     hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'hadoop/feature_to_hash.py', libjars=['hadoopy_hbase.jar'],
                          num_mappers=self.num_mappers, columns=[input_dict['feature']], files=[hasher_fp.name], single_value=True,
                          cmdenvs=cmdenvs, dummy_fp=hasher_fp, **kw)
Esempio n. 8
0
 def _feature_to_hash(self, hasher, input_table, input_column, output_table, output_column, **kw):
     hasher_fp = picarus.api.model_tofile(hasher)
     cmdenvs = {'HBASE_INPUT_COLUMN': input_column,
                'HBASE_TABLE': input_table,
                'HBASE_OUTPUT_COLUMN': output_column,
                'HASHER_FN': os.path.basename(hasher_fp.name)}
     hadoopy_hbase.launch(input_table, output_hdfs + str(random.random()), 'feature_to_hash.py', libjars=['hadoopy_hbase.jar'],
                          num_mappers=self.num_mappers, columns=[cmdenvs['HBASE_INPUT_COLUMN']], files=[hasher_fp.name],
                          cmdenvs=cmdenvs, dummy_fp=hasher_fp, **kw)
Esempio n. 9
0
 def image_to_feature(self, model_key, **kw):
     model, columns = self.key_to_model(model_key)
     model_fp = picarus.api.model_tofile(model)
     cmdenvs = {'HBASE_TABLE': self.images_table,
                'HBASE_OUTPUT_COLUMN': base64.b64encode(model_key),
                'MODEL_FN': os.path.basename(model_fp.name)}
     hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'hadoop/image_to_feature.py', libjars=['hadoopy_hbase.jar'],
                          num_mappers=self.num_mappers, files=[model_fp.name], columns=[base64.urlsafe_b64decode(columns['input'])], single_value=True,
                          jobconfs={'mapred.task.timeout': '6000000'}, cmdenvs=cmdenvs, dummy_fp=model_fp, **kw)
Esempio n. 10
0
 def _image_to_feature(self, feature, input_table, input_column, output_table, output_column):
     feature_fp = picarus.api.model_tofile(feature)
     cmdenvs = {'HBASE_INPUT_COLUMN': input_column,
                'HBASE_TABLE': output_table,
                'HBASE_OUTPUT_COLUMN': output_column,
                'FEATURE_FN': os.path.basename(feature_fp.name)}
     hadoopy_hbase.launch(input_table, output_hdfs + str(random.random()), 'image_to_feature.py', libjars=['hadoopy_hbase.jar'],
                          num_mappers=self.num_mappers, columns=[cmdenvs['HBASE_INPUT_COLUMN']],
                          cmdenvs=cmdenvs, files=[feature_fp.name],
                          jobconfs={'mapred.task.timeout': '6000000'}, dummy_fp=feature_fp)
Esempio n. 11
0
 def feature_to_prediction(self, model_key, **kw):
     input_dict, classifier, param, out = self.key_to_input_model_param_output(model_key)
     classifier_fp = picarus.api.model_tofile(classifier)
     classifier_type = 'sklearn_decision_func' if out == 'binary_class_confidence' else 'class_distance_list'
     cmdenvs = {'HBASE_TABLE': self.images_table,
                'HBASE_OUTPUT_COLUMN': base64.b64encode(model_key),
                'CLASSIFIER_FN': os.path.basename(classifier_fp.name),
                'CLASSIFIER_TYPE': classifier_type}
     hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'hadoop/feature_to_prediction.py', libjars=['hadoopy_hbase.jar'],
                          num_mappers=self.num_mappers, columns=[input_dict['feature']], files=[classifier_fp.name], single_value=True,
                          cmdenvs=cmdenvs, dummy_fp=classifier_fp, **kw)
Esempio n. 12
0
 def _feature_to_prediction(self, classifier, input_table, input_column, output_table, output_column, **kw):
     classifier_fp = tempfile.NamedTemporaryFile()
     classifier_fp.write(classifier)
     classifier_fp.flush()
     cmdenvs = {'HBASE_INPUT_COLUMN': input_column,
                'HBASE_TABLE': input_table,
                'HBASE_OUTPUT_COLUMN': output_column,
                'CLASSIFIER_FN': os.path.basename(classifier_fp.name)}
     hadoopy_hbase.launch(input_table, output_hdfs + str(random.random()), 'feature_to_prediction.py', libjars=['hadoopy_hbase.jar'],
                          num_mappers=self.num_mappers, columns=[cmdenvs['HBASE_INPUT_COLUMN']], files=[classifier_fp.name],
                          cmdenvs=cmdenvs, dummy_fp=classifier_fp, **kw)
Esempio n. 13
0
 def takeout_chain_job(self, table, model, input_column, output_column,
                       start_row, stop_row, job_row):
     output_hdfs = 'picarus_temp/%f/' % time.time()
     model_fp = model_tofile(model)
     cmdenvs = {
         'HBASE_TABLE': table,
         'HBASE_OUTPUT_COLUMN': base64.b64encode(output_column),
         'MODEL_FN': os.path.basename(model_fp.name)
     }
     hadoop_wait_till_started(
         hadoopy_hbase.launch(table,
                              output_hdfs + str(random.random()),
                              'hadoop/takeout_chain_job.py',
                              libjars=['hadoopy_hbase.jar'],
                              num_mappers=self.num_mappers,
                              files=[model_fp.name],
                              columns=[input_column],
                              single_value=True,
                              jobconfs={
                                  'mapred.task.timeout': '6000000',
                                  'picarus.job.row': job_row
                              },
                              cmdenvs=cmdenvs,
                              dummy_fp=model_fp,
                              check_script=False,
                              make_executable=False,
                              start_row=start_row,
                              stop_row=stop_row,
                              name=job_row,
                              wait=False))
Esempio n. 14
0
 def exif_job(self, start_row, stop_row, job_row):
     cmdenvs = {
         'HBASE_TABLE': 'images',
         'HBASE_OUTPUT_COLUMN': base64.b64encode('meta:exif')
     }
     output_hdfs = 'picarus_temp/%f/' % time.time()
     hadoop_wait_till_started(
         hadoopy_hbase.launch('images',
                              output_hdfs + str(random.random()),
                              'hadoop/image_exif.py',
                              libjars=['hadoopy_hbase.jar'],
                              num_mappers=self.num_mappers,
                              columns=['data:image'],
                              single_value=True,
                              jobconfs={
                                  'mapred.task.timeout': '6000000',
                                  'picarus.job.row': job_row
                              },
                              cmdenvs=cmdenvs,
                              check_script=False,
                              make_executable=False,
                              start_row=start_row,
                              stop_row=stop_row,
                              name=job_row,
                              wait=False))
Esempio n. 15
0
import hadoopy
import hadoopy_hbase
import time
import logging
logging.basicConfig(level=logging.DEBUG)

st = time.time()

# NOTE(brandyn): If launch fails, you may need to use launch_frozen see hadoopy.com for details

out = 'out-%f/0' % st
jobconfs = [
    'mapred.map.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec',
    'mapred.compress.map.output=true', 'mapred.output.compression.type=BLOCK'
]
hadoopy_hbase.launch('flickr',
                     out,
                     'identity_hbase_job.py',
                     libjars=['hadoopy_hbase.jar'],
                     num_mappers=8,
                     columns=['metadata:'],
                     jobconfs=jobconfs)
#results = dict(hadoopy.readtb(out))
#print(results)
Esempio n. 16
0
 def image_to_superpixels(self, input_table, input_column, output_column, **kw):
     cmdenvs = {'HBASE_TABLE': input_table,
                'HBASE_OUTPUT_COLUMN': base64.b64encode(output_column)}
     hadoopy_hbase.launch(input_table, output_hdfs + str(random.random()), 'hadoop/image_to_superpixels.py', libjars=['hadoopy_hbase.jar'],
                          num_mappers=self.num_mappers, columns=[input_column], single_value=True,
                          cmdenvs=cmdenvs, jobconfs={'mapred.task.timeout': '6000000'}, **kw)
import hadoopy
import hadoopy_hbase
import time
import logging
logging.basicConfig(level=logging.DEBUG)

st = time.time()

# NOTE(brandyn): If launch fails, you may need to use launch_frozen see hadoopy.com for details

out = 'out-%f/0' % st
jobconfs = ['mapred.map.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec',
            'mapred.compress.map.output=true',
            'mapred.output.compression.type=BLOCK']
hadoopy_hbase.launch('flickr', out, 'identity_hbase_job.py', libjars=['hadoopy_hbase.jar'],
                     num_mappers=8, columns=['metadata:'], jobconfs=jobconfs)
#results = dict(hadoopy.readtb(out))
#print(results)
Esempio n. 18
0
import hadoopy
import hadoopy_hbase
import time
import logging
logging.basicConfig(level=logging.DEBUG)

st = time.time()

# NOTE(brandyn): If launch fails, you may need to use launch_frozen see hadoopy.com for details
#,
#
out = 'out-%f/3' % st
hadoopy_hbase.launch('testtable', out, 'hbase_test_job.py', columns=['colfam1:'], libjars=['hadoopy_hbase.jar'], start_row='5', stop_row='52')
results = hadoopy.readtb(out)
print list(results)[:10]

out = 'out-%f/1' % st
hadoopy_hbase.launch('testtable', out, 'hbase_test_job.py', columns=['colfam1:'], libjars=['hadoopy_hbase.jar'], jobconfs={'hbase.mapred.rowfilter': '.*3'})
results = hadoopy.readtb(out)
print list(results)[:10]

out = 'out-%f/0' % st
hadoopy_hbase.launch('testtable', out, 'hbase_test_job.py', columns=['colfam1:'], libjars=['hadoopy_hbase.jar'])
results = hadoopy.readtb(out)
print list(results)[:10]

out = 'out-%f/2' % st
hadoopy_hbase.launch('testtable', out, 'hbase_test_job2.py', columns=['colfam1:'], libjars=['hadoopy_hbase.jar'])
results = hadoopy.readtb(out)
print list(results)[:10]
Esempio n. 19
0
import hadoopy_hbase
import time
import logging

logging.basicConfig(level=logging.DEBUG)

st = time.time()

# NOTE(brandyn): If launch fails, you may need to use launch_frozen see hadoopy.com for details
#,
#
out = 'out-%f/3' % st
hadoopy_hbase.launch('testtable',
                     out,
                     'hbase_test_job.py',
                     columns=['colfam1:'],
                     libjars=['hadoopy_hbase.jar'],
                     start_row='5',
                     stop_row='52')
results = hadoopy.readtb(out)
print list(results)[:10]

out = 'out-%f/1' % st
hadoopy_hbase.launch('testtable',
                     out,
                     'hbase_test_job.py',
                     columns=['colfam1:'],
                     libjars=['hadoopy_hbase.jar'],
                     jobconfs={'hbase.mapred.rowfilter': '.*3'})
results = hadoopy.readtb(out)
print list(results)[:10]
Esempio n. 20
0
 def image_exif(self, **kw):
     cmdenvs = {'HBASE_TABLE': self.images_table,
                'HBASE_OUTPUT_COLUMN': base64.b64encode('meta:exif')}
     hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'hadoop/image_exif.py', libjars=['hadoopy_hbase.jar'],
                          num_mappers=self.num_mappers, columns=['data:image'], single_value=True,
                          cmdenvs=cmdenvs, **kw)