def _masks_to_ilp(self, input_table, input_column, output_column, **kw): cmdenvs = {'HBASE_INPUT_COLUMN': input_column, 'HBASE_TABLE': input_table, 'HBASE_OUTPUT_COLUMN': output_column} hadoopy_hbase.launch(input_table, output_hdfs + str(random.random()), 'masks_to_ilp.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, columns=[cmdenvs['HBASE_INPUT_COLUMN']], cmdenvs=cmdenvs, **kw)
def _image_to_superpixels(self, input_table, input_column, output_table, output_column): cmdenvs = {'HBASE_INPUT_COLUMN': input_column, 'HBASE_TABLE': output_table, 'HBASE_OUTPUT_COLUMN': output_column} hadoopy_hbase.launch(input_table, output_hdfs + str(random.random()), 'image_to_superpixels.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, columns=[cmdenvs['HBASE_INPUT_COLUMN']], cmdenvs=cmdenvs, jobconfs={'mapred.task.timeout': '6000000'})
def takeout_chain_job(self, model, input_column, output_column, **kw): model_fp = picarus.api.model_tofile(model) cmdenvs = {'HBASE_TABLE': self.images_table, 'HBASE_OUTPUT_COLUMN': base64.b64encode(output_column), 'MODEL_FN': os.path.basename(model_fp.name)} hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'hadoop/takeout_chain_job.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, files=[model_fp.name], columns=[input_column], single_value=True, jobconfs={'mapred.task.timeout': '6000000'}, cmdenvs=cmdenvs, dummy_fp=model_fp, **kw)
def image_resize(self): cmdenvs = {'HBASE_INPUT_COLUMN': self.image_orig_column, 'HBASE_TABLE': self.images_table, 'HBASE_OUTPUT_COLUMN': self.image_column, 'MAX_SIDE': 320} hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'image_resize.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, columns=[cmdenvs['HBASE_INPUT_COLUMN']], cmdenvs=cmdenvs)
def image_thumbnail(self): cmdenvs = {'HBASE_INPUT_COLUMN': self.image_orig_column, 'HBASE_TABLE': self.images_table, 'HBASE_OUTPUT_COLUMN': self.thumbnails_column, 'SIZE': 75} hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'image_thumbnail.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, columns=[cmdenvs['HBASE_INPUT_COLUMN']], cmdenvs=cmdenvs)
def image_preprocessor(self, model_key, **kw): model, columns = self.key_to_model(model_key) model_fp = picarus.api.model_tofile(model) cmdenvs = {'HBASE_TABLE': self.images_table, 'HBASE_OUTPUT_COLUMN': base64.b64encode(model_key), 'MODEL_FN': os.path.basename(model_fp.name)} hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'hadoop/image_preprocess.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, files=[model_fp.name], columns=[base64.urlsafe_b64decode(columns['input'])], single_value=True, cmdenvs=cmdenvs, dummy_fp=model_fp, check_script=False, make_executable=False, **kw)
def feature_to_hash(self, model_key, **kw): input_dict, hasher, _ = self.key_to_input_model_param(model_key) hasher_fp = picarus.api.model_tofile(hasher) cmdenvs = {'HBASE_TABLE': self.images_table, 'HBASE_OUTPUT_COLUMN': base64.b64encode(model_key), 'HASHER_FN': os.path.basename(hasher_fp.name)} hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'hadoop/feature_to_hash.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, columns=[input_dict['feature']], files=[hasher_fp.name], single_value=True, cmdenvs=cmdenvs, dummy_fp=hasher_fp, **kw)
def _feature_to_hash(self, hasher, input_table, input_column, output_table, output_column, **kw): hasher_fp = picarus.api.model_tofile(hasher) cmdenvs = {'HBASE_INPUT_COLUMN': input_column, 'HBASE_TABLE': input_table, 'HBASE_OUTPUT_COLUMN': output_column, 'HASHER_FN': os.path.basename(hasher_fp.name)} hadoopy_hbase.launch(input_table, output_hdfs + str(random.random()), 'feature_to_hash.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, columns=[cmdenvs['HBASE_INPUT_COLUMN']], files=[hasher_fp.name], cmdenvs=cmdenvs, dummy_fp=hasher_fp, **kw)
def image_to_feature(self, model_key, **kw): model, columns = self.key_to_model(model_key) model_fp = picarus.api.model_tofile(model) cmdenvs = {'HBASE_TABLE': self.images_table, 'HBASE_OUTPUT_COLUMN': base64.b64encode(model_key), 'MODEL_FN': os.path.basename(model_fp.name)} hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'hadoop/image_to_feature.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, files=[model_fp.name], columns=[base64.urlsafe_b64decode(columns['input'])], single_value=True, jobconfs={'mapred.task.timeout': '6000000'}, cmdenvs=cmdenvs, dummy_fp=model_fp, **kw)
def _image_to_feature(self, feature, input_table, input_column, output_table, output_column): feature_fp = picarus.api.model_tofile(feature) cmdenvs = {'HBASE_INPUT_COLUMN': input_column, 'HBASE_TABLE': output_table, 'HBASE_OUTPUT_COLUMN': output_column, 'FEATURE_FN': os.path.basename(feature_fp.name)} hadoopy_hbase.launch(input_table, output_hdfs + str(random.random()), 'image_to_feature.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, columns=[cmdenvs['HBASE_INPUT_COLUMN']], cmdenvs=cmdenvs, files=[feature_fp.name], jobconfs={'mapred.task.timeout': '6000000'}, dummy_fp=feature_fp)
def feature_to_prediction(self, model_key, **kw): input_dict, classifier, param, out = self.key_to_input_model_param_output(model_key) classifier_fp = picarus.api.model_tofile(classifier) classifier_type = 'sklearn_decision_func' if out == 'binary_class_confidence' else 'class_distance_list' cmdenvs = {'HBASE_TABLE': self.images_table, 'HBASE_OUTPUT_COLUMN': base64.b64encode(model_key), 'CLASSIFIER_FN': os.path.basename(classifier_fp.name), 'CLASSIFIER_TYPE': classifier_type} hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'hadoop/feature_to_prediction.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, columns=[input_dict['feature']], files=[classifier_fp.name], single_value=True, cmdenvs=cmdenvs, dummy_fp=classifier_fp, **kw)
def _feature_to_prediction(self, classifier, input_table, input_column, output_table, output_column, **kw): classifier_fp = tempfile.NamedTemporaryFile() classifier_fp.write(classifier) classifier_fp.flush() cmdenvs = {'HBASE_INPUT_COLUMN': input_column, 'HBASE_TABLE': input_table, 'HBASE_OUTPUT_COLUMN': output_column, 'CLASSIFIER_FN': os.path.basename(classifier_fp.name)} hadoopy_hbase.launch(input_table, output_hdfs + str(random.random()), 'feature_to_prediction.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, columns=[cmdenvs['HBASE_INPUT_COLUMN']], files=[classifier_fp.name], cmdenvs=cmdenvs, dummy_fp=classifier_fp, **kw)
def takeout_chain_job(self, table, model, input_column, output_column, start_row, stop_row, job_row): output_hdfs = 'picarus_temp/%f/' % time.time() model_fp = model_tofile(model) cmdenvs = { 'HBASE_TABLE': table, 'HBASE_OUTPUT_COLUMN': base64.b64encode(output_column), 'MODEL_FN': os.path.basename(model_fp.name) } hadoop_wait_till_started( hadoopy_hbase.launch(table, output_hdfs + str(random.random()), 'hadoop/takeout_chain_job.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, files=[model_fp.name], columns=[input_column], single_value=True, jobconfs={ 'mapred.task.timeout': '6000000', 'picarus.job.row': job_row }, cmdenvs=cmdenvs, dummy_fp=model_fp, check_script=False, make_executable=False, start_row=start_row, stop_row=stop_row, name=job_row, wait=False))
def exif_job(self, start_row, stop_row, job_row): cmdenvs = { 'HBASE_TABLE': 'images', 'HBASE_OUTPUT_COLUMN': base64.b64encode('meta:exif') } output_hdfs = 'picarus_temp/%f/' % time.time() hadoop_wait_till_started( hadoopy_hbase.launch('images', output_hdfs + str(random.random()), 'hadoop/image_exif.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, columns=['data:image'], single_value=True, jobconfs={ 'mapred.task.timeout': '6000000', 'picarus.job.row': job_row }, cmdenvs=cmdenvs, check_script=False, make_executable=False, start_row=start_row, stop_row=stop_row, name=job_row, wait=False))
import hadoopy import hadoopy_hbase import time import logging logging.basicConfig(level=logging.DEBUG) st = time.time() # NOTE(brandyn): If launch fails, you may need to use launch_frozen see hadoopy.com for details out = 'out-%f/0' % st jobconfs = [ 'mapred.map.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec', 'mapred.compress.map.output=true', 'mapred.output.compression.type=BLOCK' ] hadoopy_hbase.launch('flickr', out, 'identity_hbase_job.py', libjars=['hadoopy_hbase.jar'], num_mappers=8, columns=['metadata:'], jobconfs=jobconfs) #results = dict(hadoopy.readtb(out)) #print(results)
def image_to_superpixels(self, input_table, input_column, output_column, **kw): cmdenvs = {'HBASE_TABLE': input_table, 'HBASE_OUTPUT_COLUMN': base64.b64encode(output_column)} hadoopy_hbase.launch(input_table, output_hdfs + str(random.random()), 'hadoop/image_to_superpixels.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, columns=[input_column], single_value=True, cmdenvs=cmdenvs, jobconfs={'mapred.task.timeout': '6000000'}, **kw)
import hadoopy import hadoopy_hbase import time import logging logging.basicConfig(level=logging.DEBUG) st = time.time() # NOTE(brandyn): If launch fails, you may need to use launch_frozen see hadoopy.com for details out = 'out-%f/0' % st jobconfs = ['mapred.map.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec', 'mapred.compress.map.output=true', 'mapred.output.compression.type=BLOCK'] hadoopy_hbase.launch('flickr', out, 'identity_hbase_job.py', libjars=['hadoopy_hbase.jar'], num_mappers=8, columns=['metadata:'], jobconfs=jobconfs) #results = dict(hadoopy.readtb(out)) #print(results)
import hadoopy import hadoopy_hbase import time import logging logging.basicConfig(level=logging.DEBUG) st = time.time() # NOTE(brandyn): If launch fails, you may need to use launch_frozen see hadoopy.com for details #, # out = 'out-%f/3' % st hadoopy_hbase.launch('testtable', out, 'hbase_test_job.py', columns=['colfam1:'], libjars=['hadoopy_hbase.jar'], start_row='5', stop_row='52') results = hadoopy.readtb(out) print list(results)[:10] out = 'out-%f/1' % st hadoopy_hbase.launch('testtable', out, 'hbase_test_job.py', columns=['colfam1:'], libjars=['hadoopy_hbase.jar'], jobconfs={'hbase.mapred.rowfilter': '.*3'}) results = hadoopy.readtb(out) print list(results)[:10] out = 'out-%f/0' % st hadoopy_hbase.launch('testtable', out, 'hbase_test_job.py', columns=['colfam1:'], libjars=['hadoopy_hbase.jar']) results = hadoopy.readtb(out) print list(results)[:10] out = 'out-%f/2' % st hadoopy_hbase.launch('testtable', out, 'hbase_test_job2.py', columns=['colfam1:'], libjars=['hadoopy_hbase.jar']) results = hadoopy.readtb(out) print list(results)[:10]
import hadoopy_hbase import time import logging logging.basicConfig(level=logging.DEBUG) st = time.time() # NOTE(brandyn): If launch fails, you may need to use launch_frozen see hadoopy.com for details #, # out = 'out-%f/3' % st hadoopy_hbase.launch('testtable', out, 'hbase_test_job.py', columns=['colfam1:'], libjars=['hadoopy_hbase.jar'], start_row='5', stop_row='52') results = hadoopy.readtb(out) print list(results)[:10] out = 'out-%f/1' % st hadoopy_hbase.launch('testtable', out, 'hbase_test_job.py', columns=['colfam1:'], libjars=['hadoopy_hbase.jar'], jobconfs={'hbase.mapred.rowfilter': '.*3'}) results = hadoopy.readtb(out) print list(results)[:10]
def image_exif(self, **kw): cmdenvs = {'HBASE_TABLE': self.images_table, 'HBASE_OUTPUT_COLUMN': base64.b64encode('meta:exif')} hadoopy_hbase.launch(self.images_table, output_hdfs + str(random.random()), 'hadoop/image_exif.py', libjars=['hadoopy_hbase.jar'], num_mappers=self.num_mappers, columns=['data:image'], single_value=True, cmdenvs=cmdenvs, **kw)