def run_classifier_labels(hdfs_input_pos, hdfs_input_neg, hdfs_output, classifier_name, classifier_extra, local_labels, classifier, **kw):
    """
    TODO Finish docstring
    Args:
        hdfs_output: Path to hdfs temporary output or None if execution should be performed locally using hadoopy.launch_local.
    """
    labels = {}
    try:
        labels = file_parse.load(local_labels)
    except IOError:
        pass
    if hdfs_output is None:
        j = hadoopy.launch_local(hdfs_input_pos, None, _lf('collect_keys.py'))
        pos_keys = sum((x[1] for x in j['output']), [])
        j = hadoopy.launch_local(hdfs_input_neg, None, _lf('collect_keys.py'))
        neg_keys = sum((x[1] for x in j['output']), [])
    else:
        hdfs_output_pos = hdfs_output + '/pos'
        hdfs_output_neg = hdfs_output + '/neg'
        picarus._launch_frozen(hdfs_input_pos, hdfs_output_pos, _lf('collect_keys.py'))
        picarus._launch_frozen(hdfs_input_neg, hdfs_output_neg, _lf('collect_keys.py'))
        pos_keys = sum((x[1] for x in hadoopy.readtb(hdfs_output_pos)), [])
        neg_keys = sum((x[1] for x in hadoopy.readtb(hdfs_output_neg)), [])

    labels[classifier_name] = {'labels': {'1': pos_keys, '-1': neg_keys},
                               'classifier': classifier,
                               'classifier_extra': classifier_extra}
    file_parse.dump(labels, local_labels)
def run_predict_classifier(hdfs_input, hdfs_classifier_input, hdfs_output, classes=None, image_hashes=None, **kw):
    import classipy
    # NOTE: Adds necessary files
    files = glob.glob(classipy.__path__[0] + "/lib/*")
    fp = tempfile.NamedTemporaryFile(suffix='.pkl.gz')
    file_parse.dump([x for x in hadoopy.readtb(hdfs_classifier_input)
                     if classes is None or x[0] in classes], fp.name)
    files.append(fp.name)
    picarus._launch_frozen(hdfs_input, hdfs_output, _lf('predict_classifier.py'),
                           files=files, reducer=None,
                           cmdenvs=['CLASSIFIERS_FN=%s' % os.path.basename(fp.name)],
                           image_hashes=image_hashes,
                           dummy_arg=fp)
def run_predict_windows(hdfs_input, hdfs_classifier_input, feature, hdfs_output, image_height, image_width, **kw):
    import classipy
    # NOTE: Adds necessary files
    files = glob.glob(classipy.__path__[0] + "/lib/*")
    fp = tempfile.NamedTemporaryFile(suffix='.pkl.gz')
    file_parse.dump(list(hadoopy.readtb(hdfs_classifier_input)), fp.name)
    files.append(fp.name)
    files.append(_lf('data/haarcascade_frontalface_default.xml'))
    cmdenvs = ['CLASSIFIERS_FN=%s' % os.path.basename(fp.name)]
    cmdenvs += ['IMAGE_HEIGHT=%d' % image_height,
                'IMAGE_WIDTH=%d' % image_width,
                'FEATURE=%s' % feature]
    picarus._launch_frozen(hdfs_input, hdfs_output, _lf('predict_windows.py'),
                           cmdenvs=cmdenvs,
                           files=files,
                           dummy_arg=fp)
def run_classifier_labels(hdfs_input_pos, hdfs_input_neg, hdfs_output,
                          classifier_name, classifier_extra, local_labels,
                          classifier, **kw):
    """
    TODO Finish docstring
    Args:
        hdfs_output: Path to hdfs temporary output or None if execution should be performed locally using hadoopy.launch_local.
    """
    labels = {}
    try:
        labels = file_parse.load(local_labels)
    except IOError:
        pass
    if hdfs_output is None:
        j = hadoopy.launch_local(hdfs_input_pos, None, _lf('collect_keys.py'))
        pos_keys = sum((x[1] for x in j['output']), [])
        j = hadoopy.launch_local(hdfs_input_neg, None, _lf('collect_keys.py'))
        neg_keys = sum((x[1] for x in j['output']), [])
    else:
        hdfs_output_pos = hdfs_output + '/pos'
        hdfs_output_neg = hdfs_output + '/neg'
        picarus._launch_frozen(hdfs_input_pos, hdfs_output_pos,
                               _lf('collect_keys.py'))
        picarus._launch_frozen(hdfs_input_neg, hdfs_output_neg,
                               _lf('collect_keys.py'))
        pos_keys = sum((x[1] for x in hadoopy.readtb(hdfs_output_pos)), [])
        neg_keys = sum((x[1] for x in hadoopy.readtb(hdfs_output_neg)), [])

    labels[classifier_name] = {
        'labels': {
            '1': pos_keys,
            '-1': neg_keys
        },
        'classifier': classifier,
        'classifier_extra': classifier_extra
    }
    file_parse.dump(labels, local_labels)
def run_predict_classifier(hdfs_input,
                           hdfs_classifier_input,
                           hdfs_output,
                           classes=None,
                           image_hashes=None,
                           **kw):
    import classipy
    # NOTE: Adds necessary files
    files = glob.glob(classipy.__path__[0] + "/lib/*")
    fp = tempfile.NamedTemporaryFile(suffix='.pkl.gz')
    file_parse.dump([
        x for x in hadoopy.readtb(hdfs_classifier_input)
        if classes is None or x[0] in classes
    ], fp.name)
    files.append(fp.name)
    picarus._launch_frozen(
        hdfs_input,
        hdfs_output,
        _lf('predict_classifier.py'),
        files=files,
        reducer=None,
        cmdenvs=['CLASSIFIERS_FN=%s' % os.path.basename(fp.name)],
        image_hashes=image_hashes,
        dummy_arg=fp)
def _launch_frozen(in_path, out_path, script_path, jobconfs_default=(), *args, **kw):
    import hadoopy
    import os
    kw = dict(kw)  # Make a copy as we will be mutating it
    kw['frozen_tar_path'] = _freeze_script(script_path)['frozen_tar_path']
    if 'reducer' not in kw and 'num_reducers' not in kw:
        kw['num_reducers'] = 1
    if 'jobconfs' in kw:
        kw['jobconfs'] = kw['jobconfs'] + GLOBAL_JOBCONFS
    else:
        kw['jobconfs'] = GLOBAL_JOBCONFS
    if 'jobconfs' not in kw:
        kw['jobconfs'] = []
    if jobconfs_default:
        jobconfs_dict = dict(x.split('=', 1) for x in kw['jobconfs'])
        jobconfs_default_dict = dict(x.split('=', 1) for x in jobconfs_default)
        for jobconf_name, jobconf_value in jobconfs_default_dict.items():
            if jobconf_name not in jobconfs_dict:
                jobconfs_dict[jobconf_name] = jobconf_value
        kw['jobconfs'] = ['%s=%s' % x for x in jobconfs_dict.items()]
    if 'image_hashes' in kw and kw['image_hashes'] is not None:
        import tempfile
        fp = tempfile.NamedTemporaryFile(suffix='.pkl.gz')
        file_parse.dump(kw['image_hashes'], fp.name)
        try:
            kw['files'].append(fp.name)
        except KeyError:
            kw['files'] = [fp.name]
        try:
            kw['cmdenvs'].append('PICARUS_VALID_IMAGE_HASHES=%s' % os.path.basename(fp.name))
        except KeyError:
            kw['cmdenvs'] = ['PICARUS_VALID_IMAGE_HASHES=%s' % os.path.basename(fp.name)]
        kw['_internal_dummy_arg'] = fp  # Keep the object alive
        del kw['image_hashes']
        
    return hadoopy.launch_frozen(in_path, out_path, script_path, *args, **kw)
Example #7
0
def save_classifiers(hdfs_classifier_path, classes, name):
    if classes:
        classes = set(classes.split())
    file_parse.dump([x for x in hadoopy.readtb(hdfs_classifier_path)
                     if classes is None or x[0] in classes], name)
Example #8
0
def _report_clusters(hdfs_input, local_json_output, category, make_faces,
                     **kw):
    report = report_clusters(hdfs_input, category, make_faces, **kw)
    file_parse.dump(report, local_json_output)
Example #9
0
def _report_video_keyframe(hdfs_input, local_json_output, **kw):
    report = report_video_keyframe(hdfs_input)
    file_parse.dump(report, local_json_output)
def _report_clusters(hdfs_input, local_json_output, category, make_faces, **kw):
    report = report_clusters(hdfs_input, category, make_faces, **kw)
    file_parse.dump(report, local_json_output)
def _report_video_keyframe(hdfs_input, local_json_output, **kw):
    report = report_video_keyframe(hdfs_input)
    file_parse.dump(report, local_json_output)