Ejemplo n.º 1
0
def memoize(fn, paths):
    cwd = os.getcwd()

    def change_to_target_dir():
        for dir in paths[:-1]:
            try:
                os.mkdir(dir)
            except OSError as e:
                pass
            os.chdir(dir)

    change_to_target_dir()
    filename = paths[-1]
    if os.path.exists(filename):
        data = hdf5.read(filename)
        os.chdir(cwd)
        return data

    os.chdir(cwd)
    data = fn()
    change_to_target_dir()
    tmp = '%s.pid.%d.tmp' % (filename, os.getpid())
    hdf5.write(tmp, data)
    os.rename(tmp, filename)
    os.chdir(cwd)

    return jsdict(data)
Ejemplo n.º 2
0
def process_and_merge_segments(target, data_type, out_dir, metadata, N_jobs):
    filename_out = os.path.join(out_dir, '%s_%s.hdf5' % (target, data_type))

    if os.path.exists(filename_out):
        return 0

    print 'Processing %s ...' % filename_out

    filename_in_fmt = '%s_%s_segment_%%.4d' % (target, data_type)
    filename_out_fmt = '%s/%s_%s_segment_%%d.hdf5' % (out_dir, target, data_type)

    # process_data_sub_job(settings, filename_in_fmt, filename_out_fmt, 0, 1)
    pool = Pool(N_jobs)
    results = [pool.apply_async(process_data_sub_job, [settings, filename_in_fmt, filename_out_fmt, id, N_jobs])
        for id in range(N_jobs)]
    pool.close()
    pool.join()

    num_processed = np.sum([r.get() for r in results])
    for i in xrange(num_processed):
        data = hdf5.read(filename_out_fmt % i)
        collect_metadata(data, metadata)

    _, accum_meta = accumulate_data(settings, target, data_type, tag=None,
        output_to_original_data_dir=True, quiet=True)

    return accum_meta.num_segments
Ejemplo n.º 3
0
def process_and_merge_segments(target, data_type, out_dir, metadata, N_jobs):
    filename_out = os.path.join(out_dir, '%s_%s.hdf5' % (target, data_type))

    if os.path.exists(filename_out):
        return 0

    print('Processing %s ...' % filename_out)

    filename_in_fmt = '%s_%s_segment_%%.4d' % (target, data_type)
    filename_out_fmt = '%s/%s_%s_segment_%%d.hdf5' % (out_dir, target,
                                                      data_type)

    # process_data_sub_job(settings, filename_in_fmt, filename_out_fmt, 0, 1)
    pool = Pool(N_jobs)
    results = [
        pool.apply_async(
            process_data_sub_job,
            [settings, filename_in_fmt, filename_out_fmt, id, N_jobs])
        for id in range(N_jobs)
    ]
    pool.close()
    pool.join()

    num_processed = np.sum([r.get() for r in results])
    for i in range(num_processed):
        data = hdf5.read(filename_out_fmt % i)
        collect_metadata(data, metadata)

    _, accum_meta = accumulate_data(settings,
                                    target,
                                    data_type,
                                    tag=None,
                                    output_to_original_data_dir=True,
                                    quiet=True)

    return accum_meta.num_segments
Ejemplo n.º 4
0
# features in FeatureConcatPipeline.
def memoize(fn, paths):
    cwd = os.getcwd()

    def change_to_target_dir():
        for dir in paths[:-1]:
            try:
                os.mkdir(dir)
            except OSError, e:
                pass
            os.chdir(dir)

    change_to_target_dir()
    filename = paths[-1]
    if os.path.exists(filename):
        data = hdf5.read(filename)
        os.chdir(cwd)
        return data

    os.chdir(cwd)
    data = fn()
    change_to_target_dir()
    tmp = '%s.pid.%d.tmp' % (filename, os.getpid())
    hdf5.write(tmp, data)
    os.rename(tmp, filename)
    os.chdir(cwd)

    return jsdict(data)


# Fast process-if-not-yet-processed method for training data
Ejemplo n.º 5
0
# features in FeatureConcatPipeline.
def memoize(fn, paths):
    cwd = os.getcwd()

    def change_to_target_dir():
        for dir in paths[:-1]:
            try:
                os.mkdir(dir)
            except OSError, e:
                pass
            os.chdir(dir)

    change_to_target_dir()
    filename = paths[-1]
    if os.path.exists(filename):
        data = hdf5.read(filename)
        os.chdir(cwd)
        return data

    os.chdir(cwd)
    data = fn()
    change_to_target_dir()
    tmp = '%s.pid.%d.tmp' % (filename, os.getpid())
    hdf5.write(tmp, data)
    os.rename(tmp, filename)
    os.chdir(cwd)

    return jsdict(data)


# Fast process-if-not-yet-processed method for training data