Example #1
0
    def _write_sample_results(self, results, full_mask, roi_indices):
        """Write the sample results to a .npy file.

        If the given sample files do not exists or if the existing file is not large enough it will create one
        with enough storage to hold all the samples for the given total_nmr_voxels.
        On storing it should also be given a list of voxel indices with the indices of the voxels that are being stored.

        Args:
            results (dict): the samples to write
            full_mask (ndarray): the complete mask for the entire brain
            roi_indices (ndarray): the roi indices of the voxels we computed
        """
        total_nmr_voxels = np.count_nonzero(full_mask)

        if not os.path.exists(self._output_dir):
            os.makedirs(self._output_dir)

        for map_name, samples in results.items():
            samples_path = os.path.join(self._output_dir, map_name + '.samples.npy')
            mode = 'w+'
            if os.path.isfile(samples_path):
                mode = 'r+'
                current_results = open_memmap(samples_path, mode='r')
                if current_results.shape[1] != samples.shape[1]:
                    mode = 'w+'
                del current_results # closes the memmap

            saved = open_memmap(samples_path, mode=mode, dtype=samples.dtype,
                                shape=(total_nmr_voxels, samples.shape[1]))
            saved[roi_indices, :] = samples
Example #2
0
    def _write_volumes(self, roi_indices, results, tmp_dir):
        """Write the result arrays to the temporary storage

        Args:
            roi_indices (ndarray): the indices of the voxels we computed
            results (dict): the dictionary with the results to save
            tmp_dir (str): the directory to save the intermediate results to
        """
        if not os.path.exists(tmp_dir):
            os.makedirs(tmp_dir)

        volume_indices = self._volume_indices[roi_indices, :]

        for param_name, result_array in results.items():
            storage_path = os.path.join(tmp_dir, param_name + '.npy')

            map_4d_dim_len = 1
            if len(result_array.shape) > 1:
                map_4d_dim_len = result_array.shape[1]
            else:
                result_array = np.reshape(result_array, (-1, 1))

            mode = 'w+'
            if os.path.isfile(storage_path):
                mode = 'r+'
            tmp_matrix = open_memmap(storage_path, mode=mode, dtype=result_array.dtype,
                                     shape=self._problem_data.mask.shape[0:3] + (map_4d_dim_len,))
            tmp_matrix[volume_indices[:, 0], volume_indices[:, 1], volume_indices[:, 2]] = result_array

        mask_path = os.path.join(tmp_dir, '{}.npy'.format(self._used_mask_name))
        mode = 'w+'
        if os.path.isfile(mask_path):
            mode = 'r+'
        tmp_mask = open_memmap(mask_path, mode=mode, dtype=np.bool, shape=self._problem_data.mask.shape)
        tmp_mask[volume_indices[:, 0], volume_indices[:, 1], volume_indices[:, 2]] = True
Example #3
0
def test_version_2_0_memmap(tmpdir):
    # requires more than 2 byte for header
    dt = [(("%d" % i) * 100, float) for i in range(500)]
    d = np.ones(1000, dtype=dt)
    tf1 = os.path.join(tmpdir, f'version2_01.npy')
    tf2 = os.path.join(tmpdir, f'version2_02.npy')

    # 1.0 requested but data cannot be saved this way
    assert_raises(ValueError, format.open_memmap, tf1, mode='w+', dtype=d.dtype,
                            shape=d.shape, version=(1, 0))

    ma = format.open_memmap(tf1, mode='w+', dtype=d.dtype,
                            shape=d.shape, version=(2, 0))
    ma[...] = d
    ma.flush()
    ma = format.open_memmap(tf1, mode='r')
    assert_array_equal(ma, d)

    with warnings.catch_warnings(record=True) as w:
        warnings.filterwarnings('always', '', UserWarning)
        ma = format.open_memmap(tf2, mode='w+', dtype=d.dtype,
                                shape=d.shape, version=None)
        assert_(w[0].category is UserWarning)
        ma[...] = d
        ma.flush()

    ma = format.open_memmap(tf2, mode='r')
    assert_array_equal(ma, d)
Example #4
0
def evaluate_model(model, global_params, output_dir):
    testing_input = open_memmap(global_params['testing-x'])
    testing_output = open_memmap(global_params['testing-y'])

    xfrm_params = eval(open(global_params['transform-y']).read())

    predmtx = model.predict(testing_input, global_params['batchsize_test'])
    expected_mtx = np.array([
        (testing_output[:, 0] * xfrm_params['scale_std']) +
        xfrm_params['scale_mean'],
        (testing_output[:, 1] * xfrm_params['shift_std']) +
        xfrm_params['shift_mean']
    ]).T
    predmtx = np.array([
        (predmtx[:, 0] * xfrm_params['scale_std']) + xfrm_params['scale_mean'],
        (predmtx[:, 1] * xfrm_params['shift_std']) + xfrm_params['shift_mean']
    ]).T

    dt = np.hstack([expected_mtx, predmtx])

    print("\t\tscale\tshift")
    print("Pearson r\t{:.5f}\t{:.5f}".format(
        pearsonr(dt[:, 0], dt[:, 2])[0],
        pearsonr(dt[:, 1], dt[:, 3])[0]))
    print("RMSD\t\t{:.5f}\t{:.5f}".format(
        ((dt[:, 0] - dt[:, 2])**2).mean()**0.5,
        ((dt[:, 1] - dt[:, 3])**2).mean()**0.5))

    np.save(os.path.join(output_dir, 'test-output.npy'), dt)
Example #5
0
    def _store_sample(self, optimization_results, roi_indices, sample_ind):
        """Store the optimization results as a next sample."""
        if not os.path.exists(self._output_dir):
            os.makedirs(self._output_dir)

        if self._sample_storage is None:
            self._sample_storage = {}
            for key, value in optimization_results.items():
                samples_path = os.path.join(self._output_dir,
                                            key + '.samples.npy')
                mode = 'w+'

                if os.path.isfile(samples_path):
                    mode = 'r+'
                    current_results = open_memmap(samples_path, mode='r')
                    if current_results.shape[1] != self._nmr_samples:
                        mode = 'w+'  # opening the memmap with w+ creates a new one
                    del current_results  # closes the memmap

                shape = [self._total_nmr_voxels, self._nmr_samples]
                if value.ndim > 1:
                    shape.extend(value.shape[1:])
                self._sample_storage[key] = open_memmap(samples_path,
                                                        mode=mode,
                                                        dtype=value.dtype,
                                                        shape=tuple(shape))

        for key, value in optimization_results.items():
            self._sample_storage[key][roi_indices, sample_ind] = value
Example #6
0
    def test_alloc(self):
        with tempfile.TemporaryDirectory() as tdir:
            fname = os.path.join(tdir, "vdat")
            np.save(fname, self.data)
            dmap = open_memmap(fname + ".npy")

            # illegal type
            with pytest.raises(SPYTypeError):
                VirtualData({})

            # 2darray expected
            d3 = np.ones((2, 3, 4))
            np.save(fname + "3", d3)
            d3map = open_memmap(fname + "3.npy")
            with pytest.raises(SPYValueError):
                VirtualData([d3map])

            # rows/cols don't match up
            with pytest.raises(SPYValueError):
                VirtualData([dmap, dmap.T])

            # check consistency of VirtualData object
            for vk in range(2, 6):
                vdata = VirtualData([dmap] * vk)
                assert vdata.dtype == dmap.dtype
                assert vdata.M == dmap.shape[0]
                assert vdata.N == vk * dmap.shape[1]

            # Delete all open references to file objects b4 closing tmp dir
            del dmap, vdata, d3map
Example #7
0
def test_memmap_roundtrip():
    # XXX: test crashes nose on windows. Fix this
    if not (sys.platform == "win32" or sys.platform == "cygwin"):
        for arr in basic_arrays + record_arrays:
            if arr.dtype.hasobject:
                # Skip these since they can't be mmap'ed.
                continue
            # Write it out normally and through mmap.
            nfn = os.path.join(tempdir, "normal.npy")
            mfn = os.path.join(tempdir, "memmap.npy")
            fp = open(nfn, "wb")
            try:
                format.write_array(fp, arr)
            finally:
                fp.close()

            fortran_order = arr.flags.f_contiguous and not arr.flags.c_contiguous
            ma = format.open_memmap(mfn, mode="w+", dtype=arr.dtype, shape=arr.shape, fortran_order=fortran_order)
            ma[...] = arr
            del ma

            # Check that both of these files' contents are the same.
            fp = open(nfn, "rb")
            normal_bytes = fp.read()
            fp.close()
            fp = open(mfn, "rb")
            memmap_bytes = fp.read()
            fp.close()
            yield assert_equal, normal_bytes, memmap_bytes

            # Check that reading the file using memmap works.
            ma = format.open_memmap(nfn, mode="r")
            # yield assert_array_equal, ma, arr
            del ma
Example #8
0
def test_memmap_roundtrip(tmpdir):
    for i, arr in enumerate(basic_arrays + record_arrays):
        if arr.dtype.hasobject:
            # Skip these since they can't be mmap'ed.
            continue
        # Write it out normally and through mmap.
        nfn = os.path.join(tmpdir, f'normal{i}.npy')
        mfn = os.path.join(tmpdir, f'memmap{i}.npy')
        with open(nfn, 'wb') as fp:
            format.write_array(fp, arr)

        fortran_order = (arr.flags.f_contiguous and not arr.flags.c_contiguous)
        ma = format.open_memmap(mfn,
                                mode='w+',
                                dtype=arr.dtype,
                                shape=arr.shape,
                                fortran_order=fortran_order)
        ma[...] = arr
        ma.flush()

        # Check that both of these files' contents are the same.
        with open(nfn, 'rb') as fp:
            normal_bytes = fp.read()
        with open(mfn, 'rb') as fp:
            memmap_bytes = fp.read()
        assert_equal_(normal_bytes, memmap_bytes)

        # Check that reading the file using memmap works.
        ma = format.open_memmap(nfn, mode='r')
        ma.flush()
Example #9
0
def gen_bone_data(arg):
    """Generate bone data from joint data for NTU skeleton dataset"""
    if arg.data_path:
        data = np.load(arg.data_path)
    else:
        data = np.load(
            r'C:\Users\chuaz\Unofficial-DGNN-PyTorch\data\test_data_joint.npy')
    N, C, T, V, M = data.shape
    if arg.data_path:
        fp_sp = open_memmap(arg.data_path,
                            dtype='float32',
                            mode='w+',
                            shape=(N, 2, T, V, M))
    else:
        fp_sp = open_memmap(
            r'C:\Users\chuaz\Unofficial-DGNN-PyTorch\data\test_data_bone.npy',
            dtype='float32',
            mode='w+',
            shape=(N, 2, T, V, M))

    # Copy the joints data to bone placeholder tensor
    fp_sp[:, :C, :, :, :] = data
    for v1, v2 in tqdm(paris['xview']):
        # Reduce class index for NTU datasets
        v1 -= 1
        v2 -= 1
        # Assign bones to be joint1 - joint2, the pairs are pre-determined and hardcoded
        # There also happens to be 25 bones
        fp_sp[:, :, :, v1, :] = data[:, :, :, v1, :] - data[:, :, :, v2, :]
Example #10
0
    def __init__(
        self,
        transient_path,
        parameter_path,
        noise_multiplier,
        noise_path=None,
    ):
        super(CustomTransientDataset, self).__init__()

        self.transient_mmap = open_memmap(transient_path, mode="r")
        self.parameter_mmap = open_memmap(parameter_path, mode="r")
        self.noise_multiplier = noise_multiplier
        if noise_path is not None:
            self.noise_mmap = open_memmap(noise_path, mode="r")
            self.noise_len = len(self.noise_mmap)
        else:
            self.noise_mmap = None

        self.shape = self.transient_mmap.shape[-2:]

        self.length = len(self.transient_mmap)

        print(f"Transient shapes: {self.transient_mmap.shape}")
        if self.noise_mmap is not None:
            print(f"Noise shapes: {self.noise_mmap.shape}")
Example #11
0
def test_memmap_roundtrip():
    # Fixme: test crashes nose on windows.
    if not (sys.platform == 'win32' or sys.platform == 'cygwin'):
        for arr in basic_arrays + record_arrays:
            if arr.dtype.hasobject:
                # Skip these since they can't be mmap'ed.
                continue
            # Write it out normally and through mmap.
            nfn = os.path.join(tempdir, 'normal.npy')
            mfn = os.path.join(tempdir, 'memmap.npy')
            fp = open(nfn, 'wb')
            try:
                format.write_array(fp, arr)
            finally:
                fp.close()

            fortran_order = (
                arr.flags.f_contiguous and not arr.flags.c_contiguous)
            ma = format.open_memmap(mfn, mode='w+', dtype=arr.dtype,
                                    shape=arr.shape, fortran_order=fortran_order)
            ma[...] = arr
            del ma

            # Check that both of these files' contents are the same.
            fp = open(nfn, 'rb')
            normal_bytes = fp.read()
            fp.close()
            fp = open(mfn, 'rb')
            memmap_bytes = fp.read()
            fp.close()
            yield assert_equal_, normal_bytes, memmap_bytes

            # Check that reading the file using memmap works.
            ma = format.open_memmap(nfn, mode='r')
            del ma
Example #12
0
def test_version_2_0_memmap():
    # requires more than 2 byte for header
    dt = [(("%d" % i) * 100, float) for i in range(500)]
    d = np.ones(1000, dtype=dt)
    tf = tempfile.mktemp('', 'mmap', dir=tempdir)

    # 1.0 requested but data cannot be saved this way
    assert_raises(ValueError, format.open_memmap, tf, mode='w+', dtype=d.dtype,
                            shape=d.shape, version=(1, 0))

    ma = format.open_memmap(tf, mode='w+', dtype=d.dtype,
                            shape=d.shape, version=(2, 0))
    ma[...] = d
    del ma

    with warnings.catch_warnings(record=True) as w:
        warnings.filterwarnings('always', '', UserWarning)
        ma = format.open_memmap(tf, mode='w+', dtype=d.dtype,
                                shape=d.shape, version=None)
        assert_(w[0].category is UserWarning)
        ma[...] = d
        del ma

    ma = format.open_memmap(tf, mode='r')
    assert_array_equal(ma, d)
Example #13
0
def save_data(part, out_path, sample_label, sample_name, sample_data,
              valid_frame_num):

    with open('{}/{}_label.pkl'.format(out_path, part), 'wb') as f:
        pickle.dump((sample_name, list(sample_label)), f)

    # model will be pre-trained on NTU RGB-D which was trained with 2 possible skeletons, in cad we only have 1 body at a time, but want to have the
    # same data dimensions
    fp = open_memmap('{}/{}_data.npy'.format(out_path, part),
                     dtype='float32',
                     mode='w+',
                     shape=(len(sample_label), 3, _window_size, _num_joint, 2))

    # num of frames of every sample stored here, every sample has equal length now
    fl = open_memmap('{}/{}_num_frame.npy'.format(out_path, part),
                     dtype='int',
                     mode='w+',
                     shape=(len(sample_label), ))

    for i, s in enumerate(sample_name):
        print_toolbar(
            i * 1.0 / len(sample_label),
            '({:>5}/{:<5}) Processing {:<5} data: '.format(
                i + 1, len(sample_name), part))
        fp[i, :, :, :, 0] = sample_data[i, :, :, :]

        fl[i] = _cut_frames  # num_frame
    end_toolbar()
Example #14
0
def main(global_params, output_dir):
    training_input = open_memmap(global_params['training-x'])
    training_output = open_memmap(global_params['training-y'])

    model = create_training_model(global_params, training_input.shape[1:],
                                  training_output.shape[1])

    if os.path.isdir(output_dir):
        if any((not f.startswith('.')) for f in os.listdir(output_dir)):
            print('Clearing {}...'.format(output_dir))
            shutil.rmtree(output_dir)
            os.mkdir(output_dir)
    else:
        os.makedirs(output_dir)

    train_model(model, global_params, training_input, training_output,
                output_dir)

    print('Adopting the weights to a new model for CPU')
    cpu_model = \
        convert_model_to_noncudnn(model, global_params, training_input,
                                  training_output, output_dir)

    del training_input, training_output

    print('Evaluation of the CUDA model')
    evaluate_model(model, global_params, output_dir)

    print('Evaluation of the CPU model')
    evaluate_model(cpu_model, global_params, output_dir)
Example #15
0
def generate_data(data_path,
                  out_path,
                  ignore_sample_path=None,
                  benchmark='cv',
                  dataset='test'):
    if ignore_sample_path != None:
        with open(ignore_sample_path, 'r') as f:
            ignore_samples = [line.strip() + '.skeleton' for line in f.readlines()]
    else:
        ignore_samples = []
    sample_name = []
    sample_label = []
    for filename in os.listdir(data_path):
        if filename in ignore_samples:
            continue
        action_class = int(filename[filename.find('A') + 1:filename.find('A') + 4])
        subject_id = int(filename[filename.find('P') + 1:filename.find('P') + 4])
        camera_id = int(filename[filename.find('C') + 1:filename.find('C') + 4])
        if benchmark == 'cv':
            training = (camera_id in training_cameras)
        elif benchmark == 'cs':
            training = (subject_id in training_subjects)
        else:
            raise ValueError()
        if dataset == 'train':
            training = training
        elif dataset == 'test':
            training = not training
        else:
            raise ValueError()
        if training:
            sample_name.append(filename)
            sample_label.append(action_class - 1)
    if dataset == 'train':
        sample_name, val_name, sample_label, val_label = train_test_split(sample_name, sample_label, test_size=0.05,
                                                                          random_state=10000)
        with open('{}/val_label.pkl'.format(out_path), 'wb') as f:
            pickle.dump((val_name, list(val_label)), f)
        f_data = open_memmap('{}/val_data.npy'.format(out_path),
                             dtype='float32',
                             mode='w+',
                             shape=(len(val_label), 3, max_frame, num_joint, max_body))
        for idx, s in enumerate(val_name):
            print_output(idx * 1.0 / len(val_label), '({:>5}/{:<5}) Processing {:>5}-{:<5} data: '
                         .format(idx + 1, len(val_name), benchmark, 'val'))
            data = read_xyz(os.path.join(data_path, s), max_body=max_body, num_joint=num_joint)
            f_data[idx, :, 0:data.shape[1], :, :] = data
    with open('{}/{}_label.pkl'.format(out_path, dataset), 'wb') as f:
        pickle.dump((sample_name, list(sample_label)), f)
    f_data = open_memmap('{}/{}_data.npy'.format(out_path, dataset),
                         dtype='float32',
                         mode='w+',
                         shape=(len(sample_label), 3, max_frame, num_joint, max_body))
    for idx, s in enumerate(sample_name):
        print_output(idx * 1.0 / len(sample_label), '({:>5}/{:<5}) Processing {:>5}-{:<5} data: '
                     .format(idx + 1, len(sample_name), benchmark, dataset))
        data = read_xyz(os.path.join(data_path, s), max_body=max_body, num_joint=num_joint)
        f_data[idx, :, 0:data.shape[1], :, :] = data
    sys.stdout.write('\n')
Example #16
0
def csv_to_npy(input_folder, input_filename, output_folder, astro_cols, photo_cols, bestindex_col,
               header=False):
    '''
    Convert a .csv file representation of a photometric catalogue into the
    appropriate .npy binary files used in the cross-matching process.

    Parameters
    ----------
    input_folder : string
        Folder on disk where the catalogue .csv file is stored.
    input_filename : string
        Name of the .csv file, without the extension, to convert to binary files.
    output_folder : string
        Folder on disk of where to save the .npy versions of the catalogue.
    astro_cols : list or numpy.array of integers
        List of zero-indexed columns in the input catalogue representing the
        three required astrometric parameters, two orthogonal sky axis
        coordinates and a single, circular astrometric precision.
    photo_cols : list or numpy.array of integers
        List of zero-indexed columns in the input catalogue representing the
        magnitudes of each photometric source to be used in the cross-matching.
    bestindex_col : integer
        Zero-indexed column of the flag indicating which of the available
        photometric brightnesses (represented by ``photo_cols``) is the
        preferred choice -- usually the most precise and highest quality
        detection.
    header : boolean, optional
        Flag indicating whether the .csv file has a first line with the names
        of the columns in it, or whether the first line of the file is the first
        line of the dataset.
    '''
    astro_cols, photo_cols = np.array(astro_cols), np.array(photo_cols)
    with open('{}/{}.csv'.format(input_folder, input_filename)) as fp:
        n_rows = 0 if not header else -1
        for _ in fp:
            n_rows += 1

    astro = open_memmap('{}/con_cat_astro.npy'.format(output_folder), mode='w+', dtype=float,
                        shape=(n_rows, 3))
    photo = open_memmap('{}/con_cat_photo.npy'.format(output_folder), mode='w+', dtype=float,
                        shape=(n_rows, len(photo_cols)))
    best_index = open_memmap('{}/magref.npy'.format(output_folder), mode='w+', dtype=int,
                             shape=(n_rows,))

    used_cols = np.concatenate((astro_cols, photo_cols, [bestindex_col]))
    new_astro_cols = np.array([np.where(used_cols == a)[0][0] for a in astro_cols])
    new_photo_cols = np.array([np.where(used_cols == a)[0][0] for a in photo_cols])
    new_bestindex_col = np.where(used_cols == bestindex_col)[0][0]
    n = 0
    for chunk in pd.read_csv('{}/{}.csv'.format(input_folder, input_filename), chunksize=100000,
                             usecols=used_cols, header=None if not header else 0):
        astro[n:n+chunk.shape[0]] = chunk.values[:, new_astro_cols]
        photo[n:n+chunk.shape[0]] = chunk.values[:, new_photo_cols]
        best_index[n:n+chunk.shape[0]] = chunk.values[:, new_bestindex_col]
        n += chunk.shape[0]

    return
Example #17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("datadir", help="Path to data")
    parser.add_argument("cachedir", help="Metadata storage path")
    args = parser.parse_args()

    print("NOTE: The annotations files should be converted to UTF8 beforehand.")

    print("Reading sequence durations")

    filepaths = []
    durations = []
    for dirpath, _, filenames in os.walk(args.datadir):
        for f in filenames:
            print("\r" + f, end='', flush=True)
            filepath = os.path.join(dirpath, f)
            filepaths.append(filepath)
            signer, label, sess, date = Recording.parse_archive_name(filepath)
            recording = Recording(args.datadir, signer, label, sess, date)
            durations.append(recording.duration)

    durations = np.array(durations, dtype=np.int64)
    file_offsets = np.cumsum(durations) - durations
    subsequences = np.stack([file_offsets, file_offsets + durations], axis=1)

    metadata_dtype = [('signer', 'u1'), ('sess', 'u1'), ('date', 'U8'),
                      ('label', 'i4'), ('duration', 'u4'), ('skel_data_off', 'u8')]
    info = np.empty((len(durations),), dtype=metadata_dtype)

    dump_file = os.path.join(args.cachedir, 'poses_2d.npy')
    storage = open_memmap(dump_file, 'w+', dtype=np.int16,
                          shape=(durations.sum(), 20, 2))
    poses2d = split_seq(storage, subsequences)

    dump_file = os.path.join(args.cachedir, 'poses_3d.npy')
    storage = open_memmap(dump_file, 'w+', dtype=np.float32,
                          shape=(durations.sum(), 20, 3))
    poses3d = split_seq(storage, subsequences)

    for i in range(len(durations)):
        print("\r{} / {}".format(i, len(durations)), end='', flush=True)
        signer, label, sess, date = Recording.parse_archive_name(filepaths[i])
        r = Recording(args.datadir, signer, label, sess, date)
        info[i]['signer'] = signer
        info[i]['sess'] = sess
        info[i]['date'] = date
        info[i]['label'] = label
        info[i]['duration'] = r.duration
        info[i]['skel_data_off'] = subsequences[i, 0]
        poses3d[i][...], poses2d[i][...] = r.poses()

    np.save(os.path.join(args.cachedir, 'rec_info.npy'), info)
 def split(self, split=0.20, seed=None):
     dtype = self.images.dtype
     if split > 0.0:
         # Split with stratify
         train_idx, test_idx = train_test_split(range(len(self.images)),
                                                test_size=split,
                                                random_state=seed,
                                                shuffle=True,
                                                stratify=self.cls)
         self.random_idx = train_idx + test_idx
     else:
         train_idx = np.random.permutation(range(len(self.images)))
         test_idx = []
         self.random_idx = train_idx
     print("@ Split mapping...")
     img_size = self.images.shape[1:]
     # Memmap splitting
     if self.use_mmap:
         print("@ Split mapping - deleting old memmap files")
         train_filename = os.path.join(self.mmap_directory, "train.npy")
         test_filename = os.path.join(self.mmap_directory, "test.npy")
         self.delete_memmap_files(del_split=True, del_source=False)
         print("@ Split mapping - creating new memmap files")
         self.train_images = open_memmap(train_filename,
                                         dtype=dtype,
                                         mode='w+',
                                         shape=(len(train_idx), ) +
                                         img_size)
         self.test_images = open_memmap(test_filename,
                                        dtype=dtype,
                                        mode='w+',
                                        shape=(len(test_idx), ) + img_size)
         print("@ Split mapping - copying train images")
         for i in range(len(train_idx)):
             self.train_images[i] = self.images[train_idx[i]]
         print("@ Split mapping - copying test images")
         for i in range(len(test_idx)):
             self.test_images[i] = self.images[test_idx[i]]
     # Normal splitting
     else:
         self.train_images = self.images[train_idx]
         self.test_images = self.images[test_idx]
     # Remainder
     self.train_cls = self.cls[train_idx]
     self.test_cls = self.cls[test_idx]
     self.train_onehots = self.onehots[train_idx]
     self.test_onehots = self.onehots[test_idx]
     self.train_df = self.data_df.iloc[train_idx, :]
     self.test_df = self.data_df.iloc[test_idx, :]
     print("@ Split mapping - done")
Example #19
0
def gendata(dataset_path, out_path, benchmark, part='eval'):
    dataset = NTUMotionProcessor(
        '{}/{}_data.npy'.format(os.path.join(dataset_path, benchmark), part),
        '{}/{}_label.pkl'.format(os.path.join(dataset_path, benchmark), part),
        data_type='relative',
        t_length=max_frame,
        y_rotation=True,
        sampling='resize',
        displacement=1,
        mmap=True)

    data_loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=batch_size,
        shuffle=False,
        pin_memory=True,
        num_workers=1,
        drop_last=False)

    f_position = open_memmap(
        '{}/{}_position.npy'.format(out_path, part),
        dtype='float32',
        mode='w+',
        shape=(dataset.N, 3, max_frame, num_joint, max_body))

    f_motion = open_memmap(
        '{}/{}_motion.npy'.format(out_path, part),
        dtype='float32',
        mode='w+',
        shape=(dataset.N, 3, max_frame, num_joint, max_body))

    f_label = open_memmap(
        '{}/{}_label.npy'.format(out_path, part),
        dtype='int64',
        mode='w+',
        shape=(dataset.N, 1))

    index = 0
    for i, (data, motion, label) in enumerate(data_loader):
        print_toolbar(i * 1.0 / len(data_loader),
                      '({:>5}/{:<5}) Processing {:>5}-{:<5} data: '.format(
                          i + 1, len(data_loader), benchmark, part))
        length = label.shape[0]
        if i * batch_size != index:
            print(i, index)
        f_position[index:(index+length), :, :, :, :] = data.numpy()
        f_motion[index:(index+length), :, :, :, :] = motion.numpy()
        f_label[index:(index+length), :] = label.numpy().reshape(-1, 1)
        index += length
    end_toolbar()
Example #20
0
    def setUp(self):
        self.data = sp.arange(80).reshape((2, 8, 5))

        self.memmap_data = npfor.open_memmap('temp.npy', mode='w+',
                                             shape=(2, 8, 5))

        self.memmap_data[:, :, :] = sp.arange(80).reshape(2, 8, 5)
Example #21
0
def gendata(
        data_path,
        label_path,
        data_out_path,
        label_out_path,
        num_person_in=5,  #observe the first 5 persons 
        num_person_out=2,  #then choose 2 persons with the highest score 
        max_frame=300):

    feeder = Feeder_kinetics(
        data_path=data_path,
        label_path=label_path,
        num_person_in=num_person_in,
        num_person_out=num_person_out,
        window_size=max_frame)

    sample_name = feeder.sample_name
    sample_label = []

    fp = open_memmap(
        data_out_path,
        dtype='float32',
        mode='w+',
        shape=(len(sample_name), 3, max_frame, 18, num_person_out))

    for i, s in enumerate(sample_name):
        data, label = feeder[i]
        print_toolbar(i * 1.0 / len(sample_name),
                      '({:>5}/{:<5}) Processing data: '.format(
                          i + 1, len(sample_name)))
        fp[i, :, 0:data.shape[1], :, :] = data
        sample_label.append(label)

    with open(label_out_path, 'wb') as f:
        pickle.dump((sample_name, list(sample_label)), f)
Example #22
0
def gendata(data_path,
            out_path,
            ignored_sample_path=None,
            benchmark='xview',
            part='eval'):
    if ignored_sample_path != None:
        with open(ignored_sample_path, 'r') as f:
            ignored_samples = [
                line.strip() + '.skeleton' for line in f.readlines()
            ]
    else:
        ignored_samples = []
    sample_name = []
    sample_label = []
    for filename in os.listdir(data_path):
        if filename in ignored_samples:
            continue
        action_class = int(
            filename[filename.find('A') + 1:filename.find('A') + 4])
        subject_id = int(
            filename[filename.find('P') + 1:filename.find('P') + 4])
        camera_id = int(
            filename[filename.find('C') + 1:filename.find('C') + 4])

        if benchmark == 'xview':
            istraining = (camera_id in training_cameras)
        elif benchmark == 'xsub':
            istraining = (subject_id in training_subjects)
        else:
            raise ValueError()

        if part == 'train':
            issample = istraining
        elif part == 'val':
            issample = not (istraining)
        else:
            raise ValueError()

        if issample:
            sample_name.append(filename)
            sample_label.append(action_class - 1)

    with open('{}/{}_label.pkl'.format(out_path, part), 'wb') as f:
        pickle.dump((sample_name, list(sample_label)), f)
    # np.save('{}/{}_label.npy'.format(out_path, part), sample_label)

    fp = open_memmap(
        '{}/{}_data.npy'.format(out_path, part),
        dtype='float32',
        mode='w+',
        shape=(len(sample_label), 3, max_frame, num_joint, max_body))

    for i, s in enumerate(sample_name):
        print_toolbar(i * 1.0 / len(sample_label),
                      '({:>5}/{:<5}) Processing {:>5}-{:<5} data: '.format(
                          i + 1, len(sample_name), benchmark, part))
        data = read_xyz(
            os.path.join(data_path, s), max_body=max_body, num_joint=num_joint)
        fp[i, :, 0:data.shape[1], :, :] = data
    end_toolbar()
Example #23
0
    def dump_electrode_data_circus(self, filename, chunks=1e9):
        self.load_mcs_data()
        itemsize = np.array([0.0], dtype=np.float32).nbytes
        data = self.electrodes_data
        n = len(next(iter(data.values())))  # num samples per channel
        n_items = int(chunks // itemsize)  # num chunked samples per chan
        total_n = sum(len(value) for value in data.values())  # num bytes total
        pbar = tqdm(
            total=total_n * itemsize, file=sys.stdout, unit_scale=1,
            unit='bytes')

        mmap_array = open_memmap(
            filename, mode='w+', dtype=np.float32, shape=(n, len(data)))

        names = sorted(data.keys(), key=lambda x: (x[0], int(x[1:])))
        for k, name in enumerate(names):
            value = data[name]
            offset, scale = self.get_electrode_offset_scale(name)
            i = 0
            n = len(value)

            while i * n_items < n:
                items = np.array(
                    value[i * n_items:min((i + 1) * n_items, n)])
                mmap_array[i * n_items:i * n_items + len(items), k] = \
                    (items - offset) * scale
                pbar.update(len(items) * itemsize)
                i += 1
        pbar.close()
        print('Channel order in "{}" is: {}'.format(filename, names))
Example #24
0
def gen_neighbor_data():
    """Generate bone data from joint data for NTU skeleton dataset"""
    for dataset in datasets:
        for set in sets:
            print(dataset, set)
            data = np.load(
                '/home/hhe/hhe_first_file/data_set/NTU-RGB-D-CV/{}/{}_data.npy'
                .format(dataset, set))
            N, C, T, V, M = data.shape
            fp_sp = open_memmap(
                '/home/hhe/hhe_first_file/data_set/NTU-RGB-D-CV/{}/{}_neighbor.npy'
                .format(dataset, set),
                dtype='float32',
                mode='w+',
                shape=(N, 3, T, V, M))

            ori_data = np.zeros(data.shape, dtype=data.dtype)

            # Copy the joints data to bone placeholder tensor

            fp_sp[:, :C, :, :, :] = ori_data
            for v1, v2 in tqdm(paris[dataset]):
                # Reduce class index for NTU datasets
                if dataset != 'kinetics':
                    v1 -= 1
                    v2 -= 1
                # Assign bones to be joint1 - joint2, the pairs are pre-determined and hardcoded
                # There also happens to be 25 bones
                fp_sp[:, :, :,
                      v1, :] += data[:, :, :, v1, :] - data[:, :, :, v2, :]
Example #25
0
    def _create_schema(self, *, remote_operation: bool = False):
        """stores the shape and dtype as the schema of a arrayset.

        Parameters
        ----------
        remote_operation : optional, kwarg only, bool
            if this schema is being created from a remote fetch operation, then do not
            place the file symlink in the staging directory. Instead symlink it
            to a special remote staging directory. (default is False, which places the
            symlink in the stage data directory.)
        """
        uid = random_string()
        file_path = pjoin(self.DATADIR, f'{uid}.npy')
        m = open_memmap(file_path,
                        mode='w+',
                        dtype=self.schema_dtype,
                        shape=(COLLECTION_SIZE, *self.schema_shape))
        self.wFp[uid] = m
        self.w_uid = uid
        self.hIdx = 0

        if remote_operation:
            symlink_file_path = pjoin(self.REMOTEDIR, f'{uid}.npy')
        else:
            symlink_file_path = pjoin(self.STAGEDIR, f'{uid}.npy')
        symlink_rel(file_path, symlink_file_path)
Example #26
0
def gendata(data_path,
            out_path,
            ignored_sample_path=None,
            benchmark='xview',
            part='eval'):
    if ignored_sample_path != None:
        with open(ignored_sample_path, 'r') as f:
            ignored_samples = [
                line.strip() + '.skeleton' for line in f.readlines()
            ]
    else:
        ignored_samples = []
    sample_name = []
    sample_label = []
    for filename in os.listdir(data_path):
        if filename in ignored_samples:
            continue
        action_class = int(
            filename[filename.find('A') + 1:filename.find('A') + 4])
        subject_id = int(
            filename[filename.find('P') + 1:filename.find('P') + 4])
        camera_id = int(
            filename[filename.find('C') + 1:filename.find('C') + 4])

        if benchmark == 'xview':
            istraining = (camera_id in training_cameras)
        elif benchmark == 'xsub':
            istraining = (subject_id in training_subjects)
        else:
            raise ValueError()

        if part == 'train':
            issample = istraining
        elif part == 'val':
            issample = not (istraining)
        else:
            raise ValueError()

        if issample:
            sample_name.append(filename)
            sample_label.append(action_class - 1)

    with open('{}/{}_label.pkl'.format(out_path, part), 'wb') as f:
        pickle.dump((sample_name, list(sample_label)), f)
    # np.save('{}/{}_label.npy'.format(out_path, part), sample_label)

    fp = open_memmap(
        '{}/{}_data.npy'.format(out_path, part),
        dtype='float32',
        mode='w+',
        shape=(len(sample_label), 3, max_frame, num_joint, max_body))

    for i, s in enumerate(sample_name):
        print_toolbar(i * 1.0 / len(sample_label),
                      '({:>5}/{:<5}) Processing {:>5}-{:<5} data: '.format(
                          i + 1, len(sample_name), benchmark, part))
        data = read_xyz(
            os.path.join(data_path, s), max_body=max_body, num_joint=num_joint)
        fp[i, :, 0:data.shape[1], :, :] = data
    end_toolbar()
Example #27
0
def eval(model, data_loader, output_device=0, dstype='train'):

    part = 'train' if dstype == "train" else 'test'
    out_path = './data/{}/features'.format(dataset)

    fp = open_memmap('{}/{}_data.npy'.format(out_path, part),
                     dtype='float32',
                     mode='w+',
                     shape=(len(data_loader['test']), 1, 256, 1))
    label_fp = open('{}/{}_label.txt'.format(out_path, part), '+w')
    for i, (data, label, sample_name) in enumerate(data_loader['test']):

        data = Variable(data.float().cuda(output_device),
                        requires_grad=False,
                        volatile=True)
        label = Variable(label.long().cuda(output_device),
                         requires_grad=False,
                         volatile=True)
        label_int = int(label.data.cpu().numpy())

        label_fp.write(sample_name[0] + ", " + str(label_int) + '\n')
        ddata = data.data.cpu().numpy()
        label_fp.write(
            " ".join(list(map(lambda x: str(x),
                              ddata.flatten()[:10]))) + '\n')
        output = model(data)
        np_output = output.data.cpu().numpy()
        label_fp.write(
            " ".join(list(map(lambda x: str(x),
                              np_output.flatten()[:10]))) + '\n')

        fp[i, :, :, :] = np_output

    label_fp.close()
Example #28
0
    def __init__(self, split, model_group_name):
        self.codebook = json.load(open('{}/data.json'.format(cfg.DATA_DIR)))

        data = h5py.File('{}/data.h5'.format(
            cfg.DATA_DIR))['/{}'.format(split)]
        self.img_pos = data['img_pos'].value
        self.que = data['que'].value
        self.que_id = data['que_id'].value
        if 'ans' in data:
            self.ans = data['ans'].value
            if cfg.SOFT_LOSS:
                self.ans = self.ans.astype(np.float32)

        # load image features
        self.splits = cfg[split.upper()].SPLITS
        self.img_feas = []
        for data_split in self.splits:
            if data_split == 'vg':
                continue
            fea_fname = get_feature_path(data_split, 'feature')
            if cfg.LOAD_ALL_DATA:
                img_fea = np.load(fea_fname)
            else:
                img_fea = open_memmap(fea_fname, dtype='float32')
            self.img_feas.append(img_fea)
        self.img_cnts = list(map(len, self.img_feas))

        self.model_group_name = None
        self.reload_obj(model_group_name)
Example #29
0
    def _write_volume(self, data, volume_indices, filename):
        """Write the result of one map to the specified file.

        This is meant to save map data to a temporary .npy file.

        Args:
            data (ndarray): the voxel data to store
            volume_indices (ndarray): the volume indices of the computed data points
            filename (str): the file to write the results to. This by default will append to the file if it exists.
        """
        extra_dims = (1, )
        if len(data.shape) == 2:
            extra_dims = (data.shape[1], )
        elif len(data.shape) > 2:
            extra_dims = data.shape[1:]
        else:
            data = np.reshape(data, (-1, 1))

        mode = 'w+'
        if os.path.isfile(filename):
            mode = 'r+'

        tmp_matrix = open_memmap(filename,
                                 mode=mode,
                                 dtype=data.dtype,
                                 shape=self._mask.shape[0:3] + extra_dims)
        tmp_matrix[volume_indices[:, 0], volume_indices[:, 1],
                   volume_indices[:, 2]] = data
Example #30
0
    def _init_mem(self, ex, name, shape, header_name=None):
        #
        header, offset = ex.get_header(name, shape, self.headers[0])

        # create shared memory for output frames / info
        n_frames, n_ch, *_ = shape
        self.logger.info(
            'Extracting %i frames from %i amplifier channel%s to '
            '%r', n_frames, n_ch, 's' * (n_ch > 1), str(name))

        # check free hd space
        req_bytes_head = self.head_dtype.itemsize * n_frames * n_ch
        req_bytes_data = (ex.image_size_bytes * n_frames * n_ch) + offset
        self.check_free_space(req_bytes_data, req_bytes_head)

        # create memory map for extraction (4D)
        data = np.memmap(name, ex.dtype, 'w+', offset, shape)
        # FIXME: w+ will always overwrite, r+ fails on create

        # header info data
        header_data = None
        if header_name:
            # read the extracted keys to structured memory map
            header_data = open_memmap(str(header_name), 'w+', self.head_dtype,
                                      (n_frames, n_ch))
        return data, header, header_data
Example #31
0
    def dump_electrode_data_circus(self, filename, chunks=1e9):
        self.load_mcs_data()
        itemsize = np.array([0.0], dtype=np.float32).nbytes
        data = self.electrodes_data
        n = len(next(iter(data.values())))  # num samples per channel
        n_items = int(chunks // itemsize)  # num chunked samples per chan
        total_n = sum(len(value) for value in data.values())  # num bytes total
        pbar = tqdm(total=total_n * itemsize,
                    file=sys.stdout,
                    unit_scale=1,
                    unit='bytes')

        mmap_array = open_memmap(filename,
                                 mode='w+',
                                 dtype=np.float32,
                                 shape=(n, len(data)))

        names = sorted(data.keys(), key=lambda x: (x[0], int(x[1:])))
        for k, name in enumerate(names):
            value = data[name]
            offset, scale = self.get_electrode_offset_scale(name)
            i = 0
            n = len(value)

            while i * n_items < n:
                items = np.array(value[i * n_items:min((i + 1) * n_items, n)])
                mmap_array[i * n_items:i * n_items + len(items), k] = \
                    (items - offset) * scale
                pbar.update(len(items) * itemsize)
                i += 1
        pbar.close()
        print('Channel order in "{}" is: {}'.format(filename, names))
Example #32
0
    def export(self, file_name, table_name="aequilibrae_table"):
        """
        Exports the dataset to another format. Supports CSV and SQLite

        Args:
            *file_name* (:obj:`str`): File name with PATH and extension (csv, or sqlite3, sqlite or db)

            *table_name* (:obj:`str`): It only applies if you are saving to an SQLite table. Otherwise ignored

        ::

            dataset = AequilibraeData()
            dataset.load("D:/datasets/vectors.aed")
            dataset.export("D:/datasets/vectors.csv")
        """

        file_type = os.path.splitext(file_name)[1]
        headers = ["index"]
        headers.extend(self.fields)

        if file_type.lower() == ".aed":
            dtype = [("index", self.aeq_index_type)]
            dtype.extend([(self.fields[i], self.data_types[i]) for i in range(self.num_fields)])
            data = open_memmap(file_name, mode="w+", dtype=dtype, shape=(self.entries,))
            for field in data.dtype.names:
                data[field] = self.data[field]
            data.flush()
            del data

        elif file_type.lower() == ".csv":
            fmt = "%d"
            for dt in self.data_types:
                if np.issubdtype(dt, np.floating):
                    fmt += ",%f"
                elif np.issubdtype(dt, np.integer):
                    fmt += ",%d"
            data = np.array(self.data, copy=True)
            for nm in self.data.dtype.names:
                np.nan_to_num(data[nm], copy=False)

            np.savetxt(file_name, data[np.newaxis, :][0], delimiter=",", fmt=fmt, header=",".join(headers), comments="")

        elif file_type.lower() in [".sqlite", ".sqlite3", ".db"]:
            # Connecting to the database file
            conn = sqlite3.connect(file_name)
            c = conn.cursor()
            # Creating the table, but before deletes if the table exists
            c.execute("""DROP TABLE IF EXISTS """ + table_name)
            fi = ""
            qm = "?"
            for f in headers[1:]:
                fi += ", " + f + " REAL"
                qm += ", ?"

            c.execute("""CREATE TABLE """ + table_name + """ (link_id INTEGER PRIMARY KEY""" + fi + ")" "")
            c.execute("BEGIN TRANSACTION")
            c.executemany("INSERT INTO " + table_name + " VALUES (" + qm + ")", self.data)
            c.execute("END TRANSACTION")
            conn.commit()
            conn.close()
Example #33
0
def gendata(
        data_path,
        label_path,
        data_out_path,
        label_out_path,
        num_person_in=1,  #observe the first 5 persons
        num_person_out=1,  #then choose 2 persons with the highest score
        max_frame=300):

    feeder = Feeder_kinetics(data_path=data_path,
                             label_path=label_path,
                             num_person_in=num_person_in,
                             num_person_out=num_person_out,
                             window_size=max_frame)

    sample_name = feeder.sample_name
    sample_label = []

    fp = open_memmap(data_out_path,
                     dtype='float32',
                     mode='w+',
                     shape=(len(sample_name), 3, max_frame, 18,
                            num_person_out))

    for i, s in enumerate(sample_name):
        data, label = feeder[i]
        print_toolbar(
            i * 1.0 / len(sample_name),
            '({:>5}/{:<5}) Processing data: '.format(i + 1, len(sample_name)))
        fp[i, :, 0:data.shape[1], :, :] = data
        sample_label.append(label)

    with open(label_out_path, 'wb') as f:
        pickle.dump((sample_name, list(sample_label)), f)
Example #34
0
def create_and_save_depth(inference_fn, video_data,
                          depth_estimation_model_path, dnn_depth_map_path,
                          logger, batch_size):
    try:
        depth_maps = open_memmap(filename=dnn_depth_map_path,
                                 dtype=np.float32,
                                 mode='w+',
                                 shape=(video_data.num_frames, 1,
                                        *video_data.shape))

        depth_map_generator = inference_fn(video_data,
                                           depth_estimation_model_path,
                                           logger,
                                           batch_size=batch_size)

        for batch_i, depth_map in enumerate(depth_map_generator):
            batch_start_idx = batch_size * batch_i
            # Sometimes the last batch is a different size to the rest, so we need to use the actual batch size rather
            # than the specified one.
            current_batch_size = depth_map.shape[0]
            batch_end_idx = batch_start_idx + current_batch_size
            depth_maps[batch_start_idx:batch_end_idx] = depth_map

        depth_maps.flush()

        logger.log("Saved DNN depth maps to {}.".format(dnn_depth_map_path))

        return depth_maps
    except Exception:
        logger.log(
            "\nError occurred during creation of depth maps - deleting {}.".
            format(dnn_depth_map_path))
        os.remove(dnn_depth_map_path)
        raise
Example #35
0
 def _get_numpy_binary_array(self, name):
     """Return the an memmap object as represented by the .npy file"""
     filename = self._array_files.get(name)
     if filename is not None:
         return open_memmap(filename)
     else:
         return None
Example #36
0
    def read_data(self, hashVal: NUMPY_10_DataHashSpec) -> np.ndarray:
        """Read data from disk written in the numpy_00 fmtBackend

        Parameters
        ----------
        hashVal : NUMPY_10_DataHashSpec
            record specification stored in the db

        Returns
        -------
        np.ndarray
            tensor data stored at the provided hashVal specification.

        Raises
        ------
        RuntimeError
            If the recorded checksum does not match the received checksum.

        Notes
        -----

        TO AVOID DATA LOSS / CORRUPTION:

        * On a read operation, we copy memmap subarray tensor data to a new
          `np.ndarray` instance so as to prevent writes on a raw memmap result
          slice (a `np.memmap` instance) from propogating to data on disk.

        * This is an issue for reads from a write-enabled checkout where data
          was just written, since the np flag "WRITEABLE" and "OWNDATA" will be
          true, and writes to the returned array would be overwrite that data
          slice on disk.

        * For read-only checkouts, modifications to the resultant array would
          perform a "copy on write"-like operation which would be propogated to
          all future reads of the subarray from that process, but which would
          not be persisted to disk.
        """
        srcSlc = (self.slcExpr[hashVal.collection_idx],
                  *(self.slcExpr[0:x] for x in hashVal.shape))
        try:
            res = self.Fp[hashVal.uid][srcSlc]
        except TypeError:
            self.Fp[hashVal.uid] = self.Fp[hashVal.uid]()
            res = self.Fp[hashVal.uid][srcSlc]
        except KeyError:
            process_dir = self.STAGEDIR if self.mode == 'a' else self.STOREDIR
            file_pth = pjoin(process_dir, f'{hashVal.uid}.npy')
            if os.path.islink(file_pth):
                self.rFp[hashVal.uid] = open_memmap(file_pth, 'r')
                res = self.Fp[hashVal.uid][srcSlc]
            else:
                raise

        out = np.array(res, dtype=res.dtype, order='C')
        cksum = adler32(out)
        if cksum != int(hashVal.checksum):
            raise RuntimeError(
                f'DATA CORRUPTION ERROR: Checksum {cksum} != recorded for {hashVal}'
            )
        return out
Example #37
0
 def crop(self, item, focus, mode='loose', fixed=None, return_data=True):
     """Faster version of precomputed(item).crop(...)"""
     memmap = open_memmap(self.get_path(item), mode='r')
     swf = SlidingWindowFeature(memmap, self.sliding_window_)
     result = swf.crop(focus, mode=mode, fixed=fixed,
                       return_data=return_data)
     del memmap
     return result
Example #38
0
def open_memmap(filename, mode='r+', dtype=None, shape=None,
                fortran_order=False, version=(1, 0), metafile=None):
    """Open a file and memory map it to an InfoMemmap object.

    This is similar to the numpy.lib.format.openmemmap() function but also
    deals with the meta data dictionary, which is read and written from a
    meta data file.

    The only extra argument over the numpy version is the meta data file name
    `metafile`.

    Parameters
    ----------
    metafile: str
        File name for which the `info` attribute of the returned InfoMemmap
        will be read from and written to. Default is None, where the it is
        assumed to be `filename` + ".meta".

    Returns
    -------
    marray: InfoMemmap
        The `info` is intialized as an empty dictionary if `mode` is 'w' or if
        the file corresponding to `metafile` does not exist.  The `metafile`
        attribute of marray is set to the `metafile` parameter unless `mode` is
        'r' or 'c' in which case it is set to None.
    """

    # Restrict to version (1,0) because we've only written write_header for
    # this version.
    if version != (1, 0):
        raise ValueError("Only version (1,0) is safe from this function.")

    # Memory map the data part.
    marray = npfor.open_memmap(filename, mode, dtype, shape, fortran_order,
                               version)

    # Get the file name for the meta data.
    if metafile is None:
        metafile = filename + '.meta'

    # Read the meta data if need be.
    if ('r' in mode or mode is 'c') and os.path.isfile(metafile):
        info_fid = open(metafile, 'r')
        try:
            infostring = info_fid.readline()
        finally:
            info_fid.close()
        info = safe_eval(infostring)
    else:
        info = {}

    # In read mode don't pass a metafile to protect the meta data.
    if mode is 'r' or mode is 'c':
        metafile = None

    marray = info_header.InfoMemmap(marray, info, metafile)

    return marray
    def create_empty(self, file_path=None, entries=1, field_names=None, data_types=None, memory_mode=False):
        """
        :param file_path: Optional. Full path for the output data file. If *memory_false* is 'false' and path is missing,
        then the file is created in the temp folder
        :param entries: Number of records in the dataset. Default is 1
        :param field_names: List of field names for this dataset. If no list is provided, the field 'data' will be created
        :param data_types: List of data types for the dataset. Types need to be NumPy data types (e.g. np.int16,
        np.float64). If no list of types are provided, type will be *np.float64*
        :param memory_mode: If true, dataset will be kept in memory. If false, the dataset will  be a memory-mapped numpy array
        :return: # nothing. Associates a dataset with the AequilibraEData object
        """

        if file_path is not None or memory_mode:
            if field_names is None:
                field_names = ['data']

            if data_types is None:
                data_types = [np.float64] * len(field_names)

            self.file_path = file_path
            self.entries = entries
            self.fields = field_names
            self.data_types = data_types
            self.aeq_index_type = np.uint64

            if memory_mode:
                self.memory_mode = MEMORY
            else:
                self.memory_mode = DISK
                if self.file_path is None:
                    self.file_path = self.random_name()

            # Consistency checks
            if not isinstance(self.fields, list):
                raise ValueError('Titles for fields, "field_names", needs to be a list')

            if not isinstance(self.data_types, list):
                raise ValueError('Data types, "data_types", needs to be a list')
            # The check below is not working properly with the QGIS importer
            # else:
            #     for dt in self.data_types:
            #         if not isinstance(dt, type):
            #             raise ValueError('Data types need to be Python or Numpy data types')

            for field in self.fields:
                if field in object.__dict__:
                    raise Exception(field + ' is a reserved name. You cannot use it as a field name')

            self.num_fields = len(self.fields)

            dtype = [('index', self.aeq_index_type)]
            dtype.extend([(self.fields[i], self.data_types[i]) for i in range(self.num_fields)])

            # the file
            if self.memory_mode:
                self.data = np.recarray((self.entries,), dtype=dtype)
            else:
                self.data = open_memmap(self.file_path, mode='w+', dtype=dtype, shape=(self.entries,))
Example #40
0
 def setUp(self) :
     data = sp.arange(20)
     data.shape = (5,4)
     self.mat_arr = algebra.make_mat(data.copy(), axis_names=('ra', 'dec'))
     self.vect_arr = algebra.make_vect(data.copy(), axis_names=('ra', 'dec'))
     mem = npfor.open_memmap('temp.npy', mode='w+', shape=(5, 4))
     mem[:] = data
     self.vect_mem = algebra.make_vect(mem)
     self.arr = data.copy()
Example #41
0
 def test_from_memmap(self) :
     # Works if constructed from array.
     data = npfor.open_memmap('temp.npy', mode='w+', shape=(4,3,3))
     data[:] = 5.0
     Mat = algebra.info_memmap(data, {'a': 'b'})
     Mat.flush()
     self.assertEqual(Mat.shape, (4, 3, 3))
     self.assertEqual(Mat.info['a'], 'b')
     self.assertTrue(sp.allclose(Mat, 5.0))
     self.assertTrue(isinstance(Mat,  sp.memmap))
     del Mat
     os.remove('temp.npy')
    def load(self, file_path):
        """
        :param file_path: Full file path to the AequilibraEDataset to be loaded
        :return: Loads the dataset into the AequilibraEData instance
        """
        f = open(file_path)
        self.file_path = os.path.realpath(f.name)
        f.close()

        # Map in memory and load data names plus dimensions
        self.data = open_memmap(self.file_path, mode='r+')

        self.entries = self.data.shape[0]
        self.fields = [x for x in self.data.dtype.fields if x != 'index']
        self.num_fields = len(self.fields)
        self.data_types = [self.data[x].dtype.type for x in self.fields]
Example #43
0
 def test_assert_info(self) :
     """Test the assert_info function."""
     # info_memaps should pass.
     data = npfor.open_memmap('temp.npy', mode='w+', shape=(4,3,3))
     data[:] = 5.0
     Mat = algebra.info_memmap(data)
     algebra.assert_info(Mat)
     del Mat
     os.remove('temp.npy')
     # info_arrays should pass.
     data = sp.empty((5, 6, 6))
     data[:] = 4.0
     Mat = algebra.info_array(data)
     algebra.assert_info(Mat)
     # arrays should fail.
     self.assertRaises(TypeError, algebra.assert_info, data)
Example #44
0
    def load_data_matrix(self):
        
        memmap_path = os.path.join(self.bin_dir,self.memmap_name)
        if os.path.exists(memmap_path):
            
            print 'loading in '+self.memmap_name
            
            self.raw_data_list = npf.open_memmap(memmap_path,mode='r',dtype='float32')
            #self.raw_data_list = np.load(memmap_path)

            print 'shape of loaded memmap:'
            print self.raw_data_list.shape
            
            self.loaded_warm_start = True
            return True
            
        else:
            print 'no file of name '+self.memmap_name+' to load.'
            print 'aborting memmap load'
            
            return False
Example #45
0
from numpy.lib.format import open_memmap

n0 = open_memmap('launch-000000.npy')
n1 = open_memmap('launch-000001.npy')
n2 = open_memmap('launch-000002.npy')

n0_cond_ss = n0.reshape((-1, 32, 401, 192))[:, :, 200:].reshape((-1, 32*201, 192))
n1_cond_ss = n1.reshape((-1, 32, 401, 192))[:, :, 200:].reshape((-1, 32*201, 192))


# triple f here
figure(figsize=(15, 12))
ws = l9['dataset'].weights
ds = l9['dataset'].distances
idx = 32*10 + 21
cond = n9.reshape((-1, 32, 401, 96, 2))[idx, :, :]
ts = r_[0 : cond.shape[1]*2.5 : 1j*cond.shape[1]]
cond -= cond.reshape((-1, 192)).mean(axis=0).reshape((1, 1, 96, 2))
trial_svds = [svd(trial[:, :, 0], full_matrices=0) for trial in cond]
cond_svd = svd(cond[:, :, :, 0].reshape((-1, 96)), full_matrices=0)
for i, svdi, trial in zip(range(32), trial_svds, cond):
    subplot(335)
    x, y, z = svdi[1][:3][:, newaxis]*dot(svdi[2][:3], trial[:, :, 0].T)
    plot(x+z/3, y+z/3, 'k-', alpha=0.2)
    subplot(336)
    x, y, z = svdi[1][:3][:, newaxis]*dot(cond_svd[2][:3], trial[:, :, 0].T)
    plot(x+z/3, y+z/3, 'k-', alpha=0.3)
subplot(6,3,13)
hist(concatenate([abs(dot(svd1[2][:3], svd2[2][:3].T)).flat for i, svd1 in enumerate(trial_svds) for j, svd2 in enumerate(trial_svds) if not j==i]), 50)
xlim([0, 1.0])
subplot(3, 3, 4)
Example #46
0
    def create_data_matrix(self,save_memmap=True,nuke=True):
        
        raw_path = os.path.join(self.bin_dir,self.memmap_name)
        
        if nuke and os.path.exists(raw_path):
            os.remove(raw_path)
            
        if save_memmap:            
            # We need to determine how many nifti files there are in total to
            # determine the shape of the memmap:
            
            brainshape = []

            for subject in self.reg_subjects:
                sub_path = os.path.join(self.top_dir,subject)
                for nifti_name in self.reg_nifti_name:
                    nifti_path = os.path.join(sub_path,nifti_name)
                    if os.path.exists(nifti_path):
                        self.total_nifti_files += 1
                        if not brainshape:
                            [tempdata,tempaffine,brainshape] = self.__load_nifti(nifti_path)
                
            # Allocate the .npy memmap according to its size:
            
            memmap_shape = (self.total_nifti_files,brainshape[0],brainshape[1],brainshape[2],
                            brainshape[3])
            
            print 'Determined memmap shape:'
            print memmap_shape
            print 'Allocating the memmap...'
            
            self.raw_data_list = npf.open_memmap(raw_path,mode='w+',dtype='float32',
                                                 shape=memmap_shape)
            
            print 'Succesfully allocated memmap... memmap shape:'
            pprint(self.raw_data_list.shape)
            
        
        nifti_iter = 0
        for subject in self.reg_subjects:
            sub_path = os.path.join(self.top_dir,subject)
            print sub_path
            print subject
            print os.getcwd()
            
            for nifti_name in self.reg_nifti_name:
                nifti_path = os.path.join(sub_path,nifti_name)
                pprint(nifti_name)
                if os.path.exists(nifti_path):
                    [idata,affine,ishape] = self.__load_nifti(nifti_path)
                    pprint(ishape)
                    
                    if save_memmap:
                        print 'Appending idata to memmap at: %s' % str(nifti_iter)
                        self.raw_data_list[nifti_iter] = np.array(idata)
                        self.subject_trial_indices[nifti_iter] = []
                        nifti_iter += 1
                    
                    if self.reg_experiment_trs == False:
                        self.reg_experiment_trs = len(idata[3])
                        
                    if self.reg_total_trials == False:
                        if self.reg_trial_trs:
                            self.reg_total_trials = self.reg_experiment_trs/self.reg_trial_trs

                    if self.raw_affine == []:
                        self.raw_affine = affine
                        
                    if self.raw_data_shape == []:
                        self.raw_data_shape = ishape
                        pprint(ishape)
def main():

    def parse_args():
        parser = argparse.ArgumentParser(
            description=("Compares sequential write and random read times for "
                         "HDF5 vs memmap datasets."))

        parser.add_argument("--output-dir",
                            default="/tmp/",
                            help=("The directory to output to. Handy for "
                                  "comparing HDD vs SDD performance."))

        parser.add_argument("--no-memmap",
                            action='store_true',
                            default=False,
                            help="Don't test memmaps.")

        parser.add_argument("--no-h5",
                            action='store_true',
                            default=False,
                            help="Don't test HDF5.")

        parser.add_argument("--batch-size",
                            default=128,
                            help="Number of images per batch")

        parser.add_argument("--dtype",
                            type=numpy.dtype,
                            default='uint8',
                            help="Data dtype.")

        parser.add_argument("--image-dim",
                            default=108,  # from big NORB dataset
                            help=("Size of one side of the random square "
                                  "images."))

        parser.add_argument("--num-gb",
                            type=float,
                            default=1.0,
                            help="File size, in GB")

    args = parse_args()

    # modeled after big NORB's test set images
    example_shape = (args.image_dim, args.image_dim)
    example_size = numpy.prod(example_shape) * args.dtype.itemsize
    num_examples = numpy.floor(num_GB * (1024 ** 3) / example_size)

    shape = (num_examples, args.image_dim, args.image_dim)
    dtype_max = numpy.iinfo(args.dtype).max

    # batch_size = 128
    num_batches = int(numpy.ceil(shape[0] / float(args.batch_size)))

    path_prefix = os.path.join(args.output_dir,
                               '/benchmark_random_access_to_hdf5_and_memmap')
    h5_path = path_prefix + '.h5'
    mm_path = path_prefix + '.npy'

    def get_expected_values(start_row, end_row=None):
        if end_row is None:
            assert_is_instance(start_row, numpy.ndarray)
            values = start_row
        else:
            assert_integer(start_row)
            assert_integer(end_row)
            values = numpy.arange(start_row, end_row)

        values = values % dtype_max
        values = values.reshape((values.shape[0], ) +
                                ((1, ) * (len(shape) - 1)))

        return numpy.tile(values, shape[1:])

    def fill_tensor(tensor):
        '''
        Fill each row with its batch index.
        '''

        row_index = 0

        while row_index < shape[0]:
            print("writing {} of {} rows".format(row_index, shape[0]),
                  end='\r')

            next_row_index = min(shape[0], row_index + args.batch_size)
            values = get_expected_values(row_index, next_row_index)

            tensor[row_index:next_row_index, ...] = values
            row_index = next_row_index

    memory_size = human_readable_memory_size(numpy.prod(shape))

    if not args.no_h5:
        start_time = default_timer()
        with h5py.File(h5_path, mode='w') as h5_file:
            print("Allocating %s HDF5 tensor to %s." % (memory_size, h5_path))
            h5_tensor = h5_file.create_dataset('tensor', shape, args.dtype)
            print("Filling HDF5 tensor.")
            fill_tensor(h5_tensor)

        duration = default_timer() - start_time
        print("HDF5 sequential write time: " + human_readable_duration(duration))
        print("{:.2g} secs per {}-sized batch".format(duration / num_batches,
                                                      args.batch_size))

    if not args.no_memmap:
        print("Allocating %s memmap tensor to %s." % (memory_size, mm_path))
        start_time = default_timer()
        fill_tensor(open_memmap(mm_path, 'w+', args.dtype, shape))
        duration = default_timer() - start_time
        print('Memmap sequential write time: %s' %
              human_readable_duration(duration))
        print("{:.2g} secs per {}-sized batch".format(duration / num_batches,
                                                      args.batch_size))

    rng = numpy.random.RandomState(1413)

    shuffled_indices = rng.choice(shape[0], size=shape[0], replace=False)

    def random_reads(tensor):
        row_index = 0

        is_hdf5 = isinstance(tensor, h5py.Dataset)

        while row_index < shape[0]:
            print("read {} of {} rows".format(row_index, shape[0]), end='\r')

            next_row_index = min(shape[0], row_index + args.batch_size)
            indices = shuffled_indices[row_index:next_row_index]
            if is_hdf5:
                indices = numpy.sort(indices)

            expected_values = get_expected_values(indices)
            assert_true((tensor[indices, ...] == expected_values).all())

            row_index = next_row_index

    if not args.no_h5:
        print("Randomly reading from " + h5_path)
        start_time = default_timer()
        with h5py.File(h5_path, mode='r') as h5_file:
            h5_tensor = h5_file['tensor']
            random_reads(h5_tensor)

        duration = default_timer() - start_time
        print('HDF5 random read time: ' + human_readable_duration(duration))
        print("{:.2g} secs per {}-sized batch".format(duration / num_batches,
                                                      args.batch_size))

    if not args.no_memmap:
        print("Randomly reading from " + mm_path)
        start_time = default_timer()
        random_reads(open_memmap(mm_path, 'r', args.dtype, shape))
        duration = default_timer() - start_time
        print('Memmap random read time: ' + human_readable_duration(duration))
        print("{:.2g} secs per {}-sized batch".format(duration / num_batches,
                                                      args.batch_size))
Example #48
0
def build_dataset(path='/srv/data/apnea'):
    nights = ['302-adjust', '302-nopap', '303-adjust', '303-nopap', '304-adjust', '304-nopap', '305-adjust', '305-nopap', '306-adjust', '306-nopap', '307-adjust', '307-nopap', '309-adjust', '309-nopap', '310-adjust', '310-nopap', '311-adjust', '311-nopap', '312-adjust', '312-nopap', '313-adjust', '313-nopap', '314-adjust', '314-nopap', '315-adjust', '316-adjust', '316-nopap', '317-adjust', '317-nopap']
    labeled_nights = ['302-adjust', '302-nopap', '303-adjust', '303-nopap', '304-nopap', '309-adjust', '310-adjust', '310-nopap', '311-adjust', '312-adjust', '312-nopap', '316-nopap', '317-adjust', '317-nopap']
    # These are the "nominal" start and end times for each WAV file.
    # In fact, however, each WAV file is padded out with unlabeled data,
    # at the beginning (!), to an exact multiple of two minutes in length.
    nominal_times = {
            '302-adjust': ('2011-07-07 22:42:50', '2011-07-08 06:04:04'),
            '302-nopap': ('2011-07-11 22:46:36', '2011-07-12 06:51:09'),
            '303-adjust': ('2011-07-06 00:16:58', '2011-07-06 06:38:52'),
            '303-nopap': ('2011-07-27 22:20:45', '2011-07-28 06:26:15'),
            '304-adjust': ('2011-07-19 21:41:07', '2011-07-20 06:20:42'),
            '304-nopap': ('2011-07-26 22:49:09', '2011-07-27 06:00:30'),
            '305-adjust': ('2011-08-03 23:22:44', '2011-08-04 06:39:41'),
            '305-nopap': ('2011-08-04 23:48:46', '2011-08-05 07:09:23'),
            '306-adjust': ('2011-08-18 22:45:03', '2011-08-19 06:34:35'),
            '306-nopap': ('2011-08-19 22:13:58', '2011-08-20 06:47:19'),
            '307-adjust': ('2011-08-23 22:32:23', '2011-08-24 05:48:02'),
            '307-nopap': ('2011-08-30 22:16:11', '2011-08-31 06:04:09'),
            '309-adjust': ('2011-11-13 22:22:38', '2011-11-14 05:57:54'),
            '309-nopap': ('2011-11-14 22:30:07', '2011-11-15 05:14:17'),
            '310-adjust': ('2011-11-22 23:59:34', '2011-11-23 06:44:19'),
            '310-nopap': ('2011-11-29 00:47:19', '2011-11-29 06:51:23'),
            '311-adjust': ('2011-11-09 22:59:20', '2011-11-10 06:17:11'),
            '311-nopap': ('2011-11-17 22:37:49', '2011-11-18 06:30:28'),
            '312-adjust': ('2011-12-09 23:16:22', '2011-12-10 06:20:20'),
            '312-nopap': ('2011-12-11 22:28:14', '2011-12-12 05:23:21'),
            '313-adjust': ('2011-12-05 22:37:40', '2011-12-06 06:19:57'),
            '313-nopap': ('2011-12-06 21:58:02', '2011-12-07 05:53:00'),
            '314-adjust': ('2012-02-12 23:18:14', '2012-02-13 05:21:12'),
            '314-nopap': ('2012-02-19 22:47:06', '2012-02-20 05:50:21'),
            '315-adjust': ('2012-04-20 23:13:48', '2012-04-21 06:50:22'),
            '316-adjust': ('2012-03-21 23:44:59', '2012-03-22 08:36:15'),
            '316-nopap': ('2012-03-22 22:58:45', '2012-03-23 07:43:39'),
            '317-adjust': ('2012-04-16 00:23:34', '2012-04-16 08:29:43'),
            '317-nopap': ('2012-04-30 00:06:28', '2012-04-30 07:56:16')
            }

    def parse_time(s): return datetime.strptime(s,'%Y-%m-%d %H:%M:%S')
    end_times = {x: parse_time(nominal_times[x][1]) for x in nominal_times}

    # TODO: use unlabeled nights, too
    window_shape = None
    total_examples = 0
    X_names = []
    y_names = []
    for night in labeled_nights:

        basename = path+'/'+night
        wav_name = basename+'.wav'
        X_name = basename+'-X.npy'
        y_name = basename+'-y.npy'
        X_names.append(X_name)
        y_names.append(y_name)

        print >> sys.stderr, "Reading samples from %s" % wav_name
        rate, samples = wavfile.read(wav_name)
        assert rate == sample_rate, "File has wrong sample rate: %s (is %d, should be %d)" % (wav_name,rate,sample_rate)
        assert samples.ndim == 1, "Expected mono audio only: %s" % wav_name
        assert samples.dtype == numpy.dtype('int16'), "Expected 16-bit samples: %s" % wav_name

        mat = loadmat(basename+'.mat')

        actual_length      = timedelta(seconds=len(samples)/float(sample_rate))
        nominal_start_time = parse_time(nominal_times[night][0])
        end_time           = parse_time(nominal_times[night][1])
        actual_start_time  = end_time - actual_length
        assert actual_start_time < nominal_start_time

        # elements here are offsets in seconds from the beginning of the wav file.
        def to_seconds(t): return (parse_time(t) - actual_start_time).total_seconds()
        times = {signal: map(to_seconds, numpy.hstack(mat[signal].flatten()))
                         if len(mat[signal]) > 0 else []
                    for signal in classes}
        
        X, y = compute_windows(samples, times)
        del samples

        assert window_shape is None or window_shape == X.shape[1:]
        window_shape = X.shape[1:]

        numpy.save(X_name, X)
        numpy.save(y_name, y)
        total_examples += X.shape[0]
        del X, y
        # end for night

    print >> sys.stderr, "Gathering all examples..."
    X = open_memmap(path+'/X.npy', mode='w+', dtype='float32', shape=(total_examples,)+window_shape)
    ys = []

    i = 0
    for X_name,y_name in zip(X_names,y_names):
        print >> sys.stderr, X_name
        x1 = numpy.load(X_name, mmap_mode='r')
        ys.append(numpy.load(y_name))
        X[i:i+x1.shape[0]] = x1
        i += x1.shape[0]
    print >> sys.stderr, 'OK'
    y = numpy.concatenate(ys)
    numpy.save(path+'/y.npy', y)
    assert i == total_examples
    return (X,y)
Example #49
0
 def shape(self, item):
     """Faster version of precomputed(item).data.shape"""
     memmap = open_memmap(self.get_path(item), mode='r')
     shape = memmap.shape
     del memmap
     return shape
Example #50
0
    ncores = 8

    savename = 'launch-%06d.npy' % (idx,)
    outname = 'reduced-%06d.pickle' % (idx,)

    msg = msgr()

    try:
        os.stat(outname)
        msg('found result file!')
    except:
        msg('no result found, proceeding to do reduction')
        msg('loading dataset %s' % savename)

        import cPickle as cp
        from numpy import *
        from numpy.linalg import svd
        from numpy.lib.format import open_memmap
        from multiprocessing import Pool

        npy = open_memmap(savename)
        npy_ = npy.reshape((-1, 32*npy.shape[1], 192))
        pool = Pool(ncores)

        svds = pool.map(reducer, range(npy_.shape[0]))

        msg('writing data')
        with open(outname, 'w') as fd:
            cp.dump(svds, fd)