def create_np_dataset(self, out_dir):
     """Create npz files to store dataset"""
     mode = 'np_test'
     fname = 'np_test-{:04d}.npz'
     outfile = lambda idx: os.path.join(out_dir, fname.format(idx))
     print('Writing dataset to {}'.format(out_dir))
     record_idx = 0
     # Select tuples to generate
     bundle_tuples = self.get_bundle_tuples(mode)
     pbar = tqdm.tqdm(total=self.dataset_params.sizes[mode])
     index = 0
     for bundle_file, tuples in bundle_tuples.items():
         scene_name = self.scene_fname(bundle_file)
         np.random.seed(hash(scene_name) % 2**32)
         scene = parse.load_scene(scene_name)
         for tupl in tuples:
             features = self.gen_sample_from_tuple(scene, tupl)
             np_features = {}
             for k, v in self.features.items():
                 np_features.update(v.npz_value(features[k]))
             np.savez(outfile(index), **np_features)
             index += 1
             pbar.update()
     pbar.close()
     # And save out a file with the creation time for versioning
     timestamp_file = 'np_test_timestamp.txt'
     with open(os.path.join(out_dir, timestamp_file), 'w') as date_file:
         date_file.write('Numpy Dataset created {}'.format(
             str(datetime.datetime.now())))
Exemple #2
0
def generate_samples_fixed_knn(opts, bundle_file):
    scene = parse.load_scene(opts.load)
    cam_pt = lambda i: set([f.point for f in scene.cams[i].features])
    print("Loading triples...")
    triplets_fname = parse.triplets_name(opts.load, lite=opts.triplets_lite)
    with open(triplets_fname, 'rb') as f:
        triplets = np.array(pickle.load(f))
    print(len(triplets))
    print("Building similiarty matrices...")
    k = opts.knn
    n = opts.points[-1]
    v = opts.views[-1]
    mask = np.kron(np.ones((v, v)) - np.eye(v), np.ones((n, n)))
    for triplet in triplets:
        point_set = cam_pt(triplet[0]) & cam_pt(triplet[1]) & cam_pt(
            triplet[2])
        feat_perm = np.random.permutation(len(point_set))[:n]
        features = [
            sorted([([f for f in p.features if f.cam.id == camid])[0]
                    for p in point_set],
                   key=lambda x: x.id)[feat_perm] for camid in triplet
        ]
        descs_ = [np.array([f.desc for f in feats]) for feats in features]
        rids = [np.random.permutation(len(ff)) for ff in descs_]
        perm_mats = [np.eye(len(perm))[perm] for perm in rids]
        perm = la.block_diag(*perm_mats)
        descs = np.dot(perm, np.concatenate(descs_))
        desc_norms = np.sum(descs**2, 1).reshape(-1, 1)
        ndescs = descs / desc_norms
        Dinit = np.dot(ndescs, ndescs.T)
        Dmin = Dinit.min()
        Dmax = Dinit.max()
        D = (Dinit - Dmin) / (Dmax - Dmin)
        L = np.copy(D)
        for i in range(L.shape[0]):
            L[i, L[i].argsort()[:-k]] = 0
        LLT = np.maximum(L, L.T)

        # Build dataset options
        InitEmbeddings = ndescs
        AdjMat = LLT * mask
        Degrees = np.diag(np.sum(AdjMat, 0))
        TrueEmbedding = np.concatenate(perm_mats, axis=0)
        Ahat = AdjMat + np.eye(*AdjMat.shape)
        Dhat_invsqrt = np.diag(1 / np.sqrt(np.sum(Ahat, 0)))
        Laplacian = np.dot(Dhat_invsqrt, np.dot(Ahat, Dhat_invsqrt))

        yield {
            'InitEmbeddings': InitEmbeddings.astype(self.dtype),
            'AdjMat': AdjMat.astype(self.dtype),
            'Degrees': Degrees.astype(self.dtype),
            'Laplacian': Laplacian.astype(self.dtype),
            'TrueEmbedding': TrueEmbedding.astype(self.dtype),
            'NumViews': v,
            'NumPoints': n,
        }
 def convert_dataset(self, out_dir, mode):
     """Writes synthetic flow data in .mat format to a TF record file."""
     params = self.dataset_params
     fname = '{}-{:02d}.tfrecords'
     outfile = lambda idx: os.path.join(out_dir, fname.format(mode, idx))
     if not os.path.isdir(out_dir):
         os.makedirs(out_dir)
     # Select tuples to generate
     bundle_tuples = self.get_bundle_tuples(mode)
     # Begin generation
     print('Writing dataset to {}/{}'.format(out_dir, mode))
     writer = None
     scene = None
     record_idx = 0
     file_idx = self.MAX_IDX + 1
     pbar = tqdm.tqdm(total=params.sizes[mode])
     for bundle_file, tuples in bundle_tuples.items():
         scene_name = self.scene_fname(bundle_file)
         np.random.seed(hash(scene_name) % 2**32)
         scene = parse.load_scene(scene_name)
         for tupl in tuples:
             if file_idx > self.MAX_IDX:
                 file_idx = 0
                 if writer: writer.close()
                 writer = tf.python_io.TFRecordWriter(outfile(record_idx))
                 record_idx += 1
             loaded_features = self.gen_sample_from_tuple(scene, tupl)
             features = self.process_features(loaded_features)
             example = tf.train.Example(features=tf.train.Features(
                 feature=features))
             writer.write(example.SerializeToString())
             file_idx += 1
             pbar.update()
     pbar.close()
     if writer: writer.close()
     # And save out a file with the creation time for versioning
     timestamp_file = '{}_timestamp.txt'.format(mode)
     with open(os.path.join(out_dir, timestamp_file), 'w') as date_file:
         date_file.write('TFrecord created {}'.format(
             str(datetime.datetime.now())))
    def create_np_dataset(self, out_dir, num_entries):
        del num_entries
        fname = 'np_test-{:04d}.npz'
        outfile = lambda idx: os.path.join(out_dir, fname.format(idx))
        print('Writing dataset to {}'.format(out_dir))
        record_idx = 0
        pbar = tqdm.tqdm(total=self.dataset_params.sizes['test'])
        index = 0
        for bundle_file in parse.bundle_file_info['test']:
            scene_name = self.scene_fname(bundle_file)
            np.random.seed(hash(scene_name) % 2**32)
            scene = parse.load_scene(scene_name)
            for tupl in self.get_tuples(bundle_file):
                features = self.gen_sample_from_tuple(scene, tupl)
                np.savez(outfile(index), **features)
                index += 1
                pbar.update()

        # And save out a file with the creation time for versioning
        timestamp_file = 'np_test_timestamp.txt'
        with open(os.path.join(out_dir, timestamp_file), 'w') as date_file:
            date_file.write('Numpy Dataset created {}'.format(
                str(datetime.datetime.now())))