def create_np_dataset(self, out_dir): """Create npz files to store dataset""" mode = 'np_test' fname = 'np_test-{:04d}.npz' outfile = lambda idx: os.path.join(out_dir, fname.format(idx)) print('Writing dataset to {}'.format(out_dir)) record_idx = 0 # Select tuples to generate bundle_tuples = self.get_bundle_tuples(mode) pbar = tqdm.tqdm(total=self.dataset_params.sizes[mode]) index = 0 for bundle_file, tuples in bundle_tuples.items(): scene_name = self.scene_fname(bundle_file) np.random.seed(hash(scene_name) % 2**32) scene = parse.load_scene(scene_name) for tupl in tuples: features = self.gen_sample_from_tuple(scene, tupl) np_features = {} for k, v in self.features.items(): np_features.update(v.npz_value(features[k])) np.savez(outfile(index), **np_features) index += 1 pbar.update() pbar.close() # And save out a file with the creation time for versioning timestamp_file = 'np_test_timestamp.txt' with open(os.path.join(out_dir, timestamp_file), 'w') as date_file: date_file.write('Numpy Dataset created {}'.format( str(datetime.datetime.now())))
def generate_samples_fixed_knn(opts, bundle_file): scene = parse.load_scene(opts.load) cam_pt = lambda i: set([f.point for f in scene.cams[i].features]) print("Loading triples...") triplets_fname = parse.triplets_name(opts.load, lite=opts.triplets_lite) with open(triplets_fname, 'rb') as f: triplets = np.array(pickle.load(f)) print(len(triplets)) print("Building similiarty matrices...") k = opts.knn n = opts.points[-1] v = opts.views[-1] mask = np.kron(np.ones((v, v)) - np.eye(v), np.ones((n, n))) for triplet in triplets: point_set = cam_pt(triplet[0]) & cam_pt(triplet[1]) & cam_pt( triplet[2]) feat_perm = np.random.permutation(len(point_set))[:n] features = [ sorted([([f for f in p.features if f.cam.id == camid])[0] for p in point_set], key=lambda x: x.id)[feat_perm] for camid in triplet ] descs_ = [np.array([f.desc for f in feats]) for feats in features] rids = [np.random.permutation(len(ff)) for ff in descs_] perm_mats = [np.eye(len(perm))[perm] for perm in rids] perm = la.block_diag(*perm_mats) descs = np.dot(perm, np.concatenate(descs_)) desc_norms = np.sum(descs**2, 1).reshape(-1, 1) ndescs = descs / desc_norms Dinit = np.dot(ndescs, ndescs.T) Dmin = Dinit.min() Dmax = Dinit.max() D = (Dinit - Dmin) / (Dmax - Dmin) L = np.copy(D) for i in range(L.shape[0]): L[i, L[i].argsort()[:-k]] = 0 LLT = np.maximum(L, L.T) # Build dataset options InitEmbeddings = ndescs AdjMat = LLT * mask Degrees = np.diag(np.sum(AdjMat, 0)) TrueEmbedding = np.concatenate(perm_mats, axis=0) Ahat = AdjMat + np.eye(*AdjMat.shape) Dhat_invsqrt = np.diag(1 / np.sqrt(np.sum(Ahat, 0))) Laplacian = np.dot(Dhat_invsqrt, np.dot(Ahat, Dhat_invsqrt)) yield { 'InitEmbeddings': InitEmbeddings.astype(self.dtype), 'AdjMat': AdjMat.astype(self.dtype), 'Degrees': Degrees.astype(self.dtype), 'Laplacian': Laplacian.astype(self.dtype), 'TrueEmbedding': TrueEmbedding.astype(self.dtype), 'NumViews': v, 'NumPoints': n, }
def convert_dataset(self, out_dir, mode): """Writes synthetic flow data in .mat format to a TF record file.""" params = self.dataset_params fname = '{}-{:02d}.tfrecords' outfile = lambda idx: os.path.join(out_dir, fname.format(mode, idx)) if not os.path.isdir(out_dir): os.makedirs(out_dir) # Select tuples to generate bundle_tuples = self.get_bundle_tuples(mode) # Begin generation print('Writing dataset to {}/{}'.format(out_dir, mode)) writer = None scene = None record_idx = 0 file_idx = self.MAX_IDX + 1 pbar = tqdm.tqdm(total=params.sizes[mode]) for bundle_file, tuples in bundle_tuples.items(): scene_name = self.scene_fname(bundle_file) np.random.seed(hash(scene_name) % 2**32) scene = parse.load_scene(scene_name) for tupl in tuples: if file_idx > self.MAX_IDX: file_idx = 0 if writer: writer.close() writer = tf.python_io.TFRecordWriter(outfile(record_idx)) record_idx += 1 loaded_features = self.gen_sample_from_tuple(scene, tupl) features = self.process_features(loaded_features) example = tf.train.Example(features=tf.train.Features( feature=features)) writer.write(example.SerializeToString()) file_idx += 1 pbar.update() pbar.close() if writer: writer.close() # And save out a file with the creation time for versioning timestamp_file = '{}_timestamp.txt'.format(mode) with open(os.path.join(out_dir, timestamp_file), 'w') as date_file: date_file.write('TFrecord created {}'.format( str(datetime.datetime.now())))
def create_np_dataset(self, out_dir, num_entries): del num_entries fname = 'np_test-{:04d}.npz' outfile = lambda idx: os.path.join(out_dir, fname.format(idx)) print('Writing dataset to {}'.format(out_dir)) record_idx = 0 pbar = tqdm.tqdm(total=self.dataset_params.sizes['test']) index = 0 for bundle_file in parse.bundle_file_info['test']: scene_name = self.scene_fname(bundle_file) np.random.seed(hash(scene_name) % 2**32) scene = parse.load_scene(scene_name) for tupl in self.get_tuples(bundle_file): features = self.gen_sample_from_tuple(scene, tupl) np.savez(outfile(index), **features) index += 1 pbar.update() # And save out a file with the creation time for versioning timestamp_file = 'np_test_timestamp.txt' with open(os.path.join(out_dir, timestamp_file), 'w') as date_file: date_file.write('Numpy Dataset created {}'.format( str(datetime.datetime.now())))