def main(argv): if len(argv) > 1: raise RuntimeError(f'Unexpected arguments: {argv[1:]}') input_stats = graph_io.get_stats(FLAGS.in_file) max_importance = input_stats['max_final_importance'] with open(FLAGS.in_file) as input_file: rejector = molecule_sampler.RejectToUniform( base_iter=graph_io.graph_reader(input_file), max_importance=max_importance, rng_seed=FLAGS.seed) with open(FLAGS.out_file, 'w') as output_file: for graph in rejector: graph_io.write_graph(graph, output_file) if rejector.num_accepted % 10000 == 0: acc = rejector.num_accepted proc = rejector.num_processed print(f'Accepted {acc}/{proc}: {acc / proc * 100:.2f}%') output_stats = dict( num_samples=rejector.num_accepted, estimated_num_graphs=input_stats['estimated_num_graphs'], rng_seed=rejector.rng_seed) graph_io.write_stats(output_stats, output_file) acc = rejector.num_accepted proc = rejector.num_processed print(f'Done rejecting to uniform! Accepted {acc}/{proc}: ' f'{acc / proc * 100:.2f}%')
def test_invalid_graph_file(self): filename = tempfile.NamedTemporaryFile(delete=False).name with open(filename, 'w') as f: for graph in self.graphs: graph_io.write_graph(graph, f) with self.assertRaisesRegex(AssertionError, 'is not a stats graph'): graph_io.get_stats(filename)
def main(argv): if len(argv) > 1: raise RuntimeError(f'Unexpected arguments: {argv[1:]}') print(f'Reading stoich from: {FLAGS.stoich_file}') with open(FLAGS.stoich_file) as f: stoich = stoichiometry.read(f) mol_sampler = molecule_sampler.MoleculeSampler( stoich, min_samples=FLAGS.min_samples, min_uniform_proportion=FLAGS.min_uniform_proportion, relative_precision=FLAGS.relative_precision, rng_seed=FLAGS.seed) start_time = timeit.default_timer() num = 0 def print_progress(): stats = mol_sampler.stats() std_err_frac = stats['num_graphs_std_err'] / stats[ 'estimated_num_graphs'] est_proportion = (stats['num_after_rejection'] / stats['estimated_num_graphs']) print(f'Sampled {stats["num_samples"]} ({num} valid), ' f'{timeit.default_timer() - start_time:.03f} sec, ' f'{stats["estimated_num_graphs"]:.3E} graphs ' f'(std err={100 * std_err_frac:.3f}%), ' f'proportion after rejection={est_proportion:.3E}') sys.stdout.flush() with open(FLAGS.out_file, 'w') as out: for graph in mol_sampler: graph_io.write_graph(graph, out) num += 1 if num % 10000 == 0: print_progress() stats = mol_sampler.stats() stats['elapsed time'] = timeit.default_timer() - start_time graph_io.write_stats(stats, out) print('Done generating molecules!') if num % 10000 != 0: print_progress()
def main(argv): graph_fnames = argv[1:] bucket_sizes, sample_sizes, = [], [] for graph_fname in graph_fnames: stats = graph_io.get_stats(graph_fname) bucket_sizes.append(stats['estimated_num_graphs']) sample_sizes.append(stats['num_samples']) def graph_iter(graph_fname): with open(graph_fname) as graph_file: for graph in graph_io.graph_reader(graph_file): yield graph base_iters = (graph_iter(graph_fname) for graph_fname in graph_fnames) aggregator = molecule_sampler.AggregateUniformSamples( bucket_sizes=bucket_sizes, sample_sizes=sample_sizes, base_iters=base_iters, target_num_samples=FLAGS.target_samples, rng_seed=FLAGS.seed) with open(FLAGS.output, 'w') as output_file: for graph in aggregator: graph_io.write_graph(graph, output_file) if aggregator.num_accepted % 10000 == 0: print( f'Working on file {aggregator.num_iters_started}/' f'{len(graph_fnames)}. Accepted {aggregator.num_accepted}/' f'{aggregator.num_proccessed} so far.') stats = dict(target_num_samples=aggregator.target_num_samples, num_samples=aggregator.num_accepted, rng_seed=aggregator.rng_seed, estimated_total_num_graphs=sum(bucket_sizes)) graph_io.write_stats(stats, output_file) acc = aggregator.num_accepted proc = aggregator.num_proccessed print(f'Done aggregating uniform samples! Accepted {acc}/{proc}: ' f'{acc / proc * 100:.2f}%')
def test_write_read_graphs(self): filename = tempfile.NamedTemporaryFile(delete=False).name # Write a file. stats = dict(summary='Some summary', quality=100.0) with open(filename, 'w') as f: for graph in self.graphs: graph_io.write_graph(graph, f) graph_io.write_stats(stats, f) # Check we can recover the data. recovered_stats = graph_io.get_stats(filename) recovered_graphs = [] with open(filename, 'r') as f: for graph in graph_io.graph_reader(f): recovered_graphs.append(graph) self.assertEqual(stats, recovered_stats) self.assertEqual(len(self.graphs), len(recovered_graphs)) for g1, g2 in zip(self.graphs, recovered_graphs): self.assertTrue(molecule_sampler.is_isomorphic(g1, g2)) os.remove(filename)