def main(_):
    with pipeline_flags.create_pipeline() as pipeline:
        num_pages = (FLAGS.num_positive_examples +
                     generation.POSITIVE_EXAMPLES_PER_IMAGE -
                     1) // generation.POSITIVE_EXAMPLES_PER_IMAGE
        num_batches = (num_pages + FLAGS.num_pages_per_batch -
                       1) // FLAGS.num_pages_per_batch
        batch_nums = pipeline | beam.transforms.Create(list(
            range(num_batches)))
        pages = batch_nums | beam.ParDo(
            generation.PageGenerationDoFn(
                num_pages_per_batch=FLAGS.num_pages_per_batch,
                vexflow_generator_command=FLAGS.vexflow_generator_command,
                svg_to_png_command=FLAGS.svg_to_png_command))

        def noise_fn(image):
            # TODO(ringw): Add better noise, maybe using generative adversarial
            # networks trained on real scores from IMSLP.
            return image_noise.gaussian_noise(
                image_noise.random_rotation(image))

        examples = pages | beam.ParDo(
            generation.PatchExampleDoFn(
                negative_example_distance=FLAGS.negative_example_distance,
                patch_width=FLAGS.patch_width,
                negative_to_positive_example_ratio=FLAGS.
                negative_to_positive_example_ratio,
                noise_fn=noise_fn))
        examples |= beam.io.WriteToTFRecord(FLAGS.examples_path,
                                            beam.coders.ProtoCoder(
                                                tf.train.Example),
                                            num_shards=FLAGS.num_shards)
def main(_):
    tf.logging.info('Building the pipeline...')
    records_dir = tempfile.mkdtemp(prefix='staffline_kmeans')
    try:
        patch_file_prefix = os.path.join(records_dir, 'patches')
        with pipeline_flags.create_pipeline() as pipeline:
            filenames = file_io.get_matching_files(FLAGS.music_pattern)
            assert filenames, 'Must have matched some filenames'
            if 0 < FLAGS.num_pages < len(filenames):
                filenames = random.sample(filenames, FLAGS.num_pages)
            filenames = pipeline | beam.transforms.Create(filenames)
            patches = filenames | beam.ParDo(
                staffline_patches_dofn.StafflinePatchesDoFn(
                    patch_height=FLAGS.patch_height,
                    patch_width=FLAGS.patch_width,
                    num_stafflines=FLAGS.num_stafflines,
                    timeout_ms=FLAGS.timeout_ms,
                    max_patches_per_page=FLAGS.max_patches_per_page))
            if FLAGS.num_outputs:
                patches |= combiners.Sample.FixedSizeGlobally(
                    FLAGS.num_outputs)
            patches |= beam.io.WriteToTFRecord(
                patch_file_prefix, beam.coders.ProtoCoder(tf.train.Example))
            tf.logging.info('Running the pipeline...')
        tf.logging.info('Running k-means...')
        patch_files = file_io.get_matching_files(patch_file_prefix + '*')
        clusters = train_kmeans(patch_files, FLAGS.kmeans_num_clusters,
                                FLAGS.kmeans_batch_size,
                                FLAGS.kmeans_num_steps)
        tf.logging.info('Writing the centroids...')
        with tf_record.TFRecordWriter(FLAGS.output_path) as writer:
            for cluster in clusters:
                example = tf.train.Example()
                example.features.feature['features'].float_list.value.extend(
                    cluster)
                example.features.feature['height'].int64_list.value.append(
                    FLAGS.patch_height)
                example.features.feature['width'].int64_list.value.append(
                    FLAGS.patch_width)
                writer.write(example.SerializeToString())
        tf.logging.info('Done!')
    finally:
        shutil.rmtree(records_dir)