def testOutputConnectedToDict(self): class UnitQ(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, {'xy': Type1, 'z': Type2}) def transform(self, input_object): return {'xy': [Type1(x=input_object.x, y=input_object.y)], 'z': [Type2(z=input_object.z)]} q = UnitQ() dag = {q: dag_pipeline.DagInput(q.input_type), dag_pipeline.DagOutput(): q} dag_pipe_obj = dag_pipeline.DAGPipeline(dag) self.assertEqual(dag_pipe_obj.output_type, {'xy': Type1, 'z': Type2}) x, y, z = -3, 0, 8 output_dict = dag_pipe_obj.transform(Type0(x, y, z)) self.assertEqual(output_dict, {'xy': [Type1(x, y)], 'z': [Type2(z)]}) dag = {q: dag_pipeline.DagInput(q.input_type), dag_pipeline.DagOutput(): {'xy': q['xy'], 'z': q['z']}} dag_pipe_obj = dag_pipeline.DAGPipeline(dag) self.assertEqual(dag_pipe_obj.output_type, {'xy': Type1, 'z': Type2}) x, y, z = -3, 0, 8 output_dict = dag_pipe_obj.transform(Type0(x, y, z)) self.assertEqual(output_dict, {'xy': [Type1(x, y)], 'z': [Type2(z)]})
def get_pipeline(config, eval_ratio): """Returns the Pipeline instance which creates the RNN dataset. Args: config: A DrumsRnnConfig object. eval_ratio: Fraction of input to set aside for evaluation set. Returns: A pipeline.Pipeline instance. """ quantizer = pipelines_common.Quantizer(steps_per_quarter=4) drums_extractor_train = drum_pipelines.DrumsExtractor( min_bars=7, max_steps=512, gap_bars=1.0, name='DrumsExtractorTrain') drums_extractor_eval = drum_pipelines.DrumsExtractor( min_bars=7, max_steps=512, gap_bars=1.0, name='DrumsExtractorEval') encoder_pipeline_train = EncoderPipeline(config, name='EncoderPipelineTrain') encoder_pipeline_eval = EncoderPipeline(config, name='EncoderPipelineEval') partitioner = pipelines_common.RandomPartition( music_pb2.NoteSequence, ['eval_drum_tracks', 'training_drum_tracks'], [eval_ratio]) dag = { quantizer: dag_pipeline.DagInput(music_pb2.NoteSequence), partitioner: quantizer, drums_extractor_train: partitioner['training_drum_tracks'], drums_extractor_eval: partitioner['eval_drum_tracks'], encoder_pipeline_train: drums_extractor_train, encoder_pipeline_eval: drums_extractor_eval, dag_pipeline.DagOutput('training_drum_tracks'): encoder_pipeline_train, dag_pipeline.DagOutput('eval_drum_tracks'): encoder_pipeline_eval } return dag_pipeline.DAGPipeline(dag)
def testInvalidDictionaryOutputError(self): b = UnitB() dag = { b: dag_pipeline.DagInput(b.input_type), dag_pipeline.DagOutput(): b } with self.assertRaises(dag_pipeline.InvalidDictionaryOutputError): dag_pipeline.DAGPipeline(dag) a = UnitA() dag = { a: dag_pipeline.DagInput(b.input_type), dag_pipeline.DagOutput('output'): a } with self.assertRaises(dag_pipeline.InvalidDictionaryOutputError): dag_pipeline.DAGPipeline(dag) a2 = UnitA() dag = { a: dag_pipeline.DagInput(a.input_type), a2: dag_pipeline.DagInput(a2.input_type), dag_pipeline.DagOutput('output'): { 't1': a['t1'], 't2': a2['t2'] } } with self.assertRaises(dag_pipeline.InvalidDictionaryOutputError): dag_pipeline.DAGPipeline(dag)
def testInvalidStatisticsError(self): class UnitQ(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, str, str) def transform(self, input_object): self._set_stats([statistics.Counter('stat_1', 5), 1234]) return [input_object] class UnitR(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, str, str) def transform(self, input_object): self._set_stats(statistics.Counter('stat_1', 5)) return [input_object] q = UnitQ() dag = { q: dag_pipeline.DagInput(q.input_type), dag_pipeline.DagOutput('output'): q } dag_pipe_obj = dag_pipeline.DAGPipeline(dag) with self.assertRaises(pipeline.InvalidStatisticsError): dag_pipe_obj.transform('hello world') r = UnitR() dag = { r: dag_pipeline.DagInput(q.input_type), dag_pipeline.DagOutput('output'): r } dag_pipe_obj = dag_pipeline.DAGPipeline(dag) with self.assertRaises(pipeline.InvalidStatisticsError): dag_pipe_obj.transform('hello world')
def testDAGPipelineInputAndOutputType(self): # Tests that the DAGPipeline has the correct `input_type` and # `output_type` values based on the DAG given to it. a, b, c, d = UnitA(), UnitB(), UnitC(), UnitD() dag = { a: dag_pipeline.DagInput(Type0), b: a['t1'], c: { 'A_data': a['t2'], 'B_data': b }, d: { '0': c['regular_data'], '1': b, '2': c['special_data'] }, dag_pipeline.DagOutput('abcdz'): d } dag_pipe_obj = dag_pipeline.DAGPipeline(dag) self.assertEqual(dag_pipe_obj.input_type, Type0) self.assertEqual(dag_pipe_obj.output_type, {'abcdz': Type5}) dag = { a: dag_pipeline.DagInput(Type0), dag_pipeline.DagOutput('t1'): a['t1'], dag_pipeline.DagOutput('t2'): a['t2'] } dag_pipe_obj = dag_pipeline.DAGPipeline(dag) self.assertEqual(dag_pipe_obj.input_type, Type0) self.assertEqual(dag_pipe_obj.output_type, {'t1': Type1, 't2': Type2})
def testDependencyLoops(self): class UnitQ(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, Type1) def transform(self, input_object): pass class UnitR(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type1, Type0) def transform(self, input_object): pass class UnitS(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, {'a': Type1, 'b': Type0}, Type1) def transform(self, input_object): pass class UnitT(pipeline.Pipeline): def __init__(self, name='UnitT'): pipeline.Pipeline.__init__(self, Type0, Type0, name) def transform(self, input_object): pass q, r, s, t = UnitQ(), UnitR(), UnitS(), UnitT() dag = {q: dag_pipeline.DagInput(q.input_type), s: {'a': q, 'b': r}, r: s, dag_pipeline.DagOutput('output'): r, dag_pipeline.DagOutput('output_2'): s} with self.assertRaises(dag_pipeline.BadTopologyError): dag_pipeline.DAGPipeline(dag) dag = {s: {'a': dag_pipeline.DagInput(Type1), 'b': r}, r: s, dag_pipeline.DagOutput('output'): r} with self.assertRaises(dag_pipeline.BadTopologyError): dag_pipeline.DAGPipeline(dag) dag = {dag_pipeline.DagOutput('output'): dag_pipeline.DagInput(Type0), t: t} with self.assertRaises(dag_pipeline.BadTopologyError): dag_pipeline.DAGPipeline(dag) t2 = UnitT('UnitT2') dag = {dag_pipeline.DagOutput('output'): dag_pipeline.DagInput(Type0), t2: t, t: t2} with self.assertRaises(dag_pipeline.BadTopologyError): dag_pipeline.DAGPipeline(dag)
def testIntermediateUnequalOutputCounts(self): # Tests that intermediate output lists which are not the same length are # handled correctly. class UnitQ(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, {'xy': Type1, 'z': Type2}) def transform(self, input_object): return {'xy': [Type1(x=input_object.x + i, y=input_object.y + i) for i in range(input_object.z)], 'z': [Type2(z=i) for i in [-input_object.z, input_object.z]]} class Partitioner(pipeline.Pipeline): def __init__(self, input_type, training_set_name, test_set_name): self.training_set_name = training_set_name self.test_set_name = test_set_name pipeline.Pipeline.__init__( self, input_type, {training_set_name: Type0, test_set_name: Type0}) def transform(self, input_object): input_dict = input_object input_object = Type0(input_dict['xy'].x, input_dict['xy'].y, input_dict['z'].z) if input_object.x < 0: return {self.training_set_name: [], self.test_set_name: [input_object]} return {self.training_set_name: [input_object], self.test_set_name: []} q = UnitQ() partition = Partitioner(q.output_type, 'training_set', 'test_set') dag = {q: dag_pipeline.DagInput(q.input_type), partition: {'xy': q['xy'], 'z': q['z']}, dag_pipeline.DagOutput('training_set'): partition['training_set'], dag_pipeline.DagOutput('test_set'): partition['test_set']} dag_pipe_obj = dag_pipeline.DAGPipeline(dag) x, y, z = -3, 0, 8 output_dict = dag_pipe_obj.transform(Type0(x, y, z)) self.assertEqual(set(output_dict.keys()), set(['training_set', 'test_set'])) training_results = output_dict['training_set'] test_results = output_dict['test_set'] all_expected_results = [Type0(x + i, y + i, zed) for i in range(0, z) for zed in [-z, z]] expected_training_results = [sample for sample in all_expected_results if sample.x >= 0] expected_test_results = [sample for sample in all_expected_results if sample.x < 0] self.assertEqual(set(training_results), set(expected_training_results)) self.assertEqual(set(test_results), set(expected_test_results))
def testDisjointGraph(self): class UnitQ(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, Type1) def transform(self, input_object): pass class UnitR(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type1, { 'a': Type2, 'b': Type3 }) def transform(self, input_object): pass q, r = UnitQ(), UnitR() dag = { q: dag_pipeline.DagInput(q.input_type), dag_pipeline.DagOutput(): r } with self.assertRaises(dag_pipeline.NotConnectedError): dag_pipeline.DAGPipeline(dag) q, r = UnitQ(), UnitR() dag = { q: dag_pipeline.DagInput(q.input_type), dag_pipeline.DagOutput(): { 'a': q, 'b': r['b'] } } with self.assertRaises(dag_pipeline.NotConnectedError): dag_pipeline.DAGPipeline(dag) # Pipelines that do not output to anywhere are not allowed. dag = { dag_pipeline.DagOutput('output'): dag_pipeline.DagInput(q.input_type), q: dag_pipeline.DagInput(q.input_type), r: q } with self.assertRaises(dag_pipeline.NotConnectedError): dag_pipeline.DAGPipeline(dag) # Pipelines which need to be executed but don't have inputs are not allowed. dag = { dag_pipeline.DagOutput('output'): dag_pipeline.DagInput(q.input_type), r: q, dag_pipeline.DagOutput(): r } with self.assertRaises(dag_pipeline.NotConnectedError): dag_pipeline.DAGPipeline(dag)
def testUnequalOutputCounts(self): # Tests dictionary output type where each output list has a different size. class UnitQ(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, Type1) def transform(self, input_object): return [ Type1(x=input_object.x + i, y=input_object.y + i) for i in range(input_object.z) ] class Partitioner(pipeline.Pipeline): def __init__(self, input_type, training_set_name, test_set_name): self.training_set_name = training_set_name self.test_set_name = test_set_name pipeline.Pipeline.__init__(self, input_type, { training_set_name: input_type, test_set_name: input_type }) def transform(self, input_object): if input_object.x < 0: return { self.training_set_name: [], self.test_set_name: [input_object] } return { self.training_set_name: [input_object], self.test_set_name: [] } q = UnitQ() partition = Partitioner(q.output_type, 'training_set', 'test_set') dag = { q: dag_pipeline.DagInput(q.input_type), partition: q, dag_pipeline.DagOutput('training_set'): partition['training_set'], dag_pipeline.DagOutput('test_set'): partition['test_set'] } dag_pipe_obj = dag_pipeline.DAGPipeline(dag) x, y, z = -3, 0, 8 output_dict = dag_pipe_obj.transform(Type0(x, y, z)) self.assertEqual(set(output_dict.keys()), set(['training_set', 'test_set'])) training_results = output_dict['training_set'] test_results = output_dict['test_set'] expected_training_results = [Type1(x + i, y + i) for i in range(-x, z)] expected_test_results = [Type1(x + i, y + i) for i in range(0, -x)] self.assertEqual(set(training_results), set(expected_training_results)) self.assertEqual(set(test_results), set(expected_test_results))
def testNoOutputs(self): # Test that empty lists or dicts as intermediate or final outputs don't # break anything. class UnitQ(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, {'xy': Type1, 'z': Type2}) def transform(self, input_object): return {'xy': [], 'z': []} class UnitR(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, {'xy': Type1, 'z': Type2}, Type4) def transform(self, input_object): input_dict = input_object return [Type4(input_dict['xy'].x, input_dict['xy'].y, input_dict['z'].z)] class UnitS(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, Type1) def transform(self, unused_input_dict): return [] q, r, s = UnitQ(), UnitR(), UnitS() dag = {q: dag_pipeline.DagInput(Type0), r: q, dag_pipeline.DagOutput('output'): r} dag_pipe_obj = dag_pipeline.DAGPipeline(dag) self.assertEqual(dag_pipe_obj.transform(Type0(1, 2, 3)), {'output': []}) dag = {q: dag_pipeline.DagInput(Type0), s: dag_pipeline.DagInput(Type0), r: {'xy': s, 'z': q['z']}, dag_pipeline.DagOutput('output'): r} dag_pipe_obj = dag_pipeline.DAGPipeline(dag) self.assertEqual(dag_pipe_obj.transform(Type0(1, 2, 3)), {'output': []}) dag = {s: dag_pipeline.DagInput(Type0), dag_pipeline.DagOutput('output'): s} dag_pipe_obj = dag_pipeline.DAGPipeline(dag) self.assertEqual(dag_pipe_obj.transform(Type0(1, 2, 3)), {'output': []}) dag = {q: dag_pipeline.DagInput(Type0), dag_pipeline.DagOutput(): q} dag_pipe_obj = dag_pipeline.DAGPipeline(dag) self.assertEqual( dag_pipe_obj.transform(Type0(1, 2, 3)), {'xy': [], 'z': []})
def get_pipeline(config, steps_per_quarter, min_steps, max_steps, eval_ratio): """Returns the Pipeline instance which creates the RNN dataset. Args: config: An EventSequenceRnnConfig. steps_per_quarter: How many steps per quarter to use when quantizing. min_steps: Minimum number of steps for an extracted sequence. max_steps: Maximum number of steps for an extracted sequence. eval_ratio: Fraction of input to set aside for evaluation set. Returns: A pipeline.Pipeline instance. """ quantizer = pipelines_common.Quantizer(steps_per_quarter=steps_per_quarter) # Transpose up to a major third in either direction. # Because our current dataset is Bach chorales, transposing more than a major # third in either direction probably doesn't makes sense (e.g., because it is # likely to exceed normal singing range). transposition_range = range(-4, 5) transposition_pipeline_train = sequences_lib.TranspositionPipeline( transposition_range, name='TranspositionPipelineTrain') transposition_pipeline_eval = sequences_lib.TranspositionPipeline( transposition_range, name='TranspositionPipelineEval') poly_extractor_train = PolyphonicSequenceExtractor( min_steps=min_steps, max_steps=max_steps, name='PolyExtractorTrain') poly_extractor_eval = PolyphonicSequenceExtractor(min_steps=min_steps, max_steps=max_steps, name='PolyExtractorEval') encoder_pipeline_train = encoder_decoder.EncoderPipeline( polyphony_lib.PolyphonicSequence, config.encoder_decoder, name='EncoderPipelineTrain') encoder_pipeline_eval = encoder_decoder.EncoderPipeline( polyphony_lib.PolyphonicSequence, config.encoder_decoder, name='EncoderPipelineEval') partitioner = pipelines_common.RandomPartition( music_pb2.NoteSequence, ['eval_poly_tracks', 'training_poly_tracks'], [eval_ratio]) dag = { quantizer: dag_pipeline.DagInput(music_pb2.NoteSequence), partitioner: quantizer, transposition_pipeline_train: partitioner['training_poly_tracks'], transposition_pipeline_eval: partitioner['eval_poly_tracks'], poly_extractor_train: transposition_pipeline_train, poly_extractor_eval: transposition_pipeline_eval, encoder_pipeline_train: poly_extractor_train, encoder_pipeline_eval: poly_extractor_eval, dag_pipeline.DagOutput('training_poly_tracks'): encoder_pipeline_train, dag_pipeline.DagOutput('eval_poly_tracks'): encoder_pipeline_eval } return dag_pipeline.DAGPipeline(dag)
def testBadInputOrOutputError(self): class UnitQ(pipeline.Pipeline): def __init__(self, name='UnitQ'): pipeline.Pipeline.__init__(self, Type0, Type1, name) def transform(self, input_object): pass class UnitR(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type1, Type0) def transform(self, input_object): pass # Missing Input. q, r = UnitQ(), UnitR() dag = {r: q, dag_pipeline.DagOutput('output'): r} with self.assertRaises(dag_pipeline.BadInputOrOutputError): dag_pipeline.DAGPipeline(dag) # Missing Output. dag = {q: dag_pipeline.DagInput(Type0), r: q} with self.assertRaises(dag_pipeline.BadInputOrOutputError): dag_pipeline.DAGPipeline(dag) # Multiple instances of Input with the same type IS allowed. q2 = UnitQ('UnitQ2') dag = { q: dag_pipeline.DagInput(Type0), q2: dag_pipeline.DagInput(Type0), dag_pipeline.DagOutput(): { 'q': q, 'q2': q2 } } _ = dag_pipeline.DAGPipeline(dag) # Multiple instances with different types is not allowed. dag = { q: dag_pipeline.DagInput(Type0), r: dag_pipeline.DagInput(Type1), dag_pipeline.DagOutput(): { 'q': q, 'r': r } } with self.assertRaises(dag_pipeline.BadInputOrOutputError): dag_pipeline.DAGPipeline(dag)
def get_pipeline(config, eval_ratio): """Returns the Pipeline instance which creates the RNN dataset. Args: config: A MelodyRnnConfig object. eval_ratio: Fraction of input to set aside for evaluation set. Returns: A pipeline.Pipeline instance. A id_pipeline.IDPipeline instance. """ quantizer = pipelines_common.Quantizer(steps_per_quarter=4) melody_extractor = melody_pipelines.MelodyExtractor( min_bars=7, max_steps=512, min_unique_pitches=5, gap_bars=1.0, ignore_polyphonic_notes=False) id_pipeline = pipelines_common.IDPipeline() encoder_pipeline = EncoderPipeline(config) partitioner = pipelines_common.RandomPartition( tf.train.SequenceExample, ['eval_melodies', 'training_melodies'], [eval_ratio]) dag = { quantizer: dag_pipeline.DagInput(music_pb2.NoteSequence), melody_extractor: quantizer, id_pipeline: melody_extractor, encoder_pipeline: id_pipeline, partitioner: encoder_pipeline, dag_pipeline.DagOutput(): partitioner } return dag_pipeline.DAGPipeline(dag), id_pipeline
def testDirectConnection(self): # Tests a direct dict to dict connection in the DAG. class UnitQ(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, {'xy': Type1, 'z': Type2}) def transform(self, input_object): return {'xy': [Type1(x=input_object.x, y=input_object.y)], 'z': [Type2(z=input_object.z)]} class UnitR(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, {'xy': Type1, 'z': Type2}, Type4) def transform(self, input_dict): return [Type4(input_dict['xy'].x, input_dict['xy'].y, input_dict['z'].z)] q, r = UnitQ(), UnitR() dag = {q: dag_pipeline.DagInput(q.input_type), r: q, dag_pipeline.DagOutput('output'): r} dag_pipe_obj = dag_pipeline.DAGPipeline(dag) x, y, z = -3, 0, 8 output_dict = dag_pipe_obj.transform(Type0(x, y, z)) self.assertEqual(output_dict, {'output': [Type4(x, y, z)]})
def testMultiOutput(self): # Tests a pipeline.Pipeline that maps a single input to multiple outputs. class UnitQ(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, {'t1': Type1, 't2': Type2}) def transform(self, input_object): t1 = [Type1(x=input_object.x + i, y=input_object.y + i) for i in range(input_object.z)] t2 = [Type2(z=input_object.z)] return {'t1': t1, 't2': t2} q, b, c = UnitQ(), UnitB(), UnitC() dag = {q: dag_pipeline.DagInput(Type0), b: q['t1'], c: {'A_data': q['t2'], 'B_data': b}, dag_pipeline.DagOutput('outputs'): c['regular_data']} dag_pipe_obj = dag_pipeline.DAGPipeline(dag) x, y, z = 1, 2, 3 output_dict = dag_pipe_obj.transform(Type0(x, y, z)) self.assertEqual(list(output_dict.keys()), ['outputs']) results = output_dict['outputs'] self.assertEqual(len(results), 3) expected_results = [Type4((x + i) * 1000, (y + i) - 100, 0) for i in range(z)] self.assertEqual(set(results), set(expected_results))
def testSingleOutputs(self): # Tests single object and dictionaries in the DAG. a, b, c, d = UnitA(), UnitB(), UnitC(), UnitD() dag = {a: dag_pipeline.DagInput(Type0), b: a['t1'], c: {'A_data': a['t2'], 'B_data': b}, d: {'0': c['regular_data'], '1': b, '2': c['special_data']}, dag_pipeline.DagOutput('abcdz'): d} dag_pipe_obj = dag_pipeline.DAGPipeline(dag) inputs = [Type0(1, 2, 3), Type0(-1, -2, -3), Type0(3, -3, 2)] for input_object in inputs: x, y, z = input_object.x, input_object.y, input_object.z output_dict = dag_pipe_obj.transform(input_object) self.assertEqual(list(output_dict.keys()), ['abcdz']) results = output_dict['abcdz'] self.assertEqual(len(results), 1) result = results[0] # The following outputs are the result of passing the values in # `input_object` through the transform functions of UnitA, UnitB, UnitC, # and UnitD (all defined at the top of this file), connected in the way # defined by `dag`. self.assertEqual(result.a, x * 1000) self.assertEqual(result.b, y - 100) self.assertEqual(result.c, x * 1000 + z * 100) self.assertEqual(result.d, y - 100 - z * 100) self.assertEqual(result.z, z)
def get_pipeline(config, eval_ratio): """Returns the Pipeline instance which creates the RNN dataset. Args: config: An ImprovRnnConfig object. eval_ratio: Fraction of input to set aside for evaluation set. Returns: A pipeline.Pipeline instance. """ all_transpositions = config.transpose_to_key is None partitioner = pipelines_common.RandomPartition( music_pb2.NoteSequence, ['eval_lead_sheets', 'training_lead_sheets'], [eval_ratio]) dag = {partitioner: dag_pipeline.DagInput(music_pb2.NoteSequence)} for mode in ['eval', 'training']: time_change_splitter = note_sequence_pipelines.TimeChangeSplitter( name='TimeChangeSplitter_' + mode) quantizer = note_sequence_pipelines.Quantizer( steps_per_quarter=config.steps_per_quarter, name='Quantizer_' + mode) lead_sheet_extractor = lead_sheet_pipelines.LeadSheetExtractor( min_bars=7, max_steps=512, min_unique_pitches=3, gap_bars=1.0, ignore_polyphonic_notes=False, all_transpositions=all_transpositions, name='LeadSheetExtractor_' + mode) encoder_pipeline = EncoderPipeline(config, name='EncoderPipeline_' + mode) dag[time_change_splitter] = partitioner[mode + '_lead_sheets'] dag[quantizer] = time_change_splitter dag[lead_sheet_extractor] = quantizer dag[encoder_pipeline] = lead_sheet_extractor dag[dag_pipeline.DagOutput(mode + '_lead_sheets')] = encoder_pipeline return dag_pipeline.DAGPipeline(dag)
def get_pipeline(config, min_events, max_events, eval_ratio): """Returns the Pipeline instance which creates the RNN dataset. Args: config: A PerformanceRnnConfig. min_events: Minimum number of events for an extracted sequence. max_events: Maximum number of events for an extracted sequence. eval_ratio: Fraction of input to set aside for evaluation set. Returns: A pipeline.Pipeline instance. """ # Stretch by -5%, -2.5%, 0%, 2.5%, and 5%. stretch_factors = [0.95, 0.975, 1.0, 1.025, 1.05] # Transpose no more than a major third. transposition_range = range(-3, 4) partitioner = pipelines_common.RandomPartition( music_pb2.NoteSequence, ['eval_performances', 'training_performances'], [eval_ratio]) dag = {partitioner: dag_pipeline.DagInput(music_pb2.NoteSequence)} for mode in ['eval', 'training']: sustain_pipeline = note_sequence_pipelines.SustainPipeline( name='SustainPipeline_' + mode) stretch_pipeline = note_sequence_pipelines.StretchPipeline( stretch_factors, name='StretchPipeline_' + mode) splitter = note_sequence_pipelines.Splitter(hop_size_seconds=30.0, name='Splitter_' + mode) quantizer = note_sequence_pipelines.Quantizer( steps_per_second=config.steps_per_second, name='Quantizer_' + mode) transposition_pipeline = note_sequence_pipelines.TranspositionPipeline( transposition_range, name='TranspositionPipeline_' + mode) perf_extractor = PerformanceExtractor( min_events=min_events, max_events=max_events, num_velocity_bins=config.num_velocity_bins, name='PerformanceExtractor_' + mode) encoder_pipeline = EncoderPipeline(config, name='EncoderPipeline_' + mode) dag[sustain_pipeline] = partitioner[mode + '_performances'] if mode == 'eval': # No stretching in eval. dag[splitter] = sustain_pipeline else: dag[stretch_pipeline] = sustain_pipeline dag[splitter] = stretch_pipeline dag[quantizer] = splitter if mode == 'eval': # No transposition in eval. dag[perf_extractor] = quantizer else: dag[transposition_pipeline] = quantizer dag[perf_extractor] = transposition_pipeline dag[encoder_pipeline] = perf_extractor dag[dag_pipeline.DagOutput(mode + '_performances')] = encoder_pipeline return dag_pipeline.DAGPipeline(dag)
def get_pipeline(config, eval_ratio): """Returns the Pipeline instance which creates the RNN dataset. Args: config: A MelodyRnnConfig object. eval_ratio: Fraction of input to set aside for evaluation set. Returns: A pipeline.Pipeline instance. """ partitioner = pipelines_common.RandomPartition( music_pb2.NoteSequence, ['eval_melodies', 'training_melodies'], [eval_ratio]) dag = {partitioner: dag_pipeline.DagInput(music_pb2.NoteSequence)} for mode in ['eval', 'training']: time_change_splitter = note_sequence_pipelines.TimeChangeSplitter( name='TimeChangeSplitter_' + mode) quantizer = note_sequence_pipelines.Quantizer( steps_per_quarter=config.steps_per_quarter, name='Quantizer_' + mode) melody_extractor = melody_pipelines.MelodyExtractor( min_bars=7, max_steps=512, min_unique_pitches=5, gap_bars=1.0, ignore_polyphonic_notes=True, name='MelodyExtractor_' + mode) encoder_pipeline = EncoderPipeline(config, name='EncoderPipeline_' + mode) dag[time_change_splitter] = partitioner[mode + '_melodies'] dag[quantizer] = time_change_splitter dag[melody_extractor] = quantizer dag[encoder_pipeline] = melody_extractor dag[dag_pipeline.DagOutput(mode + '_melodies')] = encoder_pipeline return dag_pipeline.DAGPipeline(dag)
def get_pipeline(config, eval_ratio=0.0): partitioner = pipelines_common.RandomPartition( music_pb2.NoteSequence, ['eval_melodies', 'training_melodies'], [eval_ratio]) dag = {partitioner: dag_pipeline.DagInput(music_pb2.NoteSequence)} for mode in ['eval', 'training']: time_change_splitter = note_sequence_pipelines.TimeChangeSplitter( name='TimeChangeSplitter_' + mode) repeat_sequence = RepeatSequence(min_duration=16, name='RepeatSequence_' + mode) transposition_pipeline = note_sequence_pipelines.TranspositionPipeline( (0, ), name='TranspositionPipeline_' + mode) quantizer = note_sequence_pipelines.Quantizer( steps_per_quarter=config.steps_per_quarter, name='Quantizer_' + mode) melody_extractor = melody_pipelines.MelodyExtractor( min_bars=7, max_steps=512, min_unique_pitches=5, gap_bars=1.0, ignore_polyphonic_notes=True, name='MelodyExtractor_' + mode) encoder_pipeline = EncoderPipeline(config, name='EncoderPipeline_' + mode) dag[time_change_splitter] = partitioner[mode + '_melodies'] dag[repeat_sequence] = time_change_splitter dag[quantizer] = repeat_sequence dag[transposition_pipeline] = quantizer dag[melody_extractor] = transposition_pipeline dag[encoder_pipeline] = melody_extractor dag[dag_pipeline.DagOutput(mode + '_melodies')] = encoder_pipeline return dag_pipeline.DAGPipeline(dag)
def get_pipeline(config, eval_ratio): """Returns the Pipeline instance which creates the RNN dataset. Args: config: A DrumsRnnConfig object. eval_ratio: Fraction of input to set aside for evaluation set. Returns: A pipeline.Pipeline instance. """ partitioner = pipelines_common.RandomPartition( music_pb2.NoteSequence, ['eval_drum_tracks', 'training_drum_tracks'], [eval_ratio]) dag = {partitioner: dag_pipeline.DagInput(music_pb2.NoteSequence)} for mode in ['eval', 'training']: time_change_splitter = note_sequence_pipelines.TimeChangeSplitter( name='TimeChangeSplitter_' + mode) quantizer = note_sequence_pipelines.Quantizer( steps_per_quarter=config.steps_per_quarter, name='Quantizer_' + mode) drums_extractor = drum_pipelines.DrumsExtractor( min_bars=7, max_steps=512, gap_bars=1.0, name='DrumsExtractor_' + mode) encoder_pipeline = event_sequence_pipeline.EncoderPipeline( magenta.music.DrumTrack, config.encoder_decoder, name='EncoderPipeline_' + mode) dag[time_change_splitter] = partitioner[mode + '_drum_tracks'] dag[quantizer] = time_change_splitter dag[drums_extractor] = quantizer dag[encoder_pipeline] = drums_extractor dag[dag_pipeline.DagOutput(mode + '_drum_tracks')] = encoder_pipeline return dag_pipeline.DAGPipeline(dag)
def get_pipeline(config, eval_ratio): """Returns the Pipeline instance which creates the RNN dataset. Args: config: An ImprovRnnConfig object. eval_ratio: Fraction of input to set aside for evaluation set. Returns: A pipeline.Pipeline instance. """ all_transpositions = config.transpose_to_key is None quantizer = pipelines_common.Quantizer(steps_per_quarter=4) lead_sheet_extractor_train = lead_sheet_pipelines.LeadSheetExtractor( min_bars=7, max_steps=512, min_unique_pitches=3, gap_bars=1.0, ignore_polyphonic_notes=False, all_transpositions=all_transpositions, name='LeadSheetExtractorTrain') lead_sheet_extractor_eval = lead_sheet_pipelines.LeadSheetExtractor( min_bars=7, max_steps=512, min_unique_pitches=3, gap_bars=1.0, ignore_polyphonic_notes=False, all_transpositions=all_transpositions, name='LeadSheetExtractorEval') encoder_pipeline_train = EncoderPipeline(config, name='EncoderPipelineTrain') encoder_pipeline_eval = EncoderPipeline(config, name='EncoderPipelineEval') partitioner = pipelines_common.RandomPartition( music_pb2.NoteSequence, ['eval_lead_sheets', 'training_lead_sheets'], [eval_ratio]) dag = { quantizer: dag_pipeline.DagInput(music_pb2.NoteSequence), partitioner: quantizer, lead_sheet_extractor_train: partitioner['training_lead_sheets'], lead_sheet_extractor_eval: partitioner['eval_lead_sheets'], encoder_pipeline_train: lead_sheet_extractor_train, encoder_pipeline_eval: lead_sheet_extractor_eval, dag_pipeline.DagOutput('training_lead_sheets'): encoder_pipeline_train, dag_pipeline.DagOutput('eval_lead_sheets'): encoder_pipeline_eval } return dag_pipeline.DAGPipeline(dag)
def get_pipeline(config, steps_per_quarter, min_steps, max_steps, eval_ratio): """Returns the Pipeline instance which creates the RNN dataset. Args: config: An EventSequenceRnnConfig. steps_per_quarter: How many steps per quarter to use when quantizing. min_steps: Minimum number of steps for an extracted sequence. max_steps: Maximum number of steps for an extracted sequence. eval_ratio: Fraction of input to set aside for evaluation set. Returns: A pipeline.Pipeline instance. """ quantizer = pipelines_common.Quantizer(steps_per_quarter=steps_per_quarter) poly_extractor_train = PolyphonicSequenceExtractor( min_steps=min_steps, max_steps=max_steps, name='PolyExtractorTrain') poly_extractor_eval = PolyphonicSequenceExtractor( min_steps=min_steps, max_steps=max_steps, name='PolyExtractorEval') encoder_pipeline_train = encoder_decoder.EncoderPipeline( polyphony_lib.PolyphonicSequence, config.encoder_decoder, name='EncoderPipelineTrain') encoder_pipeline_eval = encoder_decoder.EncoderPipeline( polyphony_lib.PolyphonicSequence, config.encoder_decoder, name='EncoderPipelineEval') partitioner = pipelines_common.RandomPartition( music_pb2.NoteSequence, ['eval_poly_tracks', 'training_poly_tracks'], [eval_ratio]) dag = {quantizer: dag_pipeline.DagInput(music_pb2.NoteSequence), partitioner: quantizer, poly_extractor_train: partitioner['training_poly_tracks'], poly_extractor_eval: partitioner['eval_poly_tracks'], encoder_pipeline_train: poly_extractor_train, encoder_pipeline_eval: poly_extractor_eval, dag_pipeline.DagOutput('training_poly_tracks'): encoder_pipeline_train, dag_pipeline.DagOutput('eval_poly_tracks'): encoder_pipeline_eval} return dag_pipeline.DAGPipeline(dag)
def get_pipeline(config, min_steps, max_steps, eval_ratio): """Returns the Pipeline instance which creates the RNN dataset. Args: config: An EventSequenceRnnConfig. min_steps: Minimum number of steps for an extracted sequence. max_steps: Maximum number of steps for an extracted sequence. eval_ratio: Fraction of input to set aside for evaluation set. Returns: A pipeline.Pipeline instance. """ # Transpose up to a major third in either direction. # Because our current dataset is Bach chorales, transposing more than a major # third in either direction probably doesn't makes sense (e.g., because it is # likely to exceed normal singing range). transposition_range = range(-4, 5) partitioner = pipelines_common.RandomPartition( music_pb2.NoteSequence, ['eval_poly_tracks', 'training_poly_tracks'], [eval_ratio]) dag = {partitioner: dag_pipeline.DagInput(music_pb2.NoteSequence)} for mode in ['eval', 'training']: time_change_splitter = note_sequence_pipelines.TimeChangeSplitter( name='TimeChangeSplitter_' + mode) quantizer = note_sequence_pipelines.Quantizer( steps_per_quarter=config.steps_per_quarter, name='Quantizer_' + mode) transposition_pipeline = note_sequence_pipelines.TranspositionPipeline( transposition_range, name='TranspositionPipeline_' + mode) poly_extractor = PolyphonicSequenceExtractor(min_steps=min_steps, max_steps=max_steps, name='PolyExtractor_' + mode) encoder_pipeline = event_sequence_pipeline.EncoderPipeline( polyphony_lib.PolyphonicSequence, config.encoder_decoder, name='EncoderPipeline_' + mode) dag[time_change_splitter] = partitioner[mode + '_poly_tracks'] dag[quantizer] = time_change_splitter dag[transposition_pipeline] = quantizer dag[poly_extractor] = transposition_pipeline dag[encoder_pipeline] = poly_extractor dag[dag_pipeline.DagOutput(mode + '_poly_tracks')] = encoder_pipeline return dag_pipeline.DAGPipeline(dag)
def testStatistics(self): class UnitQ(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, Type1) self.stats = [] def transform(self, input_object): self._set_stats( [statistics.Counter('output_count', input_object.z)]) return [ Type1(x=input_object.x + i, y=input_object.y + i) for i in range(input_object.z) ] class UnitR(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type1, Type1) def transform(self, input_object): self._set_stats([statistics.Counter('input_count', 1)]) return [input_object] q, r = UnitQ(), UnitR() dag = { q: dag_pipeline.DagInput(q.input_type), r: q, dag_pipeline.DagOutput('output'): r } dag_pipe_obj = dag_pipeline.DAGPipeline(dag, 'DAGPipelineName') for x, y, z in [(-3, 0, 8), (1, 2, 3), (5, -5, 5)]: dag_pipe_obj.transform(Type0(x, y, z)) stats_1 = dag_pipe_obj.get_stats() stats_2 = dag_pipe_obj.get_stats() self.assertEqual(stats_1, stats_2) for stat in stats_1: self.assertTrue(isinstance(stat, statistics.Counter)) names = sorted([stat.name for stat in stats_1]) self.assertEqual(names, (['DAGPipelineName_UnitQ_output_count'] + ['DAGPipelineName_UnitR_input_count'] * z)) for stat in stats_1: if stat.name == 'DAGPipelineName_UnitQ_output_count': self.assertEqual(stat.count, z) else: self.assertEqual(stat.count, 1)
def testDuplicateNameError(self): class UnitQ(pipeline.Pipeline): def __init__(self, name='UnitQ'): pipeline.Pipeline.__init__(self, Type0, Type1, name) def transform(self, input_object): pass q, q2 = UnitQ(), UnitQ() dag = {q: dag_pipeline.DagInput(Type0), q2: dag_pipeline.DagInput(Type0), dag_pipeline.DagOutput(): {'q': q, 'q2': q2}} with self.assertRaises(dag_pipeline.DuplicateNameError): dag_pipeline.DAGPipeline(dag)
def get_pipeline(config, min_steps, max_steps, eval_ratio): """Returns the Pipeline instance which creates the RNN dataset. Args: config: An EventSequenceRnnConfig. min_steps: Minimum number of steps for an extracted sequence. max_steps: Maximum number of steps for an extracted sequence. eval_ratio: Fraction of input to set aside for evaluation set. Returns: A pipeline.Pipeline instance. """ # Transpose up to a major third in either direction. transposition_range = list(range(-4, 5)) partitioner = pipelines_common.RandomPartition( music_pb2.NoteSequence, ['eval_pianoroll_tracks', 'training_pianoroll_tracks'], [eval_ratio]) dag = {partitioner: dag_pipeline.DagInput(music_pb2.NoteSequence)} for mode in ['eval', 'training']: time_change_splitter = note_sequence_pipelines.TimeChangeSplitter( name='TimeChangeSplitter_' + mode) quantizer = note_sequence_pipelines.Quantizer( steps_per_quarter=config.steps_per_quarter, name='Quantizer_' + mode) transposition_pipeline = note_sequence_pipelines.TranspositionPipeline( transposition_range, name='TranspositionPipeline_' + mode) pianoroll_extractor = PianorollSequenceExtractor( min_steps=min_steps, max_steps=max_steps, name='PianorollExtractor_' + mode) encoder_pipeline = event_sequence_pipeline.EncoderPipeline( mm.PianorollSequence, config.encoder_decoder, name='EncoderPipeline_' + mode) dag[time_change_splitter] = partitioner[mode + '_pianoroll_tracks'] dag[quantizer] = time_change_splitter dag[transposition_pipeline] = quantizer dag[pianoroll_extractor] = transposition_pipeline dag[encoder_pipeline] = pianoroll_extractor dag[dag_pipeline.DagOutput(mode + '_pianoroll_tracks')] = encoder_pipeline return dag_pipeline.DAGPipeline(dag)
def _note_seq_to_text_seq(self, note_seq): # ### PIPELINE MAP ### # # Converts NoteSequence to TextSequence # # # DagInput > Quantizer > PerformanceExtractor > 'MetricPerformance' # DagInput > MetadataExtractor > 'metadata' # # {'MetricPerformance', 'meta'} > ParserToText > DagOutput key = 'live' quantizer = Quantizer(steps_per_quarter=STEPS_PER_QUARTER, name='Quantizer_' + key) perf_extractor = PerformanceExtractor(min_events=MIN_EVENTS, max_events=MAX_EVENTS, num_velocity_bins=0, name='PerformanceExtractor_' + key) meta_extractor = MetadataExtractor(name='MetadataExtractor' + key) parser = ParserToText(name='ParserToText' + key) dag = {} dag[quantizer] = dag_pipeline.DagInput(music_pb2.NoteSequence) dag[perf_extractor] = quantizer dag[meta_extractor] = dag_pipeline.DagInput(music_pb2.NoteSequence) dag[parser] = { 'MetricPerformance': perf_extractor, 'metadata': meta_extractor } dag[dag_pipeline.DagOutput(key)] = parser # NoteSequence -> TextSequence text_seq = None pipeline = dag_pipeline.DAGPipeline(dag) output_names = pipeline.output_type_as_dict.keys() for name, outputs in _guarantee_dict(pipeline.transform(note_seq), list(output_names)[0]).items(): for output in outputs: text_seq = output return text_seq
def testInvalidTransformOutputError(self): # This happens when the output of a pipeline's `transform` method does not # match the type signature given by the pipeline's `output_type`. class UnitQ1(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, Type1) def transform(self, input_object): return [Type2(1)] class UnitQ2(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, Type1) def transform(self, input_object): return [Type1(1, 2), Type2(1)] class UnitQ3(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, Type1) def transform(self, input_object): return Type1(1, 2) class UnitR1(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, { 'xy': Type1, 'z': Type2 }) def transform(self, input_object): return {'xy': [Type1(1, 2)], 'z': [Type1(1, 2)]} class UnitR2(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, { 'xy': Type1, 'z': Type2 }) def transform(self, input_object): return {'xy': [Type1(1, 2)]} class UnitR3(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, { 'xy': Type1, 'z': Type2 }) def transform(self, input_object): return [{'xy': [Type1(1, 2)], 'z': Type2(1)}] class UnitR4(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, { 'xy': Type1, 'z': Type2 }) def transform(self, input_object): return [{'xy': [Type1(1, 2), Type2(1)], 'z': [Type2(1)]}] class UnitR5(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, { 'xy': Type1, 'z': Type2 }) def transform(self, input_object): return [{ 'xy': [Type1(1, 2), Type1(1, 3)], 'z': [Type2(1)], 'q': [] }] for pipeline_class in [ UnitQ1, UnitQ2, UnitQ3, UnitR1, UnitR2, UnitR3, UnitR4, UnitR5 ]: pipe = pipeline_class() if pipeline_class.__name__.startswith('UnitR'): output = dag_pipeline.DagOutput() else: output = dag_pipeline.DagOutput('output') dag = {pipe: dag_pipeline.DagInput(pipe.input_type), output: pipe} dag_pipe_obj = dag_pipeline.DAGPipeline(dag) with self.assertRaises(dag_pipeline.InvalidTransformOutputError): dag_pipe_obj.transform(Type0(1, 2, 3))
def testTypeMismatchError(self): class UnitQ(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type0, Type1) def transform(self, input_object): pass class UnitR(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, Type1, { 'a': Type2, 'b': Type3 }) def transform(self, input_object): pass class UnitS(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, { 'x': Type2, 'y': Type3 }, Type4) def transform(self, input_object): pass class UnitT(pipeline.Pipeline): def __init__(self): pipeline.Pipeline.__init__(self, { 'x': Type2, 'y': Type5 }, Type4) def transform(self, input_object): pass q, r, s, t = UnitQ(), UnitR(), UnitS(), UnitT() dag = { q: dag_pipeline.DagInput(Type1), r: q, s: r, dag_pipeline.DagOutput('output'): s } with self.assertRaises(dag_pipeline.TypeMismatchError): dag_pipeline.DAGPipeline(dag) q2 = UnitQ() dag = { q: dag_pipeline.DagInput(Type0), q2: q, dag_pipeline.DagOutput('output'): q2 } with self.assertRaises(dag_pipeline.TypeMismatchError): dag_pipeline.DAGPipeline(dag) dag = { q: dag_pipeline.DagInput(Type0), r: q, s: { 'x': r['b'], 'y': r['a'] }, dag_pipeline.DagOutput('output'): s } with self.assertRaises(dag_pipeline.TypeMismatchError): dag_pipeline.DAGPipeline(dag) dag = { q: dag_pipeline.DagInput(Type0), r: q, t: r, dag_pipeline.DagOutput('output'): t } with self.assertRaises(dag_pipeline.TypeMismatchError): dag_pipeline.DAGPipeline(dag)