def test_back_conversion(self): """ Creates a tonal space path, converts it to state labels and converts it back again. This should produce the original path if all goes well. Note that the result of the back conversion will always have the path shifted so it starts as close as possible to the origin. This is correct behaviour: the state labels don't encode the enharmonic block that the path starts in and it is merely by convention that we assume the start point. Each path-chord sequence pair also gives the expected output, which may differ from the original path only in this respect. @todo: update this test """ # Just return for now: I've not had a chance to update this # lf_chords_to_states no longer exists return self.longMessage = True # Run the test on a whole set of paths for (coords,chords,output) in self.PATHS: # Build a CoordinateList for the path ens = [EnharmonicCoordinate.from_harmonic_coord((x,y)) for (x,y,fun) in coords] pcs = [PathCoordinate.from_enharmonic_coord(en) for en in ens] time = 0 for pc,(__,__,fun) in zip(pcs,coords): pc.function = fun pc.duration = 1 pc.time = time time += 1 path = Semantics(CoordinateList(items=pcs)) # Build the list of chords chords = [Chord.from_name(crd).to_db_mirror() for crd in chords] for chord in chords: chord.duration = 1 # Try converting it to states states = lf_chords_to_states(path, chords) # Now try converting it back back = states_chords_to_lf(zip(states,chords)) # Check that we got the same coordinates out in_coords = [(x,y) for (x,y,fun) in output] in_funs = [fun for (x,y,fun) in output] out_coords = [point.harmonic_coord for point in back.lf] out_funs = [point.function for point in back.lf] self.assertEqual(in_coords, out_coords, msg="coordinates converted to states and back produced something different.\nState labels:\n%s" % (states)) self.assertEqual(in_funs, out_funs, msg="coordinates converted to states and back produced different functions.\nState labels:\n%s" % (states))
def test_nearest(self): """ This is a particularly difficult bit of EnharmonicCoordinate's behaviour to get right, so it's worth testing a good few examples to make sure it's behaving right. """ self.longMessage = False # Define some test pairs and the expected result TESTS = [ # Tuples: base coord, coord to be shifted, expected result # Some that shouldn't be shifted ((0,0), (0,0), (0,0)), ((0,0), (2,0), (2,0)), ((0,0), (-2,0), (-2,0)), ((0,0), (1,1), (1,1)), # Some that should ((0,0), (2,2), (-2,0)), ((0,0), (-2,-2), (2,0)), ] for base,candidate,correct in TESTS: # Build enharmonic coords base_crd = EnharmonicCoordinate.from_harmonic_coord(base) candidate_crd = EnharmonicCoordinate.from_harmonic_coord(candidate) # Try running nearest on these result_crd = base_crd.nearest(candidate_crd) result = result_crd.harmonic_coord # Check it came out right self.assertEqual(result, correct, msg="nearest instance of %s "\ "[%s] to %s should have been %s, got %s" % \ (candidate, candidate_crd.zero_coord, base, correct, result))
def vector(p0, p1): """ Vector from p0 to p1, where the ps are points represented as they are internally in the model: (X,Y,x,y). (x,y) defines the point in the local (enharmonic) space and is that closest to the previous point when (X,Y) = (0,0). (X,Y) defines a shift of enharmonic space. """ # We don't care about X0 and Y0 X0, Y0, x0, y0 = p0 X1, Y1, x1, y1 = p1 # Get the basic vector, assuming (X1,Y1)=(0,0) nearest = EnharmonicCoordinate((x0, y0)).nearest((x1, y1)) # Shift this according to X1 and Y1 nearest.X += X1 nearest.Y += Y1 newx, newy = nearest.harmonic_coord return (newx - x0, newy - y0)
def test_back_conversion(self): """ Creates a tonal space path, converts it to state labels and converts it back again. This should produce the original path if all goes well. Note that the result of the back conversion will always have the path shifted so it starts as close as possible to the origin. This is correct behaviour: the state labels don't encode the enharmonic block that the path starts in and it is merely by convention that we assume the start point. Each path-chord sequence pair also gives the expected output, which may differ from the original path only in this respect. @todo: update this test """ # Just return for now: I've not had a chance to update this # lf_chords_to_states no longer exists return self.longMessage = True # Run the test on a whole set of paths for (coords, chords, output) in self.PATHS: # Build a CoordinateList for the path ens = [ EnharmonicCoordinate.from_harmonic_coord((x, y)) for (x, y, fun) in coords ] pcs = [PathCoordinate.from_enharmonic_coord(en) for en in ens] time = 0 for pc, (__, __, fun) in zip(pcs, coords): pc.function = fun pc.duration = 1 pc.time = time time += 1 path = Semantics(CoordinateList(items=pcs)) # Build the list of chords chords = [Chord.from_name(crd).to_db_mirror() for crd in chords] for chord in chords: chord.duration = 1 # Try converting it to states states = lf_chords_to_states(path, chords) # Now try converting it back back = states_chords_to_lf(zip(states, chords)) # Check that we got the same coordinates out in_coords = [(x, y) for (x, y, fun) in output] in_funs = [fun for (x, y, fun) in output] out_coords = [point.harmonic_coord for point in back.lf] out_funs = [point.function for point in back.lf] self.assertEqual( in_coords, out_coords, msg= "coordinates converted to states and back produced something different.\nState labels:\n%s" % (states)) self.assertEqual( in_funs, out_funs, msg= "coordinates converted to states and back produced different functions.\nState labels:\n%s" % (states))
def train(data, estimator, grammar, cutoff=0, logger=None, chord_map=None, order=2, backoff_orders=0, backoff_kwargs={}): """ Initializes and trains an HMM in a supervised fashion using the given training data. Training data should be chord sequence data (input type C{bulk-db} or C{bulk-db-annotated}). """ # Prepare a dummy logger if none was given if logger is None: logger = create_dummy_logger() logger.info(">>> Beginning training of ngram backoff model") training_data = [] # Generate the gold standard data by parsing the annotations for dbinput in data: # Get a gold standard tonal space sequence try: parses = parse_sequence_with_annotations(dbinput, grammar, \ allow_subparses=False) except ParseError, err: # Just skip this sequence logger.error('Could not get a GS parse of %s: %s' % (dbinput, err)) continue # There should only be one of these now parse = parses[0] if parse is None: logger.error('Could not get a GS parse of %s' % (dbinput)) continue # Get the form of the analysis we need for the training if chord_map is None: chords = [(c.root, c.type) for c in dbinput.chords] else: chords = [(c.root, chord_map[c.type]) for c in dbinput.chords] points, times = zip( *grammar.formalism.semantics_to_coordinates(parse.semantics)) # Run through the sequence, transforming absolute points into # the condensed relative representation ec0 = EnharmonicCoordinate.from_harmonic_coord(points[0]) # The first point is relative to the origin and always in the # (0,0) enharmonic space rel_points = [(0, 0, ec0.x, ec0.y)] for point in points[1:]: ec1 = EnharmonicCoordinate.from_harmonic_coord(point) # Find the nearest enharmonic instance of this point to the last nearest = ec0.nearest((ec1.x, ec1.y)) # Work out how much we have to shift this by to get the point dX = ec1.X - nearest.X dY = ec1.Y - nearest.Y rel_points.append((dX, dY, ec1.x, ec1.y)) ec0 = ec1 funs, times = zip( *grammar.formalism.semantics_to_functions(parse.semantics)) ### Synchronize the chords with the points and functions # We may need to repeat chords to match up with analysis # points that span multiple chords analysis = iter(zip(rel_points, funs, times)) rel_point, fun, __ = analysis.next() next_rel_point, next_fun, next_anal_time = analysis.next() # Keep track of how much time has elapsed time = 0 training_seq = [] reached_end = False for crd_pair, chord in zip(chords, dbinput.chords): if time >= next_anal_time and not reached_end: # Move on to the next analysis point rel_point, fun = next_rel_point, next_fun try: next_rel_point, next_fun, next_anal_time = analysis.next( ) except StopIteration: # No more points: keep using the same to the end reached_end = True training_seq.append((crd_pair, (rel_point, fun))) time += chord.duration training_data.append(training_seq)
def train(data, estimator, grammar, cutoff=0, logger=None, chord_map=None, order=2, backoff_orders=0, backoff_kwargs={}): """ Initializes and trains an HMM in a supervised fashion using the given training data. Training data should be chord sequence data (input type C{bulk-db} or C{bulk-db-annotated}). """ # Prepare a dummy logger if none was given if logger is None: logger = create_dummy_logger() logger.info(">>> Beginning training of ngram backoff model") training_data = [] # Generate the gold standard data by parsing the annotations for dbinput in data: # Get a gold standard tonal space sequence try: parses = parse_sequence_with_annotations(dbinput, grammar, \ allow_subparses=False) except ParseError, err: # Just skip this sequence logger.error('Could not get a GS parse of %s: %s' % (dbinput,err)) continue # There should only be one of these now parse = parses[0] if parse is None: logger.error('Could not get a GS parse of %s' % (dbinput)) continue # Get the form of the analysis we need for the training if chord_map is None: chords = [(c.root, c.type) for c in dbinput.chords] else: chords = [(c.root, chord_map[c.type]) for c in dbinput.chords] points,times = zip(*grammar.formalism.semantics_to_coordinates( parse.semantics)) # Run through the sequence, transforming absolute points into # the condensed relative representation ec0 = EnharmonicCoordinate.from_harmonic_coord(points[0]) # The first point is relative to the origin and always in the # (0,0) enharmonic space rel_points = [(0,0,ec0.x,ec0.y)] for point in points[1:]: ec1 = EnharmonicCoordinate.from_harmonic_coord(point) # Find the nearest enharmonic instance of this point to the last nearest = ec0.nearest((ec1.x, ec1.y)) # Work out how much we have to shift this by to get the point dX = ec1.X - nearest.X dY = ec1.Y - nearest.Y rel_points.append((dX,dY,ec1.x,ec1.y)) ec0 = ec1 funs,times = zip(*grammar.formalism.semantics_to_functions( parse.semantics)) ### Synchronize the chords with the points and functions # We may need to repeat chords to match up with analysis # points that span multiple chords analysis = iter(zip(rel_points,funs,times)) rel_point, fun, __ = analysis.next() next_rel_point,next_fun,next_anal_time = analysis.next() # Keep track of how much time has elapsed time = 0 training_seq = [] reached_end = False for crd_pair,chord in zip(chords, dbinput.chords): if time >= next_anal_time and not reached_end: # Move on to the next analysis point rel_point, fun = next_rel_point, next_fun try: next_rel_point,next_fun,next_anal_time = analysis.next() except StopIteration: # No more points: keep using the same to the end reached_end = True training_seq.append((crd_pair, (rel_point,fun))) time += chord.duration training_data.append(training_seq)