コード例 #1
0
	def chunk_estimate(self, pitch_track, mode_names=[], mode_name='', mode_dir='./',
		                 est_tonic=True, est_mode=True, distance_method="euclidean",
		                 metric='pcd', ref_freq=440, min_cnt=3, equalSamplePerMode = False):
		"""-------------------------------------------------------------------------
		This function is called by the wrapper estimate() function only. It gets a 
		pitch track chunk, generates its pitch distribution and compares it with the
		chunk distributions of the candidate modes. Then, finds the min_cnt nearest
		neighbors and returns them to estimate(), where these are used to make an
		estimation about the overall recording.
		----------------------------------------------------------------------------
		pitch_track     : Pitch track chunk of the input recording whose tonic and/or
		                  mode is to be estimated. This is only a 1-D list of frequency
		                  values.
		mode_dir        : The directory where the mode models are stored. This is to
		                  load the annotated mode or the candidate mode.
		mode_names      : Names of the candidate modes. These are used when loading
		                  the mode models. If the mode isn't estimated, this parameter
		                  isn't used and can be ignored.
		mode_name       : Annotated mode of the recording. If it's not known and to be
		                  estimated, this parameter isn't used and can be ignored.
		est_tonic       : Whether tonic is to be estimated or not. If this flag is
		                  False, ref_freq is treated as the annotated tonic.
		est_mode        : Whether mode is to be estimated or not. If this flag is
		                  False, mode_name is treated as the annotated mode.
		distance_method : The choice of distance methods. See distance() in
		                  ModeFunctions for more information.
		metric          : Whether the model should be octave wrapped (Pitch Class
		                  Distribution: PCD) or not (Pitch Distribution: PD)
		ref_freq        : Annotated tonic of the recording. If it's unknown, we use
		                  an arbitrary value, so this can be ignored.
		min_cnt         : The number of nearest neighbors of the current chunk to be
		                  returned. The details of this parameter and its implications
		                  are explained in the first lines of estimate().
		-------------------------------------------------------------------------"""
		# Preliminaries before the estimations
		# Cent-to-Hz covnersion is done and pitch distributions are generated
		cent_track = mf.hz_to_cent(pitch_track, ref_freq)
		dist = mf.generate_pd(cent_track, ref_freq=ref_freq,
			                  smooth_factor=self.smooth_factor, step_size=self.step_size)
		dist = mf.generate_pcd(dist) if (metric=='pcd') else dist
		# The model mode distribution(s) are loaded. If the mode is annotated and tonic
		# is to be estimated, only the model of annotated mode is retrieved.
		mode_collections = [self.load_collection(mode, dist_dir=mode_dir) for mode in mode_names]

		if equalSamplePerMode:
			minSamp = min([len(n) for n in mode_collections])
			for i, m in enumerate(mode_collections):
				mode_collections[i] = random.sample(m, minSamp)

		# cum_lens (cummulative lengths) keeps track of number of chunks retrieved from
		# each mode. So that we are able to find out which mode the best performed chunk
		# belongs to.
		cum_lens = np.cumsum([len(col) for col in mode_collections])

		# load mode distribution
		mode_dists = [d for col in mode_collections for d in col]
		mode_dist = self.load_collection(mode_name, dist_dir=mode_dir) if (mode_name!='') else None

		#Initializations of possible output parameters
		tonic_list = [0 for x in range(min_cnt)]
		mode_list = ['' for x in range(min_cnt)]
		min_distance_list = np.zeros(min_cnt)

		# If tonic will be estimated, there are certain common preliminary steps, 
		# regardless of the process being a joint estimation of a tonic estimation.
		if(est_tonic):
			if(metric=='pcd'):
				# This is a precaution step, just to be on the safe side. If there
				# happens to be a peak at the last (and first due to the circular nature
				# of PCD) sample, it is considered as two peaks, one at the end and
				# one at the beginning. To prevent this, we find the global minima
				# of the distribution and shift it to the beginning, i.e. make it the
				# new reference frequency. This new reference could have been any other
				# as long as there is no peak there, but minima is fairly easy to find.
				shift_factor = dist.vals.tolist().index(min(dist.vals))
				dist = dist.shift(shift_factor)
				# anti-freq is the new reference frequency after shift, as mentioned
				# above.
				anti_freq = mf.cent_to_hz([dist.bins[shift_factor]], ref_freq=ref_freq)[0]
				# Peaks of the distribution are found and recorded. These will be treated
				# as tonic candidates.
				peak_idxs, peak_vals = dist.detect_peaks()
			elif(metric=='pd'):
				# Since PD isn't circular, the precaution in PCD is unnecessary here.
				# Peaks of the distribution are found and recorded. These will be treated
				# as tonic candidates.
				peak_idxs, peak_vals = dist.detect_peaks()
				# The number of samples to be shifted is the list [peak indices - zero bin]
				# origin is the bin with value zero and the shifting is done w.r.t. it.
				origin =  np.where(dist.bins==0)[0][0]
				shift_idxs = [(idx - origin) for idx in peak_idxs]

		# Here the actual estimation steps begin

		#Joint Estimation
		### TODO: The first steps of joint estimation are very similar for both Bozkurt and
		### Chordia. We might squeeze them into a single function in ModeFunctions.
		if(est_tonic and est_mode):
			if(metric=='pcd'):
				# PCD doesn't require any prelimimary steps. Generates the distance matrix.
				# The rows are tonic candidates and columns are mode candidates.
				dist_mat = mf.generate_distance_matrix(dist, peak_idxs, mode_dists, method=distance_method)
			elif(metric=='pd'):
				# Since PD lengths aren't equal, zero padding is required and
				# tonic_estimate() of ModeFunctions just does that. It can handle only
				# a single column, so the columns of the matrix are iteratively generated
				dist_mat = np.zeros((len(shift_idxs), len(mode_dists)))
				for m in xrange(len(mode_dists)):
					dist_mat[:,m] = mf.tonic_estimate(dist, shift_idxs, mode_dists[m],
						                              distance_method=distance_method,
						                              metric=metric, step_size=self.step_size)

			# Distance matrix is ready now. Since we need to report min_cnt many
			# nearest neighbors, the loop is iterated min_cnt times and returns
			# one neighbor at each iteration, from closest to futher. When first
			# nearest neighbor is found it's changed to the worst, so in the
			# next iteration, the nearest would be the second nearest and so on.
			for r in xrange(min_cnt):
				# The minima of the distance matrix is found. This is to find
				# the current nearest neighbor chunk.
				min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0]
				min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0]
				# Due to the precaution step of PCD, the reference frequency is
				# changed. That's why it's treated differently than PD. Here,
				# the cent value of the tonic estimate is converted back to Hz.	
				if(metric=='pcd'):
					tonic_list[r] = mf.cent_to_hz([dist.bins[peak_idxs[min_row]]],
						                          anti_freq)[0]
				elif(metric=='pd'):
					tonic_list[r] = mf.cent_to_hz([shift_idxs[min_row] * self.step_size],
						                          ref_freq)[0]
				# We have found out which chunk is our nearest now. Here, we find out
				# which mode it belongs to, from cum_lens.
				mode_list[r] = (mode_names[min(np.where((cum_lens > min_col))[0])],
					           mode_dists[min_col].source[:-6])
				# To observe how close these neighbors are, we report their distances.
				# This doesn't affect the computation at all and it's just for the 
				# evaluating and understanding the behvaviour of the system. 
				min_distance_list[r] = dist_mat[min_row][min_col]
				# The minimum value is replaced with a value larger than maximum,
				# so we can easily find the second nearest neighbor.
				dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1)
			return [[mode_list, tonic_list], min_distance_list.tolist()]

		# Tonic Estimation
		elif(est_tonic):
			# This part assigns the special case changes to standard variables,
			# so that we can treat PD and PCD in the same way, as much as
			# possible.
			peak_idxs = shift_idxs if metric=='pd' else peak_idxs
			anti_freq = ref_freq if metric=='pd' else anti_freq

			# Distance matrix is generated. In the mode_estimate() function
			# of ModeFunctions, PD and PCD are treated differently and it
			# handles the special cases such as zero-padding. The mode is
			# already known, so there is only one mode collection, i.e.
			# set of chunk distributions that belong to the same mode, to
			# be compared. Each column is a chunk distribution and each
			# row is a tonic candidate.
			dist_mat = [mf.tonic_estimate(dist, peak_idxs, d,
				                          distance_method=distance_method,
				                          metric=metric, step_size=self.step_size) for d in mode_dist]

			# Distance matrix is ready now. Since we need to report min_cnt many
			# nearest neighbors, the loop is iterated min_cnt times and returns
			# one neighbor at each iteration, from closest to futher. When first
			# nearest neighbor is found it's changed to the worst, so in the
			# next iteration, the nearest would be the second nearest and so on.
			for r in xrange(min_cnt):
				# The minima of the distance matrix is found. This is to find
				# the current nearest neighbor chunk.
				min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0]
				min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0]
				# The corresponding tonic candidate is found, based on the
				# current nearest neighbor and it's distance is recorded
				tonic_list[r] = (mf.cent_to_hz([dist.bins[peak_idxs[min_col]]],
					                           anti_freq)[0], mode_dist[min_row].source[:-6])
				min_distance_list[r] = dist_mat[min_row][min_col]
				# The minimum value is replaced with a value larger than maximum,
				# so we can easily find the second nearest neighbor.
				dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1)
			return [tonic_list, min_distance_list.tolist()]

		# Mode estimation
		elif(est_mode):
			# Only in mode estimation, the distance matrix is actually a vector.
			# Since the tonic is annotated, the distribution isn't shifted and
			# compared to each chunk distribution of each candidate mode.
			# Again, mode_estimate() of ModeFunctions handles the different
			# approach required for PCD and PD.
			distance_vector = mf.mode_estimate(dist, mode_dists,
				                               distance_method=distance_method,
				                               metric=metric, step_size=self.step_size)
			
			# Distance matrix is ready now. Since we need to report min_cnt many
			# nearest neighbors, the loop is iterated min_cnt times and returns
			# one neighbor at each iteration, from closest to futher. When first
			# nearest neighbor is found it's changed to the worst, so in the
			# next iteration, the nearest would be the second nearest and so on.
			for r in xrange(min_cnt):
				# The minima of the distance matrix is found. This is to find
				# the current nearest neighbor chunk.
				idx = np.argmin(distance_vector)
				# We have found out which chunk is our nearest now. Here, we find out
				# which mode it belongs to, from cum_lens.
				mode_list[r] = (mode_names[min(np.where((cum_lens > idx))[0])],
					                                    mode_dists[idx].source[:-6])
				# The distance of the current nearest neighbors recorded. The details
				# of this step is explained in the end of the analogous loop in joint
				# estimation of thşs function.
				min_distance_list[r] = distance_vector[idx]
				# The minimum value is replaced with a value larger than maximum,
				# so we can easily find the second nearest neighbor.
				distance_vector[idx] = (np.amax(distance_vector) + 1) 
			return [mode_list, min_distance_list.tolist()]

		else:
			return 0
コード例 #2
0
	def estimate(self, pitch_file, mode_in='./', tonic_freq=None, rank=1,
	             distance_method="bhat", metric='pcd'):
		"""-------------------------------------------------------------------------
		This is the ultimate estimation function. There are three different types
		of estimations.

		1) Joint Estimation: Neither the tonic nor the mode of the recording is known.
		Then, joint estimation estimates both of these parameters without any prior
		knowledge about the recording.
		To use this: est_mode and est_tonic flags should be True since both are to
		be estimated. In this case tonic_freq and mode_name parameters are not used,
		since these are used to pass the annotated data about the recording.

		2) Tonic Estimation: The mode of the recording is known and tonic is to be
		estimated. This is generally the most accurate estimation among the three.
		To use this: est_tonic should be True and est_mode should be False. In this
		case tonic_freq  and mode_names parameters are not used since tonic isn't
		known a priori and mode is known and hence there is no candidate mode.

		3) Mode Estimation: The tonic of the recording is known and mode is to be
		estimated.
		To use this: est_mode should be True and est_tonic should be False. In this
		case mode_name parameter isn't used since the mode annotation is not
		available. It can be ignored.
		----------------------------------------------------------------------------
		pitch_file:     : File in which the pitch track of the input recording
						whose tonic and/or mode is to be estimated. 
		mode_in         : The mode input, If it is a filename or distribution object,
						the mode is treated as known and only tonic will be estimated.
						If a directory with the json files or dictionary of
						distributions (per mode) is given, the mode will be estimated.
						In case of directory, the modes will be taken as the json
						filenames.
		tonic_freq      : Annotated tonic of the recording. If it's unknown, we use
						an arbitrary value, so this can be ignored.
		rank            : The number of estimations expected from the system. If
						this is 1, estimation returns the most likely tonic, mode
						or tonic/mode pair. If it is n, it returns a sorted list
						of tuples of length n, each containing a tonic/mode pair. 
		distance_method : The choice of distance methods. See distance() in
						ModeFunctions for more information.
		metric          : Whether the model should be octave wrapped (Pitch Class
						Distribution: PCD) or not (Pitch Distribution: PD)
		-------------------------------------------------------------------------"""

		# load pitch track 
		pitch_track = np.loadtxt(pitch_file)

		# assume the first col is time, the second is pitch and the rest is labels etc.
		pitch_track = pitch_track[:,1] if pitch_track.ndim > 1 else pitch_track

		# parse mode input
		try:
			# list of json files per mode
			if all(os.path.isfile(m) for m in mode_in): 
				est_mode = True  # do mode estimation
				mode_names = [os.path.splitext(m)[0] for m in mode_in]
				models = [pD.load(m) for m in mode_in]
			elif os.path.isfile(mode_in): # json file
				est_mode = False # mode already known
				model = pD.load(mode_in)
		except TypeError:
			try:  # models
				if isinstance(mode_in, pD.PitchDistribution):
					# mode is loaded
					est_mode = False  # mode already known
					model = mode_in
				elif all(isinstance(m, pD.PitchDistribution) for m in mode_in.values()):
					# models of all modes are loaded
					est_mode = True  # do mode estimation
					mode_names = mode_in.keys()
					models = [mode_in[m] for m in mode_names]
			except:
				ValueError("Unknown mode input!")

		# parse tonic input
		if tonic_freq:  # tonic is already known;
			est_tonic = False
		else:
			est_tonic = True
			tonic_freq = 440  # take A4 as the dummy frequency value for cent conversion; it doesnt affect anything

		if not (est_tonic or est_mode):
			ValueError("Both tonic and mode are known!")

		# slice the pitch track if specified
		if self.chunk_size > 0:
			time_track = np.arange(0, self.frame_rate * len(pitch_track), self.frame_rate)
			pitch_track, segs = mF.slice(time_track, pitch_track, '', self.chunk_size)

		# normalize pitch track according to the given tonic frequency
		cent_track = mF.hz_to_cent(pitch_track, ref_freq=tonic_freq)

		# Pitch distribution of the input recording is generated
		distrib = mF.generate_pd(cent_track, ref_freq=tonic_freq, smooth_factor=self.smooth_factor,
		                         step_size=self.step_size)

		# convert to PCD, if specified
		distrib = mF.generate_pcd(distrib) if (metric == 'pcd') else distrib

		# Saved mode models are loaded and output variables are initiated
		tonic_ranked = [('', 0) for x in range(rank)]
		mode_ranked = [('', 0) for x in range(rank)]

		# Preliminary steps for tonic identification
		if est_tonic:
			if metric == 'pcd':
				# If there happens to be a peak at the last (and first due to the circular
				# nature of PCD) sample, it is considered as two peaks, one at the end and
				# one at the beginning. To prevent this, we find the global minima (as it
				# is easy to compute) of the distribution and make it the new reference
				# frequency, i.e. shift it to the beginning.
				shift_factor = distrib.vals.tolist().index(min(distrib.vals))
				distrib = distrib.shift(shift_factor)

				# update to the new reference frequency after shift
				tonic_freq = mF.cent_to_hz([distrib.bins[shift_factor]], ref_freq=tonic_freq)[0]

				# Find the peaks of the distribution. These are the tonic candidates.
				peak_idxs, peak_vals = distrib.detect_peaks()
			elif metric == 'pD':
				# Find the peaks of the distribution. These are the tonic candidates
				peak_idxs, peak_vals = distrib.detect_peaks()

				# The number of samples to be shifted is the list [peak indices - zero bin]
				# origin is the bin with value zero and the shifting is done w.r.t. it.
				origin = np.where(distrib.bins == 0)[0][0]
				shift_idxs = [(idx - origin) for idx in peak_idxs]

		# Joint Estimation
		if (est_tonic and est_mode):
			if (metric == 'pD'):
				# Since PD lengths aren't equal, we zero-pad the distributions for comparison
				# tonic_estimate() of ModeFunctions just does that. It can handle only
				# a single column, so the columns of the matrix are iteratively generated
				dist_mat = np.zeros((len(shift_idxs), len(models)))
				for m, model in enumerate(models):
					dist_mat[:, m] = mF.tonic_estimate(distrib, shift_idxs, model, distance_method=distance_method,
					                                   metric=metric, step_size=self.step_size)
			elif (metric == 'pcd'):
				# PCD doesn't require any preliminary steps. Generate the distance matrix.
				# The rows are tonic candidates and columns are mode candidates.
				dist_mat = mF.generate_distance_matrix(distrib, peak_idxs, models, method=distance_method)

			# Distance matrix is ready now. For each rank, (or each pair of
			# tonic-mode estimate pair) the loop is iterated. When the first
			# best estimate is found it's changed to the worst, so in the
			# next iteration, the estimate would be the second best and so on.
			for r in range(min(rank, len(peak_idxs))):
				# The minima of the distance matrix is found. This is when the
				# distribution is the most similar to a mode distribution, according
				# to the corresponding tonic estimate. The corresponding tonic
				# and mode pair is our current estimate.
				min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0]
				min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0]
				# Due to the precaution step of PCD, the reference frequency is
				# changed. That's why it's treated differently than PD. Here,
				# the cent value of the tonic estimate is converted back to Hz.
				if (metric == 'pcd'):
					tonic_ranked[r] = (mF.cent_to_hz([distrib.bins[peak_idxs[min_row]]],
					                                 tonic_freq)[0], dist_mat[min_row][min_col])
				elif (metric == 'pD'):
					tonic_ranked[r] = (mF.cent_to_hz([shift_idxs[min_row] * self.step_size],
					                                 tonic_freq)[0], dist_mat[min_row][min_col])
				# Current mode estimate is recorded.
				mode_ranked[r] = (mode_names[min_col], dist_mat[min_row][min_col])
				# The minimum value is replaced with a value larger than maximum,
				# so we won't return this estimate pair twice.
				dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1)
			return mode_ranked, tonic_ranked

		# Tonic Estimation
		elif (est_tonic):
			# This part assigns the special case changes to standard variables,
			# so that we can treat PD and PCD in the same way, as much as
			# possible. 
			peak_idxs = shift_idxs if (metric == 'pD') else peak_idxs
			tonic_freq = tonic_freq if (metric == 'pcd') else tonic_freq

			# Distance vector is generated. In the mode_estimate() function
			# of ModeFunctions, PD and PCD are treated differently and it
			# handles the special cases such as zero-padding. The mode is
			# already known, so there is only one model to be compared. Each
			# entry corresponds to one tonic candidate.
			distance_vector = mF.tonic_estimate(distrib, peak_idxs, model, distance_method=distance_method,
			                                    metric=metric, step_size=self.step_size)

			# Distance vector is ready now. For each rank, the loop is iterated.
			# When the first best estimate is found it's changed to be the worst,
			# so in the next iteration, the estimate would be the second best
			# and so on
			for r in range(min(rank, len(peak_idxs))):
				# Minima is found, corresponding tonic candidate is our current
				# tonic estimate
				idx = np.argmin(distance_vector)
				# Due to the changed reference frequency in PCD's precaution step,
				# PCD and PD are treated differently here. 
				# TODO: review here, this might be tedious due to 257th line.
				if (metric == 'pcd'):
					tonic_ranked[r] = (mF.cent_to_hz([distrib.bins[peak_idxs[idx]]],
					                                 tonic_freq)[0], distance_vector[idx])
				elif (metric == 'pD'):
					tonic_ranked[r] = (mF.cent_to_hz([shift_idxs[idx] * self.step_size],
					                                 tonic_freq)[0], distance_vector[idx])
				# Current minima is replaced with a value larger than maxima,
				# so that we won't return the same estimate twice.
				distance_vector[idx] = (np.amax(distance_vector) + 1)
			return tonic_ranked

		# Mode Estimation
		elif (est_mode):
			# Distance vector is generated. Again, mode_estimate() of
			# ModeFunctions handles the different approach required for
			# PCD and PD. Since tonic is known, the distributions aren't
			# shifted and are only compared to candidate mode models.
			distance_vector = mF.mode_estimate(distrib, models, distance_method=distance_method, metric=metric,
			                                   step_size=self.step_size)

			# Distance vector is ready now. For each rank, the loop is iterated.
			# When the first best estimate is found it's changed to be the worst,
			# so in the next iteration, the estimate would be the second best
			# and so on
			for r in range(min(rank, len(mode_names))):
				# Minima is found, corresponding mode candidate is our current
				# mode estimate
				idx = np.argmin(distance_vector)
				mode_ranked[r] = (mode_names[idx], distance_vector[idx])
				# Current minima is replaced with a value larger than maxima,
				# so that we won't return the same estimate twice.
				distance_vector[idx] = (np.amax(distance_vector) + 1)
			return mode_ranked

		else:
			# Nothing is expected to be estimated.
			return 0