def _closest(self, row): current_best_dist = euclidean_distance(row, self.features_train[0]) best_index = 0 for i in range(0, len(self.features_train)): dist = euclidean_distance(row, self.features_train[i]) if dist < current_best_dist: current_best_dist = dist best_index = i return self.labels_train[best_index]
def filterSongOptionsByThemeDistance(self, song_options, master_song): # First select NUM_SONGS_THEME songs that have a similar theme profile. # From this set of songs, the NUM_SONG_ONSETS most similar ones are selected and evaluated based on their onset similarity song_options_distance_to_centroid = [] # Use this weighted average of the master song and goal song to select the next song cur_theme_centroid = ( THEME_WEIGHT * self.theme_centroid + CURRENT_SONG_WEIGHT * master_song.song_theme_descriptor + PREV_SONG_WEIGHT * self.prev_song_theme_descriptor) for song in song_options: song.open() dist_to_centroid = euclidean_distance(cur_theme_centroid, song.song_theme_descriptor) song_options_distance_to_centroid.append(dist_to_centroid) song.close() song_options_closest_to_centroid = np.argsort( song_options_distance_to_centroid) # Log messages logger.debug('Selected songs, ordered by theme similarity:') for i in song_options_closest_to_centroid[:NUM_SONGS_ONSETS]: song_options[i].open() title = song_options[i].title dist_to_centroid = song_options_distance_to_centroid[i] key = song_options[i].key scale = song_options[i].scale logger.debug('>> Theme difference {:20s} : {:.2f} ({} {})'.format( title[:20], dist_to_centroid, key, scale)) song_options[i].close() return song_options[ song_options_closest_to_centroid[:NUM_SONGS_ONSETS]]
def distance_length_tuple(option): """ Return distance, length tuple for easy of sorting """ inverted_x = option % 128 # column of vc_map inverted_y = option // 128 # row of vc_map return (euclidean_distance(self.map(inverted_x, inverted_y), (original_x, original_y)), np.sqrt(inverted_x**2 + inverted_y**2), (inverted_x, inverted_y))
def chooseNewTheme(self, firstSong): # Initialize the theme centroid # 1. Measure the distance of each song to the first song songs_distance_to_first_song = [] songs_themes = [] for song in self.songsUnplayed: song.open() theme = song.song_theme_descriptor songs_themes.append(firstSong.song_theme_descriptor) songs_distance_to_first_song.append(euclidean_distance(theme, firstSong.song_theme_descriptor)) song.close() songs_sorted = np.argsort(songs_distance_to_first_song) # 2. Select songs that are close to this first song indices_sorted = songs_sorted[:len(songs_sorted)/4] # 3. Calculate the centroid of these songs self.theme_centroid = np.average(np.array(songs_themes)[indices_sorted],axis=0)
songs = [] for song in sc.get_annotated(): song.open() pool.add('song.themes', song.song_theme_descriptor.tolist()[0]) songs.append(song.title) song.close() # --------------------- Make nice plots --------------------------- Y = pool['song.themes'] # All songs in "/music" and "/moremusic" libraries X = pool['theme_descriptors'] # Evolution of mix theme descriptors from scipy.spatial.distance import euclidean as euclidean_distance for P1,P2 in zip(X[:-1,:],X[1:,:]): distance_path = euclidean_distance(P1,P2) num_songs_closer = 0 for P in Y: dist = euclidean_distance(P1,P) if dist < distance_path: num_songs_closer += 1 print distance_path, num_songs_closer from mpl_toolkits.mplot3d import Axes3D #~ fig = plt.figure() #~ ax = fig.add_subplot(111, projection='3d') #~ ax.scatter(X[:,0],X[:,1],X[:,2], marker='x') #~ ax.scatter(Y[:,0],Y[:,1],Y[:,2], marker='o') #~ plt.show() # 01
def compute_projection_fidelity(self, psd=None): """ Compute the reconstruction fidelity as a function of nPCs, for each waveform in the catalogue. Optionally computes waveform match. match / euclidean_distance arrays are organised as number_of_test_waves x num_train_waves, where num_train_waves = number of PCs used to reconstruct the test waveform """ print "Evaluating reconstruction fidelity" # Pre-allocate class euclidean_distances(object): hplus = np.zeros(shape=(self.ntest, self.ntrain)) amplitude = np.zeros(shape=(self.ntest, self.ntrain)) phase = np.zeros(shape=(self.ntest, self.ntrain)) class projections(object): hplus = np.zeros(shape=(self.ntest, self.ntrain)) ampphase = np.zeros(shape=(self.ntest, self.ntrain)) if self.do_si_projection: class matches(object): hplus = np.zeros(shape=(self.ntest, self.ntrain)) ampphase = np.zeros(shape=(self.ntest, self.ntrain)) for w in xrange(self.ntest): # retrieve projection coefficients hplus_NR_betas = self.test_catalogue_data[w]['NRhplusTimeSeriesBetas'] amp_NR_betas = self.test_catalogue_data[w]['NRAmpTimeSeriesBetas'] phase_NR_betas = self.test_catalogue_data[w]['NRPhaseTimeSeriesBetas'] # --- The target waveforms # We can just use the PCs to rebuild these using the PCA() inverse # transform method instead of carrying all the data round # XXX: can only use inverse transform to go back to training data! # target_NR_hplus = self.NRhplusTimeSeriesPCA.inverse_transform( # self.test_catalogue_data[w]['NRhplusTimeSeriesBetas'] ) # target_NR_amp = self.NRAmpTimeSeriesPCA.inverse_transform( # self.test_catalogue_data[w]['NRAmpTimeSeriesBetas'] ) # target_NR_phase = self.NRPhaseTimeSeriesPCA.inverse_transform( # self.test_catalogue_data[w]['NRPhaseTimeSeriesBetas'] ) target_NR_hplus = reconstruct(self.NRhplusTimeSeriesPCA, self.test_catalogue_data[w]['NRhplusTimeSeriesBetas'], len(self.test_catalogue_data[w]['NRhplusTimeSeriesBetas'])) target_NR_amp = reconstruct(self.NRAmpTimeSeriesPCA, self.test_catalogue_data[w]['NRAmpTimeSeriesBetas'], len(self.test_catalogue_data[w]['NRAmpTimeSeriesBetas'])) target_NR_phase = reconstruct(self.NRPhaseTimeSeriesPCA, self.test_catalogue_data[w]['NRPhaseTimeSeriesBetas'], len(self.test_catalogue_data[w]['NRPhaseTimeSeriesBetas'])) target_NR_ampphase = target_NR_amp*np.exp(1j*target_NR_phase) if self.do_si_projection: hplus_SI_betas = self.test_catalogue_data[w]['SIhplusTimeSeriesBetas'] amp_SI_betas = self.test_catalogue_data[w]['SIAmpTimeSeriesBetas'] phase_SI_betas = self.test_catalogue_data[w]['SIPhaseTimeSeriesBetas'] # The target waveform # target_SI_hplus = self.SIhplusTimeSeriesPCA.inverse_transform( # self.test_catalogue_data[w]['SIhplusTimeSeriesBetas'] ) # target_SI_amp = self.SIAmpTimeSeriesPCA.inverse_transform( # self.test_catalogue_data[w]['SIAmpTimeSeriesBetas'] ) # target_SI_phase = self.SIPhaseTimeSeriesPCA.inverse_transform( # self.test_catalogue_data[w]['SIPhaseTimeSeriesBetas'] ) target_SI_hplus = reconstruct(self.SIhplusTimeSeriesPCA, self.test_catalogue_data[w]['SIhplusTimeSeriesBetas'], len(self.test_catalogue_data[w]['SIhplusTimeSeriesBetas'])) target_SI_amp = reconstruct(self.SIAmpTimeSeriesPCA, self.test_catalogue_data[w]['SIAmpTimeSeriesBetas'], len(self.test_catalogue_data[w]['SIAmpTimeSeriesBetas'])) target_SI_phase = reconstruct(self.SIPhaseTimeSeriesPCA, self.test_catalogue_data[w]['SIPhaseTimeSeriesBetas'], len(self.test_catalogue_data[w]['SIPhaseTimeSeriesBetas'])) target_SI_ampphase = target_SI_amp * np.exp(1j*target_SI_phase) for n,npcs in enumerate(xrange(1,self.ntrain+1)): reconstructed_NR_hplus = \ reconstruct(self.NRhplusTimeSeriesPCA, hplus_NR_betas, npcs) reconstructed_NR_amp = \ reconstruct(self.NRAmpTimeSeriesPCA, amp_NR_betas, npcs) reconstructed_NR_phase = \ reconstruct(self.NRPhaseTimeSeriesPCA, phase_NR_betas, npcs) if self.do_si_projection: reconstructed_SI_hplus = \ reconstruct(self.SIhplusTimeSeriesPCA, hplus_SI_betas, npcs) reconstructed_SI_amp = \ reconstruct(self.SIAmpTimeSeriesPCA, amp_SI_betas, npcs) reconstructed_SI_phase = \ reconstruct(self.SIPhaseTimeSeriesPCA, phase_SI_betas, npcs) reconstructed_NR_ampphase = \ reconstructed_NR_amp*np.exp(1j*reconstructed_NR_phase) euclidean_distances.hplus[w,n] = euclidean_distance( reconstructed_NR_hplus, target_NR_hplus) euclidean_distances.amplitude[w,n] =\ euclidean_distance(reconstructed_NR_amp, target_NR_amp) euclidean_distances.phase[w,n] =\ euclidean_distance(reconstructed_NR_amp, target_NR_amp) projections.hplus[w,n] = np.dot( reconstructed_NR_hplus/np.linalg.norm(reconstructed_NR_hplus), target_NR_hplus/np.linalg.norm(target_NR_hplus) ) projections.ampphase[w,n] = np.vdot( reconstructed_NR_ampphase/np.linalg.norm(reconstructed_NR_ampphase), target_NR_ampphase/np.linalg.norm(target_NR_ampphase) ) if self.do_si_projection: reconstructed_SI_ampphase = \ reconstructed_SI_amp*np.exp(1j*reconstructed_SI_phase) matches.hplus[w,n] = \ compute_match(reconstructed_SI_hplus, target_SI_hplus, delta_t=self.SI_deltaT, psd=psd, low_frequency_cutoff=self.fmin) matches.ampphase[w,n] = \ compute_match(np.real(reconstructed_SI_ampphase), np.real(target_SI_ampphase), delta_t=self.SI_deltaT, psd=psd, low_frequency_cutoff=self.fmin) if self.do_si_projection: return euclidean_distances, projections, matches else: return euclidean_distances, projections
def __init__(self, catalog, delta_t=1./1024): self.delta_t = delta_t # # --- Peform the PCA decomposition # print "Performing PCA" self.pca = perform_pca(catalog.amplitude_matrix, catalog.phase_matrix) nsims = catalog.simulations.nsimulations # # --- Compute nominal projection coefficients and matches # self.amplitude_betas = np.zeros(shape=(nsims,nsims)) self.phase_betas = np.zeros(shape=(nsims,nsims)) self.amplitude_euclidean_distance = np.zeros(shape=(nsims,nsims)) self.phase_euclidean_distance = np.zeros(shape=(nsims,nsims)) self.matches = np.zeros(shape=(nsims,nsims), dtype=complex) for w in xrange(nsims): projection = self.project_waveform(catalog.amplitude_matrix[w,:], catalog.phase_matrix[w,:]) self.amplitude_betas[w,:] = np.copy(projection['amplitude_betas']) self.phase_betas[w,:] = np.copy(projection['phase_betas']) for n in xrange(nsims): # Reconstruct for each number of PCs recamp, recphase = self.reconstruct_ampphase( catalog.amplitude_matrix[w,:], catalog.phase_matrix[w,:], npcs=n+1) self.amplitude_euclidean_distance[w,n] = euclidean_distance(recamp, catalog.amplitude_matrix[w,:]) self.phase_euclidean_distance[w,n] = euclidean_distance(recphase, catalog.phase_matrix[w,:]) # Compute match with hplus hplus = pycbc.types.TimeSeries(np.real(catalog.amplitude_matrix[w,:] * np.exp(1j*catalog.phase_matrix[w,:])), delta_t=self.delta_t) hplus_rec = \ pycbc.types.TimeSeries(np.real(recamp*np.exp(1j*recphase)), delta_t=self.delta_t) plus_match , _ = pycbc.filter.match(hplus, hplus_rec, low_frequency_cutoff=30.0, psd=None) # Compute match with hcross hcross = pycbc.types.TimeSeries(np.imag(catalog.amplitude_matrix[w,:] * np.exp(1j*catalog.phase_matrix[w,:])), delta_t=self.delta_t) hcross_rec = \ pycbc.types.TimeSeries(np.imag(recamp*np.exp(1j*recphase)), delta_t=self.delta_t) cross_match , _ = pycbc.filter.match(hcross, hcross_rec, low_frequency_cutoff=30.0, psd=None) self.matches[w,n] = plus_match + 1j*cross_match
def reconstruct_tfmap(self, tfmap, npcs=1, this_fpeak=None, wfnum=None): """ Reconstruct the given timefrequency map tfmap by projecting onto the current instance's PCs """ if this_fpeak == None: print >> sys.stderr, "require desired fpeak" sys.exit() # # Compute projection of this map onto the PCs # tf_projection = self.project_tfmap(tfmap, this_fpeak=this_fpeak) # # Reconstruct the waveform # h, w = tfmap['map'].shape recmap_align = dict() recmap_align['map'] = np.zeros(h * w) for i in xrange(npcs): recmap_align['map'] += tf_projection['timefreq_betas'][i]*\ self.pca['timefreq_pca'].components_[i,:] # # De-center and realign the reconstruction # # Reshape recmap_align['map'] += self.pca['timefreq_mean'] recmap_align['map'][recmap_align['map'] < 0] = 0.0 recmap_align['map'] = recmap_align['map'].reshape(h, w) recmap_align['times'] = np.copy(tfmap['times']) recmap_align['frequencies'] = np.copy(tfmap['frequencies']) recmap_align['scales'] = np.copy(tfmap['scales']) recmap_align['mother_wavelet'] = tfmap['mother_wavelet'] recmap_align['image_shape'] = tfmap['image_shape'] # realign recmap = recmap_align.copy() recmap['map'] = dealign_cwt(recmap, this_fpeak) # # Populate the output dictionary # tf_reconstruction = dict() tf_reconstruction['orig_map'] = recmap.copy() tf_reconstruction['align_map'] = recmap_align.copy() tf_reconstruction['tfmap_euclidean_raw'] = euclidean_distance( recmap_align['map'].reshape(h * w), tf_projection['tfmap_align'].reshape(h * w)) tf_reconstruction['tfmap_euclidean'] = euclidean_distance( recmap['map'].reshape(h * w), tfmap['map'].reshape(h * w)) return tf_reconstruction
def reconstruct_freqseries(self, freqseries, npcs=1, this_fpeak=None, wfnum=None): """ Reconstruct the waveform in freqseries using <npcs> principal components from the catalogue Procedure: 1) Reconstruct the centered spectra (phase and mag) from the beta-weighted PCs 2) Un-center the spectra (add the mean back on) """ #print "Analysing reconstruction with %d PCs"%npcs if this_fpeak == None: # Locate fpeak # Note: we'll assume the peak we're aligning to is >2kHz. This # avoids any low frequency stuff. high_idx = self.sample_frequencies >= 2000 high_freq = self.sample_frequencies[high_idx] high_spec = freqseries[high_idx] this_fpeak = high_freq[np.argmax(abs(high_spec))] # Get projection: fd_projection = self.project_freqseries(freqseries) fd_reconstruction = dict() fd_reconstruction['fd_projection'] = fd_projection # # Original Waveforms # orimag = abs(freqseries) oriphi = phase_of(freqseries) orispec = orimag * np.exp(1j * oriphi) fd_reconstruction['original_spectrum'] = unit_hrss(orispec, delta=self.delta_f, domain='frequency') fd_reconstruction['sample_frequencies'] = np.copy( self.sample_frequencies) # # Magnitude and phase reconstructions # # Initialise reconstructions recmag = np.zeros(shape=np.shape(orimag)) recphi = np.zeros(shape=np.shape(oriphi)) # Sum contributions from PCs for i in xrange(npcs): recmag += \ fd_projection['magnitude_betas'][i]*\ self.pca['magnitude_pca'].components_[i,:] recphi += \ fd_projection['phase_betas'][i]*\ self.pca['phase_pca'].components_[i,:] # # De-center the reconstruction # recmag += self.pca['magnitude_pca'].mean_ recphi += self.pca['phase_pca'].mean_ # --- Raw reconstruction quality idx = (self.sample_frequencies>self.low_frequency_cutoff) \ * (orimag>0.01*max(orimag)) fd_reconstruction['magnitude_euclidean_raw'] = \ euclidean_distance(recmag[idx], fd_projection['magnitude_cent'][idx]) fd_reconstruction['phase_euclidean_raw'] = \ euclidean_distance(recphi[idx], fd_projection['phase_cent'][idx]) # # Move the spectrum back to where it should be # recmag = shift_vec(recmag, self.sample_frequencies, fcenter=this_fpeak, fpeak=self.fcenter).real # XXX: phase_align recphi = shift_vec(recphi, self.sample_frequencies, fcenter=this_fpeak, fpeak=self.fcenter).real fd_reconstruction['recon_mag'] = np.copy(recmag) fd_reconstruction['recon_phi'] = np.copy(recphi) # # Fourier spectrum reconstructions # recon_spectrum = recmag * np.exp(1j * recphi) # --- Unit norm reconstruction fd_reconstruction['recon_spectrum'] = unit_hrss(recon_spectrum, delta=self.delta_f, domain='frequency') fd_reconstruction['recon_timeseries'] = \ fd_reconstruction['recon_spectrum'].to_timeseries() # --- Match calculations for mag/phase reconstructions recon_spectrum = np.copy(fd_reconstruction['recon_spectrum'].data) # --- Match calculations for full reconstructions idx = (self.sample_frequencies>self.low_frequency_cutoff) \ * (orimag>0.01*max(orimag)) fd_reconstruction['magnitude_euclidean'] = \ euclidean_distance(recmag[idx], orimag[idx]) fd_reconstruction['phase_euclidean'] = \ euclidean_distance(recphi[idx], oriphi[idx]) # make psd flen = len(self.sample_frequencies) psd = aLIGOZeroDetHighPower(flen, self.delta_f, low_freq_cutoff=self.low_frequency_cutoff) fd_reconstruction['match_aligo'] = \ pycbc.filter.match(fd_reconstruction['recon_spectrum'], fd_reconstruction['original_spectrum'], psd = psd, low_frequency_cutoff = self.low_frequency_cutoff)[0] fd_reconstruction['match_noweight'] = \ pycbc.filter.match(fd_reconstruction['recon_spectrum'], fd_reconstruction['original_spectrum'], low_frequency_cutoff = self.low_frequency_cutoff)[0] return fd_reconstruction
def factorized_invert(self, x_coord, y_coord): """ Same as invert(), but much faster and needs less memory. Exploits several symmetries to invert with small lookup tables. - Negative and positive are symmetrical, calculate with abolute values - Y and X are reflected, so we can store half the values - The game treats large input values all the same, so the region we need to map is actually reasonably small. - Within a large part of this region, X and Y can be calculated separately. Call plot_reachable() for a visual clue to how this works. After the calculation we check that our answer matches with invert() """ clamped_x, clamped_y = self.clamp_to_max(x_coord, y_coord) if clamped_x >= 0: x_sign = 1 else: x_sign = -1 clamped_x = abs(clamped_x) if clamped_y >= 0: y_sign = 1 else: y_sign = -1 clamped_y = abs(clamped_y) boundary = self.one_dimensional_boundary remainder = None if clamped_x > boundary - 0.5 and clamped_y > boundary - 0.5: # Outside the one dimensional range remainder = self.n64_max + 1 - boundary # Now x and y must become zero indexed in our 2D lookup clamped_x -= boundary clamped_y -= boundary clamped_x = int(np.round(clamped_x)) clamped_y = int(np.round(clamped_y)) if clamped_y >= clamped_x: index = self.triangular_to_linear_index( clamped_x, clamped_y, remainder) inverted_y, inverted_x = self.triangular_map[index] else: index = self.triangular_to_linear_index( clamped_y, clamped_x, remainder) inverted_x, inverted_y = self.triangular_map[index] else: inverted_x = self.one_dimensional_map[int(np.ceil(clamped_x * 2))] inverted_y = self.one_dimensional_map[int(np.ceil(clamped_y * 2))] inverted_x = x_sign * inverted_x + 128 inverted_y = y_sign * inverted_y + 128 # Check how accurate factorized_invert is vs the canonical self.invert factorized = self.clamp_to_max(*self.umap(inverted_x, inverted_y)) canonical = self.clamp_to_max(*self.umap( *self.invert(x_coord, y_coord))) distance = euclidean_distance(factorized, canonical) if remainder: # Used triangular map (upper right corner of the range) # We care less about accuracy in the far ranges # There might be a small rounding error in the 2D lookup if distance > 5: print( "d>5: {}, x, y: {} {}, ix, iy: {} {}, r: {}, six, siy: {} {}, r: {}" .format(distance, x_coord, y_coord, inverted_x, inverted_y, factorized, *self.invert(x_coord, y_coord), canonical), file=sys.stderr, flush=True) assert distance <= 5 else: # Used one dimensional map if distance != 0: print( "d>0: {}, x, y: {} {}, ix, iy: {} {}, r: {}, six, siy: {} {}, r: {}" .format(distance, x_coord, y_coord, inverted_x, inverted_y, factorized, *self.invert(x_coord, y_coord), canonical), file=sys.stderr, flush=True) assert distance == 0 return inverted_x, inverted_y