def _closest(self, row):
        current_best_dist = euclidean_distance(row, self.features_train[0])
        best_index = 0

        for i in range(0, len(self.features_train)):
            dist = euclidean_distance(row, self.features_train[i])
            if dist < current_best_dist:
                current_best_dist = dist
                best_index = i

        return self.labels_train[best_index]
Esempio n. 2
0
    def filterSongOptionsByThemeDistance(self, song_options, master_song):

        # First select NUM_SONGS_THEME songs that have a similar theme profile.
        # From this set of songs, the NUM_SONG_ONSETS most similar ones are selected and evaluated based on their onset similarity
        song_options_distance_to_centroid = []

        # Use this weighted average of the master song and goal song to select the next song
        cur_theme_centroid = (
            THEME_WEIGHT * self.theme_centroid +
            CURRENT_SONG_WEIGHT * master_song.song_theme_descriptor +
            PREV_SONG_WEIGHT * self.prev_song_theme_descriptor)
        for song in song_options:
            song.open()
            dist_to_centroid = euclidean_distance(cur_theme_centroid,
                                                  song.song_theme_descriptor)
            song_options_distance_to_centroid.append(dist_to_centroid)
            song.close()
        song_options_closest_to_centroid = np.argsort(
            song_options_distance_to_centroid)

        # Log messages
        logger.debug('Selected songs, ordered by theme similarity:')
        for i in song_options_closest_to_centroid[:NUM_SONGS_ONSETS]:
            song_options[i].open()
            title = song_options[i].title
            dist_to_centroid = song_options_distance_to_centroid[i]
            key = song_options[i].key
            scale = song_options[i].scale
            logger.debug('>> Theme difference {:20s} : {:.2f} ({} {})'.format(
                title[:20], dist_to_centroid, key, scale))
            song_options[i].close()

        return song_options[
            song_options_closest_to_centroid[:NUM_SONGS_ONSETS]]
Esempio n. 3
0
 def distance_length_tuple(option):
     """ Return distance, length tuple for easy of sorting """
     inverted_x = option % 128  # column of vc_map
     inverted_y = option // 128  # row of vc_map
     return (euclidean_distance(self.map(inverted_x, inverted_y),
                                (original_x, original_y)),
             np.sqrt(inverted_x**2 + inverted_y**2), (inverted_x,
                                                      inverted_y))
Esempio n. 4
0
	def chooseNewTheme(self, firstSong):
		# Initialize the theme centroid
		# 1. Measure the distance of each song to the first song
		songs_distance_to_first_song = []
		songs_themes = []
		for song in self.songsUnplayed:
			song.open()
			theme = song.song_theme_descriptor
			songs_themes.append(firstSong.song_theme_descriptor)
			songs_distance_to_first_song.append(euclidean_distance(theme, firstSong.song_theme_descriptor))
			song.close()
		songs_sorted = np.argsort(songs_distance_to_first_song)
		# 2. Select songs that are close to this first song
		indices_sorted = songs_sorted[:len(songs_sorted)/4]
		# 3. Calculate the centroid of these songs
		self.theme_centroid = np.average(np.array(songs_themes)[indices_sorted],axis=0)
songs = []

for song in sc.get_annotated():
	song.open()
	pool.add('song.themes', song.song_theme_descriptor.tolist()[0])
	songs.append(song.title)
	song.close()

# --------------------- Make nice plots ---------------------------
Y = pool['song.themes']	# All songs in "/music" and "/moremusic" libraries
X = pool['theme_descriptors']						# Evolution of mix theme descriptors

from scipy.spatial.distance import euclidean as euclidean_distance

for P1,P2 in zip(X[:-1,:],X[1:,:]):
	distance_path = euclidean_distance(P1,P2)
	num_songs_closer = 0
	for P in Y:
		dist = euclidean_distance(P1,P)
		if dist < distance_path:
			num_songs_closer += 1
	print distance_path, num_songs_closer

from mpl_toolkits.mplot3d import Axes3D
#~ fig = plt.figure()
#~ ax = fig.add_subplot(111, projection='3d')
#~ ax.scatter(X[:,0],X[:,1],X[:,2], marker='x')
#~ ax.scatter(Y[:,0],Y[:,1],Y[:,2], marker='o')
#~ plt.show()

# 01
Esempio n. 6
0
    def compute_projection_fidelity(self, psd=None):
        """
        Compute the reconstruction fidelity as a function of nPCs, for each
        waveform in the catalogue.  Optionally computes waveform match.

        match / euclidean_distance arrays are organised as
            number_of_test_waves x num_train_waves,

        where num_train_waves = number of PCs used to reconstruct the test
        waveform

        """ 

        print "Evaluating reconstruction fidelity"
 
        # Pre-allocate

        class euclidean_distances(object):
            hplus = np.zeros(shape=(self.ntest, self.ntrain))
            amplitude = np.zeros(shape=(self.ntest, self.ntrain))
            phase = np.zeros(shape=(self.ntest, self.ntrain))

        class projections(object):
            hplus = np.zeros(shape=(self.ntest, self.ntrain))
            ampphase = np.zeros(shape=(self.ntest, self.ntrain))

        if self.do_si_projection:
            class matches(object):
                hplus = np.zeros(shape=(self.ntest, self.ntrain))
                ampphase = np.zeros(shape=(self.ntest, self.ntrain))


        for w in xrange(self.ntest):
    
            # retrieve projection coefficients
            hplus_NR_betas = self.test_catalogue_data[w]['NRhplusTimeSeriesBetas']

            amp_NR_betas   = self.test_catalogue_data[w]['NRAmpTimeSeriesBetas']
            phase_NR_betas = self.test_catalogue_data[w]['NRPhaseTimeSeriesBetas']
            
            # --- The target waveforms
            # We can just use the PCs to rebuild these using the PCA() inverse
            # transform method instead of carrying all the data round
            # XXX: can only use inverse transform to go back to training data!

           # target_NR_hplus = self.NRhplusTimeSeriesPCA.inverse_transform(
           #         self.test_catalogue_data[w]['NRhplusTimeSeriesBetas'] )
           # target_NR_amp = self.NRAmpTimeSeriesPCA.inverse_transform(
           #         self.test_catalogue_data[w]['NRAmpTimeSeriesBetas'] )
           # target_NR_phase = self.NRPhaseTimeSeriesPCA.inverse_transform(
           #         self.test_catalogue_data[w]['NRPhaseTimeSeriesBetas'] )

            target_NR_hplus = reconstruct(self.NRhplusTimeSeriesPCA,
                    self.test_catalogue_data[w]['NRhplusTimeSeriesBetas'],
                    len(self.test_catalogue_data[w]['NRhplusTimeSeriesBetas']))
            target_NR_amp = reconstruct(self.NRAmpTimeSeriesPCA,
                    self.test_catalogue_data[w]['NRAmpTimeSeriesBetas'],
                    len(self.test_catalogue_data[w]['NRAmpTimeSeriesBetas']))
            target_NR_phase = reconstruct(self.NRPhaseTimeSeriesPCA,
                    self.test_catalogue_data[w]['NRPhaseTimeSeriesBetas'],
                    len(self.test_catalogue_data[w]['NRPhaseTimeSeriesBetas']))


            target_NR_ampphase = target_NR_amp*np.exp(1j*target_NR_phase)

            if self.do_si_projection:
                hplus_SI_betas = self.test_catalogue_data[w]['SIhplusTimeSeriesBetas']
                amp_SI_betas   = self.test_catalogue_data[w]['SIAmpTimeSeriesBetas']
                phase_SI_betas = self.test_catalogue_data[w]['SIPhaseTimeSeriesBetas']
                
                # The target waveform
#               target_SI_hplus = self.SIhplusTimeSeriesPCA.inverse_transform(
#                       self.test_catalogue_data[w]['SIhplusTimeSeriesBetas'] )
#               target_SI_amp = self.SIAmpTimeSeriesPCA.inverse_transform(
#                       self.test_catalogue_data[w]['SIAmpTimeSeriesBetas'] )
#               target_SI_phase = self.SIPhaseTimeSeriesPCA.inverse_transform(
#                       self.test_catalogue_data[w]['SIPhaseTimeSeriesBetas'] )

                target_SI_hplus = reconstruct(self.SIhplusTimeSeriesPCA,
                        self.test_catalogue_data[w]['SIhplusTimeSeriesBetas'],
                        len(self.test_catalogue_data[w]['SIhplusTimeSeriesBetas']))
                target_SI_amp = reconstruct(self.SIAmpTimeSeriesPCA,
                        self.test_catalogue_data[w]['SIAmpTimeSeriesBetas'],
                        len(self.test_catalogue_data[w]['SIAmpTimeSeriesBetas']))
                target_SI_phase = reconstruct(self.SIPhaseTimeSeriesPCA,
                        self.test_catalogue_data[w]['SIPhaseTimeSeriesBetas'],
                        len(self.test_catalogue_data[w]['SIPhaseTimeSeriesBetas']))



                target_SI_ampphase = target_SI_amp * np.exp(1j*target_SI_phase)

    
            for n,npcs in enumerate(xrange(1,self.ntrain+1)):
    
                reconstructed_NR_hplus = \
                        reconstruct(self.NRhplusTimeSeriesPCA, hplus_NR_betas,
                                npcs)
                reconstructed_NR_amp = \
                        reconstruct(self.NRAmpTimeSeriesPCA, amp_NR_betas, npcs)
                reconstructed_NR_phase = \
                        reconstruct(self.NRPhaseTimeSeriesPCA, phase_NR_betas,
                                npcs)

                if self.do_si_projection:
                    reconstructed_SI_hplus = \
                            reconstruct(self.SIhplusTimeSeriesPCA, hplus_SI_betas,
                                    npcs)
                    reconstructed_SI_amp = \
                            reconstruct(self.SIAmpTimeSeriesPCA, amp_SI_betas, npcs)
                    reconstructed_SI_phase = \
                            reconstruct(self.SIPhaseTimeSeriesPCA, phase_SI_betas,
                                    npcs)

                reconstructed_NR_ampphase = \
                        reconstructed_NR_amp*np.exp(1j*reconstructed_NR_phase)
 
                euclidean_distances.hplus[w,n] = euclidean_distance(
                   reconstructed_NR_hplus, target_NR_hplus)
                euclidean_distances.amplitude[w,n] =\
                   euclidean_distance(reconstructed_NR_amp, target_NR_amp)
                euclidean_distances.phase[w,n] =\
                        euclidean_distance(reconstructed_NR_amp, target_NR_amp)

                projections.hplus[w,n] = np.dot(
                        reconstructed_NR_hplus/np.linalg.norm(reconstructed_NR_hplus),
                        target_NR_hplus/np.linalg.norm(target_NR_hplus)
                        )

                projections.ampphase[w,n] = np.vdot(
                        reconstructed_NR_ampphase/np.linalg.norm(reconstructed_NR_ampphase),
                        target_NR_ampphase/np.linalg.norm(target_NR_ampphase)
                        )
    
                if self.do_si_projection:

                    reconstructed_SI_ampphase = \
                            reconstructed_SI_amp*np.exp(1j*reconstructed_SI_phase)

                    matches.hplus[w,n] = \
                            compute_match(reconstructed_SI_hplus, target_SI_hplus,
                                    delta_t=self.SI_deltaT, psd=psd,
                                    low_frequency_cutoff=self.fmin)
        
                    matches.ampphase[w,n] = \
                            compute_match(np.real(reconstructed_SI_ampphase),
                                    np.real(target_SI_ampphase), delta_t=self.SI_deltaT,
                                    psd=psd, low_frequency_cutoff=self.fmin)

        if self.do_si_projection:
            return euclidean_distances, projections, matches
        else:
            return euclidean_distances, projections
    def __init__(self, catalog, delta_t=1./1024):

        self.delta_t = delta_t
        #
        # --- Peform the PCA decomposition
        #
        print "Performing PCA"
        self.pca = perform_pca(catalog.amplitude_matrix,
                catalog.phase_matrix)

        nsims = catalog.simulations.nsimulations

        #
        # --- Compute nominal projection coefficients and matches
        #
        self.amplitude_betas = np.zeros(shape=(nsims,nsims))
        self.phase_betas = np.zeros(shape=(nsims,nsims))

        self.amplitude_euclidean_distance = np.zeros(shape=(nsims,nsims))
        self.phase_euclidean_distance = np.zeros(shape=(nsims,nsims))
        self.matches = np.zeros(shape=(nsims,nsims), dtype=complex)


        for w in xrange(nsims):

            projection = self.project_waveform(catalog.amplitude_matrix[w,:],
                catalog.phase_matrix[w,:])

            self.amplitude_betas[w,:] = np.copy(projection['amplitude_betas'])
            self.phase_betas[w,:] = np.copy(projection['phase_betas'])

            for n in xrange(nsims):


                # Reconstruct for each number of PCs
                recamp, recphase = self.reconstruct_ampphase(
                        catalog.amplitude_matrix[w,:],
                        catalog.phase_matrix[w,:], npcs=n+1)

                self.amplitude_euclidean_distance[w,n] = euclidean_distance(recamp,
                        catalog.amplitude_matrix[w,:])
                self.phase_euclidean_distance[w,n] = euclidean_distance(recphase,
                        catalog.phase_matrix[w,:])


                # Compute match with hplus
                hplus = pycbc.types.TimeSeries(np.real(catalog.amplitude_matrix[w,:] *
                        np.exp(1j*catalog.phase_matrix[w,:])), delta_t=self.delta_t)
                hplus_rec = \
                        pycbc.types.TimeSeries(np.real(recamp*np.exp(1j*recphase)),
                                delta_t=self.delta_t)

                plus_match , _ = pycbc.filter.match(hplus, hplus_rec,
                        low_frequency_cutoff=30.0, psd=None)

                # Compute match with hcross
                hcross = pycbc.types.TimeSeries(np.imag(catalog.amplitude_matrix[w,:] *
                        np.exp(1j*catalog.phase_matrix[w,:])), delta_t=self.delta_t)
                hcross_rec = \
                        pycbc.types.TimeSeries(np.imag(recamp*np.exp(1j*recphase)),
                                delta_t=self.delta_t)
                cross_match , _ = pycbc.filter.match(hcross, hcross_rec,
                        low_frequency_cutoff=30.0, psd=None)


                self.matches[w,n] = plus_match + 1j*cross_match
Esempio n. 8
0
    def reconstruct_tfmap(self, tfmap, npcs=1, this_fpeak=None, wfnum=None):
        """
        Reconstruct the given timefrequency map tfmap by projecting onto the
        current instance's PCs
        """

        if this_fpeak == None:
            print >> sys.stderr, "require desired fpeak"
            sys.exit()

        #
        # Compute projection of this map onto the PCs
        #
        tf_projection = self.project_tfmap(tfmap, this_fpeak=this_fpeak)

        #
        # Reconstruct the waveform
        #
        h, w = tfmap['map'].shape

        recmap_align = dict()
        recmap_align['map'] = np.zeros(h * w)

        for i in xrange(npcs):
            recmap_align['map'] += tf_projection['timefreq_betas'][i]*\
                    self.pca['timefreq_pca'].components_[i,:]

        #
        # De-center and realign the reconstruction
        #

        # Reshape
        recmap_align['map'] += self.pca['timefreq_mean']
        recmap_align['map'][recmap_align['map'] < 0] = 0.0
        recmap_align['map'] = recmap_align['map'].reshape(h, w)

        recmap_align['times'] = np.copy(tfmap['times'])
        recmap_align['frequencies'] = np.copy(tfmap['frequencies'])
        recmap_align['scales'] = np.copy(tfmap['scales'])
        recmap_align['mother_wavelet'] = tfmap['mother_wavelet']
        recmap_align['image_shape'] = tfmap['image_shape']

        # realign
        recmap = recmap_align.copy()
        recmap['map'] = dealign_cwt(recmap, this_fpeak)

        #
        # Populate the output dictionary
        #
        tf_reconstruction = dict()
        tf_reconstruction['orig_map'] = recmap.copy()
        tf_reconstruction['align_map'] = recmap_align.copy()

        tf_reconstruction['tfmap_euclidean_raw'] = euclidean_distance(
            recmap_align['map'].reshape(h * w),
            tf_projection['tfmap_align'].reshape(h * w))

        tf_reconstruction['tfmap_euclidean'] = euclidean_distance(
            recmap['map'].reshape(h * w), tfmap['map'].reshape(h * w))

        return tf_reconstruction
Esempio n. 9
0
    def reconstruct_freqseries(self,
                               freqseries,
                               npcs=1,
                               this_fpeak=None,
                               wfnum=None):
        """
        Reconstruct the waveform in freqseries using <npcs> principal components
        from the catalogue

        Procedure:
        1) Reconstruct the centered spectra (phase and mag) from the
        beta-weighted PCs
        2) Un-center the spectra (add the mean back on)
        """
        #print "Analysing reconstruction with %d PCs"%npcs

        if this_fpeak == None:
            # Locate fpeak
            # Note: we'll assume the peak we're aligning to is >2kHz.  This
            # avoids any low frequency stuff.
            high_idx = self.sample_frequencies >= 2000
            high_freq = self.sample_frequencies[high_idx]
            high_spec = freqseries[high_idx]
            this_fpeak = high_freq[np.argmax(abs(high_spec))]

        # Get projection:
        fd_projection = self.project_freqseries(freqseries)
        fd_reconstruction = dict()

        fd_reconstruction['fd_projection'] = fd_projection

        #
        # Original Waveforms
        #
        orimag = abs(freqseries)
        oriphi = phase_of(freqseries)

        orispec = orimag * np.exp(1j * oriphi)

        fd_reconstruction['original_spectrum'] = unit_hrss(orispec,
                                                           delta=self.delta_f,
                                                           domain='frequency')

        fd_reconstruction['sample_frequencies'] = np.copy(
            self.sample_frequencies)

        #
        # Magnitude and phase reconstructions
        #

        # Initialise reconstructions
        recmag = np.zeros(shape=np.shape(orimag))
        recphi = np.zeros(shape=np.shape(oriphi))

        # Sum contributions from PCs
        for i in xrange(npcs):

            recmag += \
                    fd_projection['magnitude_betas'][i]*\
                    self.pca['magnitude_pca'].components_[i,:]

            recphi += \
                    fd_projection['phase_betas'][i]*\
                    self.pca['phase_pca'].components_[i,:]

        #
        # De-center the reconstruction
        #

        recmag += self.pca['magnitude_pca'].mean_
        recphi += self.pca['phase_pca'].mean_

        # --- Raw reconstruction quality
        idx = (self.sample_frequencies>self.low_frequency_cutoff) \
                * (orimag>0.01*max(orimag))

        fd_reconstruction['magnitude_euclidean_raw'] = \
                euclidean_distance(recmag[idx], fd_projection['magnitude_cent'][idx])

        fd_reconstruction['phase_euclidean_raw'] = \
                euclidean_distance(recphi[idx], fd_projection['phase_cent'][idx])

        #
        # Move the spectrum back to where it should be
        #
        recmag = shift_vec(recmag,
                           self.sample_frequencies,
                           fcenter=this_fpeak,
                           fpeak=self.fcenter).real
        # XXX: phase_align
        recphi = shift_vec(recphi,
                           self.sample_frequencies,
                           fcenter=this_fpeak,
                           fpeak=self.fcenter).real

        fd_reconstruction['recon_mag'] = np.copy(recmag)
        fd_reconstruction['recon_phi'] = np.copy(recphi)

        #
        # Fourier spectrum reconstructions
        #

        recon_spectrum = recmag * np.exp(1j * recphi)

        # --- Unit norm reconstruction
        fd_reconstruction['recon_spectrum'] = unit_hrss(recon_spectrum,
                                                        delta=self.delta_f,
                                                        domain='frequency')

        fd_reconstruction['recon_timeseries'] = \
                fd_reconstruction['recon_spectrum'].to_timeseries()

        # --- Match calculations for mag/phase reconstructions
        recon_spectrum = np.copy(fd_reconstruction['recon_spectrum'].data)

        # --- Match calculations for full reconstructions


        idx = (self.sample_frequencies>self.low_frequency_cutoff) \
                * (orimag>0.01*max(orimag))


        fd_reconstruction['magnitude_euclidean'] = \
                euclidean_distance(recmag[idx], orimag[idx])

        fd_reconstruction['phase_euclidean'] = \
                euclidean_distance(recphi[idx], oriphi[idx])

        # make psd
        flen = len(self.sample_frequencies)
        psd = aLIGOZeroDetHighPower(flen,
                                    self.delta_f,
                                    low_freq_cutoff=self.low_frequency_cutoff)

        fd_reconstruction['match_aligo'] = \
                pycbc.filter.match(fd_reconstruction['recon_spectrum'],
                        fd_reconstruction['original_spectrum'], psd = psd,
                        low_frequency_cutoff = self.low_frequency_cutoff)[0]


        fd_reconstruction['match_noweight'] = \
                pycbc.filter.match(fd_reconstruction['recon_spectrum'],
                        fd_reconstruction['original_spectrum'],
                        low_frequency_cutoff = self.low_frequency_cutoff)[0]

        return fd_reconstruction
Esempio n. 10
0
    def factorized_invert(self, x_coord, y_coord):
        """ Same as invert(), but much faster and needs less memory.
            Exploits several symmetries to invert with small lookup tables.

            - Negative and positive are symmetrical, calculate with abolute values
            - Y and X are reflected, so we can store half the values
            - The game treats large input values all the same, so the region we
              need to map is actually reasonably small.
            - Within a large part of this region, X and Y can be calculated separately.
              Call plot_reachable() for a visual clue to how this works.

            After the calculation we check that our answer matches with invert()
        """
        clamped_x, clamped_y = self.clamp_to_max(x_coord, y_coord)

        if clamped_x >= 0:
            x_sign = 1
        else:
            x_sign = -1
            clamped_x = abs(clamped_x)
        if clamped_y >= 0:
            y_sign = 1
        else:
            y_sign = -1
            clamped_y = abs(clamped_y)

        boundary = self.one_dimensional_boundary
        remainder = None
        if clamped_x > boundary - 0.5 and clamped_y > boundary - 0.5:
            # Outside the one dimensional range
            remainder = self.n64_max + 1 - boundary

            # Now x and y must become zero indexed in our 2D lookup
            clamped_x -= boundary
            clamped_y -= boundary
            clamped_x = int(np.round(clamped_x))
            clamped_y = int(np.round(clamped_y))

            if clamped_y >= clamped_x:
                index = self.triangular_to_linear_index(
                    clamped_x, clamped_y, remainder)
                inverted_y, inverted_x = self.triangular_map[index]
            else:
                index = self.triangular_to_linear_index(
                    clamped_y, clamped_x, remainder)
                inverted_x, inverted_y = self.triangular_map[index]
        else:
            inverted_x = self.one_dimensional_map[int(np.ceil(clamped_x * 2))]
            inverted_y = self.one_dimensional_map[int(np.ceil(clamped_y * 2))]

        inverted_x = x_sign * inverted_x + 128
        inverted_y = y_sign * inverted_y + 128

        # Check how accurate factorized_invert is vs the canonical self.invert
        factorized = self.clamp_to_max(*self.umap(inverted_x, inverted_y))
        canonical = self.clamp_to_max(*self.umap(
            *self.invert(x_coord, y_coord)))
        distance = euclidean_distance(factorized, canonical)

        if remainder:
            # Used triangular map (upper right corner of the range)
            # We care less about accuracy in the far ranges
            # There might be a small rounding error in the 2D lookup
            if distance > 5:
                print(
                    "d>5: {}, x, y: {} {}, ix, iy: {} {}, r: {}, six, siy: {} {}, r: {}"
                    .format(distance, x_coord, y_coord, inverted_x,
                            inverted_y, factorized,
                            *self.invert(x_coord, y_coord), canonical),
                    file=sys.stderr,
                    flush=True)
            assert distance <= 5
        else:
            # Used one dimensional map
            if distance != 0:
                print(
                    "d>0: {}, x, y: {} {}, ix, iy: {} {}, r: {}, six, siy: {} {}, r: {}"
                    .format(distance, x_coord, y_coord, inverted_x,
                            inverted_y, factorized,
                            *self.invert(x_coord, y_coord), canonical),
                    file=sys.stderr,
                    flush=True)
            assert distance == 0

        return inverted_x, inverted_y