def getCSMSmithWatermanScores(Features1, O1, Features2, O2, Kappa, Type, doPlot = False): """ Compute the Smith Waterman score between two songs using a single feature set :param Features1: Mxk matrix of features in song 1 :param O1: Auxiliary info for song 1 :param Features2: Nxk matrix of features in song 2 :param O2: Auxiliary info for song 2 :param Kappa: Nearest neighbors param for CSM :param Type: Type of CSM to use :param doPlot: If True, plot the results of Smith waterman :returns: Score if doPlot = False, or dictionary of {'score', 'DBinary', 'D', 'maxD', 'CSM'} if doPlot is True """ CSM = getCSMType(Features1, O1, Features2, O2, Type) DBinary = CSMToBinaryMutual(CSM, Kappa) if doPlot: (maxD, D) = SA.swalignimpconstrained(DBinary) plt.subplot(131) plt.imshow(CSM, interpolation = 'nearest', cmap = 'afmhot') plt.title('CSM') plt.subplot(132) plt.imshow(1-DBinary, interpolation = 'nearest', cmap = 'gray') plt.title("CSM Binary, $\kappa$=%g"%Kappa) plt.subplot(133) plt.imshow(D, interpolation = 'nearest', cmap = 'afmhot') plt.title("Smith Waterman Score = %g"%maxD) return {'score':maxD, 'DBinary':DBinary, 'D':D, 'maxD':maxD, 'CSM':CSM} return SAC.swalignimpconstrained(DBinary)
def getCSMSmithWatermanScoresORMerge(AllFeatures1, O1, AllFeatures2, O2, Kappa, CSMTypes, doPlot=False): """ Compute the Smith Waterman score between two songs after doing a binary OR on individual feature sets :param AllFeatures1: A dictionary of Mxk matric of features in song 1 :param O1: Auxiliary info for song 1 :param AllFeatures2: A dictionary of Nxk matrix of features in song 2 :param O2: Auxiliary info for song 2 :param Kappa: Nearest neighbors param for CSM :param CSMTypes: Dictionary of types of CSMs for each feature :param doPlot: If True, plot the results of the fusion and of Smith Waterman :returns: Score if doPlot = False, or dictionary of {'score', 'DBinary', 'D', 'maxD'} if doPlot is True """ CSMs = [] DsBinary = [] Features = list(AllFeatures1) #Compute all CSMs for i in range(len(Features)): F = Features[i] CSMs.append( getCSMType(AllFeatures1[F], O1, AllFeatures2[F], O2, CSMTypes[F])) DsBinary.append(CSMToBinaryMutual(CSMs[i], Kappa)) #Do an OR merge DBinary = np.zeros(DsBinary[0].shape) for D in DsBinary: DBinary += D DBinary[DBinary > 0] = 1 if doPlot: #TODO: I have no idea why I'm seeing a large gap (maxD, D) = SA.swalignimpconstrained(DBinary) N = len(CSMs) for i in range(N): print("plt.subplot(2, %i, %i)" % (N + 1, i + 1)) plt.subplot(2, N + 1, i + 1) plt.imshow(CSMs[i], interpolation='nearest', cmap='afmhot') plt.title('CSM %s' % Features[i]) plt.subplot(2, N + 1, N + 2 + i) plt.imshow(1 - DsBinary[i], interpolation='nearest', cmap='gray') plt.title("CSM Binary %s K=%g" % (Features[i], Kappa)) plt.subplot(2, N + 1, 2 * N + 2) plt.imshow(DBinary, interpolation='nearest', cmap='afmhot') plt.title('CSM Binary OR Merged') plt.subplot(2, N + 1, N + 1) plt.imshow(D, interpolation='nearest', cmap='afmhot') plt.title("Smith Waterman Score = %g" % maxD) return {'score': maxD, 'DBinary': DBinary, 'D': D, 'maxD': maxD} return SAC.swalignimpconstrained(DBinary)
def getCSMSmithWatermanScoresORMerge(AllFeatures1, O1, AllFeatures2, O2, Kappa, CSMTypes, doPlot = False): """ Compute the Smith Waterman score between two songs after doing a binary OR on individual feature sets :param AllFeatures1: A dictionary of Mxk matric of features in song 1 :param O1: Auxiliary info for song 1 :param AllFeatures2: A dictionary of Nxk matrix of features in song 2 :param O2: Auxiliary info for song 2 :param Kappa: Nearest neighbors param for CSM :param CSMTypes: Dictionary of types of CSMs for each feature :param doPlot: If True, plot the results of the fusion and of Smith Waterman :returns: Score if doPlot = False, or dictionary of {'score', 'DBinary', 'D', 'maxD'} if doPlot is True """ CSMs = [] DsBinary = [] Features = list(AllFeatures1) #Compute all CSMs for i in range(len(Features)): F = Features[i] CSMs.append(getCSMType(AllFeatures1[F], O1, AllFeatures2[F], O2, CSMTypes[F])) DsBinary.append(CSMToBinaryMutual(CSMs[i], Kappa)) #Do an OR merge DBinary = np.zeros(DsBinary[0].shape) for D in DsBinary: DBinary += D DBinary[DBinary > 0] = 1 if doPlot: #TODO: I have no idea why I'm seeing a large gap (maxD, D) = SA.swalignimpconstrained(DBinary) N = len(CSMs) for i in range(N): print("plt.subplot(2, %i, %i)"%(N+1, i+1)) plt.subplot(2, N+1, i+1) plt.imshow(CSMs[i], interpolation = 'nearest', cmap = 'afmhot') plt.title('CSM %s'%Features[i]) plt.subplot(2, N+1, N+2+i) plt.imshow(1-DsBinary[i], interpolation = 'nearest', cmap = 'gray') plt.title("CSM Binary %s K=%g"%(Features[i], Kappa)) plt.subplot(2, N+1, 2*N+2) plt.imshow(DBinary, interpolation = 'nearest', cmap = 'afmhot') plt.title('CSM Binary OR Merged') plt.subplot(2, N+1, N+1) plt.imshow(D, interpolation = 'nearest', cmap = 'afmhot') plt.title("Smith Waterman Score = %g"%maxD) return {'score':maxD, 'DBinary':DBinary, 'D':D, 'maxD':maxD} return SAC.swalignimpconstrained(DBinary)
def getCSMSmithWatermanScores(Features1, O1, Features2, O2, Kappa, Type, doPlot=False): """ Compute the Smith Waterman score between two songs using a single feature set :param Features1: Mxk matrix of features in song 1 :param O1: Auxiliary info for song 1 :param Features2: Nxk matrix of features in song 2 :param O2: Auxiliary info for song 2 :param Kappa: Nearest neighbors param for CSM :param Type: Type of CSM to use :param doPlot: If True, plot the results of Smith waterman :returns: Score if doPlot = False, or dictionary of {'score', 'DBinary', 'D', 'maxD', 'CSM'} if doPlot is True """ CSM = getCSMType(Features1, O1, Features2, O2, Type) DBinary = CSMToBinaryMutual(CSM, Kappa) if doPlot: (maxD, D) = SA.swalignimpconstrained(DBinary) plt.subplot(131) plt.imshow(CSM, interpolation='nearest', cmap='afmhot') plt.title('CSM') plt.subplot(132) plt.imshow(1 - DBinary, interpolation='nearest', cmap='gray') plt.title("CSM Binary, $\kappa$=%g" % Kappa) plt.subplot(133) plt.imshow(D, interpolation='nearest', cmap='afmhot') plt.title("Smith Waterman Score = %g" % maxD) return { 'score': maxD, 'DBinary': DBinary, 'D': D, 'maxD': maxD, 'CSM': CSM } return SAC.swalignimpconstrained(DBinary)
def getCSMSmithWatermanScoresEarlyFusionFull(AllFeatures1, O1, AllFeatures2, O2, Kappa, K, NIters, CSMTypes, doPlot=False, conservative=False): """ Compute the Smith Waterman score between two songs after doing early similarity network fusion on individual feature sets :param AllFeatures1: A dictionary of Mxk matric of features in song 1 :param O1: Auxiliary info for song 1 :param AllFeatures2: A dictionary of Nxk matrix of features in song 2 :param O2: Auxiliary info for song 2 :param Kappa: Nearest neighbors param for CSM :param CSMTypes: Dictionary of types of CSMs for each feature :param doPlot: If True, plot the results of the fusion and of Smith Waterman :param conservative: Whether to use a percentage of the closest distances instead of mutual nearest neighbors (False by default, but useful for audio synchronization) :returns: if doPlot = False {'score', 'CSM', 'DBinary', 'OtherCSMs'} if doPlot = True {'score', 'CSM', 'DBinary', 'D', 'maxD', 'path'} """ CSMs = [] #Individual CSMs Ws = [] #W built from fused CSMs/SSMs Features = list(AllFeatures1) OtherCSMs = {} #Compute all CSMs and SSMs for i in range(len(Features)): F = Features[i] SSMA = getCSMType(AllFeatures1[F], O1, AllFeatures1[F], O1, CSMTypes[F]) SSMB = getCSMType(AllFeatures2[F], O2, AllFeatures2[F], O2, CSMTypes[F]) CSMAB = getCSMType(AllFeatures1[F], O1, AllFeatures2[F], O2, CSMTypes[F]) CSMs.append(CSMAB) OtherCSMs[F] = CSMAB #Build W from CSM and SSMs Ws.append(getWCSMSSM(SSMA, SSMB, CSMAB, K)) tic = time.time() D = doSimilarityFusionWs(Ws, K, NIters, 1) toc = time.time() t1 = toc - tic N = AllFeatures1[Features[0]].shape[0] CSM = D[0:N, N::] + D[N::, 0:N].T #sio.savemat("CSM.mat", {"CSM":CSM}) #Note that the CSM is in probabalistic weight form, so the #"nearest neighbors" are actually those with highest weight. So #apply monotonic exp(-CSM) to fix this if conservative: x = CSM.flatten() x = x[np.argsort(-x)] cutoff = x[int(3 * np.sqrt(CSM.size))] DBinary = np.array(CSM) DBinary[CSM < cutoff] = 0 DBinary[DBinary > 0] = 1 else: DBinary = CSMToBinaryMutual(np.exp(-CSM), Kappa) if doPlot: print("Elapsed Time Similarity Fusion: %g" % t1) N = len(CSMs) for i in range(N): plt.subplot(3, N + 1, i + 1) plt.imshow(CSMs[i], interpolation='nearest', cmap='afmhot') plt.title('CSM %s' % Features[i]) plt.subplot(3, N + 1, N + 2 + i) thisDBinary = CSMToBinaryMutual(CSMs[i], Kappa) plt.imshow(1 - thisDBinary, interpolation='nearest', cmap='gray') plt.title("CSM Binary %s K=%g" % (Features[i], Kappa)) (maxD, D) = SA.swalignimpconstrained(thisDBinary) plt.subplot(3, N + 1, 2 * N + 3 + i) plt.imshow(D, interpolation='nearest', cmap='afmhot') plt.title("Score = %g" % maxD) plt.subplot(3, N + 1, N + 1) plt.imshow(CSM, interpolation='nearest', cmap='afmhot') plt.title("CSM W Fused") plt.subplot(3, N + 1, 2 * N + 2) plt.imshow(1 - DBinary, interpolation='nearest', cmap='gray') plt.title('CSM Binary W Fused') plt.subplot(3, N + 1, 3 * N + 3) (maxD, D, path) = SA.SWBacktrace(DBinary) plt.imshow(D, interpolation='nearest', cmap='afmhot') plt.title("Fused Score = %g" % maxD) return { 'score': maxD, 'CSM': CSM, 'DBinary': DBinary, 'D': D, 'maxD': maxD, 'path': path } return { 'score': SAC.swalignimpconstrained(DBinary), 'CSM': CSM, 'DBinary': DBinary, 'OtherCSMs': OtherCSMs }
def compareBatchBlock(args): """ Process a rectangular block of the all pairs score matrix between all of the songs. Return score matrices for each individual type of feature, in addition to one for early similarity network fusion :param idxs: [start1, end1, start2, end2] range of rectangular block of songs to compare :param Kappa: Percent nearest neighbors to use both for binary cross-similarity and similarity network fusion :param CSMTypes: Dictionary of types of features and associated cross-similarity comparisons to do :param allFiles: List of all files that are being compared from which this block is drawn :param scratchDir: Path to directory for storing block results """ (idxs, Kappa, CSMTypes, allFiles, scratchDir) = args DsFilename = "%s/D%i_%i_%i_%i.mat" % (scratchDir, idxs[0], idxs[1], idxs[2], idxs[3]) if os.path.exists(DsFilename): return sio.loadmat(DsFilename) #Figure out block size thisM x thisN thisM = idxs[1] - idxs[0] thisN = idxs[3] - idxs[2] D = np.zeros((thisM, thisN)) AllFeatures = {} tic = time.time() allidxs = [i + idxs[0] for i in range(thisM)] allidxs += [j + idxs[2] for j in range(thisN)] allidxs = np.unique(np.array(allidxs)) #Preload features and Ws for SSM parts ticfeatures = time.time() count = 1 for idx in allidxs: filename = getMatFilename(scratchDir, allFiles[idx]) AllFeatures[idx] = sio.loadmat(filename) for key, val in AllFeatures[idx].items(): if type(val) is np.ndarray: if val.size == 1: AllFeatures[idx][key] = val.flatten()[0] tocfeatures = time.time() print("Elapsed Time Loading Features: ", tocfeatures - ticfeatures) stdout.flush() K = 20 NIters = 3 Ds = {'SNF': np.zeros((thisM, thisN))} for Feature in CSMTypes.keys(): Ds[Feature] = np.zeros((thisM, thisN)) for i in range(thisM): print("i = %i" % i) stdout.flush() thisi = i + idxs[0] Features1 = AllFeatures[thisi] for j in range(thisN): thisj = j + idxs[2] if thisj < thisi: #Only compute upper triangular part since it's symmetric continue Features2 = AllFeatures[thisj] #Compare all tempo levels for a in range(Features1['NTempos']): O1 = {'ChromaMean': Features1['ChromaMean%i' % a].flatten()} for b in range(Features2['NTempos']): O2 = { 'ChromaMean': Features2['ChromaMean%i' % b].flatten() } Ws = [] OtherCSMs = {} #Compute all W matrices (M, N) = (0, 0) for F in CSMTypes.keys(): CSMAB = getCSMType(Features1['%s%i' % (F, a)], O1, Features2['%s%i' % (F, b)], O2, CSMTypes[F]) OtherCSMs[F] = CSMAB (M, N) = (CSMAB.shape[0], CSMAB.shape[1]) k1 = int(0.5 * Kappa * M) k2 = int(0.5 * Kappa * N) WCSMAB = getWCSM(CSMAB, k1, k2) WSSMA = Features1['W%s%i' % (F, a)] WSSMB = Features2['W%s%i' % (F, b)] Ws.append(setupWCSMSSM(WSSMA, WSSMB, WCSMAB)) #Do Similarity Fusion D = doSimilarityFusionWs(Ws, K, NIters, 1) #Extract CSM Part CSM = D[0:M, M::] + D[M::, 0:M].T DBinary = CSMToBinaryMutual(np.exp(-CSM), Kappa) score = SAC.swalignimpconstrained(DBinary) Ds['SNF'][i, j] = max(score, Ds['SNF'][i, j]) #In addition to fusion, compute scores for individual #features to be used with the fusion later for Feature in OtherCSMs: DBinary = CSMToBinaryMutual(OtherCSMs[Feature], Kappa) score = SAC.swalignimpconstrained(DBinary) Ds[Feature][i, j] = max(Ds[Feature][i, j], score) toc = time.time() print("Elapsed Time Block: ", toc - tic) stdout.flush() sio.savemat(DsFilename, Ds) return Ds
def compareBatchBlock(args): """ Process a rectangular block of the all pairs score matrix between all of the songs. Return score matrices for each individual type of feature, in addition to one for early similarity network fusion :param idxs: [start1, end1, start2, end2] range of rectangular block of songs to compare :param Kappa: Percent nearest neighbors to use both for binary cross-similarity and similarity network fusion :param CSMTypes: Dictionary of types of features and associated cross-similarity comparisons to do :param allFiles: List of all files that are being compared from which this block is drawn :param scratchDir: Path to directory for storing block results """ (idxs, Kappa, CSMTypes, allFiles, scratchDir) = args DsFilename = "%s/D%i_%i_%i_%i.mat"%(scratchDir, idxs[0], idxs[1], idxs[2], idxs[3]) if os.path.exists(DsFilename): return sio.loadmat(DsFilename) #Figure out block size thisM x thisN thisM = idxs[1] - idxs[0] thisN = idxs[3] - idxs[2] D = np.zeros((thisM, thisN)) AllFeatures = {} tic = time.time() allidxs = [i + idxs[0] for i in range(thisM)] allidxs += [j + idxs[2] for j in range(thisN)] allidxs = np.unique(np.array(allidxs)) #Preload features and Ws for SSM parts ticfeatures = time.time() count = 1 for idx in allidxs: filename = getMatFilename(scratchDir, allFiles[idx]) AllFeatures[idx] = sio.loadmat(filename) tocfeatures = time.time() print("Elapsed Time Loading Features: ", tocfeatures-ticfeatures) stdout.flush() K = 20 NIters = 3 Ds = {'SNF':np.zeros((thisM, thisN))} for Feature in CSMTypes.keys(): Ds[Feature] = np.zeros((thisM, thisN)) for i in range(thisM): print("i = %i"%i) stdout.flush() thisi = i + idxs[0] Features1 = AllFeatures[thisi] for j in range(thisN): thisj = j + idxs[2] if thisj < thisi: #Only compute upper triangular part since it's symmetric continue Features2 = AllFeatures[thisj] #Compare all tempo levels for a in range(Features1['NTempos']): O1 = {'ChromaMean':Features1['ChromaMean%i'%a].flatten()} for b in range(Features2['NTempos']): O2 = {'ChromaMean':Features2['ChromaMean%i'%b].flatten()} Ws = [] OtherCSMs = {} #Compute all W matrices (M, N) = (0, 0) for F in CSMTypes.keys(): CSMAB = getCSMType(Features1['%s%i'%(F, a)], O1, Features2['%s%i'%(F, b)], O2, CSMTypes[F]) OtherCSMs[F] = CSMAB (M, N) = (CSMAB.shape[0], CSMAB.shape[1]) k1 = int(0.5*Kappa*M) k2 = int(0.5*Kappa*N) WCSMAB = getWCSM(CSMAB, k1, k2) WSSMA = Features1['W%s%i'%(F, a)] WSSMB = Features2['W%s%i'%(F, b)] Ws.append(setupWCSMSSM(WSSMA, WSSMB, WCSMAB)) #Do Similarity Fusion D = doSimilarityFusionWs(Ws, K, NIters, 1) #Extract CSM Part CSM = D[0:M, M::] + D[M::, 0:M].T DBinary = CSMToBinaryMutual(np.exp(-CSM), Kappa) score = SAC.swalignimpconstrained(DBinary) Ds['SNF'][i, j] = max(score, Ds['SNF'][i, j]) #In addition to fusion, compute scores for individual #features to be used with the fusion later for Feature in OtherCSMs: DBinary = CSMToBinaryMutual(OtherCSMs[Feature], Kappa) score = SAC.swalignimpconstrained(DBinary) Ds[Feature][i, j] = max(Ds[Feature][i, j], score) toc = time.time() print("Elapsed Time Block: ", toc-tic) stdout.flush() sio.savemat(DsFilename, Ds) return Ds
def getCSMSmithWatermanScoresEarlyFusionFull(AllFeatures1, O1, AllFeatures2, O2, Kappa, K, NIters, CSMTypes, doPlot = False, conservative = False): """ Compute the Smith Waterman score between two songs after doing early similarity network fusion on individual feature sets :param AllFeatures1: A dictionary of Mxk matric of features in song 1 :param O1: Auxiliary info for song 1 :param AllFeatures2: A dictionary of Nxk matrix of features in song 2 :param O2: Auxiliary info for song 2 :param Kappa: Nearest neighbors param for CSM :param CSMTypes: Dictionary of types of CSMs for each feature :param doPlot: If True, plot the results of the fusion and of Smith Waterman :param conservative: Whether to use a percentage of the closest distances instead of mutual nearest neighbors (False by default, but useful for audio synchronization) :returns: if doPlot = False {'score', 'CSM', 'DBinary', 'OtherCSMs'} if doPlot = True {'score', 'CSM', 'DBinary', 'D', 'maxD', 'path'} """ CSMs = [] #Individual CSMs Ws = [] #W built from fused CSMs/SSMs Features = list(AllFeatures1) OtherCSMs = {} #Compute all CSMs and SSMs for i in range(len(Features)): F = Features[i] SSMA = getCSMType(AllFeatures1[F], O1, AllFeatures1[F], O1, CSMTypes[F]) SSMB = getCSMType(AllFeatures2[F], O2, AllFeatures2[F], O2, CSMTypes[F]) CSMAB = getCSMType(AllFeatures1[F], O1, AllFeatures2[F], O2, CSMTypes[F]) CSMs.append(CSMAB) OtherCSMs[F] = CSMAB #Build W from CSM and SSMs Ws.append(getWCSMSSM(SSMA, SSMB, CSMAB, K)) tic = time.time() D = doSimilarityFusionWs(Ws, K, NIters, 1) toc = time.time() t1 = toc - tic N = AllFeatures1[Features[0]].shape[0] CSM = D[0:N, N::] + D[N::, 0:N].T #sio.savemat("CSM.mat", {"CSM":CSM}) #Note that the CSM is in probabalistic weight form, so the #"nearest neighbors" are actually those with highest weight. So #apply monotonic exp(-CSM) to fix this if conservative: x = CSM.flatten() x = x[np.argsort(-x)] cutoff = x[int(3*np.sqrt(CSM.size))] DBinary = np.array(CSM) DBinary[CSM < cutoff] = 0 DBinary[DBinary > 0] = 1 else: DBinary = CSMToBinaryMutual(np.exp(-CSM), Kappa) if doPlot: print("Elapsed Time Similarity Fusion: %g"%t1) N = len(CSMs) for i in range(N): plt.subplot(3, N+1, i+1) plt.imshow(CSMs[i], interpolation = 'nearest', cmap = 'afmhot') plt.title('CSM %s'%Features[i]) plt.subplot(3, N+1, N+2+i) thisDBinary = CSMToBinaryMutual(CSMs[i], Kappa) plt.imshow(1-thisDBinary, interpolation = 'nearest', cmap = 'gray') plt.title("CSM Binary %s K=%g"%(Features[i], Kappa)) (maxD, D) = SA.swalignimpconstrained(thisDBinary) plt.subplot(3, N+1, 2*N+3+i) plt.imshow(D, interpolation = 'nearest', cmap = 'afmhot') plt.title("Score = %g"%maxD) plt.subplot(3, N+1, N+1) plt.imshow(CSM, interpolation = 'nearest', cmap = 'afmhot') plt.title("CSM W Fused") plt.subplot(3, N+1, 2*N+2) plt.imshow(1-DBinary, interpolation = 'nearest', cmap = 'gray') plt.title('CSM Binary W Fused') plt.subplot(3, N+1, 3*N+3) (maxD, D, path) = SA.SWBacktrace(DBinary) plt.imshow(D, interpolation = 'nearest', cmap = 'afmhot') plt.title("Fused Score = %g"%maxD) return {'score':maxD, 'CSM':CSM, 'DBinary':DBinary, 'D':D, 'maxD':maxD, 'path':path} return {'score':SAC.swalignimpconstrained(DBinary), 'CSM':CSM, 'DBinary':DBinary, 'OtherCSMs':OtherCSMs}