def plotFoundMatches(fileList, pitchExt='.pitchSilIntrpPP', tonicExt='.tonicFine'): """ """ song_str, st_seg, en_seg, spl_arr = readFullTransFile( fileList, fullTransExt='.fullTrans') centroids = readCentroids(centroids_file='centroids.npy') aligned = getAlignment(fileList) lines = open(fileList, 'r').readlines() for ii, line in enumerate(lines): pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt) tonic = np.loadtxt(line.strip() + tonicExt) pcents = BO.PitchHz2Cents(pitch, tonic) #recons = get_quantized_ts(st_seg/Hop, en_seg/Hop, song_str, centroids, pcents) for ii in range(len(aligned)): matches = aligned[ii] print "Query index:", ii + 1 count = 0 for s, e in matches: st = st_seg[s] en = en_seg[e] #print st, en contour = pcents[st / Hop:en / Hop] plt.plot(np.arange(len(contour)) * Hop, contour) plt.ylim((-300, 1100)) #plt.show() count += 1 print "# motifs found: ", count
def get_transients(fileList, map_file, segExt='.seg', pitchExt='.pitchSilIntrpPP', tonicExt='.tonicFine'): """ """ map_data = pickle.load(open(map_file, 'r')) lines = open(fileList, 'r').readlines() ids_data = [] cnt = 0 for ii, line in enumerate(lines): print line.strip() seg_filename = line.strip() + segExt seg_file = np.loadtxt(seg_filename) pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt) tonic = np.loadtxt(line.strip() + tonicExt) pcents = BO.PitchHz2Cents(pitch, tonic) #print pitch for jj in range(seg_file.shape[0]): if seg_file[jj][2] == -20000: ids_data.append(map_data['file_row_to_id_map'][ii][jj]) start_time = seg_file[jj][0] end_time = seg_file[jj][1] #trans_id = map_data[][ii][jj] start_ind = find_ind(time, start_time) end_ind = find_ind(time, end_time) segment = pitch[start_ind:end_ind] #print len(segment) if len(segment) >= 60: segment_norm = polyfit_shapes_norm(segment) if cnt == 0: aggregate = np.array([segment_norm]) else: aggregate = np.vstack((aggregate, segment_norm)) cnt += 1 print aggregate.shape #plt.show() # For training data #------------------ #np.save('transientIds',np.array(ids_data)) #np.save('transientShapes',aggregate) # For unknown data #----------------- np.save('transientIds_eval', np.array(ids_data)) np.save('transientShapes_eval', aggregate)
def generateLinearDataset(self, root_dir, output_dir, pitchExt, tonicExt, downsampleFactor, min_nyas_dur=-1): pitch=np.array([]) timeInfo=np.array([]) fileInfo={} filenames = BP.GetFileNamesInDir(root_dir,pitchExt) for filename in filenames: fname, ext = os.path.splitext(filename) #reading pitch and tonic data pitchData,timeStamps,pHop = BPO.readPitchFile(fname+pitchExt) tonic = np.loadtxt(open(fname+tonicExt,"r")) pCents = BPO.PitchHz2Cents(pitchData, tonic) #some preprocessing #removing flat regions if (min_nyas_dur>0): msObj = MS.nyasSegmentation() msObj.ComputeNyasCandidates(pitchData, tonic.tolist(), pHop) msObj.FilterNyasCandidates(min_nyas_duration=min_nyas_dur) for swar in msObj.nyasInfo.keys(): for seg in msObj.nyasInfo[swar]: pCents[seg[0]:seg[1]]=-5000 #downsampling factor=downsampleFactor pCents, pHop, timeStamps = BPO.downsamplesPitchData(pCents,pHop,timeStamps, factor) #removing silence regions ind_silence = np.where(pCents<-4000)[0] ###Please correct this silence condition once log eps is used pCents = np.delete(pCents,ind_silence) timeStamps = np.delete(timeStamps,ind_silence) #accumulating pitch = np.append(pitch, pCents) timeInfo = np.append(timeInfo, timeStamps) fileInfo[filename]= [timeInfo.size-timeStamps.size, timeInfo.size] np.savetxt(output_dir+'/'+'AggPitch.txt', pitch, fmt='%.2f') np.savetxt(output_dir+'/'+'AggTime.txt', timeInfo, fmt='%.2f') stream = file(output_dir+'/'+'fileInfo.yaml','w') yaml.dump(fileInfo, stream)
def generateSubsequenceDataset(self, pitchFile, tonicFile, outputCandidateFile): #reading pitch data tonic = np.loadtxt(open(tonicFile,"r")) pitchData,timeData,pHop = BPO.readPitchFile(pitchFile) pCents = BPO.PitchHz2Cents(pitchData, tonic) #preprocessing pitch data factor=3 pCents, pHop, timeData = BPO.downsamplesPitchData(pCents,pHop,timeData, factor) if self.params.keys()[0]=='slidingWindow': segments = self.slidingWindowCandidates(pCents, pHop, self.params['slidingWindow']['windowLength'],pHop*3) dataset = [] [dataset.append(pCents[segment[0]:segment[1]]) for segment in segments] np.savetxt(outputCandidateFile,np.array(dataset),fmt='%.2f')
def batchProc(root_dir, audioExt='.mp3', pitchExt='.pitchSilIntrpPP', tonicExt='.tonicFine'): filenames = BP.GetFileNamesInDir(root_dir, '.mp3') segObj = seg.melodySegmentation() #fig = plt.figure(figsize=(15,10), dpi=80) for filename in filenames[:]: print "Processing file %s" % filename #====================== ## This is done for all #====================== fname, ext = os.path.splitext(filename) pitch, time, Hop = BO.readPitchFile(fname + pitchExt) tonic = np.loadtxt(fname + tonicExt) pcents = BO.PitchHz2Cents(pitch, tonic) pdata = (time, pcents, Hop) ## Extract Breath Phrases #------------------------ breathPhrases = findBreathPhrases(segObj, fname, pcents, Hop) ## Histogram processing to extract note locations #------------------------------------------------ svaraSemitone, ignoreNotes = findValidSvaras(pitch, tonic) #print svaraSemitone, ignoreNotes print "Notes being ignored are: %s" % ignoreNotes ## Read valid region for evolution #--------------------------------- #endTime = readValidVistarRegion(fname) ## Svara transcription #--------------------- transcription = transcribePitch(fname, pdata, ignoreNotes) #print transcription print "-------\nDone !!\n-------" '''
def batchProc(fileList, centroids_file = 'centroids.npy', audioExt = '.mp3', pitchExt = '.pitchSilIntrpPP', tonicExt = '.tonicFine', fullTransExt = '.fullTrans', gtExt = '.gtruth'): """ """ centroids = readCentroids(centroids_file) lines = open(fileList,'r').readlines() for ii, line in enumerate(lines): filename = line.strip() print "Processing file: %s" %filename # Read pitch data #---------------- pitch, time, Hop = BO.readPitchFile(filename + pitchExt) tonic = np.loadtxt(line.strip() + tonicExt) pcents = BO.PitchHz2Cents(pitch, tonic) # Read transcription #------------------- song_str, st_seg, en_seg = readFullTransFile(filename, fullTransExt = fullTransExt) # Read ground truth #------------------ st_gt, en_gt, str_gt = visualizeGroundTruth(filename, pcents, time, Hop, song_str, st_seg, en_seg) # Get query strings #------------------ note_sym = getQuertString(str_gt) # Get song string #---------------- search_str = getSearchString(song_str) # Get aligned contour indices by SW #---------------------------------- aligned = getAlignment(song_str, note_sym) # Get contour segments #--------------------- plotFoundMatches(aligned, st_seg, en_seg, pcents, Hop) print "-------\nDone !!\n-------"
def readGroundTruth(fileList, gtExt='.gtruth', pitchExt='.pitchSilIntrpPP', tonicExt='.tonicFine'): """ Returns the start and end time of ground truth phrases in seconds along with the label representing the annotated name of the phrase """ lines = open(fileList, 'r').readlines() for ii, line in enumerate(lines): gt_filename = line.strip() + gtExt gt_file = np.loadtxt(gt_filename) pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt) tonic = np.loadtxt(line.strip() + tonicExt) pcents = BO.PitchHz2Cents(pitch, tonic) segment = [] time_stamps = [] count = 4 st, en = [0.0] * count, [0.0] * count for ii in range(count): s, e = gt_file[ii][0], gt_file[ii][1] st[ii], en[ii] = s, e start_ind = find_ind(time, s) end_ind = find_ind(time, e) #print start_ind, end_ind time_stamp = np.arange(start_ind, end_ind) pitch_vals = pcents[start_ind:end_ind] time_stamps.append(time_stamp) segment.append(pitch_vals) return segment, time_stamps, st, en, Hop, pcents
def ComputePitchHistogram(self, pitch=-1, timeStamps=-1, tonic=-1, tRange=-1, Oct_fold=0, smth_variance=15): """This function computes pitch histogram Input parameters: pitch = pitch sequence tonic = tonic value of the lead artist tRange = time range within which pitch has to be considered for constructing pitch histogram timeStamps = time stamps needed if tRange is specified for histogram construction Oct_fold = (0 or 1); 0 for no octave folding, 1 for octave folding of the pitch histogram """ ### reading values and throwing errors if any if type(pitch) != int: self.setPitch(pitch) elif type(self.pitch) == int: print "Please provide a pitch file name, it was not provided during initialization" return -1 if tonic != -1: self.setTonic(tonic) elif self.tonic == -1: print "Please provide tonic information, it was not provided during initialization" return -1 if type(timeStamps) != int: self.setTimeStamps(timeStamps) if (tRange != -1) and (type(self.timeStamps) == int): print "For this option of using tRange for histogram computation, timeStamps information should also be provided" return -1 ### before starting anything (and after updating all important parameters), lets convert pitch to cents self.pCents = BOP.PitchHz2Cents(self.pitch, self.tonic) ### Copying in local buffer to process pitch in this function and octave folding if specified sil_loc_inds = np.where( self.pCents >= -1200 )[0] ###TODO Uncomment this, if commented it is to reproduce the same error in original version pCents_local = copy.deepcopy(self.pCents[sil_loc_inds]) #pCents_local = copy.deepcopy(self.pCents) ###TODO after uncommenting above two lines comment this line if (Oct_fold == 1): pCents_local = np.mod(pCents_local, 1200) ### histogram computation if tRange == -1: histogram = np.histogram(pCents_local, bins=self.nBins, range=self.hRange) else: str_ind = find_nearest_element_ind(self.timeStamps, tRange[0]) end_ind = find_nearest_element_ind(self.timeStamps, tRange[1]) histogram = np.histogram(pCents_local[str_ind:end_ind], bins=self.nBins, range=self.hRange) ### assigning the obtained values to the class global variables hist_Yval = copy.deepcopy(histogram[0]) hist_Yval = hist_Yval.astype(float) hist_Xval = copy.deepcopy(histogram[1][1:]) hist_Xval = hist_Xval.astype(float) ### Normalization of the histogram hist_Yval = hist_Yval / max(hist_Yval) ### if Octave folding is performed, to avoid splitting of tonic note into two parts (think, why will it happen!!) we just copy paste small end part of histogram to negative values (very intuitive) if Oct_fold == 1: # we need to cut the negative size of Sa (tonic) i.e. from <--1200 at the nearest valley to 1200. If we dont do it at valley then there can be a shart popping hump because of low pass filtering which will be detected as a valid swar. And if you apply a lot of mind you will find that this valley has to be detected from a smoothened version of histogram otherwise jittering can cause everything go wrong temp_smooth = self.SmoothPitchHistogram(Histogram=hist_Yval, Variance=smth_variance) peak_ind, valley_ind = DF.PeakValleyPicking(temp_smooth) if len(valley_ind) > 0: # if we do not get any valley, we dont need to probably do this thing valley_location = hist_Xval[max(valley_ind)] ind2 = np.where((hist_Xval > valley_location) & (hist_Xval <= 1200)) ind1 = np.where((hist_Xval > -(1200 - valley_location)) & (hist_Xval <= 0)) hist_Yval[ind1] = hist_Yval[ind2] hist_Yval[ind2] = 0 #updating class variables self.hist_Yval = hist_Yval self.hist_Xval = hist_Xval self.SmoothPitchHistogram(Variance=smth_variance)