def cluster(src, key="AvgTonalCentroid(6)", nbins=15, min_volume=-30): jsonfile = "%s-%s-%d.json" % (src, key, nbins) if os.path.exists(jsonfile): return deserialize(jsonfile) analysis = meap.analysis(src) segs = [] features = [] rms = [] for idx, X in enumerate(analysis): segs.append((X["onset_time"], X["chunk_length"], idx)) features.append(X[key]) rms.append(X["RMSAmplitude(1)"]) rms = np.array(rms) features = np.array(features) segs = np.array(segs) too_quiet = rms < min_volume quiet_segs = segs[too_quiet] features = features[np.invert(too_quiet)] segs = segs[np.invert(too_quiet)] codebook, distortion = kmeans(features, nbins) clusters, distance = vq(features, codebook) out = {} for i in range(nbins + 1): out[i] = [] for idx, cluster in enumerate(clusters): out[cluster].append(segs[idx].tolist()) out[nbins] = quiet_segs.tolist() serialize(out, jsonfile) return out
def _get_features(self): a = meap.analysis(self.path) out = [X[self.key] for X in a] return np.array(out)
def _get_segments(self): a = meap.analysis(self.path) return [Seg(X["onset_time"], X["chunk_length"], idx) for idx,X in enumerate(a)]
def _get_segments(self): a = meap.analysis(self.path) return [ Seg(X["onset_time"], X["chunk_length"], idx) for idx, X in enumerate(a) ]