def get_query_data(self, song_id_list): p = open(norm_param_pkl, "rb") song_sv_dict = pickle.load(p) p.close() print "NUMBER OF SONGS IN QUERY: ", len(song_id_list) if len(song_id_list) > 1: timbre_result, rhythm_result = pardora_db_sqlite.get_song_features_from_query( song_id_list, self.cursor) t_feature_list = [] r_feature_list = [] for row in timbre_result: feats = np.array(np.ndarray((row[0], row[1]), buffer=row[2]), dtype=np.float32) t_feature_list.append(feats) timbre_features = np.array(np.concatenate(t_feature_list)) for row in rhythm_result: feats = np.array(np.ndarray((row[0], row[1]), buffer=row[2]), dtype=np.float32) feats = feats.T r_feature_list.append(feats) rhythm_features = np.array(np.concatenate(r_feature_list)) print "INFO: Timbre features shape:", timbre_features.shape print "INFO: Rhythm features shape:", rhythm_features.shape query_timbre_sv = pardora_ubm.adapt_model(timbre_features, self.timbre_ubm_params, M) query_rhythm_sv = pardora_ubm.adapt_model(rhythm_features, self.rhythm_ubm_params, M) query_timbre_sv = msdtools.mcs_norm(query_timbre_sv, song_sv_dict['t_sv_mean']) query_rhythm_sv = msdtools.mcs_norm(query_rhythm_sv, song_sv_dict['r_sv_mean']) p_mean_t, p_sigma_t = msdtools.p_norm_params_single( query_timbre_sv, song_sv_dict['t_sv_sample'].T) p_mean_r, p_sigma_r = msdtools.p_norm_params_single( query_rhythm_sv, song_sv_dict['r_sv_sample'].T) query_dict = {} query_dict['q_t_sv'] = query_timbre_sv query_dict['q_r_sv'] = query_rhythm_sv query_dict['p_mean_t'] = p_mean_t query_dict['p_mean_r'] = p_mean_r query_dict['p_sigma_t'] = p_sigma_t query_dict['p_sigma_r'] = p_sigma_r else: query_dict = pardora_db_sqlite.get_song_sv_data( song_id_list[0], self.cursor) return query_dict
def get_query_data(self, song_id_list): p = open(norm_param_pkl, "rb") song_sv_dict = pickle.load(p) p.close() print "NUMBER OF SONGS IN QUERY: ", len(song_id_list) if len(song_id_list) > 1: timbre_result, rhythm_result = pardora_db.get_song_features_from_query(song_id_list, self.cursor) t_feature_list = [] r_feature_list = [] for row in timbre_result: feats = np.array(np.ndarray((row[0],row[1]), buffer=row[2]), dtype=np.float32) t_feature_list.append(feats) timbre_features = np.array(np.concatenate(t_feature_list)) for row in rhythm_result: feats = np.array(np.ndarray((row[0],row[1]), buffer=row[2]), dtype=np.float32) feats = feats.T r_feature_list.append(feats) rhythm_features = np.array(np.concatenate(r_feature_list)) print "INFO: Timbre features shape:", timbre_features.shape print "INFO: Rhythm features shape:", rhythm_features.shape query_timbre_sv = pardora_ubm.adapt_model(timbre_features, self.timbre_ubm_params, M) query_rhythm_sv = pardora_ubm.adapt_model(rhythm_features, self.rhythm_ubm_params, M) query_timbre_sv = msdtools.mcs_norm(query_timbre_sv, song_sv_dict['t_sv_mean']) query_rhythm_sv = msdtools.mcs_norm(query_rhythm_sv, song_sv_dict['r_sv_mean']) p_mean_t, p_sigma_t = msdtools.p_norm_params_single(query_timbre_sv, song_sv_dict['t_sv_sample'].T) p_mean_r, p_sigma_r = msdtools.p_norm_params_single(query_rhythm_sv, song_sv_dict['r_sv_sample'].T) query_dict = {} query_dict['q_t_sv'] = query_timbre_sv query_dict['q_r_sv'] = query_rhythm_sv query_dict['p_mean_t'] = p_mean_t query_dict['p_mean_r'] = p_mean_r query_dict['p_sigma_t'] = p_sigma_t query_dict['p_sigma_r'] = p_sigma_r else: query_dict = pardora_db.get_song_sv_data(song_id_list[0], self.cursor) return query_dict
def compute_and_add_song_svs(timbre_ubm_params, rhythm_ubm_params, db_cursor): print "............... Computing and Adding Supervectors To DB ...................." p = open(song_id_pkl, "rb") song_ids = pickle.load(p) p.close() song_id_chunks = chunks(song_ids, CHUNK_SIZE) chunk_count = 0 t_mean_to_use = np.zeros(1) t_sv = np.zeros(1) r_mean_to_use = np.zeros(1) r_sv = np.zeros(1) all_songs_list = [] total_time = time.time() for chunk in song_id_chunks: song_id_list = [] timbre_sv_arr = [] rhythm_sv_arr = [] print "==== CHUNK: ", chunk_count, "====" chunk_count += 1 chunk_time = time.time() songs = pardora_db.get_rhythm_features_for_song_ids(chunk, db_cursor) for s in songs: if s[1] is not None and s[2] is not None: feats = np.array(np.ndarray((s[1], s[2]), buffer=s[0]), dtype=np.float32) feats_t = feats.T rhythm_sv = pardora_ubm.adapt_model(feats_t, rhythm_ubm_params, M) rhythm_sv_arr.append(rhythm_sv) song_id_list.append(s[3]) print "INFO: Rhythm SV comp time: ", time.time() - st st = time.time() songs = pardora_db.get_timbre_features_for_song_ids(chunk, db_cursor) for s in songs: if s[3] in song_id_list: feats = np.array(np.ndarray((s[1], s[2]), buffer=s[0]), dtype=np.float32) timbre_sv = pardora_ubm.adapt_model(feats, timbre_ubm_params, M) timbre_sv_arr.append(timbre_sv) print "INFO: Timbre SV comp time: ", time.time() - st st = time.time() t_sv = np.vstack(timbre_sv_arr) del timbre_sv_arr r_sv = np.vstack(rhythm_sv_arr) del rhythm_sv_arr if chunk_count == 1: t_mean_to_use = np.mean(t_sv, axis=0) r_mean_to_use = np.mean(r_sv, axis=0) t_sv = msdtools.mcs_norm(t_sv.T, t_mean_to_use).T r_sv = msdtools.mcs_norm(r_sv.T, r_mean_to_use).T print "INFO: MCS norm computation time: ", time.time() - st st = time.time() p_means_t = np.zeros(len(song_id_list)) p_sigmas_t = p_means_t.copy() p_means_t = p_means_t.copy() p_sigmas_t = p_means_t.copy() st = time.time() p_means_t, p_sigmas_t = msdtools.p_norm_params_chunk(t_sv.T, t_sv.T, NORM_CHUNK_SIZE) p_means_r, p_sigmas_r = msdtools.p_norm_params_chunk(r_sv.T, r_sv.T, NORM_CHUNK_SIZE) print "INFO: P-means computation time: ", time.time() - st st = time.time() idx = 0 for s_id in song_id_list: t = np.array(t_sv[idx]) r = np.array(r_sv[idx]) if s_id not in all_songs_list: pardora_db.add_sv_and_p_vals_to_db( conn, c, s_id, t, r, p_means_t[idx], p_sigmas_t[idx], p_means_r[idx], p_sigmas_r[idx], db_cursor ) all_songs_list.append(s_id) idx += 1 conn.commit() print "INFO: Database update time: ", time.time() - st print "INFO: TOTAL CHUNK TIME:", time.time() - chunk_time print "==============================================" print "INFO: TOTAL TIME FOR SV COMP TIME: ", time.time() - total_time print "==============================================" d = {} d["t_sv_mean"] = t_mean_to_use d["t_sv_sample"] = t_sv d["r_sv_mean"] = r_mean_to_use d["r_sv_sample"] = r_sv p = open(norm_param_pkl, "wb") pickle.dump(d, p, True) p.close() return
def compute_and_add_song_svs(timbre_ubm_params, rhythm_ubm_params, db_cursor): print "............... Computing and Adding Supervectors To DB ...................." p = open(song_id_pkl, "rb") song_ids = pickle.load(p) p.close() song_id_chunks = chunks(song_ids, CHUNK_SIZE) chunk_count = 0 t_mean_to_use = np.zeros(1) t_sv = np.zeros(1) r_mean_to_use = np.zeros(1) r_sv = np.zeros(1) all_songs_list = [] total_time = time.time() for chunk in song_id_chunks: song_id_list = [] timbre_sv_arr = [] rhythm_sv_arr = [] print "==== CHUNK: ", chunk_count, "====" chunk_count += 1 chunk_time = time.time() songs = pardora_db.get_rhythm_features_for_song_ids(chunk, db_cursor) for s in songs: if s[1] is not None and s[2] is not None: feats = np.array(np.ndarray((s[1], s[2]), buffer=s[0]), dtype=np.float32) feats_t = feats.T rhythm_sv = pardora_ubm.adapt_model(feats_t, rhythm_ubm_params, M) rhythm_sv_arr.append(rhythm_sv) song_id_list.append(s[3]) print "INFO: Rhythm SV comp time: ", time.time() - st st = time.time() songs = pardora_db.get_timbre_features_for_song_ids(chunk, db_cursor) for s in songs: if s[3] in song_id_list: feats = np.array(np.ndarray((s[1], s[2]), buffer=s[0]), dtype=np.float32) timbre_sv = pardora_ubm.adapt_model(feats, timbre_ubm_params, M) timbre_sv_arr.append(timbre_sv) print "INFO: Timbre SV comp time: ", time.time() - st st = time.time() t_sv = np.vstack(timbre_sv_arr) del timbre_sv_arr r_sv = np.vstack(rhythm_sv_arr) del rhythm_sv_arr if chunk_count == 1: t_mean_to_use = np.mean(t_sv, axis=0) r_mean_to_use = np.mean(r_sv, axis=0) t_sv = msdtools.mcs_norm(t_sv.T, t_mean_to_use).T r_sv = msdtools.mcs_norm(r_sv.T, r_mean_to_use).T print "INFO: MCS norm computation time: ", time.time() - st st = time.time() p_means_t = np.zeros(len(song_id_list)) p_sigmas_t = p_means_t.copy() p_means_t = p_means_t.copy() p_sigmas_t = p_means_t.copy() st = time.time() p_means_t, p_sigmas_t = msdtools.p_norm_params_chunk( t_sv.T, t_sv.T, NORM_CHUNK_SIZE) p_means_r, p_sigmas_r = msdtools.p_norm_params_chunk( r_sv.T, r_sv.T, NORM_CHUNK_SIZE) print "INFO: P-means computation time: ", time.time() - st st = time.time() idx = 0 for s_id in song_id_list: t = np.array(t_sv[idx]) r = np.array(r_sv[idx]) if s_id not in all_songs_list: pardora_db.add_sv_and_p_vals_to_db(conn, c, s_id, t, r, \ p_means_t[idx], p_sigmas_t[idx],\ p_means_r[idx], p_sigmas_r[idx],\ db_cursor) all_songs_list.append(s_id) idx += 1 conn.commit() print "INFO: Database update time: ", time.time() - st print "INFO: TOTAL CHUNK TIME:", time.time() - chunk_time print "==============================================" print "INFO: TOTAL TIME FOR SV COMP TIME: ", time.time() - total_time print "==============================================" d = {} d['t_sv_mean'] = t_mean_to_use d['t_sv_sample'] = t_sv d['r_sv_mean'] = r_mean_to_use d['r_sv_sample'] = r_sv p = open(norm_param_pkl, "wb") pickle.dump(d, p, True) p.close() return