def run_single_nmf_experiment(mat, n_components, apply_nan, labels): if apply_nan: nmf = NMF.NMF(n_components=n_components, apply_nan_mask=True, nan_weight=0.1) else: nmf = NMF.NMF(n_components=n_components) W, H = nmf.decompose(mat) clusters = W.argmax(axis=1) nmi, nmi_t, nmi_p = calc_score_significance(labels, clusters, normalized_mutual_info_score) if nmi_p >= 0.05: print('non conclusive NMI with {} and nan_mask={} '.format(n_components, apply_nan), nmi_t, nmi_p) sill = silhouette_score(mat.toarray(), clusters) accs = [] f1s = [] for i in range(20): f1, f1_t, f1_p, acc, acc_t, acc_p = semi_supervised_classify(labels, clusters) accs.append(acc) f1s.append(f1) if f1_p >= 0.05: print('non conclusive f1 score with {} and nan_mask={} '.format(n_components, apply_nan), f1_t, f1_p) if acc_p >= 0.05: print('non conclusive accuracy score with {} and nan_mask={} '.format(n_components, apply_nan), acc_t, acc_p) return {'nmi': nmi, 'nmi_p': nmi_p, 'sill': sill, 'acc': np.mean(accs), 'acc_std': np.std(accs), 'f1': np.mean(f1s), 'f1_std': np.std(f1s)}
def B3(): w = 2048 h = 1024 path = '../audio/train/vio/' vio_64 = librosa.load(path + 'vio_64.wav', fs)[0][0:61000] vio_88 = librosa.load(path + 'vio_88.wav', fs)[0][0:61000] cla_64 = librosa.load('../audio/train/cla/cla_64.wav', fs)[0][0:61000] S_1 = NMF.extractTemplate(vio_64) S_2 = NMF.extractTemplate(vio_88) S_3 = NMF.extractTemplate(cla_64) librosa.display.specshow(S_1, y_axis='cqt_note', x_axis='frames', n_yticks=180) plt.axis([0, 2, 0, 100]) plt.show() librosa.display.specshow(S_2, y_axis='cqt_note', x_axis='frames', n_yticks=10) #plt.axis([0, 2, 0, 100]) plt.show() librosa.display.specshow(S_3, y_axis='cqt_note', x_axis='frames', n_yticks=180) plt.axis([0, 2, 0, 100]) plt.show() S_1 = librosa.core.istft(S_1) librosa.display.waveplot(S_1, x_axis='time') plt.show() S_2 = librosa.core.istft(S_2) librosa.display.waveplot(S_2, x_axis='time') plt.show()
def estimate(valid, v_W, c_W, valid_v, valid_c, r, score_inf=None): fn = 'est/0' + r W = np.append(v_W, c_W, axis=1) H = NMF.extractActivation(valid, W) t_num = v_W.shape[1] if score_inf is not None : print('apply score inf') H = NMF.cons_Activation(score_inf, H) v_H = H[0:t_num] c_H = H[t_num:H.shape[0]] o_v, o_c = reconstruct(valid, v_W, v_H, c_W, c_H) librosa.output.write_wav(fn + '_vio_est.wav', o_v, 44100) librosa.output.write_wav(fn + '_cla_est.wav', o_c, 44100) valid_v = valid_v[0:220160] valid_c = valid_c[0:220160] sdr, sir, sar, perm = basic.evalBSS(np.array([valid_v, valid_c]), np.array([o_v, o_c])) print(sdr) print(sir) print(sar)
def R2(valid, W): H = NMF.extractActivation(valid, W) r = np.dot(W, H) o = librosa.core.istft(r, win_length=NMF.d_w, hop_length=NMF.d_h) """showDicSpectr(W) showActSpectr(H) showReconSpectr(r)""" librosa.output.write_wav('01_cla.wav', o, 44100)
def getNoteTemplates(path_notes): list_templates = [] list_noteaudio = find_files(path_notes,ext="wav") for noteaudio in list_noteaudio: S_mag = U.LoadAudio(noteaudio) init_H = np.ones((1,S_mag.shape[1])) template,activate = NMF.nmf_sklearn(S_mag,k=1,H=init_H,verbose=False) list_templates.append(template[:,0]/np.max(template)) templates = np.stack(list_templates) return templates
def extractAllTemplate(data, n_components=NMF.nc): init = False for d in data: comp = NMF.extractTemplate(d[0][0:70000], n_components=n_components) if init == False: init = True W = comp continue W = np.append(W, comp, axis=1) return W
def NMijF_jp(tt_matrix, tw_matrix, k_topic, alpha): print "Entering NMijF_jp process..." tt_shape = tt_matrix.shape tw_shape = tw_matrix.shape W = np.random.rand(tt_shape[0], k_topic) Y = np.random.rand(k_topic, tt_shape[1]) H = np.random.rand(k_topic, tw_shape[1]) observation = 0 for i in range(30): print "Fatoring..." + str(float(i) / 30.0) + "%" W = NMF.update_a(W, Y, tt_matrix) Y = NMF.update_x(W, Y, tt_matrix) H = NMF.update_x(W, H, tw_matrix) di = NMF.divergence_function(tt_matrix, np.dot(W, Y)) dj = NMF.divergence_function(tw_matrix, np.dot(W, H)) if (object_function(di, dj, alpha) - observation) < 0.001: break observation = object_function(di, dj, alpha) # print "tweet - topic matrix \n" + str(W) # print "topic - tweet matrix \n" + str(Y) # print "topic - word matrix \n" + str(H) print "Done!" return [W, H]
def R4(v_W, c_W): path = '../audio/test/' fn = '../pred/' test_clips = u.readClips(path) W = np.append(v_W, c_W, axis=1) for i in range(0, len(test_clips)): p = fn + '0' + str(i + 6) H = NMF.extractActivation(test_clips[i][0], W) v_H = H[0:t_num] c_H = H[t_num:H.shape[0]] o_v, o_c = est.reconstruct(test_clips[i][0], v_W, v_H, c_W, c_H) librosa.output.write_wav(p + '_vio_est.wav', o_v, 44100) librosa.output.write_wav(p + '_cla_est.wav', o_c, 44100)
def main(): feedlist = [ 'http://rss.cnn.com/rss/edition_business.rss', 'https://news.google.com/news/section?topic=b&output=rss', ] all_words, article_titles, article_words = get_news_text(feedlist) articlemx, word_vec = make_article_matrix(all_words, article_words) # Get weight and feature matrix v = matrix(articlemx) weights, feats = NMF.factorize(v, pc=10, iter=10) top_num = 15 pattern_names = get_features(top_num, weights, feats, word_vec) print pattern_names
def main(): feedlist = [ 'http://rss.cnn.com/rss/edition_business.rss', 'https://news.google.com/news/section?topic=b&output=rss', ] all_words, article_titles, article_words = get_news_text(feedlist) articlemx, word_vec = make_article_matrix(all_words, article_words) # Get weight and feature matrix v = matrix(articlemx) weights, feats = NMF.factorize(v,pc=10,iter=10) top_num = 15; pattern_names = get_features(top_num, weights, feats, word_vec) print pattern_names
def test_1(self): test_r = random_mask(self.R, self.N, self.M) k = 10 P = numpy.random.rand(self.N, k) Q = numpy.random.rand(self.M, k) fr = open('log', mode='w') nP, nQ = NMF.nmf_gd(test_r, P, Q, k, fr, steps=10000) nR = numpy.dot(nP, nQ.T) fr.write('\nR:\n') fr.write(self.R) fr.write('\nresult_R:\n') fr.write(nR) fr.write('\nP:\n') fr.write(nP) fr.write('\nQ:\n') fr.write(nQ) fr.close()
def run(data): data = parameter.Parameter() data.get_sample() w, h = NMF.factorize(data, 1000) result(data, w, h)
def main(): # Proactive recommendations, based on daily news print 'Proactive Daily Recommendations: ' # Daily News are extracted from these feeds feedlist = [ 'http://rss.cnn.com/rss/edition_business.rss', 'https://news.google.com/news/section?topic=b&output=rss', ] all_words, article_titles, article_words = NewsParser.get_news_text(feedlist) articlemx, word_vec = NewsParser.make_article_matrix(all_words, article_words) # Get weight and feature matrix v = matrix(articlemx) pattern_num = 30 iter = 10 weights, feats = NMF.factorize(v,pattern_num,iter) top_num = 15; # Get 30 patterns from daily news pattern_names = NewsParser.get_features(top_num, weights, feats, word_vec) # Train the data trainingdata_file = open('/Users/hanhanwu/Documents/workspace/PythonLearning/Sellers++/training_data','r') cl1 = MyClassifiers.classifier(MyClassifiers.get_words) cl2 = MyClassifiers.fisherclassifier(MyClassifiers.get_words) for line in trainingdata_file: elems = line.split('****') cate = elems[1].split(',')[0] item = elems[0] cl1.train(item, cate) cl2.train(item, cate) trainingdata_categories = cl2.categories() amazon_categories = RSSParser.get_product_category() new_categories = list(set(amazon_categories) - set(trainingdata_categories)) # When new categories appear, send me a notice if len(new_categories) > 0: print 'Update the training data: ' print new_categories category_vote = {} for p in pattern_names: fit_category, max_prob = MyClassifiers.get_category(cl2, trainingdata_categories, p) category_vote.setdefault(fit_category, 0) category_vote[fit_category] += 1 sorted_vote = sorted(category_vote.iteritems(), key = lambda (k,v): (v,k), reverse = True) # Based on this sorted votes, recommended new products in each voted category based on the ratio, products with deals come first daily_recommendations = {} for t in sorted_vote: prod_category = t[0] prod_amount = t[1] new_product_info = {} new_product_info = RSSParser.get_newproduct_info(prod_category, prod_amount) if len(new_product_info) < prod_amount: new_product_info_nodeal = RSSParser.get_newproduct_info(prod_category, prod_amount, deal=0) new_product_info.update(new_product_info_nodeal) daily_recommendations.update(new_product_info) print 'daily recommendations: ' for pname, pinfo in daily_recommendations.iteritems(): print 'Product Name: ', pname print 'Product Price: ', pinfo['current_price'] print '**********************************************************' # This variable is the user input, you can change this to test user_input = 'Stark Electric Small Mini Portable Compact Washer Washing' # Reactive recommendations, based on the product name provided by the user conn = MySQLdb.connect(host='localhost', user='******', passwd='sellers', db='dbSellers' ) x = conn.cursor() max_ratio = 0 real_pname = '' try: x.execute(""" SELECT ProductName FROM tbProducts; """) numrows = x.rowcount for i in xrange(0,numrows): p_name = x.fetchone()[0] ledist = Levenshtein.ratio(p_name, user_input) if ledist > max_ratio: max_ratio = ledist real_pname = p_name if real_pname != '': print 'Product Name', real_pname x.execute(""" SELECT CurrentPrice FROM tbProducts WHERE ProductName = %s """, (real_pname,)) print 'Predicted Price', x.fetchall()[0][0] except: conn.rollback() x.close() conn.close()
if np.max(Y_currCol[:, J_Idx_wikiFile_subIndexOffset]) == 0.0: Y_currCol[:, J_Idx_wikiFile_subIndexOffset] = 0.000000001 smallValueAsgn_cnt += 1 #print "" #print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" #print "no. of all zeros column that " #print "are assignment to a very small" #print "non-negative value : %d/%d" %(smallValueAsgn_cnt,Y_blockWidth) #print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" #print "" #S_Jcolumn = NMF.HALS_CORE( np.matrix( Y_currCol ).transpose() , np.matrix( S[ : , J_Idx_wikiFile : J_Idx_wikiFile + Y_blockWidth ] ).transpose() , A.transpose() , modelOrder ).transpose() #print "Please stop here and perform manual LSMU" #print "Press any key to continue ..." #raw_input() S_Jcolumn = NMF.LSMU_CORE( np.matrix(Y_currCol).transpose(), np.matrix(S[:, J_Idx_wikiFile:J_Idx_wikiFile + Y_blockWidth]).transpose(), A.transpose()).transpose() for k in xrange(modelOrder): for J_assignmentOffset in xrange(Y_blockWidth): S[k, J_Idx_wikiFile + J_assignmentOffset] = S_Jcolumn[k, J_assignmentOffset] # -------------------------------------- # # end of enforcing Sum To One constraint # # -------------------------------------- # #logFileName = 'S_HALS_itera'+str(cycle+1)+'.txt' logFileName = 'S_LSMU_itera' + str(cycle + 1) + '.txt' DSP.LOG('log matrix to file', logFileName, S, ' ') print "S has been logged in cycle %d" % (cycle + 1) DSP.STOP() # ------------- # # update A part #
import os import numpy as np from numpy import linalg as LA import scipy import NMF import DSP os.system('clear') Y = np.matrix([[1,3,77,6],[3,0,8,9],[1,4,4,3],[7,7,2,5],[5,3,9,8]]) A = np.matrix( np.random.ranf( [ Y.shape[0] , 2 ] ) ) S = np.matrix( np.random.ranf( [ 2 , Y.shape[1] ] ) ) # ===================================================== # # display the convergence of residual error of NNLS NMF # # ===================================================== # numOfInstance = 20 iteraNum = np.array( range( 0 , numOfInstance ) ) Yresidual = iteraNum.copy() Yresidual[0] = LA.norm( Y - np.dot(A,S) , 'fro' ) for i in range( 0 , numOfInstance ) : A , S = NMF.NNLS( Y , 0 , S , 1 , 0 ) Yresidual[i] = LA.norm( Y - np.dot(A,S) , 'fro' ) DSP.PLOT( np.log10( Yresidual ) , x=iteraNum , xLabel='Iteration Number' , yLabel='Residual Error' , title='|| Y - A*S ||' , grid=True )
def separate(audio_path, model_name, num_subtargets, *args): """ Separate the audio into the given :param audio_path: path to audio input (wav file) :param model_name: name of the model ('demucs' or...) :param num_subtargets: the number of subtargerts to estimate from the mix :param *args: any relevant additional argument (for example combinations of sub targets to match NUM_SUBTARGETS) Returns array containing sub_targets as numpy arrays in float32, and the sample rate of the output """ file_name = audio_path.split("/")[-1].split(".")[0] output_path = TEMP_OUTPUT_PATH + "/" + model_name + "/" + file_name if not os.path.exists(output_path): if model_name == "TDCN++": # ConvTasNet for Universal Sound Separation TDCNpp_separate.separate( TDCNpp_model_path + "/baseline_model", TDCNpp_model_path + "/baseline_inference.meta", audio_path, output_path) elif model_name == "TDCN": demucs.separate(audio_path, output_path, 'tasnet') elif model_name == "Demucs": demucs.separate(audio_path, output_path, 'demucs') elif model_name == "OpenUnmix": open_unmix.separate(audio_path, output_path) elif model_name == "NMF": NMF.separate(audio_path, output_path) else: raise Exception( "Model name must be one of those four : TDCN, TDCN++, OpenUnmix, Demucs" ) # Read sub targets and output them in numpy array format sub_targets = [] sr = None if model_name in separation_models: if num_subtargets != len(args[0]): raise Exception( "For {}, it is required to specify the way to combine the sub targets to generate NUM_SUBTARGETS sub targets. Must be of the form [[0, 3], [1, 2]]" .format(model_name)) for l in args[0]: # Combine sub_targets generated according to the list in *args a = None for s in l: t, sr = librosa.load(output_path + "/{}.wav".format(s), sr=SAMPLING_RATE) if a is None: a = t else: a += t sub_targets.append(a) else: raise Exception("Unknown model name") if sr == None: raise Exception("No sample rate for output detected") return sub_targets, sr
'YHOO', 'AVP', 'BIIB', 'BP', 'CL', 'CVX', 'DNA', 'EXPE', 'GOOG', 'PG', 'XOM', 'AMGN' ] shortest = 300 prices = {} dates = None for t in tickers: # TOOD fix out of date yahoo url url = 'http://ichart.finance.yahoo.com/table.csv?' + \ 's=%s&d=11&e=26&f=2006&g=d&a=3&b=12&c=1996' % t +\ '&ignore=.csv' print url rows = urllib2.urlopen(url).readlines() prices[t] = [float(r.split(',')[5]) for r in rows[1:] if r.strip() != ''] if len(prices[t]) < shortest: shortest = len(prices[t]) if not dates: dates = [r.split(',')[0] for r in rows[1:] if r.strip() != ''] l1 = [[prices[tickers[i]][j] for i in range(len(tickers))] for j in range(shortest)] w, h = NMF.factorize(matrix(l1), pc=5) print h print w
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Jun 11 18:44:18 2018 @author: wuyiming """ import util import NMF import chord import NoteTemplate import numpy as np S = util.LoadAudio("PianoChord-70bpm.wav") W = NoteTemplate.getNoteTemplates("audio_notes") _, H = NMF.nmf_beta(S, 48, W.T, beta=0.5, iteration=20) util.PlotPianoroll(H) H_binary = (H / np.max(H)) > 0.2 util.PlotPianoroll(H_binary) segments = [np.sum(seg, axis=1) for seg in util.SegmentByBeat(H, 70, 4 * 4)] chords = [chord.match_chord(seg / seg.max()) for seg in segments] print(chords)
[[1, 2, 3, 4, 5], [2, 3, 12, 5, 1], [5, 43, 5, 8, 9], [5, 6, 7, 94, 0], [3, 2, 5, 7, 0], [9, 9, 8, 7, 1], [3, 76, 0, 4, 7]], float) print "Y:" print Y print "" modelNum = 3 A0 = np.random.ranf([Y.shape[0], modelNum]) #A0 = np.matrix([[1,3,5],[2,4,6],[3,5,7],[4,6,8],[5,7,9],[6,8,10],[7,9,11]],float) S0 = np.random.ranf([modelNum, Y.shape[1]]) #S0 = np.matrix([[1,2,3,4,5],[6,7,8,9,10],[11,12,13,14,15]]) #A , S = NMF.HALS( Y , A0 , S0 , 1000 , [1,1] ) A, S = NMF.HALS(Y, A0, S0, 1000, [1, 1]) print "A shape:" print A.shape print "A:" print A print "" print "S shape:" print S.shape print "S:" print S print "" print "A*S" Y_new = np.dot(A, S)
import NMF os.system('clear') Y = np.matrix([[1,2,3,4,5],[2,3,12,5,1],[5,43,5,8,9],[5,6,7,94,0],[3,2,5,7,0],[9,9,8,7,1],[3,76,0,4,7]],float) print "Y:" print Y print "" modelNum = 3 A0 = np.matrix([[1,3,5],[2,4,6],[3,5,7],[4,6,8],[5,7,9],[6,8,10],[7,9,11]],float) S0 = np.matrix([[1,2,3,4,5],[6,7,8,9,10],[11,12,13,14,15]]) A , S = NMF.LSMU( Y , A0 , S0 , 1000 ) print "A*S" Y_new = np.dot(A,S) print Y_new print "" print "Y - A*S" print Y - Y_new print "" print "norm( Y-A*S , 'fro' )" print LA.norm( Y - Y_new , 'fro' ) print ""
import parser from preprocess import * par = parser.Parser() par.parse() rownames, colnames, data = readfile() data, colnames = pruning(data, colnames, 0.05, 0.9) data = tfidf(data) writefile(rownames, colnames, data) import HAC analyser = HAC.HAC() clust = analyser.hcluster(data) clust = analyser.hcluster(data, cosineSimilarity) analyser.printclust(clust, rownames) from kmeans import * clusters = kcluster(data) printcluster(clusters, rownames) import NMF import numpy v = numpy.matrix(data) weights, feat = NMF.factorize(v, pc=20, iter=50) topp, pn = NMF.showfeatures(weights, feat, rownames, colnames)