예제 #1
0
def run_single_nmf_experiment(mat, n_components, apply_nan, labels):
    if apply_nan:
        nmf = NMF.NMF(n_components=n_components, apply_nan_mask=True, nan_weight=0.1)
    else:
        nmf = NMF.NMF(n_components=n_components)

    W, H = nmf.decompose(mat)
    clusters = W.argmax(axis=1)

    nmi, nmi_t, nmi_p = calc_score_significance(labels, clusters, normalized_mutual_info_score)
    if nmi_p >= 0.05:
        print('non conclusive NMI with {} and nan_mask={} '.format(n_components, apply_nan),
              nmi_t, nmi_p)

    sill = silhouette_score(mat.toarray(), clusters)

    accs = []
    f1s = []
    for i in range(20):
        f1, f1_t, f1_p, acc, acc_t, acc_p = semi_supervised_classify(labels, clusters)
        accs.append(acc)
        f1s.append(f1)
        if f1_p >= 0.05:
            print('non conclusive f1 score with {} and nan_mask={} '.format(n_components, apply_nan),
                  f1_t, f1_p)
        if acc_p >= 0.05:
            print('non conclusive accuracy score with {} and nan_mask={} '.format(n_components, apply_nan),
                  acc_t, acc_p)

    return {'nmi': nmi, 'nmi_p': nmi_p, 'sill': sill,
            'acc': np.mean(accs), 'acc_std': np.std(accs),
            'f1': np.mean(f1s), 'f1_std': np.std(f1s)}
예제 #2
0
def B3():
    w = 2048
    h = 1024
    path = '../audio/train/vio/'
    
    vio_64 = librosa.load(path + 'vio_64.wav', fs)[0][0:61000]
    vio_88 = librosa.load(path + 'vio_88.wav', fs)[0][0:61000]
    cla_64 = librosa.load('../audio/train/cla/cla_64.wav', fs)[0][0:61000]
   
    S_1 = NMF.extractTemplate(vio_64)
    S_2 = NMF.extractTemplate(vio_88)
    S_3 = NMF.extractTemplate(cla_64)
 
    librosa.display.specshow(S_1, y_axis='cqt_note', x_axis='frames', n_yticks=180)
    plt.axis([0, 2, 0, 100])
    plt.show()

    librosa.display.specshow(S_2, y_axis='cqt_note', x_axis='frames', n_yticks=10)
    #plt.axis([0, 2, 0, 100])
    plt.show()
    
    librosa.display.specshow(S_3, y_axis='cqt_note', x_axis='frames', n_yticks=180)
    plt.axis([0, 2, 0, 100])
    plt.show()

    S_1 = librosa.core.istft(S_1)
    librosa.display.waveplot(S_1, x_axis='time')
    plt.show()
    S_2 = librosa.core.istft(S_2)
    librosa.display.waveplot(S_2, x_axis='time')
    plt.show()
예제 #3
0
def estimate(valid, v_W, c_W, valid_v, valid_c, r, score_inf=None):
    fn  = 'est/0' + r
    W = np.append(v_W, c_W, axis=1)
    H = NMF.extractActivation(valid, W)
    t_num = v_W.shape[1]
    
    if score_inf is not None :
        print('apply score inf')
        H = NMF.cons_Activation(score_inf, H) 

    v_H = H[0:t_num]
    c_H = H[t_num:H.shape[0]]
   
    o_v, o_c = reconstruct(valid, v_W, v_H, c_W, c_H)
    
    librosa.output.write_wav(fn + '_vio_est.wav', o_v, 44100)
    librosa.output.write_wav(fn + '_cla_est.wav', o_c, 44100)
    
    valid_v = valid_v[0:220160]
    valid_c = valid_c[0:220160]
    
    sdr, sir, sar, perm = basic.evalBSS(np.array([valid_v, valid_c]), np.array([o_v, o_c]))
   
    print(sdr)
    print(sir)
    print(sar)
예제 #4
0
def R2(valid, W):
    H = NMF.extractActivation(valid, W)
    r = np.dot(W, H)
    o = librosa.core.istft(r, win_length=NMF.d_w, hop_length=NMF.d_h)
    """showDicSpectr(W)
    showActSpectr(H)
    showReconSpectr(r)"""
    librosa.output.write_wav('01_cla.wav', o, 44100)
예제 #5
0
def getNoteTemplates(path_notes):
    list_templates = []
    list_noteaudio = find_files(path_notes,ext="wav")
    for noteaudio in list_noteaudio:
        S_mag = U.LoadAudio(noteaudio)
        init_H = np.ones((1,S_mag.shape[1]))
        template,activate = NMF.nmf_sklearn(S_mag,k=1,H=init_H,verbose=False)
        list_templates.append(template[:,0]/np.max(template))
    templates = np.stack(list_templates)
    return templates
예제 #6
0
def extractAllTemplate(data, n_components=NMF.nc):
    init = False
    for d in data:
        comp = NMF.extractTemplate(d[0][0:70000], n_components=n_components)
        if init == False:
            init = True
            W = comp
            continue
        W = np.append(W, comp, axis=1)
    return W
def NMijF_jp(tt_matrix, tw_matrix, k_topic, alpha):
    print "Entering NMijF_jp process..."
    tt_shape = tt_matrix.shape
    tw_shape = tw_matrix.shape
    W = np.random.rand(tt_shape[0], k_topic)
    Y = np.random.rand(k_topic, tt_shape[1])
    H = np.random.rand(k_topic, tw_shape[1])
    observation = 0
    for i in range(30):
        print "Fatoring..." + str(float(i) / 30.0) + "%"
        W = NMF.update_a(W, Y, tt_matrix)
        Y = NMF.update_x(W, Y, tt_matrix)
        H = NMF.update_x(W, H, tw_matrix)
        di = NMF.divergence_function(tt_matrix, np.dot(W, Y))
        dj = NMF.divergence_function(tw_matrix, np.dot(W, H))
        if (object_function(di, dj, alpha) - observation) < 0.001:
            break
        observation = object_function(di, dj, alpha)
    # print "tweet - topic matrix  \n" + str(W)
    # print "topic - tweet matrix  \n" + str(Y)
    # print "topic - word matrix  \n" + str(H)
    print "Done!"
    return [W, H]
예제 #8
0
def R4(v_W, c_W):
    path = '../audio/test/'
    fn = '../pred/'
    test_clips = u.readClips(path)
    W = np.append(v_W, c_W, axis=1)
  
    for i in range(0, len(test_clips)):
        p = fn + '0' + str(i + 6)
        H = NMF.extractActivation(test_clips[i][0], W)
        v_H = H[0:t_num]
        c_H = H[t_num:H.shape[0]]
        o_v, o_c = est.reconstruct(test_clips[i][0], v_W, v_H, c_W, c_H)
        librosa.output.write_wav(p + '_vio_est.wav', o_v, 44100)
        librosa.output.write_wav(p + '_cla_est.wav', o_c, 44100)
예제 #9
0
def main():
    feedlist = [
        'http://rss.cnn.com/rss/edition_business.rss',
        'https://news.google.com/news/section?topic=b&output=rss',
    ]

    all_words, article_titles, article_words = get_news_text(feedlist)
    articlemx, word_vec = make_article_matrix(all_words, article_words)

    # Get weight and feature matrix
    v = matrix(articlemx)
    weights, feats = NMF.factorize(v, pc=10, iter=10)
    top_num = 15
    pattern_names = get_features(top_num, weights, feats, word_vec)

    print pattern_names
def main():
    feedlist = [
                'http://rss.cnn.com/rss/edition_business.rss',
                'https://news.google.com/news/section?topic=b&output=rss',
                ]
    
    all_words, article_titles, article_words = get_news_text(feedlist)
    articlemx, word_vec = make_article_matrix(all_words, article_words)
       
    # Get weight and feature matrix
    v = matrix(articlemx)
    weights, feats = NMF.factorize(v,pc=10,iter=10)
    top_num = 15;
    pattern_names = get_features(top_num, weights, feats, word_vec)
           
    print pattern_names
예제 #11
0
    def test_1(self):
        test_r = random_mask(self.R, self.N, self.M)
        k = 10
        P = numpy.random.rand(self.N, k)
        Q = numpy.random.rand(self.M, k)

        fr = open('log', mode='w')

        nP, nQ = NMF.nmf_gd(test_r, P, Q, k, fr, steps=10000)
        nR = numpy.dot(nP, nQ.T)
        fr.write('\nR:\n')
        fr.write(self.R)
        fr.write('\nresult_R:\n')
        fr.write(nR)
        fr.write('\nP:\n')
        fr.write(nP)
        fr.write('\nQ:\n')
        fr.write(nQ)
        fr.close()
예제 #12
0
def run(data):
    data = parameter.Parameter()
    data.get_sample()
    w, h = NMF.factorize(data, 1000)
    result(data, w, h)
def main():
    # Proactive recommendations, based on daily news
    print 'Proactive Daily Recommendations: '
    # Daily News are extracted from these feeds
    feedlist = [
                'http://rss.cnn.com/rss/edition_business.rss',
                'https://news.google.com/news/section?topic=b&output=rss',
                ]
    
    all_words, article_titles, article_words = NewsParser.get_news_text(feedlist)
    articlemx, word_vec = NewsParser.make_article_matrix(all_words, article_words)
    # Get weight and feature matrix
    v = matrix(articlemx)
    pattern_num = 30
    iter = 10
    weights, feats = NMF.factorize(v,pattern_num,iter)
    top_num = 15;
    # Get 30 patterns from daily news
    pattern_names = NewsParser.get_features(top_num, weights, feats, word_vec)           
    
    # Train the data
    trainingdata_file = open('/Users/hanhanwu/Documents/workspace/PythonLearning/Sellers++/training_data','r')
    cl1 = MyClassifiers.classifier(MyClassifiers.get_words)
    cl2 = MyClassifiers.fisherclassifier(MyClassifiers.get_words)
    for line in trainingdata_file:
        elems = line.split('****')
        cate = elems[1].split(',')[0]
        item = elems[0]
        cl1.train(item, cate)
        cl2.train(item, cate)
    trainingdata_categories = cl2.categories()
    amazon_categories = RSSParser.get_product_category()
    new_categories = list(set(amazon_categories) - set(trainingdata_categories))
    # When new categories appear, send me a notice
    if len(new_categories) > 0:
        print 'Update the training data: '
        print new_categories
    category_vote = {}
    for p in pattern_names:
        fit_category, max_prob = MyClassifiers.get_category(cl2, trainingdata_categories, p)
        category_vote.setdefault(fit_category, 0)
        category_vote[fit_category] += 1
    sorted_vote = sorted(category_vote.iteritems(), key = lambda (k,v): (v,k), reverse = True)
    
    # Based on this sorted votes, recommended new products in each voted category based on the ratio, products with deals come first
    daily_recommendations = {}
    for t in sorted_vote:
        prod_category = t[0]  
        prod_amount = t[1]
        new_product_info = {}
        new_product_info = RSSParser.get_newproduct_info(prod_category, prod_amount)
        if len(new_product_info) < prod_amount:
            new_product_info_nodeal = RSSParser.get_newproduct_info(prod_category, prod_amount, deal=0)
            new_product_info.update(new_product_info_nodeal)
        daily_recommendations.update(new_product_info)
    print 'daily recommendations: '
    for pname, pinfo in daily_recommendations.iteritems():
        print 'Product Name: ', pname
        print 'Product Price: ', pinfo['current_price']
        
    print '**********************************************************'
    
    # This variable is the user input, you can change this to test
    user_input = 'Stark Electric Small Mini Portable Compact Washer Washing'
     
    # Reactive recommendations, based on the product name provided by the user
    conn = MySQLdb.connect(host='localhost',
                           user='******',
                           passwd='sellers',
                           db='dbSellers'
                          )
    x = conn.cursor()
    max_ratio = 0
    real_pname = ''
    try:
        x.execute("""
                  SELECT ProductName FROM tbProducts;
                  """)
        numrows = x.rowcount
         
        for i in xrange(0,numrows):
            p_name = x.fetchone()[0]
            ledist = Levenshtein.ratio(p_name, user_input)
            if ledist > max_ratio:
                max_ratio = ledist
                real_pname = p_name   
                 
        if real_pname != '':
            print 'Product Name', real_pname
            x.execute("""
            SELECT CurrentPrice FROM tbProducts WHERE ProductName = %s
            """, (real_pname,))
            print 'Predicted Price', x.fetchall()[0][0]
    except:
        conn.rollback()
         
    x.close()
    conn.close()
예제 #14
0
         if np.max(Y_currCol[:, J_Idx_wikiFile_subIndexOffset]) == 0.0:
             Y_currCol[:, J_Idx_wikiFile_subIndexOffset] = 0.000000001
             smallValueAsgn_cnt += 1
     #print ""
     #print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
     #print "no. of all zeros column that  "
     #print "are assignment to a very small"
     #print "non-negative value : %d/%d" %(smallValueAsgn_cnt,Y_blockWidth)
     #print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
     #print ""
     #S_Jcolumn = NMF.HALS_CORE( np.matrix( Y_currCol ).transpose() , np.matrix( S[ : , J_Idx_wikiFile : J_Idx_wikiFile + Y_blockWidth ] ).transpose() , A.transpose() , modelOrder ).transpose()
     #print "Please stop here and perform manual LSMU"
     #print "Press any key to continue ..."
     #raw_input()
     S_Jcolumn = NMF.LSMU_CORE(
         np.matrix(Y_currCol).transpose(),
         np.matrix(S[:, J_Idx_wikiFile:J_Idx_wikiFile +
                     Y_blockWidth]).transpose(), A.transpose()).transpose()
     for k in xrange(modelOrder):
         for J_assignmentOffset in xrange(Y_blockWidth):
             S[k, J_Idx_wikiFile +
               J_assignmentOffset] = S_Jcolumn[k, J_assignmentOffset]
     # -------------------------------------- #
     # end of enforcing Sum To One constraint #
     # -------------------------------------- #
 #logFileName = 'S_HALS_itera'+str(cycle+1)+'.txt'
 logFileName = 'S_LSMU_itera' + str(cycle + 1) + '.txt'
 DSP.LOG('log matrix to file', logFileName, S, ' ')
 print "S has been logged in cycle %d" % (cycle + 1)
 DSP.STOP()
 # ------------- #
 # update A part #
예제 #15
0
import os
import numpy as np
from numpy import linalg as LA
import scipy
import NMF
import DSP

os.system('clear')

Y = np.matrix([[1,3,77,6],[3,0,8,9],[1,4,4,3],[7,7,2,5],[5,3,9,8]])
A = np.matrix( np.random.ranf( [ Y.shape[0] , 2 ] ) )
S = np.matrix( np.random.ranf( [ 2 , Y.shape[1] ] ) )

# ===================================================== #
# display the convergence of residual error of NNLS NMF #
# ===================================================== #

numOfInstance = 20
iteraNum = np.array( range( 0 , numOfInstance ) )
Yresidual = iteraNum.copy()
Yresidual[0] = LA.norm( Y - np.dot(A,S) , 'fro' )

for i in range( 0 , numOfInstance ) :
	A , S = NMF.NNLS( Y , 0 , S , 1 , 0 )
	Yresidual[i] = LA.norm( Y - np.dot(A,S) , 'fro' )

DSP.PLOT( np.log10( Yresidual ) , x=iteraNum , xLabel='Iteration Number' , yLabel='Residual Error' , title='|| Y - A*S ||' , grid=True )

예제 #16
0
파일: pipeline.py 프로젝트: dzluke/smc2021
def separate(audio_path, model_name, num_subtargets, *args):
    """
    Separate the audio into the given
    :param audio_path: path to audio input (wav file)
    :param model_name: name of the model ('demucs' or...)
    :param num_subtargets: the number of subtargerts to estimate from the mix
    :param *args: any relevant additional argument (for example combinations
                    of sub targets to match NUM_SUBTARGETS)

    Returns array containing sub_targets as numpy arrays in float32, and the sample rate of the output
    """

    file_name = audio_path.split("/")[-1].split(".")[0]
    output_path = TEMP_OUTPUT_PATH + "/" + model_name + "/" + file_name

    if not os.path.exists(output_path):
        if model_name == "TDCN++":
            # ConvTasNet for Universal Sound Separation
            TDCNpp_separate.separate(
                TDCNpp_model_path + "/baseline_model",
                TDCNpp_model_path + "/baseline_inference.meta", audio_path,
                output_path)
        elif model_name == "TDCN":
            demucs.separate(audio_path, output_path, 'tasnet')
        elif model_name == "Demucs":
            demucs.separate(audio_path, output_path, 'demucs')
        elif model_name == "OpenUnmix":
            open_unmix.separate(audio_path, output_path)
        elif model_name == "NMF":
            NMF.separate(audio_path, output_path)
        else:
            raise Exception(
                "Model name must be one of those four : TDCN, TDCN++, OpenUnmix, Demucs"
            )

    # Read sub targets and output them in numpy array format
    sub_targets = []
    sr = None

    if model_name in separation_models:
        if num_subtargets != len(args[0]):
            raise Exception(
                "For {}, it is required to specify the way to combine the sub targets to generate NUM_SUBTARGETS sub targets. Must be of the form [[0, 3], [1, 2]]"
                .format(model_name))
        for l in args[0]:
            # Combine sub_targets generated according to the list in *args
            a = None
            for s in l:
                t, sr = librosa.load(output_path + "/{}.wav".format(s),
                                     sr=SAMPLING_RATE)
                if a is None:
                    a = t
                else:
                    a += t
            sub_targets.append(a)
    else:
        raise Exception("Unknown model name")

    if sr == None:
        raise Exception("No sample rate for output detected")

    return sub_targets, sr
예제 #17
0
    'YHOO', 'AVP', 'BIIB', 'BP', 'CL', 'CVX', 'DNA', 'EXPE', 'GOOG', 'PG',
    'XOM', 'AMGN'
]

shortest = 300
prices = {}
dates = None

for t in tickers:
    # TOOD fix out of date yahoo url
    url = 'http://ichart.finance.yahoo.com/table.csv?' + \
          's=%s&d=11&e=26&f=2006&g=d&a=3&b=12&c=1996' % t +\
          '&ignore=.csv'
    print url
    rows = urllib2.urlopen(url).readlines()

    prices[t] = [float(r.split(',')[5]) for r in rows[1:] if r.strip() != '']
    if len(prices[t]) < shortest:
        shortest = len(prices[t])

    if not dates:
        dates = [r.split(',')[0] for r in rows[1:] if r.strip() != '']

l1 = [[prices[tickers[i]][j] for i in range(len(tickers))]
      for j in range(shortest)]

w, h = NMF.factorize(matrix(l1), pc=5)

print h
print w
예제 #18
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 11 18:44:18 2018

@author: wuyiming
"""

import util
import NMF
import chord
import NoteTemplate

import numpy as np

S = util.LoadAudio("PianoChord-70bpm.wav")
W = NoteTemplate.getNoteTemplates("audio_notes")

_, H = NMF.nmf_beta(S, 48, W.T, beta=0.5, iteration=20)

util.PlotPianoroll(H)

H_binary = (H / np.max(H)) > 0.2
util.PlotPianoroll(H_binary)

segments = [np.sum(seg, axis=1) for seg in util.SegmentByBeat(H, 70, 4 * 4)]
chords = [chord.match_chord(seg / seg.max()) for seg in segments]

print(chords)
예제 #19
0
def run(data):
    data = parameter.Parameter()
    data.get_sample()
    w, h = NMF.factorize(data, 1000)
    result(data, w, h)
예제 #20
0
    [[1, 2, 3, 4, 5], [2, 3, 12, 5, 1], [5, 43, 5, 8, 9], [5, 6, 7, 94, 0],
     [3, 2, 5, 7, 0], [9, 9, 8, 7, 1], [3, 76, 0, 4, 7]], float)

print "Y:"
print Y
print ""

modelNum = 3

A0 = np.random.ranf([Y.shape[0], modelNum])
#A0 = np.matrix([[1,3,5],[2,4,6],[3,5,7],[4,6,8],[5,7,9],[6,8,10],[7,9,11]],float)
S0 = np.random.ranf([modelNum, Y.shape[1]])
#S0 = np.matrix([[1,2,3,4,5],[6,7,8,9,10],[11,12,13,14,15]])

#A , S = NMF.HALS( Y , A0 , S0 , 1000 , [1,1] )
A, S = NMF.HALS(Y, A0, S0, 1000, [1, 1])

print "A shape:"
print A.shape
print "A:"
print A
print ""

print "S shape:"
print S.shape
print "S:"
print S
print ""

print "A*S"
Y_new = np.dot(A, S)
예제 #21
0
import NMF

os.system('clear')

Y = np.matrix([[1,2,3,4,5],[2,3,12,5,1],[5,43,5,8,9],[5,6,7,94,0],[3,2,5,7,0],[9,9,8,7,1],[3,76,0,4,7]],float)

print "Y:"
print Y
print ""

modelNum = 3

A0 = np.matrix([[1,3,5],[2,4,6],[3,5,7],[4,6,8],[5,7,9],[6,8,10],[7,9,11]],float)
S0 = np.matrix([[1,2,3,4,5],[6,7,8,9,10],[11,12,13,14,15]])
A , S = NMF.LSMU( Y , A0 , S0 , 1000 )

print "A*S"
Y_new = np.dot(A,S)
print Y_new
print ""

print "Y - A*S"
print Y - Y_new
print ""

print "norm( Y-A*S , 'fro' )"
print LA.norm( Y - Y_new , 'fro' )
print ""

예제 #22
0
import parser
from preprocess import *

par = parser.Parser()
par.parse()
rownames, colnames, data = readfile()
data, colnames = pruning(data, colnames, 0.05, 0.9)
data = tfidf(data)
writefile(rownames, colnames, data)

import HAC
analyser = HAC.HAC()
clust = analyser.hcluster(data)

clust = analyser.hcluster(data, cosineSimilarity)

analyser.printclust(clust, rownames)

from kmeans import *
clusters = kcluster(data)
printcluster(clusters, rownames)

import NMF
import numpy

v = numpy.matrix(data)
weights, feat = NMF.factorize(v, pc=20, iter=50)
topp, pn = NMF.showfeatures(weights, feat, rownames, colnames)