Esempio n. 1
0
def test_projgrad_nmf_sparseness():
    # Test sparseness
    # Test that sparsity constraints actually increase sparseness in the
    # part where they are applied.
    tol = 1e-2
    A = np.abs(random_state.randn(10, 10))
    m = ProjectedGradientNMF(n_components=5, random_state=0, tol=tol).fit(A)
    data_sp = ProjectedGradientNMF(n_components=5, sparseness='data',
                                   random_state=0,
                                   tol=tol).fit(A).data_sparseness_
    comp_sp = ProjectedGradientNMF(n_components=5, sparseness='components',
                                   random_state=0,
                                   tol=tol).fit(A).comp_sparseness_
    assert_greater(data_sp, m.data_sparseness_)
    assert_greater(comp_sp, m.comp_sparseness_)
    def get_cluster_membership(self):
        """ Determine the cluster number that each sample is associated with. """

        model = ProjectedGradientNMF(n_components=self._num_clusters,
                                     init='random',
                                     beta=.3,
                                     eta=.5,
                                     max_iter=5000)

        w = model.fit_transform(self._matrix)
        h = model.components_

        # convert the 'H' matrix, which represents weights for our data matrix W, into
        # an array representing cluster membership. Index of biggest value in each
        # col of matrix H is the cluster
        clusters = []
        model_width = len(h[0])

        for col_idx in range(model_width):
            max_val = dict()
            for row_idx in range(self._num_clusters):
                h_val = h[row_idx][col_idx]

                if not max_val or h_val > max_val['val']:
                    max_val = {'row_idx': row_idx, 'val': h_val}

            clusters.append(max_val['row_idx'])

        # clusters array, w, h
        return (clusters, w, h)
    def init_rois(self, n_components=100, show=False):
        Ain, Cin, center = greedyROI2d(self.Y,
                                       nr=n_components,
                                       gSig=[2, 2],
                                       gSiz=[7, 7],
                                       use_median=False)
        Cn = np.mean(self.Y, axis=-1)

        if show:
            pl1 = pl.imshow(Cn, interpolation='none')
            pl.colorbar()
            pl.scatter(x=center[:, 1], y=center[:, 0], c='m', s=40)
            pl.axis((-0.5, self.Y.shape[1] - 0.5, -0.5, self.Y.shape[0] - 0.5))
            pl.gca().invert_yaxis()

        active_pixels = np.squeeze(np.nonzero(np.sum(Ain, axis=1)))
        Yr = np.reshape(self.Y,
                        (self.Y.shape[0] * self.Y.shape[1], self.Y.shape[2]),
                        order='F')
        P = arpfit(Yr, p=2, pixels=active_pixels)
        Y_res = Yr - np.dot(Ain, Cin)
        model = ProjectedGradientNMF(n_components=1,
                                     init='random',
                                     random_state=0)
        model.fit(np.maximum(Y_res, 0))
        fin = model.components_.squeeze()

        self.Yr, self.Cin, self.fin, self.Ain, self.P, self.Cn = Yr, Cin, fin, Ain, P, Cn
Esempio n. 4
0
 def extract_codes(self,
                   X,
                   n_components=16,
                   log_amplitude=True,
                   **nmf_args):
     """Given a spectrogram, learn a dictionary of 2D patch atoms from spectrogram data
     inputs:
         X - spectrogram data (frequency x time)
         n_components - how many components to extract [16]
         log_amplitude - weather to apply log amplitude scaling log(1+X)
         **nmf_args - keyword arguments for ProjectedGradientNMF(...) [None]
     outputs:
         self.data - 2D patches of input spectrogram
         self.D.components_ - dictionary of 2D NMF components
     """
     zscore = False
     self._extract_data_patches(X, zscore, log_amplitude)
     self.n_components = n_components
     nmf_args.setdefault('sparseness', 'components')
     nmf_args.setdefault('init', 'nndsvd')
     nmf_args.setdefault('beta', 0.5)
     print "NMF..."
     self.model = ProjectedGradientNMF(n_components=self.n_components,
                                       **nmf_args)
     self.model.fit(self.data)
     self.D = self.model
def _nmf(X, K):
    nmf = ProjectedGradientNMF(n_components=K, max_iter=1000)
    nmf.fit(X)

    B = nmf.components_
    A = np.dot(X, np.linalg.pinv(B))

    return (A, B)
Esempio n. 6
0
 def fit(self, trainSamples, trainTargets):
     self.dataModel = MemeryDataModel(trainSamples, trainTargets)
     #print 'train user:' + str(self.dataModel.getUsersNum())
     V = self.dataModel.getData()
     model = ProjectedGradientNMF(n_components=self.factors,
                                  max_iter=1000,
                                  nls_max_iter=1000)
     self.pu = model.fit_transform(V)
     self.qi = model.fit(V).components_.transpose()
Esempio n. 7
0
 def extract_codes(self, X, **kwargs):
     self.standardize=False
     self._extract_data_patches(X)
     kwargs.setdefault('sparseness','components')
     kwargs.setdefault('init','nndsvd')
     kwargs.setdefault('beta',0.5)
     print("NMF...")
     self.model = ProjectedGradientNMF(n_components=self.n_components, **kwargs)
     self.model.fit(self.data)        
     self.D = self.model
     return self
Esempio n. 8
0
    def __init__(self,model,beta=1, eta=0.1, init='nndsvd', max_iter=500,
        n_components=100, nls_max_iter=2000, random_state=0, sparseness=None,tol=0.0001):

        self.check_non_negtive(model)
        self.model = model
        super(NMFpredictor,self).__init__()
        
        self.nmf = ProjectedGradientNMF(beta=beta, eta=eta, init=init, max_iter=max_iter,
                   n_components=n_components, nls_max_iter=nls_max_iter, random_state=random_state, 
                   sparseness=sparseness,tol=tol)
        self.user_latent_M, self.item_latent_M = self.construct_latent_matrics()
Esempio n. 9
0
    def __nmf_initialization(A, ncomms):
        try:
            from sklearn.decomposition import ProjectedGradientNMF
        except ImportError:
            print("sklearn module is missing.")
            return

        model = ProjectedGradientNMF(n_components=ncomms, init='nndsvd')
        Uin = np.asmatrix(model.fit_transform(A))
        Vin = np.asmatrix(model.components_)
        Vin = Vin.T
        init_dict = {'U': Uin, 'V': Vin}
        return init_dict
Esempio n. 10
0
def matrixFactorization(inmatrix, p_components=False):
	from sklearn.decomposition import PCA
	from sklearn.decomposition import ProjectedGradientNMF
	import pdb
	if p_components:
		p_comp = p_components
	else:
		pca = PCA(n_components=inmatrix.shape[1])
		pca.fit(inmatrix)
		explained_variance = pca.explained_variance_ratio_.cumsum()
		explained_variance = explained_variance[explained_variance <= .9]
		p_comp = len(explained_variance)
	model = ProjectedGradientNMF(n_components=p_comp,
				     init='nndsvd',
				     beta=1,
				     sparseness=None)
	#pdb.set_trace()
	model.fit(inmatrix)
	return model
def decomposition(V, W, H, n_components, solver='mu', update_H=True):
    if solver != 'project':
        W, H, _ = non_negative_factorization(V,
                                             W=W,
                                             H=H,
                                             n_components=n_components,
                                             update_H=update_H,
                                             max_iter=1000,
                                             solver=solver)
        #regularization='transformation', l1_ratio=0.1)
    else:
        model = ProjectedGradientNMF(n_components=n_components,
                                     init='random',
                                     random_state=0,
                                     sparseness='data',
                                     beta=0,
                                     max_iter=100000)
        model.fit(V)
        H = model.components_
        W = model.fit_transform(V)
    return W, H
Esempio n. 12
0
    def reducedim_nmf(self, factors):
        print "Number of factors is " + str(factors)

        model = ProjectedGradientNMF(n_components=factors,
                                     init='random',
                                     random_state=0)
        self.reducedmatrix = model.fit_transform(
            self.fullmatrix)  #left factor w (n*k)
        h = model.components_  #right factor h (k*d)

        if self.testing:
            print self.fullmatrix
            print self.reducedmatrix
            print h
            v = numpy.dot(self.reducedmatrix, h)
            print v
        print "Completed NMF routine"
        for vector in self.vectordict.values():
            vector.array = sparse.csc_matrix(
                self.reducedmatrix[vector.rowindex])
        print "Stored individual vectors"
Esempio n. 13
0
def perform_nmf(X, w_dir):

    # factorize composition into components
    print "Performing NMF..."
    n_com = 48
    model = ProjectedGradientNMF(n_components=n_com,
                                 sparseness='data',
                                 beta=1,
                                 eta=0.9,
                                 tol=0.000001,
                                 max_iter=2000,
                                 nls_max_iter=5000,
                                 random_state=None)
    model.fit(X)
    print model.reconstruction_err_
    nmf_components = model.components_
    print "done."

    # visualize Base Rules
    # nmf_components = project_data(nmf_components)
    f_name = w_dir + "base_rules_48.png"
    visualize_base_rules(nmf_components, n_com, f_name)

    return model
Esempio n. 14
0
et = ExtraTreesClassifier()
ab = AdaBoostClassifier()
clf2 = svm.LinearSVC(penalty='l1', loss='l2', C=100, dual=False)
clf = svm.SVC(kernel='rbf')
logreg = linear_model.LogisticRegression(C=100, penalty='l2')
knn = KNeighborsClassifier(n_neighbors=5)
sgdc = SGDClassifier()
gnb = GaussianNB()
mnb = MultinomialNB()
bnb = BernoulliNB()
prcp = Perceptron()
rbm = BernoulliRBM(random_state=0, verbose=True)
rbm.learning_rate = 0.02
rbm.n_iter = 20
rbm.n_components = 1000
NMF = ProjectedGradientNMF(n_components=2, init='random', random_state=0)
PCA = PCA()
LDA = LDA()
#ICA = ICA()

classifier = Pipeline(steps=[('rbm', rbm), ('logreg', logreg)])
file_handler_features = open('feature_vectors_heroes.csv', 'r')


def unique(training_data, test_data):
    for item in training_data:
        if item in test_data:
            print 'Item in test'


def hold_out(training_data, results):
Esempio n. 15
0
####THEIRS- not needed
# Example data matrix X

###MINE
X = DataFrame(matrix)
X_imputed = X.copy()
X = pa.DataFrame(matrix)# DataFrame(toy_vals, index = range(nrows), columns = range(ncols))
###use some way to mask only a few vals.... thst too either 0 or 1
msk = (X.values + np.random.randn(*X.shape) - X.values) < 0.8
X_imputed.values[~msk] = 0


##THEIRS

# Hiding values to test imputation
# Initializing model
nmf_model = ProjectedGradientNMF(n_components = 600, init='nndsvda', random_state=0,max_iter=300, eta=0.01, alpha = 0.01)
nmf_model.fit(X_imputed.values)

# iterate model
#while nmf_model.reconstruction_err_**2 > 10:
    #nmf_model = NMF( n_components = 600, init='nndsvda', random_state=0,max_iter=300, eta=0.01, alpha = 0.01)
W = nmf_model.fit_transform(X_imputed.values)
X_imputed.values[~msk] = W.dot(nmf_model.components_)[~msk]
print nmf_model.reconstruction_err_

H = nmf_model.components_
rHat = np.dot(W,H)
np.savetxt("rHat.txt" ,rHat) 
Esempio n. 16
0
import numpy


client = MongoClient('mongodb://localhost:27017/')
mydb = client['movie_database']


movies = mydb.movies.find()
i = 1
for movie in movies:
    print str(i)+" >> "+movie.get("title") +"--"+ movie.get("_id")
    i = i + 1
users = mydb.users.find()
i = 1
for user in users:
    print str(i) + " >>" + user.get("_id") + "--" + user.get("password")

activities = mydb.activity.find()
i = 1
for activity in activities:
    print str(i) + " >>" + str(activity)

A = numpy.random.uniform(size = [40, 30])
nmf_model = ProjectedGradientNMF(n_components = 5, init='random', random_state=0)
W = nmf_model.fit_transform(A);
H = nmf_model.components_;


print W
print H
Esempio n. 17
0
LC = out.tolist()
X = []
Y = []
for i in LC:
    X.append(i[0])
    Y.append(i[1])

cpmC = pca.components_

for i in range(len(cpmC[1])):
    if cpmC[1][i] * cpmC[1][i] > 0.04:
        print app[i]
        print i

from sklearn.decomposition import ProjectedGradientNMF
pca = ProjectedGradientNMF(n_components=2)

out = pca.fit_transform(catBarr)

LC = out.tolist()
X = []
Y = []
Z = []
for i in LC:
    X.append(i[0])
    Y.append(i[1])

cpmC = pca.components_
lis1 = cpmC[0].tolist()

for i in range(len(lis1)):
Esempio n. 18
0
print("Adjusted Rand-Index: %.3f" %
      metrics.adjusted_rand_score(labels, km.labels_))
print(
    "Silhouette Coefficient: %0.3f" %
    metrics.silhouette_score(tfidf_matrix_train, km.labels_, sample_size=1000))

print()

# Build a Latent Dirichlet Allocation Model
lda_model = LatentDirichletAllocation(n_topics=NUM_TOPICS,
                                      max_iter=10,
                                      learning_method='online')
lda_Z = lda_model.fit_transform(data_vectorized)
print(lda_Z.shape)  # (NO_DOCUMENTS, NO_TOPICS)

pgnmf_model = ProjectedGradientNMF(n_components=NUM_TOPICS)
pgnmf_z = pgnmf_model.fit_transform(data_vectorized)
print(pgnmf_z.shape)  # (NO_DOCUMENTS, NO_TOPICS)

# Build a Non-Negative Matrix Factorization Model
nmf_model = NMF(n_components=NUM_TOPICS)
nmf_Z = nmf_model.fit_transform(data_vectorized)
print(nmf_Z.shape)  # (NO_DOCUMENTS, NO_TOPICS)

# Build a Latent Semantic Indexing Model
lsi_model = TruncatedSVD(n_components=NUM_TOPICS)
lsi_Z = lsi_model.fit_transform(data_vectorized)
print(lsi_Z.shape)  # (NO_DOCUMENTS, NO_TOPICS)

# Let's see how the first document in the corpus looks like in different topic spaces
print(lda_Z[0])
Esempio n. 19
0
# Split into training and test
#answers_train, answers_test, cats_train, cats_test = train_test_split(answers, cats, test_size = 0.3)#, random_state=42)

# Word counts
count_vect = CountVectorizer(stop_words = 'english')
answers_train = count_vect.fit_transform(answers_train)
answers_test = count_vect.transform(answers_test)

# Tf-idf
tfidf_transformer = TfidfTransformer()
answers_train = tfidf_transformer.fit_transform(answers_train)
answers_test = tfidf_transformer.transform(answers_test)

# NMF fit on training set
print("Fitting NMF on training word count matrix with shape" + str(answers_train.shape))
nmf = ProjectedGradientNMF(n_components = 100, max_iter=200)
answers_train = nmf.fit_transform(answers_train)
answers_test = nmf.transform(answers_test)

# Fit SVM classifier
print("Fitting SVM classifier on matrix with shape" + str(answers_train.shape))
svc = svm.LinearSVC()
svc.fit(answers_train, cats_train)

print("SVM train classification %: " + str(svc.score(answers_train, cats_train) * 100))
print("SVM test classification %: " + str(svc.score(answers_test, cats_test) * 100))
mc_label = Counter(cats_train).most_common(1)[0][0]
print("Best guess % = " + str( float(Counter(cats_test)[mc_label]) / len(cats_test) * 100))

# Metrics
np.set_printoptions(linewidth=200, precision=3)
Esempio n. 20
0
if ans != "y":
    exit()

from sklearn.cluster import MiniBatchKMeans, KMeans
km = MiniBatchKMeans(n_clusters=k,
                     init='k-means++',
                     n_init=1,
                     init_size=1000,
                     batch_size=1000,
                     verbose=1)
km2 = KMeans(n_clusters=k, init='k-means++', verbose=1)
y2 = km2.fit_transform(X)

topics5 = [[(km.cluster_centers_[l][i], feature_names[i])
            for i in np.argsort(-np.abs(km.cluster_centers_[l]))[:10]]
           for l in range(k)]
print topics5

### NMF #######################
ans = raw_input("Start NMF with Scikit ? ")
if ans != "y":
    exit()

from sklearn.decomposition import ProjectedGradientNMF
# BEWARE : THIS IS COMPUTATIONNALY INTENSIVE
nmf = ProjectedGradientNMF(n_components=k, max_iter=10, nls_max_iter=100)
nmf.fit(X)

topics6 = [[(nmf.components_[l][i], feature_names[i])
            for i in np.argsort(-np.abs(nmf.components_[l]))[:10]]
           for l in range(k)]
Esempio n. 21
0
fr = frame.drop('Email', 1)
#NMF will not use email or total score
fr = fr.drop('Total Score', 1)

feature_names = fr.columns

X = np.array(fr.astype(float))
'''for i in range(60):												#Test error as a function of number of topics

   model = ProjectedGradientNMF(n_components=i, init='nndsvda',random_state=0,max_iter=500)
   model.fit(X)

   print (i,model.reconstruction_err_);'''

model = ProjectedGradientNMF(n_components=11,
                             init='nndsvda',
                             random_state=0,
                             max_iter=500)  #Perform the NMF
Xtrans = model.fit_transform(X)

for topic_idx, topic in enumerate(
        model.components_
):  #Print the rubric items with strongest contribution in topics
    sorte = np.sort(topic)[::-1]
    sorteargs = np.argsort(topic)[::-1]
    i = 0
    print("Topic #%d:" % topic_idx)
    while (sorte[i] > 1.5
           ):  #Only show things where contribution is large (1.5 is arbitrary)
        print feature_names[sorteargs[i]], np.mean(
            np.transpose(X)[sorteargs[i]]) / ptvals[feature_names[
                sorteargs[i]]]
Esempio n. 22
0
def driver_movie_data_test_sklearn(train_filename, test_filename, k):

    (A, movie_ids, user_ids, m_count, u_count) = read_data(train_filename)

    # Do nnmf
    #(U1,V1) = hack_nmf_iter(A,k,.07,16*A.nnz)

    model = ProjectedGradientNMF(n_components=k)

    model.fit(A)
    V1 = model.components_
    U1 = model.transform(A)
    print A.shape
    print U1.shape
    print V1.shape
    # Read test data
    (A, movie_ids, user_ids, m_count, u_count) = read_data(test_filename,
                                                           movie_ids,
                                                           user_ids,
                                                           m_count,
                                                           u_count,
                                                           discard=True)
    (error, del_U, del_V, random_pairs) = evaluate_gradients(A,
                                                             U1,
                                                             V1,
                                                             .07,
                                                             16 * A.nnz,
                                                             hard=True)

    reverse_user = inverse_map(user_ids)
    reverse_movie = inverse_map(movie_ids)

    # Test on Ratings!
    outfile = open("test.sklearn.predictions", "w")
    print("Doing %d test ratings" % A.nnz)
    (n, m) = A.shape
    for row in xrange(n):
        for row_col_index in xrange(A.indptr[row], A.indptr[row + 1]):
            col = A.indices[row_col_index]
            elt = A.data[row_col_index]
            print >> outfile, "%s,%s,%0.5f" % (reverse_movie[row],
                                               reverse_user[col],
                                               nd.dot(U1[row, :], V1[:, col]))

    # Test on completely random pairs
    outfile = open("test.sklearn.rndpairs.predictions", "w")
    for n_pairs in xrange(1000):
        row = r.randint(0, n - 1)
        col = r.randint(0, m)
        print >> outfile, "%s,%s,%0.5f" % (reverse_movie[row],
                                           reverse_user[col],
                                           nd.dot(U1[row, :], V1[:, col]))

    # Test on difficult distribution that ephasizes non-rated pairs where movies and users
    # are chosen based on rating count.
    outfile = open("test.sklearn.hard.rndpairs.predictions", "w")
    for n_pairs in xrange(1000):
        i = r.randint(0, A.nnz - 1)
        row = find_index(A.indptr, i)
        j = r.randint(0, A.nnz - 1)
        col = A.indices[j]
        if (row > A.shape[0] - 1):
            print row, A.shape, "what is going on"
            continue
        if (col > A.shape[1] - 1):
            print col, A.shape, "what is going on"
            continue
        #print "shape,row,col", A.shape,row,col
        # if (A[row][col] > 0):
        #    continue
        print >> outfile, "%s,%s,%0.5f" % (reverse_movie[row],
                                           reverse_user[col],
                                           nd.dot(U1[row, :], V1[:, col]))

    print("test rsme", math.sqrt(error))
    for i in xrange(k):
        print("Factor:", i)
        print_movie_factor(U1, reverse_movie, i)
    return (U1, V1, reverse_movie, reverse_user)
Esempio n. 23
0
import numpy as np
X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
from sklearn.decomposition import ProjectedGradientNMF
model = ProjectedGradientNMF(n_components=10, init='random', random_state=0)
model.fit(X)

print model.components_
U = X.dot(model.components_.T)
print U
print U.dot(model.components_)
model.reconstruction_err_

model = ProjectedGradientNMF(n_components=2,
                             sparseness='components',
                             init='random',
                             random_state=0)
model.fit(X)
ProjectedGradientNMF(beta=1,
                     eta=0.1,
                     init='random',
                     max_iter=200,
                     n_components=2,
                     nls_max_iter=2000,
                     random_state=0,
                     sparseness='components',
                     tol=0.0001)
model.components_
model.reconstruction_err_
Esempio n. 24
0
def filter_1sigma_nmf_new(dma, iter_date, df, header_df):
    print 'Get the 1-sigma filtered data'
    print df.shape[1]
    idx_vt = df.shape[1] - 1
    mean_viewtime = df[idx_vt].mean()
    std_viewtime = df[idx_vt].std()

    print mean_viewtime / 3600.0, std_viewtime / 3600.0

    reduced_df = df[(df[idx_vt] >= LOW_LIMIT)
                    & (df[idx_vt] <= HIGH_LIMIT)].reset_index()
    print reduced_df.shape

    reduced_df[range(1, idx_vt)] = reduced_df[range(1, idx_vt)].div(
        1.0 * reduced_df[idx_vt], 'index')
    dev_id_list = reduced_df[0]

    reduced_df_vsum = reduced_df[range(1, idx_vt)].sum()
    reduced_df_vsum = reduced_df_vsum[reduced_df_vsum > 0.00]
    idx_list = reduced_df_vsum.index.tolist()
    reduced_df_1 = reduced_df[range(1, idx_vt)][reduced_df_vsum.index.tolist()]

    # Select the header accordingly
    reduced_header_df = header_df[idx_list]

    #program_viewtime_array = np.array(reduced_df[range(1,idx_vt)].astype(np.float))
    program_viewtime_array = np.array(reduced_df_1.astype(np.float))
    program_name_array = np.array(reduced_header_df)

    t_program_viewtime_array = program_viewtime_array.transpose()

    cluster_num = 14
    # Non-negative Matrix Factorization
    model = ProjectedGradientNMF(n_components=cluster_num,
                                 sparseness='data',
                                 init='nndsvd',
                                 max_iter=400,
                                 random_state=0)
    WW = model.fit_transform(t_program_viewtime_array)
    t_WW = WW.transpose()
    HH = model.components_
    t_HH = HH.transpose()
    #print t_HH.shape
    #print pd.DataFrame(t_HH).head()
    membership = [-1 for item in range(0, t_HH.shape[0])]
    # Assign the membership
    for i in range(0, t_HH.shape[0]):
        membership[i] = np.argmax(t_HH[i])

    dd = reduced_header_df
    print dd.shape
    print program_name_array.shape
    print program_viewtime_array.shape

    file = open(
        'decompose_results_clusters_%s_%s_%s.csv' %
        (iter_date.month, iter_date.day, dma), 'w')
    file.write(
        'Cluster_id,Dev_num,Household_num,Feature_val,Feature_fraction,Program_name\n'
    )
    file.write(
        '-1,%s,%s,,,\n' %
        (len(dev_id_list), get_household_num(dma, dev_id_list.tolist())))
    cluster_num = t_WW.shape[0]

    for i in range(0, cluster_num):
        dev_indices = [index for index, v in enumerate(membership) if v == i]
        dev_in_cluster = dev_id_list[dev_indices]
        dev_num = len(dev_in_cluster)
        household_num = get_household_num(dma, dev_in_cluster.tolist())

        #print heapq.nlargest(10,t_WW[i])
        feature_val = np.sort(t_WW[i])
        feature_val = feature_val[::-1]
        #print 't_WW:',t_WW[i]
        #print 'sorted t_WW:',feature_val
        val_sum = np.sum(feature_val)
        feature_frac = feature_val * 1.0 / val_sum
        accumulated_frac = 0
        cut_ind = 0
        for frac in feature_frac:
            accumulated_frac += frac
            cut_ind += 1
            if accumulated_frac > 0.6:
                break
        idx_list = np.argsort(t_WW[i])[::-1][:cut_ind]
        program_list = program_name_array[0][idx_list]
        for j in range(0, cut_ind):
            file.write('%s,%s,%s,%s,%s,%s\n' %
                       (i, dev_num, household_num, feature_val[j],
                        feature_frac[j], program_list[j]))
        #file.write(' '.join(program_name_array[0][idx_list]))
        #file.write('\n')
    file.close()
    #income_analysis(dma, dev_id_list, cluster_num, membership)
    #child_present_analysis(dma, dev_id_list, cluster_num, membership)
    #age_analysis(dma, dev_id_list, cluster_num, membership)
    clusters_obj = all_clusters(dma, cluster_num, dev_id_list, membership)
    return clusters_obj