예제 #1
def main():
    feedlist = [

    all_words, article_titles, article_words = get_news_text(feedlist)
    articlemx, word_vec = make_article_matrix(all_words, article_words)

    # Get weight and feature matrix
    v = matrix(articlemx)
    weights, feats = NMF.factorize(v, pc=10, iter=10)
    top_num = 15
    pattern_names = get_features(top_num, weights, feats, word_vec)

    print pattern_names
def main():
    feedlist = [
    all_words, article_titles, article_words = get_news_text(feedlist)
    articlemx, word_vec = make_article_matrix(all_words, article_words)
    # Get weight and feature matrix
    v = matrix(articlemx)
    weights, feats = NMF.factorize(v,pc=10,iter=10)
    top_num = 15;
    pattern_names = get_features(top_num, weights, feats, word_vec)
    print pattern_names
def main():
    # Proactive recommendations, based on daily news
    print 'Proactive Daily Recommendations: '
    # Daily News are extracted from these feeds
    feedlist = [
    all_words, article_titles, article_words = NewsParser.get_news_text(feedlist)
    articlemx, word_vec = NewsParser.make_article_matrix(all_words, article_words)
    # Get weight and feature matrix
    v = matrix(articlemx)
    pattern_num = 30
    iter = 10
    weights, feats = NMF.factorize(v,pattern_num,iter)
    top_num = 15;
    # Get 30 patterns from daily news
    pattern_names = NewsParser.get_features(top_num, weights, feats, word_vec)           
    # Train the data
    trainingdata_file = open('/Users/hanhanwu/Documents/workspace/PythonLearning/Sellers++/training_data','r')
    cl1 = MyClassifiers.classifier(MyClassifiers.get_words)
    cl2 = MyClassifiers.fisherclassifier(MyClassifiers.get_words)
    for line in trainingdata_file:
        elems = line.split('****')
        cate = elems[1].split(',')[0]
        item = elems[0]
        cl1.train(item, cate)
        cl2.train(item, cate)
    trainingdata_categories = cl2.categories()
    amazon_categories = RSSParser.get_product_category()
    new_categories = list(set(amazon_categories) - set(trainingdata_categories))
    # When new categories appear, send me a notice
    if len(new_categories) > 0:
        print 'Update the training data: '
        print new_categories
    category_vote = {}
    for p in pattern_names:
        fit_category, max_prob = MyClassifiers.get_category(cl2, trainingdata_categories, p)
        category_vote.setdefault(fit_category, 0)
        category_vote[fit_category] += 1
    sorted_vote = sorted(category_vote.iteritems(), key = lambda (k,v): (v,k), reverse = True)
    # Based on this sorted votes, recommended new products in each voted category based on the ratio, products with deals come first
    daily_recommendations = {}
    for t in sorted_vote:
        prod_category = t[0]  
        prod_amount = t[1]
        new_product_info = {}
        new_product_info = RSSParser.get_newproduct_info(prod_category, prod_amount)
        if len(new_product_info) < prod_amount:
            new_product_info_nodeal = RSSParser.get_newproduct_info(prod_category, prod_amount, deal=0)
    print 'daily recommendations: '
    for pname, pinfo in daily_recommendations.iteritems():
        print 'Product Name: ', pname
        print 'Product Price: ', pinfo['current_price']
    print '**********************************************************'
    # This variable is the user input, you can change this to test
    user_input = 'Stark Electric Small Mini Portable Compact Washer Washing'
    # Reactive recommendations, based on the product name provided by the user
    conn = MySQLdb.connect(host='localhost',
    x = conn.cursor()
    max_ratio = 0
    real_pname = ''
                  SELECT ProductName FROM tbProducts;
        numrows = x.rowcount
        for i in xrange(0,numrows):
            p_name = x.fetchone()[0]
            ledist = Levenshtein.ratio(p_name, user_input)
            if ledist > max_ratio:
                max_ratio = ledist
                real_pname = p_name   
        if real_pname != '':
            print 'Product Name', real_pname
            SELECT CurrentPrice FROM tbProducts WHERE ProductName = %s
            """, (real_pname,))
            print 'Predicted Price', x.fetchall()[0][0]
예제 #4
def run(data):
    data = parameter.Parameter()
    w, h = NMF.factorize(data, 1000)
    result(data, w, h)
    'YHOO', 'AVP', 'BIIB', 'BP', 'CL', 'CVX', 'DNA', 'EXPE', 'GOOG', 'PG',
    'XOM', 'AMGN'

shortest = 300
prices = {}
dates = None

for t in tickers:
    # TOOD fix out of date yahoo url
    url = 'http://ichart.finance.yahoo.com/table.csv?' + \
          's=%s&d=11&e=26&f=2006&g=d&a=3&b=12&c=1996' % t +\
    print url
    rows = urllib2.urlopen(url).readlines()

    prices[t] = [float(r.split(',')[5]) for r in rows[1:] if r.strip() != '']
    if len(prices[t]) < shortest:
        shortest = len(prices[t])

    if not dates:
        dates = [r.split(',')[0] for r in rows[1:] if r.strip() != '']

l1 = [[prices[tickers[i]][j] for i in range(len(tickers))]
      for j in range(shortest)]

w, h = NMF.factorize(matrix(l1), pc=5)

print h
print w
예제 #6
def run(data):
    data = parameter.Parameter()
    w, h = NMF.factorize(data, 1000)
    result(data, w, h)
예제 #7
import parser
from preprocess import *

par = parser.Parser()
rownames, colnames, data = readfile()
data, colnames = pruning(data, colnames, 0.05, 0.9)
data = tfidf(data)
writefile(rownames, colnames, data)

import HAC
analyser = HAC.HAC()
clust = analyser.hcluster(data)

clust = analyser.hcluster(data, cosineSimilarity)

analyser.printclust(clust, rownames)

from kmeans import *
clusters = kcluster(data)
printcluster(clusters, rownames)

import NMF
import numpy

v = numpy.matrix(data)
weights, feat = NMF.factorize(v, pc=20, iter=50)
topp, pn = NMF.showfeatures(weights, feat, rownames, colnames)