Exemple #1
0
def printDF(filename='df.txt'):
    import json
    wc = df()
    checkMkdir(OUT_DIR)
    out = open(OUT_DIR+filename, 'w')
    json.dump(wc, out)
    out.close()
Exemple #2
0
def products2text(products, category):
    dirname = PLAIN_DIR+category+'/'
    checkMkdir(dirname)
    for prod in products:
        content = prod.getReviews(htmlStyle=True)
        filename = unicode(prod).replace('/','').replace(' ','_') + '.txt'
        fout = codecs.open(dirname+filename, 'w', 'utf-8')
        print >>fout, '\n'.join(content)
        fout.close()
Exemple #3
0
def outDF(filename='df.db'):
    wc = df()
    print len(wc)
    checkMkdir(OUT_DIR)
    out = open(OUT_DIR+filename+'keys.txt', 'w')
    print >>out, pp_str(wc.keys())
    out.close()
    d = shelve.open(OUT_DIR+filename)
    for k, v in wc.iteritems():
        d[k.encode('utf-8')] = v
    d.close()
Exemple #4
0
def outNgram(n, filesuffix='gram.db'):
    filename = str(n)+filesuffix
    wc = ngram(n)
    print len(wc)
    checkMkdir(OUT_DIR)
    out = open(OUT_DIR+filename+'keys.txt', 'w')
    print >>out, pp_str(wc.keys())
    out.close()
    d = shelve.open(OUT_DIR+filename)
    for k, v in wc.iteritems():
        d[u'-'.join(k).encode('utf-8')] = v
    d.close()
Exemple #5
0
def createRank(minReviewCount=0, minProductCount=0, onlyValidCategory=False):
    checkMkdir(RANK_DIR)
    for category, products in iterAllProducts(minReviewCount):
        ranking = []
        if len(products) < minProductCount: continue
        for prod in products:
            prodCategory = prod['CategoryName']
            if onlyValidCategory and category != prodCategory.split('>')[-1]: break
            ranking.append(unicode(prod)+'\t'+prodCategory)
        else:
            fout = codecs.open(RANK_DIR+category, 'w', 'utf-8')
            print >>fout, str(len(products)) + '\n' + '\n'.join(ranking)
            fout.close()
Exemple #6
0
def products2html(products, category=None, maxReview=None):
    dirname = HTML_DIR
    needRoot = False
    if category is not None:
        dirname += category + '/'
        needRoot = True
    checkMkdir(dirname)
    for prod in products:
        reviews = prod.getReviews(max=maxReview, htmlStyle=True)
        content = '<div class="review">'
        for i, review in enumerate(reviews):
            if i: content += '</div>\n\n<div class="review">'
            content += getHtmlContent(review, i)
        content += '</div>'
        filename = unicode(prod).replace('/','').replace(' ','_') + '.html'
        fout = codecs.open(dirname+filename, 'w', 'utf-8')
        print >>fout, wrapHtml(content, needRoot=needRoot)
        fout.close()