def _array2mat(fl, flo, rStopWords, StemWords): terms = csv.reader(open(fl, 'r')) if rStopWords: terms = filter(lambda term: term[1] not in STOPWORDS, terms) if StemWords: p = PorterStemmer() d = defaultdict(int) for term in terms: d[(term[0], p.stem(term[1], 0, len(term[1]) - 1))] += int(term[2]) terms = map(lambda d: (d[0][0], d[0][1], d[1]), d.items()) table = defaultdict(dict) words = set() for (year, word, item) in terms: table[num_if_is_number(year)][word] = item words.add(word) fo = csv.writer(open(flo, 'w')) years = sorted(table.keys()) fo.writerow(['Words'] + [str(year) for year in years]) for word in words: row = [table[year].get(word, '0') for year in years] fo.writerow([word] + row) return years[0], years[-1]
def render(vis, request, info): info["message"] = [] reload = int(request.args.get("reload", '0')) table = request.args.get("table", '') where = request.args.get("where", '1=1') field = request.args.get("field", '') view = request.args.get("view", '') minlen = request.args.get("MinCharLength", '3') rStopWords = int(request.args.get("RemoveStopWords", '0')) StemWords = int(request.args.get("StemWords", '0')) start = request.args.get("start", '0') # start at 0 limit = request.args.get("limit", '200') if len(table) == 0 or len(field) == 0: info["message"].append("table or field missing.") info["message_class"] = "failure" else: sql = "select word, count(*) as n from (select regexp_split_to_table(regexp_replace(lower(coalesce(%s,'')),'[^a-z0-9@]+',' ','g'),' ') as word, * from %s where %s) as a where char_length(word) > %s group by 1 order by 2 desc limit %s offset %s" % ( field, table, where, minlen, limit, start) (datfile, reload, result) = export_sql(sql, vis.config, reload, None, view) if len(result) > 0: info["message"].append(result) info["message_class"] = "failure" else: info["message_class"] = "success" if reload > 0: info["message"].append("Loaded fresh.") else: info["message"].append("Loading from cache. Use reload=1 to reload.") datfileNew = datfile + 'edited.csv' if reload: with open(datfile) as f: terms = csv.reader(f) #terms = map(lambda term: term(0).replace("'s",'').replace("'", '').replace(".", " ").replace(",", " "), terms) if rStopWords: terms = filter(lambda term: term[0] not in STOPWORDS, terms) if StemWords: p = PorterStemmer() d = defaultdict(int) for term in terms: d[p.stem(term[0], 0, len(term[0]) - 1)] += int(term[1]) terms = d.items() header = ["text", "size"] with open(datfileNew, 'w') as f2: cs = csv.writer(f2) cs.writerow(header) for term in terms: cs.writerow(term) info["datfile"] = datfileNew pfield = request.args.get("pfield", []) info["title"] = "FIELDS: <em>%s</em> from <br />TABLE: <em>%s</em>" \ % (','.join(pfield), table) info["title"] = Markup(info["title"]) info["message"] = Markup(''.join('<p>%s</p>' % m for m in info["message"] if len(m) > 0))