Ejemplo n.º 1
0
create index site_index on filtered_data_index(site_type);
create index keyword_index on filtered_data_index(keyword);
create index timestamp_index on filtered_data_index(timestamp);

create index site_sum_index on sumdata_byday(site_type);
create index keyword_sum_index on sumdata_byday(keyword);
create index timestamp_sum_index on sumdata_byday(timestamp);
"""
import os, sys
from datetime import datetime
from dbutils import DBUtils
from strutils import get_timestamp

print datetime.now()

if len(sys.argv) < 2:
    timestamp = get_timestamp(day_delta=1)
else:
    timestamp = sys.argv[1]

db = DBUtils()

c = db.execute_sql(
    "select count(*),site_type,keyword,timestamp where timestamp=%s group by keyword,site_type" % timestamp
)
for one in c.fetchall():
    count, site_type, timestamp = one
    values = {"timestamp": timestamp, "count": count, "keyword": keyword, "site_type": site_type}
    db.insert("sumdata_byday", values)
db.close()
Ejemplo n.º 2
0
 column = {}
 while 1:
     line = f.readline()
     if not line:
         f.close()
         break
     line = str(line).strip()
     if line == "@" and bool(column):
         repx = re.compile(".+_(?P<date>\d{8})_(?P<time>\d{4})\.txt$")
         date_dict = re.match(repx, filename).groupdict()
         column["date"] = date_dict["date"]
         column["time"] = date_dict["time"]
         if column.has_key("udid"):
             column["uid"] = column["udid"]
             del column["udid"]
         db.insert(table_name, column)
         column = {}
         continue
     repx = re.compile("^@(?P<name>\w+):(?P<value>.+)")
     match_str = re.match(repx, line)
     if match_str:
         tmp_dict = match_str.groupdict()
         name = tmp_dict["name"].lower()
         value = tmp_dict["value"].strip()
         try:
             value = filter_tags(value).strip()
             value = replace(value, re.compile("'|\""), "")
             value = smart_utf8(value)
             column[name] = value
         except Exception, e:
             print e