コード例 #1
0
ファイル: sum_data.py プロジェクト: sdgdsffdsfff/v2
create index site_index on filtered_data_index(site_type);
create index keyword_index on filtered_data_index(keyword);
create index timestamp_index on filtered_data_index(timestamp);

create index site_sum_index on sumdata_byday(site_type);
create index keyword_sum_index on sumdata_byday(keyword);
create index timestamp_sum_index on sumdata_byday(timestamp);
"""
import os, sys
from datetime import datetime
from dbutils import DBUtils
from strutils import get_timestamp

print datetime.now()

if len(sys.argv) < 2:
    timestamp = get_timestamp(day_delta=1)
else:
    timestamp = sys.argv[1]

db = DBUtils()

c = db.execute_sql(
    "select count(*),site_type,keyword,timestamp where timestamp=%s group by keyword,site_type" % timestamp
)
for one in c.fetchall():
    count, site_type, timestamp = one
    values = {"timestamp": timestamp, "count": count, "keyword": keyword, "site_type": site_type}
    db.insert("sumdata_byday", values)
db.close()
コード例 #2
0
ファイル: restore_data.py プロジェクト: sdgdsffdsfff/v2
        blogt  varchar(255) NULL,
        date DATE NULL,
        time TIME NULL,      
        url  varchar(255) NULL,
        keyword varchar(255) NULL,
        title  varchar(255) NULL,
        article TEXT NULL
);\n
create index site_index_%(timestamp)s on %(table_name)s(site);\n
create index date_index_%(timestamp)s on %(table_name)s(date);\n
"""

DATA_ROOT = "/data1/dspider/data/bak/%s/" % timestamp

db = DBUtils()
db.execute_sql(CREATE_SQL % {"table_name": table_name, "timestamp": timestamp})
db.close()

db = DBUtils()
for pathname in os.listdir(DATA_ROOT):
    path = os.path.join(DATA_ROOT, pathname)
    for filename in os.listdir(path):
        data_file = os.path.join(path, filename)
        try:
            f = open(data_file, "r")
            column = {}
            while 1:
                line = f.readline()
                if not line:
                    f.close()
                    break