コード例 #1
0
def main():
    SiteRule.drop_collection()
    fin = open('scripts/sub_or_article.csv','r')
    for line in fin:
        title, site_url, sub_index_url, sub_index_title, sub_index_cate = line.strip().split(',')
        if sub_index_title == 'Home':
            continue
        if 'http' not in site_url:
            site_url = 'http://%s' % site_url
        site_url = re.sub('/$', '', site_url)
        rules = SiteRule()
        rules.title = title.strip()
        rules.site_url = site_url.strip()
        rules.sub_index_title = sub_index_title.strip().decode('gb18030').encode('utf8')
        rules.sub_index_url = sub_index_url.strip()
        if not sub_index_cate == 'Null':
            rules.category_name = sub_index_cate.strip()
        print title, site_url, sub_index_cate, sub_index_title.decode('gb18030').encode('utf8'), sub_index_url
        rules.save()
    fin.close()