예제 #1
0
def update_province_area_state(res):
    """
    更新各省级数据
    :return:
    """
    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    data = session.query(Count).filter(Count.area_type == 'province').all()
    map_data = {}
    for count in data:
        map_data[count.area_name] = count
    for o in res:
        comment = o['comment']
        key = o['provinceShortName']
        count = map_data.get(key)
        if not count:
            count = Count()
            count.from_source = 'dxy'
            count.area_name = key
            count.area_type = 'province'
            count.province = key
            session.add(count)
        json_to_model(count, o)
        if comment != '':
            num_data = format_data(comment)
            filter_data(num_data, count)
        count.update_time = now
    session.commit()
예제 #2
0
def main():
    process = CrawlerProcess(get_project_settings())

    sites = session.query(Site).filter(Site.active.is_(True))
    if sites.count() == 0:
        print("No site urls added. "
              'Use "python create_site_url.py" command to add url.')
    for site in sites:
        process.crawl(SPIDERS_MAP[site.site], site_id=site.id)

    process.start()
예제 #3
0
def save_articles(data):
    articles = session.query(Article).all()
    map_data = []
    for article in articles:
        map_data.append(article.title)
    for i in data:
        if i['title'] not in map_data:
            article = Article()
            article_time = i['time']
            article.time = time.strptime(article_time, "%Y.%m.%d%H:%M:%S")
            article.detail = i['detail']
            article.link = i['link']
            article.title = i['title']
            session.add(article)
    session.commit()
예제 #4
0
def save_country_data(res):
    """
    保存全国的数据
    :return:
    """
    count = session.query(Count).filter(Count.area_name == '全国').first()
    if not count:
        count = Count()
        count.area_type = 'country'
        count.area_name = '全国'
        count.from_source = 'dxy'
        session.add(count)
    filter_data(res, count)
    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    count.update_time = now
    session.commit()
예제 #5
0
def save_data(data):
    videos = session.query(CCTVVideo).order_by(desc(CCTVVideo.date)).limit(5)
    video_map = []
    for item in videos:
        video_map.append(item.source_url)
    for i in data:
        if i['wwwUrl'] in video_map:
            continue
        video = CCTVVideo()
        video.title = i['title']
        video.source_url = i['wwwUrl']
        video.id = str(uuid.uuid4().hex)
        timeNum = i['pubTime']
        timeStamp = float(timeNum / 1000)
        timeArray = time.localtime(timeStamp)
        otherStyleTime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
        video.date = otherStyleTime
        session.add(video)
    session.commit()
예제 #6
0
def sync_area_data():
    hubei_count = session.query(Count).filter(Count.area_name == '湖北').first()
    beijing_count = session.query(Count).filter(
        Count.area_name == '北京').first()
    china_count = session.query(Count).filter(Count.area_name == '全国').first()

    hubei = session.query(HubeiCount).first()
    beijing = session.query(BeiJingCount).first()
    china = session.query(ChinaCount).first()

    sync_data(hubei, hubei_count)
    sync_data(beijing, beijing_count)
    sync_data(china, china_count)
    session.commit()
예제 #7
0
def update_city_area_state(res):
    data = session.query(Count).filter(Count.area_type == 'city').all()
    map_data = {}
    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    for count in data:
        map_data["city_" + count.province + "_" + count.area_name] = count
    for province in res:
        cities = province['cities']
        province = province['provinceShortName']
        for city in cities:
            city_key = city['cityName']
            city_count = map_data.get("city_" + province + "_" + city_key)
            if not city_count:
                city_count = Count()
                city_count.from_source = 'dxy'
                city_count.area_name = city_key
                city_count.area_type = 'city'
                city_count.province = province
                session.add(city_count)
            json_to_model(city_count, city)
            city_count.update_time = now
    session.commit()
예제 #8
0
def save_record_data():
    """
    保存对应记录
    :return:
    """
    counts = session.query(Count).all()
    for count in counts:
        count_record = CountRecord()
        count_record.new_case = count.new_case
        count_record.confirm_case = count.confirm_case
        count_record.cure_case = count.cure_case
        count_record.probable_case = count.probable_case
        count_record.dead_case = count.dead_case

        count_record.province = count.province
        count_record.area_type = count.area_type
        count_record.area_name = count.area_name
        count_record.update_time = count.update_time
        count_record.from_source = count.from_source

        count_record.memo = count.memo
        session.add(count_record)
    session.commit()
예제 #9
0
 def __init__(self, site_id, **kwargs):
     self.site = session.query(Site).filter(Site.id == site_id).one_or_none()
     if self.site:
         self.start_urls = [self.site.url]
예제 #10
0
    # 遍历日期
    for date in date_arr:
        params['date'] = date
        # 遍历地区
        for area in area_arr:
            params['dt'] = area["dt"]
            areas = area["arr"]
            # 遍历嵌入迁出方向
            for migrate in ["move_out", "move_in"]:

                for i in areas.keys():
                    params['area_name'] = i
                    params['id'] = areas[i]
                    params['type'] = migrate
                    count = session.query(Migration).filter(
                        Migration.area_id == params['id'],
                        Migration.migrate_type == migrate,
                        Migration.create_date == params['date']).count()
                    if count == 50:
                        print("跳过" + params['date'] + "_" +
                              params['area_name'] + "_" + migrate)
                        continue
                    res = get_page_html(params)
                    if res.status_code == 504:
                        continue
                    data = get_result(res.text)
                    save_data(params, data)

# html = get_page_html(params)
# data = get_result(html)
# print(len(data))