Exemple #1
0
def crawler_today2(sometime):
    #
    page_idx = 1
    step = 100
    end_time = datetime.datetime.strptime(sometime, "%Y-%m-%d")
    start_time = datetime.datetime.strptime(sometime, "%Y-%m-%d")
    start_time = start_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    end_time =  end_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")

    print u"start_time,end_time:", start_time, end_time
    filters = {
        "beginTime":    start_time,
        "endTime":      end_time
    }
    logging.info(u"start time:%s \t end time:%s" % (start_time, end_time))
    start_pos = 0

    for res_page  in login_new_adr.get_page_list(filters,start_pos, step):
        page_idx += 1
        business_idx = 1
        print u'res_page:',res_page
        for qrow in res_page:
            print "business idx: %s of %s " % (business_idx , step)
            business_idx += 1
            adr_adr.import_from_html(qrow)
Exemple #2
0
def crawler_adr_fullidlist(sometime):
    #
    page_idx = 1
    step = 100
    end_time = datetime.datetime.strptime(sometime, "%Y-%m-%d")
    start_time = datetime.datetime.strptime(sometime, "%Y-%m-%d")
    start_time = start_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    end_time =  end_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")

    filters = {
        "beginTime":    start_time,
        "endTime":      end_time
    }
    start_pos = 0

    for res_page  in login_new_adr.get_page_list(filters,start_pos, step):
        page_idx += 1
        business_idx = 1
        for qrow in res_page:
            business_idx += 1

            show_id = qrow["report_id"]
            bianma = qrow["report_id2"]                         #编码
            fungible_name = qrow["personal_his"]                #代报单位
            report_unit_name = qrow["report_unit_name"]         #报告单位
            medic_list = qrow["general_name"]                   #通用名称,用药集合
            adr_list = qrow["adr_name"]                         #不良反应名称
            data_source = qrow["data_source"]                   #个例来源
            report_type = qrow["new_flag"]                      #报告类型
            StateReportDate = qrow["report_date"]               #国家中心接收时间

            insert_sql = u'insert into adr_full_id_list(report_id,report_id2,personal_his,report_unit_name,general_name,adr_name,data_source,new_flag,report_date)values(%s,%s,%s,%s,%s,%s,%s,%s,%s)'
            mdrsql.mdr_insert_alone(insert_sql,[show_id,bianma,fungible_name,report_unit_name,medic_list,adr_list,data_source,report_type,StateReportDate]) 
Exemple #3
0
def crawler_last_month(username, password):
    #抓取上月的
    logging.info(u"开始抓取上月数据!")
    logincounter = 6
    while logincounter:
        print logincounter
        loginTag = login2.login()
        print u"login Tag(last_month):",loginTag
        if loginTag:
            break
        elif logincounter == 1:
            #
            print u"6次login(last_month)失败,重新尝试此操作"
            print u"本次任务已经结束"
            return
        else:
            cookieManager.clear()
        logincounter = logincounter -1
        time.sleep(8)
    
    page_idx = 1
    step = 100
    
    cur_time = datetime.datetime.today()
    year = cur_time.year
    month = cur_time.month - 1
    if month == 0 :
       month = 12
       year -= 1

    start_time = datetime.datetime(year, month, 1)
    end_time = datetime.datetime( cur_time.year, cur_time.month, 1 ) - datetime.timedelta(days=1)

    
    start_time = start_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    end_time =  end_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    filters = {
        "beginTime":    start_time,
        "endTime":      end_time
    }
    logging.info(u"start time:%s \t end time:%s" % (start_time, end_time))
    start_pos = 0
    for res_page  in login_new_adr.get_page_list(filters,start_pos, step):# 一次一页,一页10条
        print u"page: ",page_idx
        page_idx += 1
        
        business_idx = 1
        for qrow in res_page:  # 一次一个案例
            print u"business idx: %s of %s " % (business_idx , step)
            business_idx += 1
            
            adr_adr.import_from_html(qrow)
Exemple #4
0
def crawler_last_month(username, password):
    #抓取上月的
    logging.info(u"开始抓取上月数据!")
    logincounter = 6
    while logincounter:
        print logincounter
        loginTag = login2.login()
        print u"login Tag(last_month):", loginTag
        if loginTag:
            break
        elif logincounter == 1:
            #
            print u"6次login(last_month)失败,重新尝试此操作"
            print u"本次任务已经结束"
            return
        else:
            cookieManager.clear()
        logincounter = logincounter - 1
        time.sleep(8)

    page_idx = 1
    step = 100

    cur_time = datetime.datetime.today()
    year = cur_time.year
    month = cur_time.month - 1
    if month == 0:
        month = 12
        year -= 1

    start_time = datetime.datetime(year, month, 1)
    end_time = datetime.datetime(cur_time.year, cur_time.month,
                                 1) - datetime.timedelta(days=1)

    start_time = start_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    end_time = end_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    filters = {"beginTime": start_time, "endTime": end_time}
    logging.info(u"start time:%s \t end time:%s" % (start_time, end_time))
    start_pos = 0
    for res_page in login_new_adr.get_page_list(filters, start_pos,
                                                step):  # 一次一页,一页10条
        print u"page: ", page_idx
        page_idx += 1

        business_idx = 1
        for qrow in res_page:  # 一次一个案例
            print u"business idx: %s of %s " % (business_idx, step)
            business_idx += 1

            adr_adr.import_from_html(qrow)
def crawler_by_time(username, password,start_date,end_date):
    #开始抓取指定日期区间数据
    logging.info("开始抓取指定日期区间数据!")
    print u"正在login..."
    logincounter = 6
    while logincounter:
        print logincounter
        loginTag = login2.login()
        print u"login Tag(zoe3):",loginTag
        if loginTag:
            break
        elif logincounter == 1:
            #
            print u"6次login(main3)失败,重新尝试此操作"
            print u"本次任务已经结束"
            return
        else:
            cookieManager.clear()
        logincounter = logincounter -1
        time.sleep(8)
    
    page_idx = 1
    step = 100

    end_time = datetime.datetime.strptime(end_date,"%Y-%m-%d")
    start_time = datetime.datetime.strptime(start_date,"%Y-%m-%d")
    
    start_time = start_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    end_time =  end_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    filters = {
        "beginTime":    start_time,
        "endTime":      end_time
    }
    logging.info("start time:%s \t end time:%s" % (start_time, end_time))
    start_pos = 0
    for res_page  in login_new_adr.get_page_list(filters,start_pos, step):
        print "page: ",page_idx
        page_idx += 1
        
        business_idx = 1
        for qrow in res_page:  # 一次一个案例
            print "business idx: %s of %s " % (business_idx , step)
            business_idx += 1
            
            try:
                delete_by_bianma(qrow["report_id2"])   		#删除成功则添加
                adr_adr.import_from_html(qrow)
            except Exception as err :
                print err
def crawler_by_time(username, password, start_date, end_date):
    #开始抓取指定日期区间数据
    logging.info("开始抓取指定日期区间数据!")
    print u"正在login..."
    logincounter = 6
    while logincounter:
        print logincounter
        loginTag = login2.login()
        print u"login Tag(zoe3):", loginTag
        if loginTag:
            break
        elif logincounter == 1:
            #
            print u"6次login(main3)失败,重新尝试此操作"
            print u"本次任务已经结束"
            return
        else:
            cookieManager.clear()
        logincounter = logincounter - 1
        time.sleep(8)

    page_idx = 1
    step = 100

    end_time = datetime.datetime.strptime(end_date, "%Y-%m-%d")
    start_time = datetime.datetime.strptime(start_date, "%Y-%m-%d")

    start_time = start_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    end_time = end_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    filters = {"beginTime": start_time, "endTime": end_time}
    logging.info("start time:%s \t end time:%s" % (start_time, end_time))
    start_pos = 0
    for res_page in login_new_adr.get_page_list(filters, start_pos, step):
        print "page: ", page_idx
        page_idx += 1

        business_idx = 1
        for qrow in res_page:  # 一次一个案例
            print "business idx: %s of %s " % (business_idx, step)
            business_idx += 1

            try:
                delete_by_bianma(qrow["report_id2"])  #删除成功则添加
                adr_adr.import_from_html(qrow)
            except Exception as err:
                print err
Exemple #7
0
def crawler_first_half_year(username, password):
    #抓取上半年度的

    logging.info(u"开始抓取上半年度数据!")
    logincounter = 6
    while logincounter:
        print logincounter
        loginTag = login2.login()
        print u"login Tag(first_half_year):",loginTag
        if loginTag:
            break
        elif logincounter == 1:
            #
            print u"6次login(first_half_year)失败,重新尝试此操作"
            print u"本次任务已经结束"
            return
        else:
            cookieManager.clear()
        logincounter = logincounter -1
        time.sleep(8)
    
    page_idx = 1
    step = 100
    
    year = datetime.datetime.today().year 
    last_year_start = datetime.date(year,1,1)
    last_year_end = datetime.date(year,6,30)
    start_time = last_year_start.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    end_time =  last_year_end.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    filters = {
        "beginTime":    start_time,
        "endTime":      end_time
    }
    logging.info(u"start time:%s \t end time:%s" % (start_time, end_time))
    start_pos = 0
    for res_page  in login_new_adr.get_page_list(filters,start_pos, step):
        print u"page: ",page_idx
        page_idx += 1
        
        business_idx = 1
        for qrow in res_page:
            print u"business idx: %s of %s " % (business_idx , step)
            business_idx += 1
            
            adr_adr.import_from_html(qrow)
Exemple #8
0
def crawler_last_year(username, password):
    #抓取上一年度的

    logging.info(u"开始抓取上一年度数据!")
    logincounter = 6
    while logincounter:
        print logincounter
        loginTag = login2.login()
        print u"login Tag(last_year):", loginTag
        if loginTag:
            break
        elif logincounter == 1:
            #
            print u"6次login(last_year)失败,重新尝试此操作"
            print u"本次任务已经结束"
            return
        else:
            cookieManager.clear()
        logincounter = logincounter - 1
        time.sleep(8)

    page_idx = 1
    step = 100

    year = datetime.datetime.today().year - 1
    last_year_start = datetime.date(year, 1, 1)
    last_year_end = datetime.date(year, 12, 31)
    start_time = last_year_start.strftime("%a %b %d %Y 00:00:00 GMT+0800 ")
    end_time = last_year_end.strftime("%a %b %d %Y 00:00:00 GMT+0800 ")
    filters = {"beginTime": start_time, "endTime": end_time}
    logging.info(u"start time:%s \t end time:%s" % (start_time, end_time))
    start_pos = 0
    for res_page in login_new_adr.get_page_list(filters, start_pos,
                                                step):  # 一次一页,一页10条
        print "page: ", page_idx
        page_idx += 1

        business_idx = 1
        for qrow in res_page:  # 一次一个案例
            print u"business idx: %s of %s " % (business_idx, step)
            business_idx += 1

            adr_adr.import_from_html(qrow)
Exemple #9
0
def crawler_today():
    #
    logging.info(u"开始抓取今天数据!")
    logincounter = 6
    while logincounter:
        print logincounter
        loginTag = login2.login()
        print u"login Tag(ADR Today):",loginTag
        if loginTag:
            break
        elif logincounter == 1:
            #
            print u"6次login(auto today)失败,重新尝试此操作"
            print u"本次任务已经结束"
            return
        else:
            cookieManager.clear()
        logincounter = logincounter -1
        time.sleep(8)

    page_idx = 1
    step = 100
    start_time = datetime.datetime.today()
    end_time = start_time + datetime.timedelta(days=1)
    start_time = start_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    end_time =  end_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    filters = {
        "beginTime":    start_time,
        "endTime":      end_time
    }
    logging.info(u"start time:%s \t end time:%s" % (start_time, end_time))
    start_pos = 0
    for res_page  in login_new_adr.get_page_list(filters,start_pos, step):
        page_idx += 1

        business_idx = 1
        for qrow in res_page:
            print "business idx: %s of %s " % (business_idx , step)
            business_idx += 1
            adr_adr.import_from_html(qrow)
Exemple #10
0
def crawler_today2(sometime):
    #
    page_idx = 1
    step = 100
    end_time = datetime.datetime.strptime(sometime, "%Y-%m-%d")
    start_time = datetime.datetime.strptime(sometime, "%Y-%m-%d")
    start_time = start_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    end_time = end_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")

    print u"start_time,end_time:", start_time, end_time
    filters = {"beginTime": start_time, "endTime": end_time}
    logging.info(u"start time:%s \t end time:%s" % (start_time, end_time))
    start_pos = 0

    for res_page in login_new_adr.get_page_list(filters, start_pos, step):
        page_idx += 1
        business_idx = 1
        print u'res_page:', res_page
        for qrow in res_page:
            print "business idx: %s of %s " % (business_idx, step)
            business_idx += 1
            adr_adr.import_from_html(qrow)
Exemple #11
0
def crawler_today():
    #
    logging.info(u"开始抓取今天数据!")
    logincounter = 6
    while logincounter:
        print logincounter
        loginTag = login2.login()
        print u"login Tag(ADR Today):", loginTag
        if loginTag:
            break
        elif logincounter == 1:
            #
            print u"6次login(auto today)失败,重新尝试此操作"
            print u"本次任务已经结束"
            return
        else:
            cookieManager.clear()
        logincounter = logincounter - 1
        time.sleep(8)

    page_idx = 1
    step = 100
    start_time = datetime.datetime.today()
    end_time = start_time + datetime.timedelta(days=1)
    start_time = start_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    end_time = end_time.strftime("%a %b %d %Y 00:00:00 GMT+0800")
    filters = {"beginTime": start_time, "endTime": end_time}
    logging.info(u"start time:%s \t end time:%s" % (start_time, end_time))
    start_pos = 0
    for res_page in login_new_adr.get_page_list(filters, start_pos, step):
        page_idx += 1

        business_idx = 1
        for qrow in res_page:
            print "business idx: %s of %s " % (business_idx, step)
            business_idx += 1
            adr_adr.import_from_html(qrow)