def create_event(success_q, error_q):
    """get data from success_q,write to excel"""
    fp_success = open(SUCCESS_DATA_PATH, 'a+')
    fp_error = open(ERROR_DATA_PATH, 'a+')
    logging.info(u'total,success:%s,fail:%s' %
                 (str(success_q.qsize()), str(error_q.qsize())))
    error_list = []
    while not error_q.empty():
        error_list.append(error_q.get())
        error_str = '\n'.join([str(i) for i in error_list])
        fp_error.write(error_str + '\n')
        error_list = []
    fp_error.close()

    success_list = []
    success_count = 0
    while not success_q.empty():
        success_list.append(success_q.get())
        success_count += 1
        if success_count >= 200 or success_q.empty():
            success_file_str = '\n'.join([str(i) for i in success_list])
            fp_success.write(success_file_str + '\n')
            success_count = 0
            success_list = []
            logging.info(u'create event result;')
    fp_success.close()
def create_event(success_q, error_q):
    """get data from success_q,write to excel"""
    fp_success = open(SUCCESS_DATA_PATH, 'a+')
    fp_error = open(ERROR_DATA_PATH, 'a+')
    logging.info(u'total,success:%s,fail:%s' %
                 (str(success_q.qsize()), str(error_q.qsize())))
    error_list = []
    while not error_q.empty():
        error_list.append(error_q.get())
        error_str = '\n'.join([str(i) for i in error_list])
        fp_error.write(error_str + '\n')
        error_list = []
    fp_error.close()

    success_list = []
    success_count = 0
    while not success_q.empty():
        success_list.append(success_q.get())
        success_count += 1
        if success_count >= 200 or success_q.empty():
            success_file_str = '\n'.join([str(i) for i in success_list])
            fp_success.write(success_file_str + '\n')
            success_count = 0
            success_list = []
            logging.info(u'create event result;')
    fp_success.close()
def do_track(data_list=None, file_name=IMPORT_EXCEL_PATH):
    """
        get the data from excel then crawl,write to success file
        then read success file,write to excel
    """
    logging.info('do_process---------------')
    do_process(process_num=8,
               time_out=60000,
               data_list=data_list,
               file_name=IMPORT_EXCEL_PATH)
    logging.info('finally done--------------')
def do_track(data_list=None, file_name=IMPORT_EXCEL_PATH):
    """
        get the data from excel then crawl,write to success file
        then read success file,write to excel
    """
    logging.info('do_process---------------')
    do_process(process_num=8,
               time_out=60000,
               data_list=data_list,
               file_name=IMPORT_EXCEL_PATH)
    logging.info('finally done--------------')
def get_result(d, time_out=60000):
    """
        use requests or casperjs to get website html code,
        then parse,get the transfer result
    """
    state, last_event, list_all_event, track_time = '', '', '', ''
    try:
        if d['transfer_way'] in ['ZTO', ]:
            html = requests.get(d['url'], timeout=6).content
        else:
            cmd = '/usr/local/bin/casperjs "%s" "%s" "%s" ' % (
                JS_PATH, d['url'], time_out)
            r = os.popen(cmd)
            html = r.read()
            r.close()
    except Exception, e:
        logging.info(u'get html fail,' + str(e))
        return ''
def ftp_up(filename=json_file_path, store_name=store_name):
    """"""
    ftp = FTP()
    # open the debug level,2.list the detail,0.shut debug
    ftp.set_debuglevel(2)
    ftp.connect('host', 'port')
    # login,if the user or password is none,use an empty string
    ftp.login('user', 'password')
    # set the buf size
    bufsize = 1024
    # open the read only file
    file_handler = open(filename, 'rb')
    # ftp up
    ftp.storbinary('STOR %s' % store_name, file_handler, bufsize)
    ftp.set_debuglevel(0)
    file_handler.close()
    ftp.quit()
    logging.info('ftp_up----------')
def get_result(d, time_out=60000):
    """
        use requests or casperjs to get website html code,
        then parse,get the transfer result
    """
    state, last_event, list_all_event, track_time = '', '', '', ''
    try:
        if d['transfer_way'] in [
                'ZTO',
        ]:
            html = requests.get(d['url'], timeout=6).content
        else:
            cmd = '/usr/local/bin/casperjs "%s" "%s" "%s" ' % (
                JS_PATH, d['url'], time_out)
            r = os.popen(cmd)
            html = r.read()
            r.close()
    except Exception, e:
        logging.info(u'get html fail,' + str(e))
        return ''
def data_handle(data, success_q, error_q, time_out=15000):
    """get the crawl result,and debug"""
    for i, d in enumerate(data):
        try:
            result = get_result(d, time_out)
            logging.info(result)
        except Exception, e:
            d['error_reason'] = 'Exception' + str(e)
            error_q.put(d)
            continue

        if result:
            if result['track_event'] or result['track_state']:
                result['id'] = d['id']
                result['transfer_way'] = d['transfer_way']
                success_q.put(result)
            else:
                d['error_reason'] = 'no_data'
                error_q.put(d)

        else:
            d['error_reason'] = 'no_html'
            error_q.put(d)
def data_handle(data, success_q, error_q, time_out=15000):
    """get the crawl result,and debug"""
    for i, d in enumerate(data):
        try:
            result = get_result(d, time_out)
            logging.info(result)
        except Exception, e:
            d['error_reason'] = 'Exception' + str(e)
            error_q.put(d)
            continue

        if result:
            if result['track_event'] or result['track_state']:
                result['id'] = d['id']
                result['transfer_way'] = d['transfer_way']
                success_q.put(result)
            else:
                d['error_reason'] = 'no_data'
                error_q.put(d)

        else:
            d['error_reason'] = 'no_html'
            error_q.put(d)
def do_process(process_num=8,
               time_out=20000,
               data_list=None,
               file_name=IMPORT_EXCEL_PATH):
    """
        use multiprocessing to do the data,
        then create_event,write to txt,
    """
    # get data from excel
    if data_list:
        logging.info(u'get the logistic data from rabbitmq')
        data_list = get_list(data_list=data_list)
    else:
        logging.info(u'get the logistic data from excel')
        data_list = get_list(file_name=file_name)

    # use multiprocessing
    length = len(data_list)
    logging.info(u'total data length:%s' % str(length))
    if length <= process_num:
        process_num = 1
    step = (length / process_num) or 1
    nloop = range(0, length, step)
    if len(nloop) == process_num:
        nloop.append(length)
    elif len(nloop) == process_num + 1:
        nloop[-1] = length
    pool = LoggingPool(processes=process_num)
    error_q = multiprocessing.Manager().Queue()
    success_q = multiprocessing.Manager().Queue()
    for i, n in enumerate(nloop):
        if i < len(nloop):
            data = data_list[n:n + step]
            pool.apply_async(data_handle, (
                data,
                success_q,
                error_q,
                time_out,
            ))
    pool.close()
    pool.join()

    # write to txt
    create_event(success_q, error_q)
def do_process(process_num=8,
               time_out=20000,
               data_list=None,
               file_name=IMPORT_EXCEL_PATH):
    """
        use multiprocessing to do the data,
        then create_event,write to txt,
    """
    # get data from excel
    if data_list:
        logging.info(u'get the logistic data from rabbitmq')
        data_list = get_list(data_list=data_list)
    else:
        logging.info(u'get the logistic data from excel')
        data_list = get_list(file_name=file_name)

    # use multiprocessing
    length = len(data_list)
    logging.info(u'total data length:%s' % str(length))
    if length <= process_num:
        process_num = 1
    step = (length / process_num) or 1
    nloop = range(0, length, step)
    if len(nloop) == process_num:
        nloop.append(length)
    elif len(nloop) == process_num + 1:
        nloop[-1] = length
    pool = LoggingPool(processes=process_num)
    error_q = multiprocessing.Manager().Queue()
    success_q = multiprocessing.Manager().Queue()
    for i, n in enumerate(nloop):
        if i < len(nloop):
            data = data_list[n:n + step]
            pool.apply_async(data_handle, (data,
                                           success_q,
                                           error_q,
                                           time_out, ))
    pool.close()
    pool.join()

    # write to txt
    create_event(success_q, error_q)