Example #1
0
def _crawl_dig_point(next_dig_point):
    results = {}

    offset_dig_point = next_dig_point
    current_timestamp = util.get_timestamp()
    the_datetime = util.timestamp_to_datetime(current_timestamp)
    current_year = the_datetime.year

    cfg.logger.debug('current_year: %s', current_year)

    for idx in range(0, N_ITER_CRAWL_DIG_POINT):
        (error_code, next_dig_point, offset_dig_point,
         iter_results) = _iter_crawl_dig_point(next_dig_point,
                                               offset_dig_point)
        results.update(iter_results)

        offset_dig_point_year = offset_dig_point // 100000 + 1911
        offset_dig_point_mod_100000 = offset_dig_point % 100000

        cfg.logger.debug(
            'offset_dig_point_year: %s offset_dig_point_mod_100000: %s',
            offset_dig_point_year, offset_dig_point_mod_100000)
        if offset_dig_point_year != current_year and offset_dig_point_mod_100000 >= 30000:
            break

        sleep_time = cfg.config.get('time_sleep', 30)
        cfg.logger.debug('to sleep %s', sleep_time)
        time.sleep(sleep_time)

    results_list = results.values()

    return (next_dig_point, results_list)
Example #2
0
def _save_img(data, postfix, content_type):
    the_timestamp = util.get_timestamp()
    the_datetime = util.timestamp_to_datetime(the_timestamp)
    the_id = str(the_timestamp) + "_" + util.uuid()
    filename = the_id + '.' + postfix

    the_dir = '/data/img/bee/' + the_datetime.strftime('%Y-%m-%d')

    util.makedirs(the_dir)

    with open(the_dir + '/' + filename, 'w') as f:
        f.write(data)

    (the_thumbnail, thumbnail_postfix) = _make_thumbnail(data, postfix)
    
    the_dir = '/data/thumbnail/bee/' + the_datetime.strftime('%Y-%m-%d')

    util.makedirs(the_dir)

    thumbnail_filename = the_id + '.' + thumbnail_postfix

    with open(the_dir + '/' + thumbnail_filename, 'w') as f:
        f.write(the_thumbnail)

    db_data = {"filename": the_datetime.strftime('%Y-%m-%d/') + filename, "thumbnail_filename": the_datetime.strftime("%Y-%m-%d/") + thumbnail_filename, "the_id": the_id, 'content_type': content_type, 'save_time': the_timestamp}

    util.db_insert('bee_img', [db_data])

    if '_id' in db_data:
        del db_data['_id']

    return db_data
Example #3
0
def _save_img(data, postfix, content_type):
    the_timestamp = util.get_timestamp()
    the_datetime = util.timestamp_to_datetime(the_timestamp)
    the_id = str(the_timestamp) + "_" + util.uuid()
    filename = the_id + '.' + postfix

    the_dir = '/data/img/bee/' + the_datetime.strftime('%Y-%m-%d')

    util.makedirs(the_dir)

    with open(the_dir + '/' + filename, 'w') as f:
        f.write(data)

    (the_thumbnail, thumbnail_postfix) = _make_thumbnail(data, postfix)
    
    the_dir = '/data/thumbnail/bee/' + the_datetime.strftime('%Y-%m-%d')

    util.makedirs(the_dir)

    thumbnail_filename = the_id + '.' + thumbnail_postfix

    with open(the_dir + '/' + thumbnail_filename, 'w') as f:
        f.write(the_thumbnail)

    db_data = {"filename": the_datetime.strftime('%Y-%m-%d/') + filename, "thumbnail_filename": the_datetime.strftime("%Y-%m-%d/") + thumbnail_filename, "the_id": the_id, 'content_type': content_type, 'save_time': the_timestamp}

    util.db_insert('bee_img', [db_data])

    if '_id' in db_data:
        del db_data['_id']

    return db_data
def _crawl_dig_point(next_dig_point):
    results = {}

    offset_dig_point = next_dig_point
    current_timestamp = util.get_timestamp()
    the_datetime = util.timestamp_to_datetime(current_timestamp)
    current_year = the_datetime.year

    cfg.logger.debug('current_year: %s', current_year)

    for idx in range(0, N_ITER_CRAWL_DIG_POINT):
        (error_code, next_dig_point, offset_dig_point, iter_results) = _iter_crawl_dig_point(next_dig_point, offset_dig_point)
        results.update(iter_results)

        offset_dig_point_year = offset_dig_point // 100000 + 1911
        offset_dig_point_mod_100000 = offset_dig_point % 100000

        cfg.logger.debug('offset_dig_point_year: %s offset_dig_point_mod_100000: %s', offset_dig_point_year, offset_dig_point_mod_100000)
        if offset_dig_point_year != current_year and offset_dig_point_mod_100000 >= 30000:
            break

        sleep_time = cfg.config.get('time_sleep', 30)
        cfg.logger.debug('to sleep %s', sleep_time)
        time.sleep(sleep_time)

    results_list = results.values()

    return (next_dig_point, results_list)
Example #5
0
def _crawl_dig():
    the_url = 'http://61.60.124.185/tpctempdig/InfoAllList.asp'
    start_timestamp = 946684800
    end_timestamp = util.get_timestamp() + 86400 * 366

    start_datetime = util.timestamp_to_datetime(start_timestamp)
    end_datetime = util.timestamp_to_datetime(end_timestamp)

    params = {
        'sortflag': '',
        'sorttype': '',
        'TargetLB': '',
        'qry2': 1,
        'startyear': start_datetime.year,
        'startmonth': start_datetime.month,
        'startday': start_datetime.day,
        'endyear': end_datetime.year,
        'endmonth': end_datetime.month,
        'endday': end_datetime.day,
    }

    http_data = util.http_multipost({the_url: params})
    #cfg.logger.debug('http_data: %s', http_data)
    (latest_timestamp, dig_data) = _parse_dig(http_data[the_url])

    [_put_to_db(each_data) for each_data in dig_data]

    util.save_cache('cron_new_taipei_city_latest_dig',
                    {'latest_timestamp': latest_timestamp})
def _crawl_dig():
    the_url = 'http://61.60.124.185/tpctempdig/InfoAllList.asp'
    start_timestamp = 946684800
    end_timestamp = util.get_timestamp() + 86400 * 366

    start_datetime = util.timestamp_to_datetime(start_timestamp)
    end_datetime = util.timestamp_to_datetime(end_timestamp)

    params = {
        'sortflag': '',
        'sorttype': '',
        'TargetLB': '',
        'qry2': 1,
        'startyear': start_datetime.year,
        'startmonth': start_datetime.month,
        'startday': start_datetime.day,
        'endyear': end_datetime.year,
        'endmonth': end_datetime.month,
        'endday': end_datetime.day,
    }

    http_data = util.http_multipost({the_url: params})
    #cfg.logger.debug('http_data: %s', http_data)
    (latest_timestamp, dig_data) = _parse_dig(http_data[the_url])

    [_put_to_db(each_data) for each_data in dig_data]

    util.save_cache('cron_new_taipei_city_latest_dig', {'latest_timestamp': latest_timestamp})
Example #7
0
def _is_to_refresh_google_token(user_info):
    if user_info.get('user_type', '') != 'google':
        return False

    if user_info.get('token_refresh_timestamp', 0) < util.get_timestamp():
        return True

    return False
Example #8
0
def _is_to_refresh_google_token(user_info):
    if user_info.get('user_type', '') != 'google':
        return False

    if user_info.get('token_refresh_timestamp', 0) < util.get_timestamp():
        return True

    return False
Example #9
0
def _create_session_key(user_id=None, offset_timestamp=0):
    the_timestamp = util.get_timestamp()
    the_timestamp += offset_timestamp
    session_key = _serialize_session_key(the_timestamp, util.gen_random_string())

    if user_id:
        util.db_update('session_user_map', {"session_key": session_key}, {"user_id": user_id, "the_timestamp": the_timestamp})

    return session_key
Example #10
0
def p_json_handler(data):
    for each_data in data:
        the_timestamp = util.get_timestamp()
        the_id = str(the_timestamp) + "_" + util.uuid()
        each_data['the_id'] = the_id
        each_data['save_time'] = the_timestamp
        each_data['user_name'] = each_data.get('user_name', '')
        each_data['address'] = each_data.get('address', '')
        each_data['count'] = util._int(each_data['count'])
    util.db_insert('bee', data)

    return {"success": True}
Example #11
0
def p_json_handler(data):
    for each_data in data:
        the_timestamp = util.get_timestamp()
        the_id = str(the_timestamp) + "_" + util.uuid()
        each_data['the_id'] = the_id
        each_data['save_time'] = the_timestamp
        each_data['user_name'] = each_data.get('user_name', '')
        each_data['address'] = each_data.get('address', '')
        each_data['count'] = util._int(each_data['count'])
    util.db_insert('bee', data)

    return {"success": True}
Example #12
0
def _create_session_key(user_id=None, offset_timestamp=0):
    the_timestamp = util.get_timestamp()
    the_timestamp += offset_timestamp
    session_key = _serialize_session_key(the_timestamp,
                                         util.gen_random_string())

    if user_id:
        util.db_update('session_user_map', {"session_key": session_key}, {
            "user_id": user_id,
            "the_timestamp": the_timestamp
        })

    return session_key
Example #13
0
def _check_refresh_session(session, session_key, session_key2, user_info):
    the_timestamp = util.get_timestamp()
    user_id = user_info.get('user_id', '')

    (session_timestamp, session_id) = _deserialize_session_key(session_key)

    if the_timestamp - session_timestamp > EXPIRE_TIMESTAMP_SESSION_BLOCK:
        if not session_key2:
            session_key2 = _create_session_key(user_id=user_id)

        session_key3 = _create_session_key(user_id=user_id, offset_timestamp=OFFSET_TIMESTAMP_SESSION_BLOCK)

        session['value'] = session_key2
        session['value2'] = session_key3
        session.save()

        session_struct = _extract_session_struct_from_session_key(session_key)

        remove_session(session_struct)
Example #14
0
def _check_refresh_session(session, session_key, session_key2, user_info):
    the_timestamp = util.get_timestamp()
    user_id = user_info.get('user_id', '')

    (session_timestamp, session_id) = _deserialize_session_key(session_key)

    if the_timestamp - session_timestamp > EXPIRE_TIMESTAMP_SESSION_BLOCK:
        if not session_key2:
            session_key2 = _create_session_key(user_id=user_id)

        session_key3 = _create_session_key(
            user_id=user_id, offset_timestamp=OFFSET_TIMESTAMP_SESSION_BLOCK)

        session['value'] = session_key2
        session['value2'] = session_key3
        session.save()

        session_struct = _extract_session_struct_from_session_key(session_key)

        remove_session(session_struct)
Example #15
0
def p_json_handler(data):
    '''
    data: [{deliver_time, deliver_date, ad_versions, geo, count, user_name, address, county, town, deliver_status, memo}]
    deliver_date: time in iso-8601 format (with millisecond precision)
    deliver_time: deliver_date as timestamp (secs after Unix epoch) in int.
    ad_versions: list of ad_versions. the name of ad is based on "name" in /get/adData
    geo: geojson format. accepting LineString and Point
    count: in int number
    user_name: string
    address: string
    county: string, based on app/scripts/services/TWCounties in frontend
    town: string, based on app/scripts/services/TWTown in frontend
    deliver_status: string
    memo: string

    ex: {"town":"東區","count":10,"deliver_time":1398724259,"deliver_date":"2014-04-28T22:30:59.383Z","geo":[{"type":"LineString","coordinates":[[120.99337719999994,24.7905385],[120.99452376365662,24.79139038370729],[120.99501729011536,24.79084493848351]]}],"ad_versions":["鳥籠監督條例"],"county":"新竹市","deliver_status":"test","address":"nthu","user_name":"test_user_name","memo":"test"}

    ex2: {"town":"內湖區","count":3000,"deliver_time":1398164891,"deliver_date":"2014-04-22T11:08:11.835Z","geo":[{"type":"Point","coordinates":[121.61277294158936,25.06670789727661]}],"ad_versions":["20140421_二類電信RE"],"county":"台北市","address":"康寧路三段","user_name":"test_user_name"}
    '''
    for each_data in data:
        for key in _MUST_HAVE_KEYS:
            if key not in each_data:
                return {"success": False, "errorMsg": "no key: key: %s each_data: %s" % (key, util.json_dumps(each_data))}

        the_timestamp = util.get_timestamp()
        the_id = str(the_timestamp) + "_" + util.uuid()
        each_data['the_id'] = the_id

        if 'deliver_time' not in each_data:
            (error_code, deliver_time) = _parse_deliver_time(each_data)
            if error_code != S_OK:
                return {"success": False, "error_msg": "deliver_date not fit format: deliver_date: %s each_data: %s" % (each_data.get('deliver_date', ''), util.json_dumps(each_data))}
            each_data['deliver_time'] = deliver_time

        each_data['save_time'] = the_timestamp
        each_data['user_name'] = each_data.get('user_name', '')
        each_data['address'] = each_data.get('address', '')
        each_data['count'] = util._int(each_data['count'])
    util.db_insert('bee', data)

    return {"success": True}
Example #16
0
def p_json_handler(data):
    error_code = S_OK
    error_msg = ''

    save_timestamp = util.get_timestamp()

    for each_data in data:
        _infer_columns(each_data, save_timestamp)
        the_id = each_data['the_id']

        db_result = util.db_find_one('roadDB', {'the_id': the_id})
        if db_result:
            if not _is_same(db_result, each_data):
                error_code = S_ERR
                cfg.logger.error('data different: the_id: %s db_result: %s each_data: %s', the_id, db_result, each_data)
                error_msg += 'data different: the_id: %s db_result: %s each_data: %s\n' % (the_id, db_result, each_data)
            continue

        util.db_insert_if_not_exist('roadDB', {'the_id': the_id}, each_data)

    return {"success": True if error_code == S_OK else False, "error_msg": error_msg}
Example #17
0
def _login(client_id, scope, register_uri, authorization_base_url, request,
           params):
    (session_struct, session_struct2) = util_user.process_session(request)
    cfg.logger.debug('session_struct: %s session_struct2: %s', session_struct,
                     session_struct2)

    the_path = params.get('url', '')
    the_timestamp = util.get_timestamp()

    cfg.logger.debug('params: %s the_path: %s', params, the_path)

    the_auth = OAuth2Session(client_id, scope=scope, redirect_uri=register_uri)

    authorization_url, state = the_auth.authorization_url(
        authorization_base_url, approval_prompt="auto")

    util.db_insert(
        'login_info', {
            "state": state,
            "the_timestamp": the_timestamp,
            "params": params,
            "url": the_path
        })

    is_cron_remove_expire = cfg.config.get('is_cron_remove_expire', True)
    if not is_cron_remove_expire:
        expire_timestamp_session = cfg.config.get(
            'expire_unix_timestamp_session',
            EXPIRE_UNIX_TIMESTAMP_SESSION) * 1000
        util.db_remove('login_info', {
            "the_timestamp": {
                "$lt": the_timestamp - expire_timestamp_session
            }
        })

    cfg.logger.debug(
        'after authorization_url: authorization_url: %s state: %s',
        authorization_url, state)

    redirect(authorization_url)
Example #18
0
def process_session(request):
    session = request.environ['beaker.session']
    session_struct = {}
    session_struct2 = {}
    the_timestamp = util.get_timestamp()

    if not session.has_key('value'):
        session_struct = _construct_session_struct(the_timestamp)
        session['value'] = session_struct.get('key', '')

        session_struct2 = _construct_session_struct(the_timestamp + 300)
        session['value2'] = session_struct2.get('key', '')
        session_key = _create_session_key()
        session_key2 = _create_session_key(offset_timestamp=300)
        session['value'] = session_key
        session['value2'] = session_key2
        session.save()
    else:
        session_key = session['value']
        session_key2 = session['value2']

        session_struct = _extract_session_struct_from_session_key(session_key)
        session_struct2 = _extract_session_struct_from_session_key(
            session_key2)

        session_timestamp = session_struct.get('the_timestamp', 0)
        session_timestamp2 = session_struct2.get('the_timestamp', 0)
        if the_timestamp - util._int(session_timestamp) >= 300:
            new_timestamp = max(the_timestamp,
                                util._int(session_timestamp2) + 300)
            session_struct3 = _construct_session_struct(new_timestamp)

            session_struct = session_struct2
            session_struct2 = session_struct3
            session['value'] = session_struct.get('key', '')
            session['value2'] = session_struct2.get('key', '')
            session.save()

    return (session_struct, session_struct2)
Example #19
0
def _get_params(params):
    if not params:
        server = cfg.config.get('web_server', 'http://106.187.101.193:5346')
        the_url = server + '/get/taipei_city_dig_point_next_dig_point'
        http_result = util.http_multiget([the_url])

        next_dig_point = util._int(util.json_loads(http_result.get(the_url, ''), ''), START_TAIPEI_CITY_DIG_POINT)

        cfg.logger.debug('after http_multiget: http_result: %s next_dig_point: %s', http_result, next_dig_point)

        return (S_OK, {'next_dig_point': next_dig_point})

    next_dig_point = params.get('next_dig_point', START_TAIPEI_CITY_DIG_POINT)
    year = next_dig_point // MAX_TAIPEI_CITY_DIG_POINTS_BY_YEAR
    the_timestamp = util.get_timestamp()
    tw_year = util.timestamp_to_tw_year(the_timestamp)
    if tw_year <= year:
        return (S_ERR, None)

    next_dig_point = (year + 1) * MAX_TAIPEI_CITY_DIG_POINTS_BY_YEAR

    return (S_OK, {'next_dig_point': next_dig_point})
Example #20
0
def _login(client_id, scope, register_uri, authorization_base_url, request, params):
    (session_struct, session_struct2) = util_user.process_session(request)
    cfg.logger.debug('session_struct: %s session_struct2: %s', session_struct, session_struct2)

    the_path = params.get('url', '')
    the_timestamp = util.get_timestamp()

    cfg.logger.debug('params: %s the_path: %s', params, the_path)

    the_auth = OAuth2Session(client_id, scope=scope, redirect_uri=register_uri)

    authorization_url, state = the_auth.authorization_url(authorization_base_url, approval_prompt="auto")

    util.db_insert('login_info', {"state": state, "the_timestamp": the_timestamp, "params": params, "url": the_path})

    is_cron_remove_expire = cfg.config.get('is_cron_remove_expire', True)
    if not is_cron_remove_expire:
        expire_timestamp_session = cfg.config.get('expire_unix_timestamp_session', EXPIRE_UNIX_TIMESTAMP_SESSION) * 1000
        util.db_remove('login_info', {"the_timestamp": {"$lt": the_timestamp - expire_timestamp_session}})

    cfg.logger.debug('after authorization_url: authorization_url: %s state: %s', authorization_url, state)

    redirect(authorization_url)
Example #21
0
def process_session(request):
    session = request.environ['beaker.session']
    session_struct = {}
    session_struct2 = {}
    the_timestamp = util.get_timestamp()

    if not session.has_key('value'):
        session_struct = _construct_session_struct(the_timestamp)
        session['value'] = session_struct.get('key', '')

        session_struct2 = _construct_session_struct(the_timestamp + 300)
        session['value2'] = session_struct2.get('key', '')
        session_key = _create_session_key()
        session_key2 = _create_session_key(offset_timestamp=300)
        session['value'] = session_key
        session['value2'] = session_key2
        session.save()
    else:
        session_key = session['value']
        session_key2 = session['value2']

        session_struct = _extract_session_struct_from_session_key(session_key)
        session_struct2 = _extract_session_struct_from_session_key(session_key2)

        session_timestamp = session_struct.get('the_timestamp', 0)
        session_timestamp2 = session_struct2.get('the_timestamp', 0)
        if the_timestamp - util._int(session_timestamp) >= 300:
            new_timestamp = max(the_timestamp, util._int(session_timestamp2) + 300)
            session_struct3 = _construct_session_struct(new_timestamp)

            session_struct = session_struct2
            session_struct2 = session_struct3
            session['value'] = session_struct.get('key', '')
            session['value2'] = session_struct2.get('key', '')
            session.save()

    return (session_struct, session_struct2)