Ejemplo n.º 1
0
    def consume_new_session_request(cls,
                                    task_queue: Queue = G.SESSION_UPDATE_QUEUE
                                    ):
        """ Consume the session update event in the task queue to request new session
            from crawler server.
        """
        LOGGER.info('Listening to session update request task queue...')
        while True:
            visa_type, location, session = task_queue.get()
            LOGGER.debug(
                'Receive new session update request: %s-%s | Current queue size: %d',
                visa_type, location, task_queue.qsize())

            if session is None:
                LOGGER.error('A session object from %s-%s is NoneType',
                             visa_type, location)  # just in case

            if not SESSION_CACHE.contain_session(visa_type, location, session):
                LOGGER.debug(
                    'Session %s is no longer in the %s-%s session list.',
                    session, visa_type, location)
                continue

            try:
                if session.sys == 'ais':
                    email = G.value(f'ais_email_{visa_type}', None)
                    password = G.value(f'ais_pswd_{visa_type}', None)

                    LOGGER.debug('Fetching new session for AIS: %s, %s, %s',
                                 location, email, password)
                    endpoint = G.CRAWLER_API['register']['ais'].format(
                        location, email, password)
                    if email is None or password is None:
                        continue
                elif session.sys == 'cgi':
                    endpoint = G.CRAWLER_API['register']['cgi'].format(
                        visa_type, location)

                url = '{}{}'.format(G.value('current_crawler_node', ''),
                                    endpoint)
                res = requests.get(url,
                                   timeout=G.WAIT_TIME['register'],
                                   proxies=G.value('proxies', None))
                try:
                    result = res.json()
                except ValueError:
                    content = res.content.decode()
                    if 'Server Error (500)' in content:
                        SESSION_CACHE.mark_unavailable(visa_type, location)
                    else:
                        print(time.asctime(), visa_type, location, content)
                    continue
                LOGGER.debug(
                    'consume_new_session_request - Endpoint: %s | Response json: %s',
                    endpoint, json.dumps(result))

                if result['code'] != 0:
                    LOGGER.warning('%s, %s, %s, FAILED - %s',
                                   datetime.now().strftime('%H:%M:%S'),
                                   visa_type, location, result['msg'])
                    if result['msg'] == "Network Error":
                        SESSION_CACHE.mark_unavailable(visa_type, location)
                    else:
                        cls.check_crawler_server_connection()
                    continue

                # Generate new session object and update cache
                if session.sys == 'ais':
                    new_session = Session((result['session'], result['id']),
                                          sys='ais')
                    date_available = bool(len(result['msg']))
                elif session.sys == 'cgi':
                    new_session = Session(result['session'], sys='cgi')
                    date_available = bool(
                        tuple([dt_seg for dt_seg in result['msg'].split('-')
                               ]))  # Always True

                if date_available:  # why this flag is needed?
                    LOGGER.info(
                        'consume_new_session_request - %s, %s, %s, SUCCESS - %s',
                        datetime.now().strftime('%H:%M:%S'), visa_type,
                        location, result['msg'])
                    SESSION_CACHE.replace_session(visa_type, location, session,
                                                  new_session)
            except requests.exceptions.ReadTimeout:
                LOGGER.debug(
                    'consume_new_session_request - request time out for endpoint: %s | %s-%s',
                    endpoint, visa_type, location)
                cls.check_crawler_server_connection()
            except Exception:
                LOGGER.error('an unexpected error occured',
                             traceback.format_exc())
Ejemplo n.º 2
0
    def fetch_visa_status(cls, visa_type: str, location: str,
                          req: requests.Session):
        """ Fetch the latest visa status available from crawler server."""
        now = datetime.now().strftime('%H:%M:%S')
        try:
            session = SESSION_CACHE.get_session(visa_type, location)
            if session is None:
                LOGGER.warning('%s, %s, %s, FAILED - No Session', now,
                               visa_type, location)
                return

            if session.sys == 'ais':
                endpoint = G.CRAWLER_API['refresh']['ais'].format(
                    location, session.schedule_id, session.session)
            elif session.sys == 'cgi':
                endpoint = G.CRAWLER_API['refresh']['cgi'].format(
                    session.session)

            url = '{}{}'.format(G.value('current_crawler_node', ''), endpoint)
            try:
                res = req.get(url,
                              timeout=G.WAIT_TIME['refresh'],
                              proxies=G.value('proxies', None))
            except requests.exceptions.Timeout:
                LOGGER.warning('%s, %s, %s, FAILED - Endpoint Timeout.', now,
                               visa_type, location)
                cls.save_placeholder_at_exception(visa_type, location)
                cls.check_crawler_server_connection()
                return
            except requests.exceptions.ConnectionError:
                LOGGER.warning(
                    '%s, %s, %s, FAILED - Endpoint Connection Aborted.', now,
                    visa_type, location)
                cls.check_crawler_server_connection()
                return
            else:
                if res.status_code != 200:
                    LOGGER.warning('%s, %s, %s, FAILED - %d', now, visa_type,
                                   location, res.status_code)
                    cls.check_crawler_server_connection()
                    return

                result = res.json()
                LOGGER.debug(
                    'fetch_visa_status - Endpoint: %s | Response json: %s',
                    endpoint, json.dumps(result))

                if result[
                        'code'] != 0:  # code == 0 stands for success in crawler api code
                    LOGGER.warning('%s, %s, %s, FAILED - Session Expired', now,
                                   visa_type, location)

                    # session expired will trigger database update using the last successful fetch result
                    cls.save_placeholder_at_exception(visa_type, location)

                    SESSION_CACHE.produce_new_session_request(
                        visa_type, location, session)
                    return

                if session.sys == 'cgi':
                    dt_segments = [
                        int(dt_seg) for dt_seg in result['msg'].split('-')
                    ]
                    cls.save_fetched_data(visa_type, location, dt_segments)
                    LOGGER.info('%s, %s, %s, SUCCESS - %d/%d/%d', now,
                                visa_type, location, *dt_segments)

                elif session.sys == 'ais':
                    date_lst = result['msg']
                    for city, dt_segments in date_lst:
                        if city in G.AIS_MONITORING_CITY:
                            cls.save_fetched_data(visa_type, city, dt_segments)
                            LOGGER.info('%s, %s, %s, %s, SUCCESS - %d/%d/%d',
                                        now, visa_type, location, city,
                                        *dt_segments)

                    new_session = Session(session=(result['session'],
                                                   session.schedule_id),
                                          sys=session.sys)
                    SESSION_CACHE.replace_session(visa_type, location, session,
                                                  new_session)

        except Exception:
            LOGGER.error(traceback.format_exc())