Esempio n. 1
0
    def _periodic_check(self):
        """
        Periodic check for the health and performance of the APIs.

        This runs in a background thread to check the health and performance
        of the third party APIs. It is useful in situations where there is
        not enough traffic all the time required to have an up to date list
        of response times.

        """
        while True:
            logger.debug("Starting periodic priority check")
            with self._locks["_api_list"]:
                logger.debug("API list: %s" % self._api_list.keys())
                for key in self._api_list:
                    api = self._api_list.get(key)
                    logger.debug("Performing priority check on API: %s" %
                                 api.name)
                    try:
                        response_time = api.check()
                        self._process_response_time_api(response_time, api)
                    except NotImplementedError:
                        # Ignore NotImplementedError, the user has decided
                        # to not implement the periodic check for the
                        # response time of this API
                        logger.debug("API %s has no priority check method "
                                     "implemented." % api.name)
            logger.debug("End of periodic priority check")
            time.sleep(self._PERIODIC_CHECK_INTERVAL)
Esempio n. 2
0
    def remove_api(self, api):
        """
        Removes the API from the internal list.

        Parameters
        ----------
        api : object
            The object implementing BaseThirdPartyAPIService class.

        """
        logger.info("Removing API: %s" % api.name)
        self._locks["_api_list"].acquire_write()
        removed_api = self._api_list.pop(api.name, None)
        self._locks["_api_list"].release_write()
        if removed_api is not None:
            logger.debug("Removed API")
        else:
            logger.debug("Tried to remove API which is "
                         "not present in the list")
            return
        with self._locks["_api_response_times"]:
            self._api_response_times.pop(api.name, None)
        with self._locks["_percentile_map"]:
            self._percentile_map.pop(api.name, None)
        logger.info("New list: %s" % self._api_list.keys())
Esempio n. 3
0
    def _process_response_time_api(self, response_time, api):
        """
        Analyses the response time of an API.

        This function is always called on the response time of an API in
        order to update the internal state with the average response time.

        Parameters
        ----------
        response_time : float
            Elapsed time in milliseconds.
        api : object
            The API which had the response time passed above.

        """
        logger.info("%s: response time %sms" % (api.name, response_time))
        with self._locks["_api_response_times"]:
            if (len(self._api_response_times[api.name]) >
                    self.MAX_HISTORY_RTIME):
                # Remove from the history once it reaches max limit
                self._api_response_times[api.name].pop(0)
            self._api_response_times[api.name] += [response_time]
            # Sorted returns a new cloned list
            np_array = numpy.array(sorted(self._api_response_times[api.name]))

        # Compute the response time of self.PERCENTILE percentage of requests
        p = numpy.percentile(np_array, self.PERCENTILE)
        with self._locks["_percentile_map"]:
            self._percentile_map[api.name] = p
        logger.debug("%s - %s percentile result: %s" %
                     (api.name, self.PERCENTILE, p))
Esempio n. 4
0
 def get_exploratory_api_and_context():
     logger.debug("This request will try to explore with round-robin")
     new_api = self._shift_current_order_and_get_first()
     logger.debug("Picking API %s" % new_api.name)
     # Remove the API that was picked from the list of remainings APIs
     new_sp_list = [x for x in sp_list if x[0] != new_api.name]
     return new_api, new_sp_list
Esempio n. 5
0
    def _get_fastest_api(self, sp_list=[]):
        """
        Returns the fastest API so far.

        The result is based on the sp_list. If sp_list is empty list, it
        will fetch the current order of APIs based on response time. This
        will be used as a context for the subsequent calls to this function.

        Notes
        -----
            Round robin mode doesn't require sp_list parameter.

        Parameters
        ----------
        sp_list : list
            Previous list of API names sorted based on the percentile result.
            It should be empty list if it's the first call.
            It contains the sorted list with the response time of each API
            with respect to the value set in the config file for PERCENTILE.

        Returns
        -------
        tuple
            Contains the API object and the list which needs to be passed to
            the subsequent calls to this function.

        """
        def get_exploratory_api_and_context():
            logger.debug("This request will try to explore with round-robin")
            new_api = self._shift_current_order_and_get_first()
            logger.debug("Picking API %s" % new_api.name)
            # Remove the API that was picked from the list of remainings APIs
            new_sp_list = [x for x in sp_list if x[0] != new_api.name]
            return new_api, new_sp_list

        if self._round_robin:
            return self._shift_current_order_and_get_first(), None

        should_explore = self._should_explore()

        if sp_list:
            if should_explore:
                return get_exploratory_api_and_context()

            with self._locks["_api_list"]:
                return self._api_list.get(sp_list.pop(0)[0]), sp_list

        with self._locks["_percentile_map"]:
            sp_list = sorted(self._percentile_map.items(),
                             key=operator.itemgetter(1))

        if should_explore:
            return get_exploratory_api_and_context()

        logger.debug("Sorted by response time median: %s" % sp_list)
        with self._locks["_api_list"]:
            fastest_api = self._api_list.get(sp_list.pop(0)[0])
        return fastest_api, sp_list
Esempio n. 6
0
def get_all_translations(data):
    if data["from_lang_code"] == data["to_lang_code"] == "en":
        # Wordnik case, get only the top result
        response = get_next_results(data, number_of_results=1)
    else:
        response = get_next_results(data, number_of_results=-1)

    logger.debug(f"Zeeguu-API - Request data: {data}")
    return response
Esempio n. 7
0
def get_all_translations(data):
    if data["from_lang_code"] == data["to_lang_code"] == "en":
        # Wordnik case, get only the top result
        response = get_next_results(data, number_of_results=1)
    else:
        response = get_next_results(data, number_of_results=-1)

    logger.debug(f"Zeeguu-API - Request data: {data}")
    return response
Esempio n. 8
0
    def __init__(self, api_list=[], config_filepath='config.ini'):
        """
        Parameters
        ----------
        api_list : list
            List of objects that are implementing the default class
            BaseThirdPartyAPIService.

        """
        apimux_cfg = config.parse_config(config_filepath)
        self.PERCENTILE = apimux_cfg.getint("PERCENTILE")
        self.MAX_HISTORY_RTIME = apimux_cfg.getint("MAX_HISTORY_RTIME")
        self.MAX_WAIT_TIME = apimux_cfg.getint("MAX_WAIT_TIME")
        self._PERIODIC_CHECK_INTERVAL = apimux_cfg.getint("PERIODIC_CHECK")

        logger.debug("Initializing the APIMultiplexer class")
        # Locks used to prevent multi-threading issues
        self._locks = {}
        # ReadWriteLock allows multiple readers and only one writer
        self._locks["_api_list"] = ReadWriteLock()
        self._api_list = {}
        self._locks["_api_response_times"] = Lock()
        self._api_response_times = {}
        self._locks["_percentile_map"] = Lock()
        self._percentile_map = {}
        self._futures_not_finished = {}
        self._locks["_futures_not_finished"] = Lock()

        # Registering all APIs passed as parameters
        if len(api_list) > 0:
            for x in api_list:
                self.register_new_api(x)

        self._ignore_slow_apis = apimux_cfg.getboolean("ignore_slow_apis")
        self._slow_multiplied = apimux_cfg.getfloat("slow_multiplied")
        self._exploration_coefficient = apimux_cfg.getint(
            "exploration_coefficient")

        # Whether it should enable round robing or not
        self._round_robin = apimux_cfg.getboolean("round_robin")
        if self._round_robin:
            logger.info("Round robin enabled!")
            # Disable exploration if round robin is enabled
            self._exploration_coefficient = 0
        elif self._exploration_coefficient > 0:
            logger.info("Exploration with percentage %s enabled!" %
                        self._exploration_coefficient)
        self._current_order = []
        self._locks["_current_order"] = Lock()

        if apimux_cfg.getboolean("enable_periodic_check"):
            # Starting a background thread which will run periodically
            # the 'check' method if implemented by the user for an API
            self._periodic_check_thread = Thread(target=self._periodic_check,
                                                 args=())
            self._periodic_check_thread.setDaemon(True)
            self._periodic_check_thread.start()
Esempio n. 9
0
def get_next_results(data,
                     exclude_services=[],
                     exclude_results=[],
                     number_of_results=-1):
    translator_data = {
        "source_language": data["from_lang_code"],
        "target_language": data["to_lang_code"],
        "query": data["query"],
    }
    api_mux = None
    if data["from_lang_code"] == data["to_lang_code"] == "en":
        api_mux = api_mux_worddefs
    else:
        api_mux = api_mux_translators

    if number_of_results == 1:
        logger.debug("Getting only top result")
        translator_results = api_mux.get_next_results(translator_data,
                                                      number_of_results=1)
    else:
        logger.debug("Getting all results")
        translator_results = api_mux.get_next_results(
            translator_data,
            number_of_results=-1,
            exclude_services=exclude_services)
    zeeguu_core.log(f"Got results get_next_results: {translator_results}")
    json_translator_results = [(x, y.to_json()) for x, y in translator_results]
    logger.debug("get_next_results Zeeguu-API - Got results: %s" %
                 json_translator_results)
    logger.debug("get_next_results - exclude_services %s" % exclude_services)
    # Returning data: [('GoogleTranslateWithContext',
    #                   <python_translators.translation_response.TranslationResponse>), ...]
    translations = []
    for service_name, translation in translator_results:
        if translation is None:
            continue
        lower_translation = translation.translations[0]["translation"].lower()
        if lower_translation in exclude_results:
            # Translation already exists fetched by get_top_translation
            continue
        translations = merge_translations(translations,
                                          translation.translations)

    translations = filter_empty_translations(translations)

    if not MULTI_LANG_TRANSLATOR_AB_TESTING:
        # Disabling order by quality when A/B testing is enabled
        translations = order_by_quality(translations, translator_data["query"])

    zeeguu_core.log(f"Translations get_next_results: {translations}")
    response = TranslationResponse(translations=translations)
    zeeguu_core.log(f"Returning response get_next_results: {response}")
    return response
Esempio n. 10
0
    def _shift_current_order_and_get_first(self):
        """
        Updates the current order of API list used for round robin.

        Returns
        -------
        BaseThirdPartyAPIService
            Returns the first API after the list is shifted to the left with
            one position.

        """
        logger.debug("Current round robin order: %s" % self._round_robin_list)
        first_apiname = self._round_robin_list.pop(0)
        self._round_robin_list.append(first_apiname)
        logger.debug("New round robin order: %s" % self._round_robin_list)

        with self._locks["_api_list"]:
            first_api = self._api_list.get(first_apiname)

        return first_api
Esempio n. 11
0
def get_next_results(data, exclude_services=[], exclude_results=[],
                     number_of_results=-1):
    translator_data = {
        "source_language": data["from_lang_code"],
        "target_language": data["to_lang_code"],
        "query": data["query"]
    }
    api_mux = None
    if data["from_lang_code"] == data["to_lang_code"] == "en":
        api_mux = api_mux_worddefs
    else:
        api_mux = api_mux_translators

    if number_of_results == 1:
        logger.debug("Getting only top result")
        translator_results = api_mux.get_next_results(
            translator_data, number_of_results=1)
    else:
        logger.debug("Getting all results")
        translator_results = api_mux.get_next_results(
            translator_data, number_of_results=-1,
            exclude_services=exclude_services)
    zeeguu_core.log(f"Got results get_next_results: {translator_results}")
    json_translator_results = [(x, y.to_json()) for x, y in translator_results]
    logger.debug("get_next_results Zeeguu-API - Got results: %s"
                 % json_translator_results)
    logger.debug("get_next_results - exclude_services %s" % exclude_services)
    # Returning data: [('GoogleTranslateWithContext',
    #                   <python_translators.translation_response.TranslationResponse>), ...]
    translations = []
    for service_name, translation in translator_results:
        if translation is None:
            continue
        lower_translation = translation.translations[0]["translation"].lower()
        if lower_translation in exclude_results:
            # Translation already exists fetched by get_top_translation
            continue
        translations = merge_translations(translations,
                                          translation.translations)

    translations = filter_empty_translations(translations)

    if not MULTI_LANG_TRANSLATOR_AB_TESTING:
        # Disabling order by quality when A/B testing is enabled
        translations = order_by_quality(translations, translator_data["query"])

    zeeguu_core.log(f"Translations get_next_results: {translations}")
    response = TranslationResponse(translations=translations)
    zeeguu_core.log(f"Returning response get_next_results: {response}")
    return response
Esempio n. 12
0
def contribute_trans(data):
    logger.debug("Preferred service: %s" %
                 json.dumps(data, ensure_ascii=False).encode("utf-8"))
Esempio n. 13
0
 def __init__(self, name=None):
     super(BaseThirdPartyAPIService, self).__init__()
     self.name = name if name is not None else self.__class__.__name__
     logger.debug("Class initialized %s" % self.name)
Esempio n. 14
0
import sys

from apimux.log import logger

logger.debug("==== API Multiplexer imported ====")
major = sys.version_info.major
minor = sys.version_info.minor
micro = sys.version_info.micro
logger.debug("Running Python version %s.%s.%s" % (major, minor, micro))
Esempio n. 15
0
def contribute_trans(data):
    logger.debug("Preferred service: %s"
                 % json.dumps(data, ensure_ascii=False).encode('utf-8'))
Esempio n. 16
0
    def get_next_results(self,
                         data,
                         number_of_results,
                         exclude_services=[],
                         exclude_results=[]):
        """
        Retrieve the next N results from the registered APIs.

        This function retrieves the next "number_of_results" using the
        list which contains the fastest APIs by average response time.

        Notes
        -----
            If self.MAX_WAIT_TIME is greater than 0, this method will try
            to return within the specified time as long as there is at least
            one result to return. If the running time passes the specified
            time it will still wait for at least one result from the APIs.

            If self.MAX_WAIT_TIME is 0, this method will wait as long as it's
            needed to fetch the required number of results.

        Parameters
        ----------
        data : dict
            Contains specific implementation of the objects that implement
            BaseThirdPartyAPIService class.
        number_of_results : int
            Number of results that will be fetched. Pass -1 if you wish to
            retrieve the results from all registered APIs. If the number
            passed is greater than the number of registered APIs, it will
            work in the same way as passing -1.
        exclude_services : list of strings
            List of strings containing the service names which should be
            excluded for processing the request. It will be used to filter
            the APIs from the list of fastest APIs.
        exclude_results : list
            List of results used to filter out the returned results. It is
            particulary useful on the subsequent calls to get_next_results
            when you want to exclude the results received from previous
            requests.

        Returns
        -------
        list of tuples
            Returns a list of tuples containing the API name and the result
            fetched from that API using the method get_result from
            BaseThirdPartyAPIService. The type of the result it's specific
            to the implementation of the developer for the function get_result.

        """
        results = []
        sp_list = []
        future_to_api = {}
        failed_futures = 0
        failed_futures_lock = Lock()

        allowed_failed_futures, requested_results, executor = (
            self._prepare_get_next_results(number_of_results,
                                           exclude_services))

        def register_result(future):
            # Appends the result from the future to the final list that will
            # be returned

            # The future is ignored if it was cancelled.
            if not future.cancelled():
                nonlocal failed_futures
                nonlocal results

                future_exception = future.exception()
                if future_exception:
                    with failed_futures_lock:
                        failed_futures += 1
                    logger.warning(
                        "API %s raised exception %s" %
                        (future_to_api[future]['name'], future_exception))
                elif future.result() is not None:
                    results.append(
                        (future_to_api[future]['name'], future.result()))
                else:
                    # The API returned an invalid result, mark the future
                    # as failed and continue fetching from the next one.
                    with failed_futures_lock:
                        failed_futures += 1
            # Remove the future from the map
            future_to_api.pop(future, None)

        def launch_future(api, data, executor):
            future = executor.submit(self._get_result, api, data)
            future_to_api[future] = {
                "name": api.name,
                "start_time_ms": timer()
            }
            future.add_done_callback(register_result)

        def replace_failed_future(sp_list,
                                  data,
                                  exclude_services,
                                  executor,
                                  elapsed_ms=None):
            """
            Helper that replaces failed futures with new ones.

            This function is used when any of the current API requests fail
            and the number of results requested cannot be met. It launches
            a new future requesting a result from another API to make sure
            it meets the number of results desired.

            Notes
            -----
                The parameter elapsed_ms is required only if
                self.MAX_WAIT_TIME > 0.

            Parameters
            ----------
            sp_list : list of APIs
                The list returned by self._get_fastest_api
            data : object
                The object which will be sent to the method get_result of the
                api parameter.
            exclude_services : list of strings
                The API names which should be excluded from the result.
            executor : ThreadPoolExecutor
                The ThreadPoolExecutor object that will process the future.
            elapsed_ms : int
                How much time has elapsed since it started sending requests.

            Returns
            -------
            object
                The result of calling get_result of the api object.

            """
            nonlocal failed_futures
            nonlocal allowed_failed_futures
            with failed_futures_lock:
                if (allowed_failed_futures > 0 and failed_futures > 0
                        and sp_list):
                    api, sp_list = self._get_fastest_api(sp_list=sp_list)
                    if api.name in exclude_services:
                        return
                    max_timeout = self._get_max_api_timeout(api.name)
                    if elapsed_ms:
                        if not (elapsed_ms + max_timeout) < self.MAX_WAIT_TIME:
                            # Too late to launch new futures
                            allowed_failed_futures = 0
                    launch_future(api, data, executor)
                    failed_futures -= 1
                    allowed_failed_futures -= 1

        def cancel_slow_apis():
            # Cancels the requests currently in progress if the elapsed time
            # so far is greater than the average response time of
            # self.PERCENTILE percentage of requests plus delta
            # self._slow_multiplied.
            nonlocal failed_futures
            for future in future_to_api.keys():
                api_details = future_to_api[future]
                elapsed_ms = api_details['start_time_ms'] - timer()
                if elapsed_ms > self._get_max_api_timeout(api_details['name']):
                    with failed_futures_lock:
                        failed_futures += 1
                    future.cancel()

        try:
            current_requests_sent = 0
            while True:
                api, sp_list = self._get_fastest_api(sp_list=sp_list)
                if api.name in exclude_services:
                    continue
                logger.debug("Launching future: %s" % api)
                launch_future(api, data, executor)
                current_requests_sent += 1
                if current_requests_sent == requested_results:
                    break

            if self.MAX_WAIT_TIME > 0:
                start_time = timer()
                while len(results) < requested_results:
                    elapsed_ms = (timer() - start_time) * 1000
                    if (elapsed_ms > self.MAX_WAIT_TIME and
                            len(results) > 0) or allowed_failed_futures == 0:
                        break
                    # Launch a new future if we have any failed futures.
                    replace_failed_future(sp_list, data, exclude_services,
                                          executor, elapsed_ms)
                    # Cancel slow APIs
                    cancel_slow_apis()
                    time.sleep(0.01)

                # Maximum wait time has passed here, cancel all futures and
                # return as soon as possible
                for future in future_to_api.keys():
                    future.cancel()
            else:
                while len(results) < requested_results:
                    # Launch a new future if we have any failed futures.
                    replace_failed_future(sp_list, data, exclude_services,
                                          executor)
                    if len(sp_list) == 0:
                        break
                    time.sleep(0.01)
        finally:
            # When self.MAX_WAIT_TIME > 0 all futures will be already done
            # executing or cancelled here which allows the executor to free
            # the resources immediately.
            # When self.MAX_WAIT_TIME == 0 the executor will wait as long as
            # it's required for the futures to respond.
            executor.shutdown(wait=True)

        return results