예제 #1
0
class Querying():
    def __init__(self, loglevel: int = logging.DEBUG):
        self.__init_logger(loglevel)
        self.storage = StrictRedis(unix_socket_path=get_socket_path('storage'),
                                   decode_responses=True)
        self.ranking = StrictRedis(unix_socket_path=get_socket_path('storage'),
                                   db=1)
        self.asn_meta = StrictRedis(
            unix_socket_path=get_socket_path('storage'),
            db=2,
            decode_responses=True)
        self.cache = StrictRedis(unix_socket_path=get_socket_path('cache'),
                                 db=1,
                                 decode_responses=True)

    def __init_logger(self, loglevel: int):
        self.logger = logging.getLogger(f'{self.__class__.__name__}')
        self.logger.setLevel(loglevel)

    def __normalize_date(self, date: Dates):
        if isinstance(date, datetime.datetime):
            return date.date().isoformat()
        elif isinstance(date, datetime.date):
            return date.isoformat()
        elif isinstance(date, str):
            try:
                return parse(date).date().isoformat()
            except ValueError:
                raise InvalidDateFormat(
                    'Unable to parse the date. Should be YYYY-MM-DD.')

    def _ranking_cache_wrapper(self, key):
        if not self.cache.exists(key):
            if self.ranking.exists(key):
                key_dump = self.ranking.dump(key)
                # Cache for 10 hours
                self.cache.restore(key, 36000, key_dump, True)

    def asns_global_ranking(self,
                            date: Dates = datetime.date.today(),
                            source: Union[list, str] = '',
                            ipversion: str = 'v4',
                            limit: int = 100):
        '''Aggregated ranking of all the ASNs known in the system, weighted by source.'''
        to_return = {
            'meta': {
                'ipversion': ipversion,
                'limit': limit
            },
            'source': source,
            'response': set()
        }

        d = self.__normalize_date(date)
        to_return['meta']['date'] = d
        if source:
            if isinstance(source, list):
                keys = []
                for s in source:
                    key = f'{d}|{s}|asns|{ipversion}'
                    self._ranking_cache_wrapper(key)
                    keys.append(key)
                # union the ranked sets
                key = '|'.join(sorted(source)) + f'|{d}|asns|{ipversion}'
                if not self.cache.exists(key):
                    self.cache.zunionstore(key, keys)
            else:
                key = f'{d}|{source}|asns|{ipversion}'
        else:
            key = f'{d}|asns|{ipversion}'
        self._ranking_cache_wrapper(key)
        to_return['response'] = self.cache.zrevrange(key,
                                                     start=0,
                                                     end=limit,
                                                     withscores=True)
        return to_return

    def asn_details(self,
                    asn: int,
                    date: Dates = datetime.date.today(),
                    source: Union[list, str] = '',
                    ipversion: str = 'v4'):
        '''Aggregated ranking of all the prefixes anounced by the given ASN, weighted by source.'''
        to_return = {
            'meta': {
                'asn': asn,
                'ipversion': ipversion,
                'source': source
            },
            'response': set()
        }

        d = self.__normalize_date(date)
        to_return['meta']['date'] = d
        if source:
            if isinstance(source, list):
                keys = []
                for s in source:
                    key = f'{d}|{s}|{asn}|{ipversion}|prefixes'
                    self._ranking_cache_wrapper(key)
                    keys.append(key)
                # union the ranked sets
                key = '|'.join(sorted(source)) + f'|{d}|{asn}|{ipversion}'
                if not self.cache.exists(key):
                    self.cache.zunionstore(key, keys)
            else:
                key = f'{d}|{source}|{asn}|{ipversion}|prefixes'
        else:
            key = f'{d}|{asn}|{ipversion}'
        self._ranking_cache_wrapper(key)
        to_return['response'] = self.cache.zrevrange(key,
                                                     start=0,
                                                     end=-1,
                                                     withscores=True)
        return to_return

    def asn_rank(self,
                 asn: int,
                 date: Dates = datetime.date.today(),
                 source: Union[list, str] = '',
                 ipversion: str = 'v4',
                 with_position: bool = False):
        '''Get the rank of a single ASN, weighted by source.'''
        to_return = {
            'meta': {
                'asn': asn,
                'ipversion': ipversion,
                'source': source,
                'with_position': with_position
            },
            'response': 0.0
        }

        d = self.__normalize_date(date)
        to_return['meta']['date'] = d
        if source:
            to_return['meta']['source'] = source
            if isinstance(source, list):
                keys = []
                for s in source:
                    key = f'{d}|{s}|{asn}|{ipversion}'
                    self._ranking_cache_wrapper(key)
                    keys.append(key)
                r = sum(
                    float(self.cache.get(key)) for key in keys
                    if self.cache.exists(key))
            else:
                key = f'{d}|{source}|{asn}|{ipversion}'
                self._ranking_cache_wrapper(key)
                r = self.cache.get(key)
        else:
            key = f'{d}|asns|{ipversion}'
            self._ranking_cache_wrapper(key)
            r = self.cache.zscore(key, asn)
        if not r:
            r = 0
        if with_position and not source:
            position = self.cache.zrevrank(key, asn)
            if position is not None:
                position += 1
            to_return['response'] = {
                'rank': float(r),
                'position': position,
                'total_known_asns': self.cache.zcard(key)
            }
        else:
            to_return['response'] = float(r)
        return to_return

    def get_sources(self, date: Dates = datetime.date.today()):
        '''Get the sources availables for a specific day (default: today).'''
        to_return = {'meta': {}, 'response': set()}

        d = self.__normalize_date(date)
        to_return['meta']['date'] = d
        key = f'{d}|sources'
        to_return['response'] = self.storage.smembers(key)
        return to_return

    def get_asn_descriptions(self, asn: int, all_descriptions=False):
        to_return = {
            'meta': {
                'asn': asn,
                'all_descriptions': all_descriptions
            },
            'response': []
        }
        descriptions = self.asn_meta.hgetall(f'{asn}|descriptions')
        if all_descriptions or not descriptions:
            to_return['response'] = descriptions
        else:
            to_return['response'] = descriptions[sorted(descriptions.keys(),
                                                        reverse=True)[0]]
        return to_return

    def get_prefix_ips(self,
                       asn: int,
                       prefix: str,
                       date: Dates = datetime.date.today(),
                       source: Union[list, str] = '',
                       ipversion: str = 'v4'):
        to_return = {
            'meta': {
                'asn': asn,
                'prefix': prefix,
                'ipversion': ipversion,
                'source': source
            },
            'response': defaultdict(list)
        }

        d = self.__normalize_date(date)
        to_return['meta']['date'] = d

        if source:
            to_return['meta']['source'] = source
            if isinstance(source, list):
                sources = source
            else:
                sources = [source]
        else:
            sources = self.get_sources(d)['response']

        for source in sources:
            ips = set([
                ip_ts.split('|')[0] for ip_ts in self.storage.smembers(
                    f'{d}|{source}|{asn}|{prefix}')
            ])
            [to_return['response'][ip].append(source) for ip in ips]
        return to_return

    def get_asn_history(self,
                        asn: int,
                        period: int = 100,
                        source: Union[list, str] = '',
                        ipversion: str = 'v4',
                        date: Dates = datetime.date.today()):
        to_return = {
            'meta': {
                'asn': asn,
                'period': period,
                'ipversion': ipversion,
                'source': source
            },
            'response': []
        }

        if isinstance(date, str):
            date = parse(date).date()
        if date + timedelta(days=period / 3) > datetime.date.today():
            # the period to display will be around the date passed at least 2/3 before the date, at most 1/3 after
            # FIXME: That is not doing what it is supposed to...
            date = datetime.date.today()

        to_return['meta']['date'] = date.isoformat()

        for i in range(period):
            d = date - timedelta(days=i)
            rank = self.asn_rank(asn, d, source, ipversion)
            if 'response' not in rank:
                rank = 0
            to_return['response'].insert(0, (d.isoformat(), rank['response']))
        return to_return

    def country_rank(self,
                     country: str,
                     date: Dates = datetime.date.today(),
                     source: Union[list, str] = '',
                     ipversion: str = 'v4'):
        to_return = {
            'meta': {
                'country': country,
                'ipversion': ipversion,
                'source': source
            },
            'response': []
        }

        d = self.__normalize_date(date)
        to_return['meta']['date'] = d

        ripe = StatsRIPE()
        response = ripe.country_asns(country, query_time=d, details=1)
        if (not response.get('data') or not response['data'].get('countries')
                or not response['data']['countries'][0].get('routed')):
            logging.warning(f'Invalid response: {response}')
            # FIXME: return something
            return 0, [(0, 0)]
        routed_asns = response['data']['countries'][0]['routed']
        ranks = [
            self.asn_rank(asn, d, source, ipversion)['response']
            for asn in routed_asns
        ]
        to_return['response'] = [sum(ranks), zip(routed_asns, ranks)]
        return to_return

    def country_history(self,
                        country: Union[list, str],
                        period: int = 30,
                        source: Union[list, str] = '',
                        ipversion: str = 'v4',
                        date: Dates = datetime.date.today()):
        to_return = {}
        to_return = {
            'meta': {
                'country': country,
                'ipversion': ipversion,
                'source': source
            },
            'response': defaultdict(list)
        }

        if isinstance(date, str):
            date = parse(date).date()
        if date + timedelta(days=period / 3) > datetime.date.today():
            # the period to display will be around the date passed at least 2/3 before the date, at most 1/3 after
            date = datetime.date.today()

        if isinstance(country, str):
            country = [country]
        for c in country:
            for i in range(period):
                d = date - timedelta(days=i)
                rank, details = self.country_rank(c, d, source,
                                                  ipversion)['response']
                if rank is None:
                    rank = 0
                to_return['response'][c].insert(
                    0, (d.isoformat(), rank, list(details)))
        return to_return

    def get_source_config(self):
        pass

    def get_sources_configs(self):
        config_dir = get_config_path() / 'modules'
        loaded = []
        for modulepath in config_dir.glob('*.json'):
            with open(modulepath) as f:
                loaded.append(json.load(f))
        return {
            '{}-{}'.format(config['vendor'], config['name']): config
            for config in loaded
        }