コード例 #1
0
    def download_data_for_current_date():
        """
        Скачивает все необходимы файлы для парсинга

        С R01 данные по локальным зонам
        https://partner.r01.ru/zones/ru_domains.gz
        https://partner.r01.ru/zones/su_domains.gz
        https://partner.r01.ru/zones/rf_domains.gz

        С http://archive.routeviews.org информацию по fullview, подробно описывает Павел в своем блоге
        http://phpsuxx.blogspot.com/2011/12/full-bgp.html
        http://phpsuxx.blogspot.com/2011/12/libbgpdump-debian-6-squeeze.html

        для остальных зоне можно посмотреть
        http://csa.ee/databases-zone-files/

        :rtype: unicode
        """
        now_date = datetime.date.today()
        delta = datetime.timedelta(days=1)
        now_date = now_date - delta

        files_list = [{
            'url': 'https://partner.r01.ru/zones/ru_domains.gz',
            'file_name': 'ru_domains.gz'
        }, {
            'url': 'https://partner.r01.ru/zones/su_domains.gz',
            'file_name': 'su_domains.gz'
        }, {
            'url': 'https://partner.r01.ru/zones/rf_domains.gz',
            'file_name': 'rf_domains.gz'
        }, {
            'url':
            'http://archive.routeviews.org/bgpdata/%s/RIBS/rib.%s.0600.bz2' %
            (now_date.strftime("%Y.%m"), now_date.strftime("%Y%m%d")),
            'file_name':
            'rib.bz2'
        }]

        path = Downloader.create_data_dir()

        for item in files_list:
            path_file = os.path.abspath(os.path.join(path, item['file_name']))
            BColor.process("Download %s to %s " % (item['url'], path_file))
            shutil.rmtree(path_file, ignore_errors=True)
            Downloader.download_file(item['url'], path_file)
            if os.path.getsize(path_file) == 0:
                BColor.error("Can`t download file %s to %s" %
                             (item['url'], path_file))
                raise Exception("Can`t download file %s to %s" %
                                (item['url'], path_file))

        return path
コード例 #2
0
    def delete_not_updated_today(count_all_domain=False):
        """
        :type count_all_domain: bool|dict
        :return:
        """
        connection = get_mysql_connection()
        cursor = connection.cursor(MySQLdb.cursors.DictCursor)
        sql_trigger_enable = "SET @TRIGGER_DISABLED = 0"
        sql_trigger_disable = "SET @TRIGGER_DISABLED = 1"

        if not count_all_domain:
            sql = "DELETE FROM domain WHERE load_today = 'N'"
            BColor.process(sql)
            cursor.execute(sql)
            cursor.execute(sql_trigger_disable)

            sql = "UPDATE domain SET load_today = 'N'"
            BColor.process(sql)
            cursor.execute(sql)
            cursor.execute(sql_trigger_enable)
        else:
            for key_tld, tld_count_in_file in count_all_domain.iteritems():
                cursor.execute(
                    "SELECT count(*) as domain_count FROM domain WHERE tld = '%s'"
                    % str(key_tld))
                count_in_base = cursor.fetchone()
                BColor.process("Count zone (%s) in file %s, in base %s" %
                               (str(key_tld), str(tld_count_in_file),
                                str(count_in_base['domain_count'])))

                if count_in_base and int(count_in_base['domain_count']) >= int(
                        tld_count_in_file):
                    sql = "DELETE FROM domain WHERE load_today = 'N' AND tld = '%s'" % str(
                        key_tld)
                    BColor.process(sql)
                    cursor.execute(sql)
                    cursor.execute(sql_trigger_disable)

                    sql = "UPDATE domain SET load_today = 'N' WHERE tld = '%s'" % str(
                        key_tld)
                    BColor.process(sql)
                    cursor.execute(sql)
                    cursor.execute(sql_trigger_enable)
                else:
                    BColor.error(
                        "TLD %s - count in file %s, count in base %s" %
                        (str(key_tld), str(count_in_base),
                         str(tld_count_in_file)))
        connection.commit()
        connection.close()
コード例 #3
0
ファイル: converter.py プロジェクト: windweb/domain_statistic
    def unzip_file(path_file: str) -> bool:
        """
        :rtype path_file: unicode
        :return:
        """
        gunzip = Gunzip(path_file)
        command = gunzip.get_command()

        p = SubprocessRunner(command=command)
        p.run()
        p.wait(write_output_in_log=False)
        if p.process.returncode != 0:
            BColor.error("unzip p.process.returncode = %s" % p.process.returncode)
            return False

        return True
コード例 #4
0
    def download(path: str, item: dict):
        """
        :return:
        """
        file_name = item['file_name']
        url = item['url']
        path_file = os.path.abspath(os.path.join(path, file_name))

        BColor.process("Download %s to %s " % (url, path_file))
        shutil.rmtree(path_file, ignore_errors=True)
        Downloader.download_file(url, path_file)
        if os.path.getsize(path_file) == 0:
            BColor.error("Can`t download file %s to %s" % (url, path_file))
            raise Exception("Can`t download file %s to %s" % (url, path_file))

        return os.path.getsize(path_file)
コード例 #5
0
    def unzip_file(path_file):
        """
        :rtype path_file: unicode
        :return:
        """
        gunzip = Gunzip(path_file)
        command = gunzip.get_command()

        p = SubprocessRunner(command=command)
        p.run()
        p.wait(write_output_in_log=False)
        if p.process.returncode != 0:
            BColor.error("unzip p.process.returncode = %s" % p.process.returncode)
            return False

        return True
コード例 #6
0
    def download_file(url, data_dir):
        """
        Скачивает файл в указанную директорию
        :type url: unicode
        :type data_dir: unicode
        :rtype: bool
        """

        wget_until = Wget(url, data_dir)
        command = wget_until.get_command()

        p = SubprocessRunner(command=command)
        p.run()
        p.wait(write_output_in_log=False)
        if p.process.returncode != 0:
            BColor.error("wget p.process.returncode = %s" % p.process.returncode)
            return False

        return True
コード例 #7
0
    def download_file(url, data_dir):
        """
        Скачивает файл в указанную директорию
        :type url: unicode
        :type data_dir: unicode
        :rtype: bool
        """

        wget_until = Wget(url, data_dir)
        command = wget_until.get_command()

        p = SubprocessRunner(command=command)
        p.run()
        p.wait(write_output_in_log=False)
        if p.process.returncode != 0:
            BColor.error("wget p.process.returncode = %s" % p.process.returncode)
            return False

        return True
コード例 #8
0
    def delete_not_updated_today(count_all_domain=False):
        """
        :type count_all_domain: bool|dict
        :return:
        """
        connection = get_mysql_connection()
        cursor = connection.cursor(MySQLdb.cursors.DictCursor)
        sql_trigger_enable = "SET @TRIGGER_DISABLED = 0"
        sql_trigger_disable = "SET @TRIGGER_DISABLED = 1"

        if not count_all_domain:
            sql = "DELETE FROM domain WHERE load_today = 'N'"
            BColor.process(sql)
            cursor.execute(sql)
            cursor.execute(sql_trigger_disable)

            sql = "UPDATE domain SET load_today = 'N'"
            BColor.process(sql)
            cursor.execute(sql)
            cursor.execute(sql_trigger_enable)
        else:
            for key_tld, tld_count_in_file in count_all_domain.iteritems():
                cursor.execute("SELECT count(*) as domain_count FROM domain WHERE tld = '%s'" % str(key_tld))
                count_in_base = cursor.fetchone()
                BColor.process("Count zone (%s) in file %s, in base %s"
                               % (str(key_tld), str(tld_count_in_file), str(count_in_base['domain_count'])))

                if count_in_base and int(count_in_base['domain_count']) >= int(tld_count_in_file):
                    sql = "DELETE FROM domain WHERE load_today = 'N' AND tld = '%s'" % str(key_tld)
                    BColor.process(sql)
                    cursor.execute(sql)
                    cursor.execute(sql_trigger_disable)

                    sql = "UPDATE domain SET load_today = 'N' WHERE tld = '%s'" % str(key_tld)
                    BColor.process(sql)
                    cursor.execute(sql)
                    cursor.execute(sql_trigger_enable)
                else:
                    BColor.error("TLD %s - count in file %s, count in base %s"
                                 % (str(key_tld), str(count_in_base), str(tld_count_in_file)))
        connection.commit()
        connection.close()
コード例 #9
0
    def download_data_for_current_date():
        """
        Скачивает все необходимы файлы для парсинга

        С R01 данные по локальным зонам
        https://partner.r01.ru/zones/ru_domains.gz
        https://partner.r01.ru/zones/su_domains.gz
        https://partner.r01.ru/zones/rf_domains.gz

        С http://archive.routeviews.org информацию по fullview, подробно описывает Павел в своем блоге
        http://phpsuxx.blogspot.com/2011/12/full-bgp.html
        http://phpsuxx.blogspot.com/2011/12/libbgpdump-debian-6-squeeze.html

        для остальных зоне можно посмотреть
        http://csa.ee/databases-zone-files/

        :rtype: unicode
        """
        now_date = datetime.date.today()
        delta = datetime.timedelta(days=1)
        now_date = now_date - delta

        files_list = [{'url': 'https://partner.r01.ru/zones/ru_domains.gz', 'file_name': 'ru_domains.gz'},
                      {'url': 'https://partner.r01.ru/zones/su_domains.gz', 'file_name': 'su_domains.gz'},
                      {'url': 'https://partner.r01.ru/zones/rf_domains.gz', 'file_name': 'rf_domains.gz'},
                      {'url': 'http://archive.routeviews.org/bgpdata/%s/RIBS/rib.%s.0600.bz2'
                              % (now_date.strftime("%Y.%m"), now_date.strftime("%Y%m%d")), 'file_name': 'rib.bz2'}]

        path = Downloader.create_data_dir()

        for item in files_list:
            path_file = os.path.abspath(os.path.join(path, item['file_name']))
            BColor.process("Download %s to %s " % (item['url'], path_file))
            shutil.rmtree(path_file, ignore_errors=True)
            Downloader.download_file(item['url'], path_file)
            if os.path.getsize(path_file) == 0:
                BColor.error("Can`t download file %s to %s" % (item['url'], path_file))
                raise Exception("Can`t download file %s to %s" % (item['url'], path_file))

        return path
コード例 #10
0
    def run(self):
        """
        Запрашиваем DNS данные
        :return:
        """

        try:
            self.write_to_file(BColor.process("Process %s running, need work %s domains"
                                              % (self.number, len(self.domains))))

            added_domains = 0
            re_prefix = re.compile(r'\s*')
            self._connect_mysql()
            cursor = self.connection.cursor(MySQLdb.cursors.DictCursor)

            for domain_data in self.domains:
                try:
                    data = domain_data['line'].split("\t")

                    domain = re.sub(re_prefix, '', data[0])
                    delegated = re.sub(re_prefix, '', data[5])

                    if delegated == '1':
                        delegated = 'Y'
                        domain_dns_data_array = self._get_ns_record(domain)
                        as_array = self._get_asn_array(domain_dns_data_array)
                    else:
                        delegated = 'N'
                        domain_dns_data_array = {}
                        as_array = {}

                    register_info = {'registrant': re.sub(re_prefix, '', data[1]),
                                     'register_date': re.sub(re_prefix, '', data[2]),
                                     'register_end_date': re.sub(re_prefix, '', data[3]),
                                     'free_date': re.sub(re_prefix, '', data[4]),
                                     'delegated': delegated,
                                     'domain': domain,
                                     'prefix': domain_data['prefix']}

                    cursor.execute("SELECT id FROM domain WHERE domain_name = LOWER('%s')" % domain)
                    domain_id = cursor.fetchone()

                    if not domain_id:
                        run_sql = self._insert_domain(domain_dns_data_array, as_array, register_info)
                    else:
                        run_sql = self._update_domain(domain_dns_data_array, as_array, domain_id['id'],
                                                      register_info)

                    self.write_to_file(run_sql + ";", sql=True)

                    try:
                        cursor.execute(run_sql)
                        self.connection.commit()
                    except:
                        self.write_to_file(BColor.error("MySQL exceptions (SQL %s)" % run_sql))
                        self.write_to_file(BColor.error(traceback.format_exc()))

                        # try again
                        time.sleep(5)
                        self._connect_mysql()
                        cursor = self.connection.cursor(MySQLdb.cursors.DictCursor)
                        cursor.execute(run_sql)
                        self.connection.commit()

                    added_domains += 1

                    if (added_domains % 1000) == 0:
                        self.write_to_file(BColor.process("Thread %d success resolved %d domains"
                                                          % (self.number, added_domains), pid=self.number))

                    # USE http://habrahabr.ru/post/178637/
                    data = None
                    domain = None
                    delegated = None
                    domain_dns_data_array = None
                    as_array = None
                    register_info = None
                    domain_id = None
                    run_sql = None


                except:
                    data = domain_data['line'].split("\t")
                    domain = re.sub(re_prefix, '', data[0])

                    self.write_to_file(BColor.error("Domain %s work failed process number %s" % (domain, self.number)))
                    self.write_to_file(BColor.error(traceback.format_exc()))

            self.write_to_file(BColor.process("Process %s done " % self.number))
            self.connection.close()
            return 0
        except:
            self.write_to_file(BColor.error("Process failed %s" % self.number))
            self.write_to_file(BColor.error(traceback.format_exc()))
            return 1
コード例 #11
0
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

__author__ = 'Alexey Y Manikin'

import sys
from config.main import *

PROGRAM_NAME = 'update_statistic'
CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))

sys.path.insert(0, CURRENT_DIR)
logfile = os.path.join(CURRENT_DIR, '%s.debug' % PROGRAM_NAME)

import traceback
from helpers.helpersCollor import BColor
from classes.statistic import Statistic

if __name__ == "__main__":
    try:
        statistic = Statistic()
        statistic.update_all_statistic()
    except Exception as e:
        BColor.error("Got an exception: %s" % e.message)
        print(traceback.format_exc())
コード例 #12
0
    def run(self):
        """
        Запрашиваем DNS данные
        :return:
        """

        try:
            self.write_to_file(
                BColor.process("Process %s running, need work %s domains" %
                               (self.number, len(self.domains))))

            added_domains = 0
            re_prefix = re.compile(r'\s*')
            self._connect_mysql()
            cursor = self.connection.cursor(MySQLdb.cursors.DictCursor)
            #   rpki = RpkiChecker()

            for domain_data in self.domains:
                try:
                    data = domain_data['line'].split("\t")

                    domain = re.sub(re_prefix, '', data[0])
                    delegated = re.sub(re_prefix, '', data[5])

                    if delegated == '1':
                        delegated = 'Y'
                        domain_dns_data_array = self.get_ns_record(domain)
                        as_array = self._get_asn_array(domain_dns_data_array)
                        # try:
                        #     status = rpki.check_ip(domain_dns_data_array['a'][0], as_array[0])
                        #     rpki_status = status['code']
                        # except:
                        #     rpki_status = -2

                        rpki_status = -2
                    else:
                        delegated = 'N'
                        domain_dns_data_array = {}
                        as_array = {}
                        rpki_status = -2

                    register_info = {
                        'registrant': re.sub(re_prefix, '', data[1]),
                        'register_date': re.sub(re_prefix, '', data[2]),
                        'register_end_date': re.sub(re_prefix, '', data[3]),
                        'free_date': re.sub(re_prefix, '', data[4]),
                        'delegated': delegated,
                        'domain': domain,
                        'prefix': domain_data['prefix']
                    }

                    cursor.execute(
                        "SELECT id FROM domain WHERE domain_name = LOWER('%s')"
                        % domain)
                    domain_id = cursor.fetchone()

                    if not domain_id:
                        run_sql = self._insert_domain(domain_dns_data_array,
                                                      as_array, register_info,
                                                      rpki_status, cursor)
                    else:
                        run_sql = self._update_domain(domain_dns_data_array,
                                                      as_array,
                                                      domain_id['id'],
                                                      register_info,
                                                      rpki_status, cursor)

                    run_sql = run_sql.replace("b\'", '')
                    run_sql = run_sql.replace("\'\'", '\'')

                    self.write_to_file(run_sql + ";", sql=True)

                    try:
                        cursor.execute(run_sql)
                        self.connection.commit()
                    except Exception:
                        self.write_to_file(
                            BColor.error("MySQL exceptions (SQL %s)" %
                                         run_sql))
                        self.write_to_file(BColor.error(
                            traceback.format_exc()))

                        # try again
                        time.sleep(5)
                        self._connect_mysql()
                        cursor = self.connection.cursor(
                            MySQLdb.cursors.DictCursor)
                        cursor.execute(run_sql)
                        self.connection.commit()

                    added_domains += 1

                    if (added_domains % 1000) == 0:
                        self.write_to_file(
                            BColor.process(
                                "Thread %d success resolved %d domains" %
                                (self.number, added_domains),
                                pid=self.number))

                    # READ http://habrahabr.ru/post/178637/
                    data = None
                    domain = None
                    delegated = None
                    domain_dns_data_array = None
                    as_array = None
                    register_info = None
                    domain_id = None
                    run_sql = None

                except Exception:

                    pprint.pprint(domain_data)

                    data = domain_data['line'].split("\t")
                    domain = re.sub(re_prefix, '', data[0])

                    self.write_to_file(
                        BColor.error(
                            "Domain %s work failed process number %s" %
                            (domain, self.number)))
                    self.write_to_file(BColor.error(traceback.format_exc()))

            self.write_to_file(BColor.process("Process %s done " %
                                              self.number))
            self.connection.close()
            return 0

        except Exception:
            self.write_to_file(BColor.error("Process failed %s" % self.number))
            self.write_to_file(BColor.error(traceback.format_exc()))
            return 1
コード例 #13
0
def load_prefix_list_from_var(prefix_list):
    """
    Загрузка данных из переменной
    :return:
    """
    subnet_list_tree = SubnetTree.SubnetTree()
    for index in prefix_list:
        subnet_list_tree[as_bytes(index)] = as_bytes(prefix_list[index])

    return subnet_list_tree


if __name__ == "__main__":
    try:
        if check_prog_run(PROGRAM_NAME):
            BColor.error("Program %s already running" % PROGRAM_NAME)
            sys.exit(1)

        parser = argparse.ArgumentParser(add_help=True, version='1.0')

        parser.add_argument('-d', '--dir', type=str, help="Do`t download data, use exist from dir", action="store")
        parser.add_argument('-s', '--show_verbose', help="Show verbose log", action="count")
        parser.add_argument('-u', '--update_statistic', help="Update statistic after update domain", action="count")
        parser.add_argument('-D', '--delete_old', type=bool, help="Do`t delete removed domains", action="store")
        parser.add_argument('-n', '--name_server', type=str, help="Set name server", action="store")
        args = parser.parse_args()

        if args.show_verbose:
            BColor.ok("Use verbose")

        if not args.dir:
コード例 #14
0
def load_prefix_list_from_var(prefix_list):
    """
    Загрузка данных из переменной
    :return:
    """
    subnet_list_tree = SubnetTree.SubnetTree()
    for index in prefix_list:
        subnet_list_tree[as_bytes(index)] = as_bytes(prefix_list[index])

    return subnet_list_tree


if __name__ == "__main__":
    try:
        if check_prog_run(PROGRAM_NAME):
            BColor.error("Program %s already running" % PROGRAM_NAME)
            sys.exit(1)

        parser = argparse.ArgumentParser(add_help=True, version='1.0')

        parser.add_argument('-d', '--dir', type=str, help="Do`t download data, use exist from dir", action="store")
        parser.add_argument('-s', '--show_verbose', help="Show verbose log", action="count")
        parser.add_argument('-D', '--delete_old', type=bool, help="Do`t delete removed domains", action="store")
        parser.add_argument('-n', '--name_server', type=str, help="Set name server", action="store")
        args = parser.parse_args()

        if args.show_verbose:
            BColor.ok("Use verbose")

        if not args.dir:
            BColor.process("Download files")
コード例 #15
0
    def run(self):
        """
        Запрашиваем DNS данные
        :return:
        """

        self.write_to_file(BColor.process("Process %s running" % self.number))
        added_domains = 0
        re_prefix = re.compile(r'\s*')
        start_time = datetime.now()

        try:
            self._connect_mysql()
            cursor = self.connection.cursor(MySQLdb.cursors.DictCursor)
            #rpki = RpkiChecker()

            while not self.queue.empty():
                domain_data = self.queue.get(timeout=5)
                try:
                    data = domain_data['line'].split("\t")

                    domain = re.sub(re_prefix, '', data[0])
                    delegated = re.sub(re_prefix, '', data[5])

                    if delegated == '1':
                        delegated = 'Y'
                        domain_dns_data_array = self.get_ns_record(domain)
                        as_array = self._get_asn_array(domain_dns_data_array)

                        # try:
                        #     status = rpki.check_ip(domain_dns_data_array['a'][0], as_array[0])
                        #     rpki_status = status['code']
                        # except:
                        #     rpki_status = -2

                        rpki_status = -2
                    else:
                        delegated = 'N'
                        domain_dns_data_array = {}
                        as_array = {}
                        rpki_status = -2

                    register_info = {
                        'registrant': re.sub(re_prefix, '', data[1]),
                        'register_date': re.sub(re_prefix, '', data[2]),
                        'register_end_date': re.sub(re_prefix, '', data[3]),
                        'free_date': re.sub(re_prefix, '', data[4]),
                        'delegated': delegated,
                        'domain': domain,
                        'prefix': domain_data['prefix']
                    }

                    run_sql = self._update_domain_row(domain_dns_data_array,
                                                      as_array, register_info,
                                                      rpki_status)

                    run_sql = run_sql.replace("b\'", '')
                    run_sql = run_sql.replace("\'\'", '\'')

                    self.write_to_file(run_sql + ";", sql=True)

                    try:
                        cursor.execute(run_sql)
                        self.connection.commit()
                    except Exception:
                        self.write_to_file(
                            BColor.error("MySQL exceptions (SQL %s)" %
                                         run_sql))
                        self.write_to_file(BColor.error(
                            traceback.format_exc()))

                        # try again
                        time.sleep(5)
                        self._connect_mysql()
                        cursor = self.connection.cursor(
                            MySQLdb.cursors.DictCursor)
                        cursor.execute(run_sql)
                        self.connection.commit()

                    added_domains += 1

                    # READ http://habrahabr.ru/post/178637/
                    data = None
                    domain = None
                    delegated = None
                    domain_dns_data_array = None
                    as_array = None
                    register_info = None
                    domain_id = None
                    run_sql = None

                except Exception:
                    data = domain_data['line'].split("\t")
                    domain = re.sub(re_prefix, '', data[0])

                    self.write_to_file(
                        BColor.error(
                            "Domain %s work failed process number %i" %
                            (domain, self.number)))
                    self.write_to_file(BColor.error(traceback.format_exc()))

            diff = datetime.now() - start_time
            performance = diff.seconds / added_domains

            self.queue_statistic.put({
                'time_diff': diff.seconds,
                'performance': performance,
                'count': added_domains
            })
            self.write_to_file(
                BColor.process(
                    "Process %i done, processed %i domain (performance %f)" %
                    (self.number, added_domains, performance),
                    pid=self.number))
            self.connection.close()
            return 0
        except queue.Empty:
            diff = datetime.now() - start_time
            performance = diff.seconds / added_domains

            self.queue_statistic.put({
                'time_diff': diff.seconds,
                'performance': performance,
                'count': added_domains
            })
            self.write_to_file(
                BColor.process(
                    "Process %i done queue is Empty = %i, processed %i domain (performance %f)"
                    % (self.number, self.queue.empty(), added_domains,
                       performance),
                    pid=self.number))
            return 0
        except Exception:
            self.write_to_file(
                BColor.error("Process failed %i" % self.number,
                             pid=self.number))
            self.write_to_file(BColor.error(traceback.format_exc()))
            return 1
コード例 #16
0
def load_prefix_list_from_var(prefix_list):
    """
    Загрузка данных из переменной
    :return:
    """
    subnet_list_tree = SubnetTree.SubnetTree()
    for index in prefix_list:
        subnet_list_tree[index] = prefix_list[index]

    return subnet_list_tree


if __name__ == "__main__":
    try:
        if check_program_run(PROGRAM_NAME):
            BColor.error("Program %s already running" % PROGRAM_NAME)
            sys.exit(1)

        parser = argparse.ArgumentParser(add_help=True)

        parser.add_argument('-d',
                            '--dir',
                            type=str,
                            help="Do`t download data, use exist from dir",
                            action="store")
        parser.add_argument('-s',
                            '--show_verbose',
                            help="Show verbose log",
                            action="count")
        parser.add_argument('-u',
                            '--update_statistic',