Beispiel #1
0
 def __init__(self, path=None, nowiki=None):
     self.path = path
     self.IPQSkey = config.findproxy['IPQSkey']
     self.PCkey = config.findproxy['PCkey']
     self.GIIemail = config.findproxy['GIIemail']
     self.IPv4cache = PatriciaTrie()
     self.IPv6cache = PatriciaTrie()
Beispiel #2
0
 def __init__(self):
     self.site = pywikibot.Site()
     self.target = 'ویکی‌پدیا:گزارش دیتابیس/کشف پروکسی'
     self.summary = 'روزآمدسازی نتایج (وظیفه ۲۲)'
     self.blocksummary = '{{پروکسی باز}}'
     self.IPQSkey = config.findproxy['IPQSkey']
     self.PCkey = config.findproxy['PCkey']
     self.GIIemail = config.findproxy['GIIemail']
     self.IPv4cache = PatriciaTrie()
     self.IPv6cache = PatriciaTrie()
Beispiel #3
0
 def __init__(self):
     self.site = pywikibot.Site()
     self.target = 'ویکی‌پدیا:گزارش دیتابیس/کشف پروکسی/بازه'
     self.summary = 'روزآمدسازی نتایج (وظیفه ۲۲)'
     self.IPv4cache = PatriciaTrie()
     self.IPv6cache = PatriciaTrie()
     self.success = {}
     self.failure = []
     self.whois_reqs = 0
     self.block_link = "//fa.wikipedia.org/wiki/Special:Block?wpExpiry=" + \
         "1%20year&wpReason={{پروکسی%20باز}}&wpDisableUTEdit=1" + \
         "&wpHardBlock=1&wpTarget="
     self.sql = """
Beispiel #4
0
class ProxyCheckBot():
    def __init__(self, path=None, nowiki=None):
        self.path = path
        self.IPQSkey = config.findproxy['IPQSkey']
        self.PCkey = config.findproxy['PCkey']
        self.GIIemail = config.findproxy['GIIemail']
        self.IPv4cache = PatriciaTrie()
        self.IPv6cache = PatriciaTrie()

    def load_ip_list(self, path):
        # If path starts with ~ make it an absolute path
        path = os.path.expanduser(path)
        if not os.path.isfile(path):
            print('Provided path does not exist')
            exit()
        fh = open(path)
        lines = fh.read().splitlines()
        fh.close()
        return lines

    def get_cache(self, ip):
        pat = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
        if re.match(pat, ip) is None:
            """
            Temporary fix for https://github.com/Figglewatts/cidr-trie/issues/2
            """
            if self.IPv6cache.size == 0:
                return []
            return self.IPv6cache.find_all(ip)
        else:
            return self.IPv4cache.find_all(ip)

    def set_cache(self, ip, cidr, country):
        pat = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
        if re.match(pat, ip) is None:
            self.IPv6cache.insert(cidr, country)
        else:
            self.IPv4cache.insert(cidr, country)

    def get_ip_info(self, ip):
        """
        Retrieves pertinent fields from IP WHOIS information
        """
        cached_info = self.get_cache(ip)

        if len(cached_info) == 0:
            try:
                request = IPWhois(ip)
                result = request.lookup_rdap(depth=1)
                cidr = result['asn_cidr']
                country = result['asn_country_code']
                self.set_cache(ip, cidr, country)
            except Exception:
                cidr = ''
                country = ''
        else:
            cidr = cached_info[0][0]
            country = cached_info[0][1]

        return {'cidr': cidr, 'country_code': country}

    def query_IPQualityScore(self, ip):
        """
        Queries the IPQualityScore API to check if an IP is a proxy
        """
        url = 'https://www.ipqualityscore.com/api/json/ip/%s/%s'
        request = requests.get(url % (self.IPQSkey, ip))
        result = request.json()
        if 'proxy' in result.keys():
            return 1 if result['proxy'] is True else 0
        else:
            return False

    def query_proxycheck(self, ip):
        """
        Queries the proxycheck.io API to check if an IP is a proxy
        """
        url = 'http://proxycheck.io/v2/%s?key=%s&vpn=1'
        request = requests.get(url % (ip, self.PCkey))
        result = request.json()
        if ip in result.keys() and 'proxy' in result[ip]:
            return 1 if result[ip]['proxy'] == 'yes' else 0
        else:
            return False

    def query_GetIPIntel(self, ip):
        """
        Queries the GetIPIntel API to check if an IP is a proxy
        """
        url = 'http://check.getipintel.net/check.php' + \
              '?ip=%s&contact=%s&format=json&flags=m'
        request = requests.get(url % (ip, self.GIIemail))
        result = request.json()
        if 'result' in result.keys():
            return 1 if result['result'] == '1' else 0
        else:
            return False

    def query_teoh_io(self, ip):
        """
        Queries the teoh.io API to check if an IP is a proxy
        """
        url = 'https://ip.teoh.io/api/vpn/%s'
        request = requests.get(url % ip)
        """
        Sadly, teoh.io sometimes generates PHP notices before the JSON output.
        Therefore, we will have to find the actual JSON output and parse it.
        """
        result = request.text
        if result[0] != '{':
            result = result[result.find('{'):]
        result = json.loads(result)

        if 'vpn_or_proxy' in result.keys():
            return 1 if result['vpn_or_proxy'] == 'yes' else 0
        else:
            return False

    def run_queries(self, ip):
        return [
            self.query_IPQualityScore(ip),
            self.query_proxycheck(ip),
            self.query_GetIPIntel(ip)
        ]

    def format_result(self, res):
        if res == 1:
            return '{{yes}}'
        elif res == 0:
            return '{{no}}'

    def progress(self, str):
        """
        Displays a progress message to the user.
        The message is updated as IPs are checked or skipped.
        The message will be removed in the end, once final output is printed.
        """
        blank = '                                                             '
        print(blank, end='\r')
        print(str, end='\r')

    def run(self):
        out = '{| class="wikitable sortable"\n'
        out += '! IP !! CIDR !! Country !! ' +\
               'IPQualityScore !! proxycheck !! GetIPIntel'

        if self.path is None:
            print('Error: no IP list provided!')
            exit()
        else:
            iplist = self.load_ip_list(self.path)

        rowtemplate = '\n|-\n| %s || %s || %s || %s || %s || %s'

        for ip in iplist:
            ipinfo = self.get_ip_info(ip)
            if ipinfo['country_code'] == 'IR':
                """
                IPs from Iran are almost never proxies, skip the checks
                """
                self.progress('Skipping %s' % ip)
                pass
            else:
                self.progress('Checking %s' % ip)
                IPQS, PC, GII = self.run_queries(ip)
                row = rowtemplate % (
                    ip, ipinfo['cidr'], ipinfo['country_code'],
                    self.format_result(IPQS), self.format_result(PC),
                    self.format_result(GII))
                out += row

        out += '\n|}'

        print(out)
Beispiel #5
0
class FindProxyBot():

    def __init__(self):
        self.site = pywikibot.Site()
        self.target = 'ویکی‌پدیا:گزارش دیتابیس/کشف پروکسی'
        self.summary = 'روزآمدسازی نتایج (وظیفه ۲۲)'
        self.blocksummary = '{{پروکسی باز}}'
        self.IPQSkey = config.findproxy['IPQSkey']
        self.PCkey = config.findproxy['PCkey']
        self.GIIemail = config.findproxy['GIIemail']
        self.IPv4cache = PatriciaTrie()
        self.IPv6cache = PatriciaTrie()

    def get_ip_list(self, max_number, max_hours):
        """
        Gathers a list of up to max_number IPs which edited within the last
        max_hours hours in the wiki.
        """
        cutoff_time = datetime.utcnow() - timedelta(hours=max_hours)
        cutoff_ts = cutoff_time.strftime("%Y%m%d%H%M%S")

        gen = api.Request(
                site=self.site,
                parameters={
                    'action': 'query',
                    'list': 'recentchanges',
                    'rcshow': 'anon',
                    'rcprop': 'user|title',
                    'rclimit': str(max_number),
                    'rcend': cutoff_ts})
        data = gen.submit()
        if 'error' in data:
            raise RuntimeError('API query error: {0}'.format(data))
        if data == [] or 'query' not in data:
            raise RuntimeError('No results given.')
        iplist = {x['user'] for x in data['query']['recentchanges']}
        return list(set(iplist))

    def get_cache(self, ip):
        pat = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
        if re.match(pat, ip) is None:
            """
            Temporary fix for https://github.com/Figglewatts/cidr-trie/issues/2
            """
            if self.IPv6cache.size == 0:
                return []
            return self.IPv6cache.find_all(ip)
        else:
            return self.IPv4cache.find_all(ip)

    def set_cache(self, ip, cidr, country):
        pat = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
        if re.match(pat, ip) is None:
            self.IPv6cache.insert(cidr, country)
        else:
            self.IPv4cache.insert(cidr, country)

    def get_ip_info(self, ip):
        """
        Retrieves pertinent fields from IP WHOIS information
        """
        cached_info = self.get_cache(ip)

        if len(cached_info) == 0:
            try:
                request = IPWhois(ip)
                result = request.lookup_rdap(depth=1)
                cidr = result['asn_cidr']
                country = result['asn_country_code']
                self.set_cache(ip, cidr, country)
            except Exception:
                cidr = ''
                country = ''
        else:
            cidr = cached_info[0][0]
            country = cached_info[0][1]

        return {
            'cidr': cidr,
            'country_code': country
        }

    def query_IPQualityScore(self, ip):
        """
        Queries the IPQualityScore API to check if an IP is a proxy
        """
        url = 'https://www.ipqualityscore.com/api/json/ip/%s/%s'
        request = requests.get(url % (self.IPQSkey, ip))
        result = request.json()
        if 'proxy' in result.keys():
            return 1 if result['proxy'] is True else 0
        else:
            return False

    def query_proxycheck(self, ip):
        """
        Queries the proxycheck.io API to check if an IP is a proxy
        """
        url = 'http://proxycheck.io/v2/%s?key=%s&vpn=1'
        request = requests.get(url % (ip, self.PCkey))
        result = request.json()
        if ip in result.keys() and 'proxy' in result[ip]:
            return 1 if result[ip]['proxy'] == 'yes' else 0
        else:
            return False

    def query_GetIPIntel(self, ip):
        """
        Queries the GetIPIntel API to check if an IP is a proxy
        """
        url = 'http://check.getipintel.net/check.php' + \
              '?ip=%s&contact=%s&format=json&flags=m'
        request = requests.get(url % (ip, self.GIIemail))
        result = request.json()
        if 'result' in result.keys():
            return 1 if result['result'] == '1' else 0
        else:
            return False

    def query_teoh_io(self, ip):
        """
        Queries the teoh.io API to check if an IP is a proxy
        """
        url = 'https://ip.teoh.io/api/vpn/%s'
        request = requests.get(url % ip)
        """
        Sadly, teoh.io sometimes generates PHP notices before the JSON output.
        Therefore, we will have to find the actual JSON output and parse it.
        """
        result = request.text
        if result[0] != '{':
            result = result[result.find('{'):]
        result = json.loads(result)

        if 'vpn_or_proxy' in result.keys():
            return 1 if result['vpn_or_proxy'] == 'yes' else 0
        else:
            return False

    def run_queries(self, ip):
        return [
            self.query_IPQualityScore(ip),
            self.query_proxycheck(ip),
            self.query_GetIPIntel(ip)
        ]

    def format_result(self, res):
        if res == 1:
            return '{{yes}}'
        elif res == 0:
            return '{{no}}'
        else:
            return '{{yes-no|}}'

    def find_proxies(self):
        out = '{| class="wikitable sortable"\n'
        out += '! آی‌پی !! بازه !! کد کشور !! ' +\
               'IPQualityScore !! proxycheck !! GetIPIntel !! ' +\
               'بسته شد'

        iplist = self.get_ip_list(500, 24)
        rowtemplate = '\n|-\n| %s || %s || %s || %s || %s || %s || %s'

        for ip in iplist:
            pywikibot.output('Checking %s' % ip)
            ipinfo = self.get_ip_info(ip)
            if ipinfo['country_code'] == 'IR':
                """
                IPs from Iran are almost never proxies, skip the checks
                """
                pass
            else:
                IPQS, PC, GII = self.run_queries(ip)
                if IPQS + PC + GII == 3:
                    target = pywikibot.User(self.site, ip)
                    if target.isBlocked():
                        blocked = 2
                    else:
                        pywikibot.output('Blocking %s' % ip)
                        self.site.blockuser(
                            target, '1 year', self.blocksummary,
                            anononly=False, allowusertalk=True)
                        blocked = 1
                else:
                    blocked = 0
                row = rowtemplate % (
                    ip,
                    ipinfo['cidr'],
                    ipinfo['country_code'],
                    self.format_result(IPQS),
                    self.format_result(PC),
                    self.format_result(GII),
                    self.format_result(blocked)
                )
                out += row

        out += '\n|}'

        page = pywikibot.Page(self.site, self.target)
        page.text = out
        page.save(self.summary)

        page = pywikibot.Page(self.site, self.target + '/امضا')
        page.text = '~~~~~'
        page.save(self.summary)
Beispiel #6
0
class FindProxyBot():
    def __init__(self):
        self.site = pywikibot.Site()
        self.target = 'ویکی‌پدیا:گزارش دیتابیس/تمدید بستن پروکسی'
        self.summary = 'روزآمدسازی نتایج (وظیفه ۲۳)'
        self.blocksummary = '{{پروکسی باز}}'
        self.IPQSkey = config.findproxy['IPQSkey']
        self.PCkey = config.findproxy['PCkey']
        self.GIIemail = config.findproxy['GIIemail']
        self.IPv4cache = PatriciaTrie()
        self.IPv6cache = PatriciaTrie()

    def get_ip_list(self):
        """
        Gathers a list of IPs with a long-term block that is about to expire.
        """
        conn = mysqldb.connect(host='fawiki.web.db.svc.wikimedia.cloud',
                               db='fawiki_p',
                               read_default_file='~/replica.my.cnf')
        cursor = conn.cursor()
        query = """
SELECT
  ipb_address,
  STR_TO_DATE(LEFT(ipb_expiry, 8), '%Y%m%d') AS start_date,
  STR_TO_DATE(LEFT(ipb_timestamp, 8), '%Y%m%d') AS expiry,
  0 - DATEDIFF(NOW(), STR_TO_DATE(LEFT(ipb_expiry, 8), '%Y%m%d')) AS days_left,
  DATEDIFF(NOW(), STR_TO_DATE(LEFT(ipb_timestamp, 8), '%Y%m%d')) AS block_age
FROM ipblocks
WHERE
  ipb_user = 0
  AND ipb_expiry NOT IN (
    'infinity',
    'indefinite'
  )
  AND DATEDIFF(NOW(), STR_TO_DATE(LEFT(ipb_expiry, 8), '%Y%m%d')) > -30
  AND DATEDIFF(NOW(), STR_TO_DATE(LEFT(ipb_timestamp, 8), '%Y%m%d')) > 300
  AND ipb_range_start = ipb_range_end -- exclude CIDRs
"""
        cursor.execute(query)
        results = cursor.fetchall()
        return results

    def get_cache(self, ip):
        pat = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
        if re.match(pat, ip) is None:
            """
            Temporary fix for https://github.com/Figglewatts/cidr-trie/issues/2
            """
            if self.IPv6cache.size == 0:
                return []
            return self.IPv6cache.find_all(ip)
        else:
            return self.IPv4cache.find_all(ip)

    def set_cache(self, ip, cidr, country):
        pat = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
        if re.match(pat, ip) is None:
            self.IPv6cache.insert(cidr, country)
        else:
            self.IPv4cache.insert(cidr, country)

    def get_ip_info(self, ip):
        """
        Retrieves pertinent fields from IP WHOIS information
        """
        cached_info = self.get_cache(ip)

        if len(cached_info) == 0:
            try:
                request = IPWhois(ip)
                result = request.lookup_rdap(depth=1)
                cidr = result['asn_cidr']
                country = result['asn_country_code']
                self.set_cache(ip, cidr, country)
            except Exception:
                cidr = ''
                country = ''
        else:
            cidr = cached_info[0][0]
            country = cached_info[0][1]

        return {'cidr': cidr, 'country_code': country}

    def query_IPQualityScore(self, ip):
        """
        Queries the IPQualityScore API to check if an IP is a proxy
        """
        url = 'https://www.ipqualityscore.com/api/json/ip/%s/%s'
        request = requests.get(url % (self.IPQSkey, ip))
        result = request.json()
        if 'proxy' in result.keys():
            return 1 if result['proxy'] is True else 0
        else:
            return False

    def query_proxycheck(self, ip):
        """
        Queries the proxycheck.io API to check if an IP is a proxy
        """
        url = 'http://proxycheck.io/v2/%s?key=%s&vpn=1'
        request = requests.get(url % (ip, self.PCkey))
        result = request.json()
        if ip in result.keys() and 'proxy' in result[ip]:
            return 1 if result[ip]['proxy'] == 'yes' else 0
        else:
            return False

    def query_GetIPIntel(self, ip):
        """
        Queries the GetIPIntel API to check if an IP is a proxy
        """
        url = 'http://check.getipintel.net/check.php' + \
              '?ip=%s&contact=%s&format=json&flags=m'
        request = requests.get(url % (ip, self.GIIemail))
        result = request.json()
        if 'result' in result.keys():
            return 1 if result['result'] == '1' else 0
        else:
            return False

    def query_teoh_io(self, ip):
        """
        Queries the teoh.io API to check if an IP is a proxy
        """
        url = 'https://ip.teoh.io/api/vpn/%s'
        request = requests.get(url % ip)
        """
        Sadly, teoh.io sometimes generates PHP notices before the JSON output.
        Therefore, we will have to find the actual JSON output and parse it.
        """
        result = request.text
        if result[0] != '{':
            result = result[result.find('{'):]
        result = json.loads(result)

        if 'vpn_or_proxy' in result.keys():
            return 1 if result['vpn_or_proxy'] == 'yes' else 0
        else:
            return False

    def run_queries(self, ip):
        return [
            self.query_IPQualityScore(ip),
            self.query_proxycheck(ip),
            self.query_GetIPIntel(ip)
        ]

    def format_result(self, res):
        if res == 1:
            return '{{yes}}'
        elif res == 0:
            return '{{no}}'
        else:
            return '{{yes-no|}}'

    def find_proxies(self):
        out = '{| class="wikitable sortable"\n'
        out += '! آی‌پی !! بازه !! کد کشور !! ' +\
               'IPQualityScore !! proxycheck !! GetIPIntel !! ' +\
               'بسته شد'

        iplist = self.get_ip_list()
        rowtemplate = '\n|-\n| %s || %s || %s || %s || %s || %s || %s'

        for ipdata in iplist:
            ip = ipdata[0].decode('ASCII')
            print(ip)
            pywikibot.output('Checking %s' % ip)
            ipinfo = self.get_ip_info(ip)
            if ipinfo['country_code'] == 'IR':
                """
                IPs from Iran are almost never proxies, skip the checks
                """
                pass
            else:
                IPQS, PC, GII = self.run_queries(ip)
                if IPQS + PC + GII == 3:
                    target = pywikibot.User(self.site, ip)
                    pywikibot.output('Blocking %s' % ip)
                    self.site.blockuser(target,
                                        '1 year',
                                        self.blocksummary,
                                        anononly=False,
                                        reblock=True,
                                        allowusertalk=True)
                    blocked = 1
                else:
                    blocked = 0
                row = rowtemplate % (
                    ip, ipinfo['cidr'], ipinfo['country_code'],
                    self.format_result(IPQS), self.format_result(PC),
                    self.format_result(GII), self.format_result(blocked))
                out += row

        out += '\n|}'

        page = pywikibot.Page(self.site, self.target)
        page.text = out
        page.save(self.summary)

        page = pywikibot.Page(self.site, self.target + '/امضا')
        page.text = '~~~~~'
        page.save(self.summary)
Beispiel #7
0
class FindRangesBot():
    def __init__(self):
        self.site = pywikibot.Site()
        self.target = 'ویکی‌پدیا:گزارش دیتابیس/کشف پروکسی/بازه'
        self.summary = 'روزآمدسازی نتایج (وظیفه ۲۲)'
        self.IPv4cache = PatriciaTrie()
        self.IPv6cache = PatriciaTrie()
        self.success = {}
        self.failure = []
        self.whois_reqs = 0
        self.block_link = "//fa.wikipedia.org/wiki/Special:Block?wpExpiry=" + \
            "1%20year&wpReason={{پروکسی%20باز}}&wpDisableUTEdit=1" + \
            "&wpHardBlock=1&wpTarget="
        self.sql = """
SELECT
  ipb_address AS IP,
  ipb_range_start AS HEX
FROM ipblocks
WHERE ipb_by_actor = 1789 -- HujiBot
AND ipb_range_start = ipb_range_end
ORDER BY ipb_range_start
"""

    def get_cache(self, ip):
        pat = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
        if re.match(pat, ip) is None:
            """
            Temporary fix for https://github.com/Figglewatts/cidr-trie/issues/2
            """
            if self.IPv6cache.size == 0:
                return []
            return self.IPv6cache.find_all(ip)
        else:
            return self.IPv4cache.find_all(ip)

    def set_cache(self, ip, cidr, cases):
        pat = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
        if re.match(pat, ip) is None:
            self.IPv6cache.insert(cidr, cases)
        else:
            self.IPv4cache.insert(cidr, cases)

    def update_cache(self, ip):
        cached_info = self.get_cache(ip)

        if len(cached_info) == 0:
            try:
                self.whois_reqs += 1
                print("WHOIS query #%d" % self.whois_reqs)
                request = IPWhois(ip)
                result = request.lookup_rdap(depth=1)
                cidr = result['asn_cidr']
                cases = [ip]
                self.set_cache(ip, cidr, cases)
            except Exception:
                self.failure.append(ip)
                cidr = None
                cases = None
        else:
            cidr = cached_info[0][0]
            cases = cached_info[0][1]
            cases.append(ip)
            self.set_cache(ip, cidr, cases)

        return {'cidr': cidr, 'cases': cases}

    def get_ip_list(self, max_number, max_hours):
        conn = mysqldb.connect(host="fawiki.web.db.svc.wikimedia.cloud",
                               db="fawiki_p",
                               read_default_file="~/replica.my.cnf")
        cursor = conn.cursor()
        cursor.execute(self.sql)
        results = cursor.fetchall()

        return results

    def purge_ip_list(self, iplist):
        purged_iplist = []
        for idx in range(len(iplist)):
            item = iplist[idx]

            if idx > 0:
                prv = iplist[idx - 1]
                if prv[1].decode('utf-8')[0:2] == 'v6':
                    prv = None
                else:
                    prv = int('0x' + prv[1].decode('utf-8'), 16)

            else:
                prv = None

            if idx < len(iplist) - 1:
                nxt = iplist[idx + 1]
                if nxt[1].decode('utf-8')[0:2] == 'v6':
                    nxt = None
                else:
                    nxt = int('0x' + nxt[1].decode('utf-8'), 16)
            else:
                nxt = None
            """
            For now we keep all IPv6's
            As for IPv4's, we will only keep them if they are not far from
            the one before or the one after
            """
            if item[1].decode('utf-8')[0:2] == 'v6':
                ip = item[0].decode('utf-8')
                purged_iplist.append(ip)
            else:
                ip = item[0].decode('utf-8')
                cur = int('0x' + item[1].decode('utf-8'), 16)
                if (prv is not None and cur - prv < 65536):
                    purged_iplist.append(ip)
                elif (nxt is not None and nxt - cur < 65536):
                    purged_iplist.append(ip)

        return purged_iplist

    def find_ranges(self):
        iplist = self.get_ip_list(1000, 24)
        iplist = self.purge_ip_list(iplist)

        for ip in iplist:
            pywikibot.output('Checking %s' % ip)
            ipinfo = self.update_cache(ip)
            if ipinfo['cidr'] is not None:
                self.success[ipinfo['cidr']] = ipinfo['cases']

        print("Generating output for wiki ...")

        out = "== بازه‌های حاوی پروکسی =="
        out += "\n{| class='wikitable sortable'"
        out += "\n! بازه !! تعداد پروکسی !! فهرست !! بسته"

        for k, v in self.success.items():
            out += "\n|-"
            out += "\n| "
            out += "[[ویژه:مشارکت‌ها/%s|%s]] " % (k, k)
            out += " ([" + self.block_link + k + " بستن])"
            out += "\n| {{formatnum:" + str(len(v)) + "}}"
            out += "\n|"
            for i in v:
                out += "\n* [[ویژه:مشارکت‌ها/%s|%s]]" % (i, i)
            out += "\n|"
            target = pywikibot.User(self.site, k)
            if target.isBlocked():
                out += "{{yes}}"
            else:
                out += "{{no}}"

        out += "\n|}"

        if len(self.failure) > 0:
            out += "\n== تلاش‌های ناموفق =="
            out += "\nربات از دریافت اطلاعات این موارد عاجز بود:"
            for i in self.failure:
                out += "\n* [[ویژه:مشارکت‌ها/%s|%s]]" % (i, i)

        page = pywikibot.Page(self.site, self.target)
        page.text = out
        page.save(self.summary)
Beispiel #8
0
class ImportBlockBot():
    def __init__(self):
        self.site = pywikibot.Site()
        self.target = 'ویکی‌پدیا:گزارش دیتابیس/درون‌ریزی بستن‌های پروکسی'
        self.summary = 'روزآمدسازی نتایج (وظیفه ۲۲)'
        self.blocksummary = '{{پروکسی باز}}'
        self.IPQSkey = config.findproxy['IPQSkey']
        self.PCkey = config.findproxy['PCkey']
        self.GIIemail = config.findproxy['GIIemail']
        self.IPv4cache = PatriciaTrie()
        self.IPv6cache = PatriciaTrie()

    def get_ip_list(self):
        """
        Gathers a list of IPs with a long-term block that is about to expire.
        """
        conn = mysqldb.connect(host='enwiki.web.db.svc.wikimedia.cloud',
                               db='enwiki_p',
                               read_default_file='~/replica.my.cnf')
        cursor = conn.cursor()
        query = """
SELECT
  ipb_address
FROM ipblocks
JOIN comment
  ON ipb_reason_id = comment_id
WHERE
  ipb_user = 0
  AND ipb_auto = 0
  AND ipb_expiry NOT IN (
    'infinity',
    'indefinite'
  )
  AND DATEDIFF(
    NOW(),
    STR_TO_DATE(LEFT(ipb_timestamp, 8), '%Y%m%d')
  ) BETWEEN 8 AND 15
  AND DATEDIFF(
    STR_TO_DATE(LEFT(ipb_expiry, 8), '%Y%m%d'),
    STR_TO_DATE(LEFT(ipb_timestamp, 8), '%Y%m%d')
  ) > 90
  AND (
    comment_text LIKE '%webhost%'
    OR comment_text LIKE '%proxy%'
  )
"""
        cursor.execute(query)
        results = cursor.fetchall()
        return results

    def get_cache(self, ip):
        pat = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
        if re.match(pat, ip) is None:
            """
            Temporary fix for https://github.com/Figglewatts/cidr-trie/issues/2
            """
            if self.IPv6cache.size == 0:
                return []
            return self.IPv6cache.find_all(ip)
        else:
            return self.IPv4cache.find_all(ip)

    def set_cache(self, ip, cidr, country):
        pat = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
        if re.match(pat, ip) is None:
            self.IPv6cache.insert(cidr, country)
        else:
            self.IPv4cache.insert(cidr, country)

    def get_ip_info(self, ip):
        """
        Retrieves pertinent fields from IP WHOIS information
        """
        cached_info = self.get_cache(ip)

        if len(cached_info) == 0:
            try:
                request = IPWhois(ip)
                result = request.lookup_rdap(depth=1)
                cidr = result['asn_cidr']
                country = result['asn_country_code']
                self.set_cache(ip, cidr, country)
            except Exception:
                cidr = ''
                country = ''
        else:
            cidr = cached_info[0][0]
            country = cached_info[0][1]

        return {'cidr': cidr, 'country_code': country}

    def format_result(self, res):
        if res == 1:
            return '{{yes}}'
        elif res == 0:
            return '{{no}}'
        else:
            return '{{yes-no|}}'

    def main(self):
        out = '{| class="wikitable sortable"\n'
        out += '! آی‌پی !! بازه !! کد کشور !! بسته شد'

        iplist = self.get_ip_list()
        rowtemplate = '\n|-\n| %s || %s || %s || %s'

        for ipdata in iplist:
            ip = ipdata[0].decode('ASCII')
            pywikibot.output('Checking %s' % ip)
            ipinfo = self.get_ip_info(ip)
            if ipinfo['country_code'] == 'IR':
                """
                IPs from Iran are almost never proxies, skip the checks
                """
                pass
            else:
                target = pywikibot.User(self.site, ip)
                if target.isBlocked():
                    blcoked = 2
                else:
                    pywikibot.output('Blocking %s' % ip)
                    self.site.blockuser(target,
                                        '1 year',
                                        self.blocksummary,
                                        anononly=False,
                                        allowusertalk=True)
                    blocked = 1
                row = rowtemplate % (ip, ipinfo['cidr'],
                                     ipinfo['country_code'],
                                     self.format_result(blocked))
                out += row

        out += '\n|}'

        page = pywikibot.Page(self.site, self.target)
        page.text = out
        page.save(self.summary)

        page = pywikibot.Page(self.site, self.target + '/امضا')
        page.text = '~~~~~'
        page.save(self.summary)