Esempio n. 1
0
 def test_explicit(self):
     """should support explicit """
     result = uploader.explicit("cloudinary", type="twitter_name", eager=[TEST_TRANS_SCALE2_PNG], tags=[UNIQUE_TAG])
     params = dict(TEST_TRANS_SCALE2_PNG, type="twitter_name", version=result["version"])
     url = utils.cloudinary_url("cloudinary", **params)[0]
     actual = result["eager"][0]["url"]
     self.assertEqual(parse_url(actual).path, parse_url(url).path)
Esempio n. 2
0
 def test_explicit(self):
     """should support explicit """
     result = uploader.explicit("cloudinary", type="twitter_name",
                                eager=[dict(crop="scale", width="2.0")], tags=[UNIQUE_TAG])
     url = utils.cloudinary_url("cloudinary", type="twitter_name", crop="scale", width="2.0", format="png",
                                version=result["version"])[0]
     actual = result["eager"][0]["url"]
     self.assertEqual(parse_url(actual).path, parse_url(url).path)
Esempio n. 3
0
    def get_connection(self, url, proxies=None):
        """Returns a urllib3 connection for the given URL. This should not be
        called from user code, and is only exposed for use when subclassing the
        :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.

        :param url: The URL to connect to.
        :param proxies: (optional) A Requests-style dictionary of proxies used on this request.
        :rtype: urllib3.ConnectionPool
        """
        proxy = select_proxy(url, proxies)

        if proxy:
            proxy = prepend_scheme_if_needed(proxy, "http")
            proxy_url = parse_url(proxy)
            if not proxy_url.host:
                raise InvalidProxyURL(
                    "Please check proxy URL. It is malformed"
                    " and could be missing the host."
                )
            proxy_manager = self.proxy_manager_for(proxy)
            conn = proxy_manager.connection_from_url(url)
        else:
            # Only scheme should be lower case
            parsed = urlparse(url)
            url = parsed.geturl()
            conn = self.poolmanager.connection_from_url(url)

        return conn
Esempio n. 4
0
    def _parse_connection_properties(self, host, port, username, password, use_ssl):
        hosts_list = []

        if isinstance(host, str):
            # Force to a list, split on ',' if multiple
            host = host.split(',')

        for entity in host:
            # Loop over the hosts and parse connection properties
            host_properties = {}

            parsed_uri = parse_url(entity)
            host_properties['host'] = parsed_uri.host
            if parsed_uri.port is not None:
                host_properties['port'] = parsed_uri.port
            else:
                host_properties['port'] = port

            if parsed_uri.scheme == 'https' or use_ssl is True:
                host_properties['use_ssl'] = True

            if parsed_uri.auth is not None:
                host_properties['http_auth'] = parsed_uri.auth
            elif username is not None:
                if password is None or password == 'PROMPT':
                    password = getpass.getpass()
                host_properties['http_auth'] = (username, password)

            hosts_list.append(host_properties)
        return hosts_list
Esempio n. 5
0
def check_vul(url):
    """
    Test if a GET to a URL is successful
    :param url: The URL to test
    :return: A dict with the exploit type as the keys, and the HTTP status code as the value
    """
    if gl_args.mode == 'auto-scan' or gl_args.mode == 'file-scan':
        timeout = Timeout(connect=1.0, read=3.0)
        pool = PoolManager(timeout=timeout, retries=1, cert_reqs='CERT_NONE')
    else:
        timeout = Timeout(connect=3.0, read=6.0)
        pool = PoolManager(timeout=timeout, cert_reqs='CERT_NONE')

    url_check = parse_url(url)
    if '443' in str(url_check.port) and url_check.scheme != 'https':
        url = "https://"+str(url_check.host)+":"+str(url_check.port)

    print(GREEN + "\n ** Checking Host: %s **\n" % url)

    headers = {"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
               "Connection": "keep-alive",
               "User-Agent": user_agents[randint(0, len(user_agents) - 1)]}

    paths = {"jmx-console": "/jmx-console/HtmlAdaptor?action=inspectMBean&name=jboss.system:type=ServerInfo",
             "web-console" 	: "/web-console/ServerInfo.jsp",
             "JMXInvokerServlet": "/invoker/JMXInvokerServlet",
             "admin-console" : "/admin-console/"}

    for i in paths.keys():
        if gl_interrupted: break
        try:
            print(GREEN + " * Checking %s: \t" % i + ENDC),
            r = pool.request('HEAD', url +str(paths[i]), redirect=False, headers=headers)
            paths[i] = r.status

            # check if it's false positive
            if len(r.getheaders()) == 0:
                print(RED + "[ ERROR ]\n * The server %s is not an HTTP server.\n" % url + ENDC)
                paths = {"jmx-console": 505,
                         "web-console": 505,
                         "JMXInvokerServlet": 505,
                         "admin-console": 505}
                break


            if paths[i] in (301, 302, 303, 307, 308):
                url_redirect = r.get_redirect_location()
                print(GREEN + "[ REDIRECT ]\n * The server sent a redirect to: %s\n" % url_redirect)
            elif paths[i] == 200 or paths[i] == 500:
                if i == "admin-console":
                    print(RED + "[ EXPOSED ]" + ENDC)
                else:
                    print(RED + "[ VULNERABLE ]" + ENDC)
            else:
                print(GREEN + "[ OK ]")
        except:
            print(RED + "\n * An error occurred while connecting to the host %s\n" % url + ENDC)
            paths[i] = 505

    return paths
Esempio n. 6
0
def urlparsing(value):
    try:
        loc = parse_url(value)
    except LocationParseError as error:
        return None, None, None

    return is_secure(loc.scheme), loc.host, loc.port
def read_articles():
    '''
    read all articles as dataframe from mongodb collection 'articles'
        - INPUT: None
        - OUTPUT: df.   columns: title, url, uri, body_text, 
    '''
    my_mongo = MyMongo()

    t0 = time.time()
    cur_articles = my_mongo.get_article_body_text(testing=0)

    articles_cleaned = {}
    # print '%d unique articles ' % len(articles_cleaned)
    clean_articles(cur_articles, articles_cleaned)
    print '%d unique articles with body_text' % len(articles_cleaned)

    t1 = time.time()  # time it
    print "finished in  %4.4fmin for %s " % ((t1 - t0) / 60, 'read/clean articles')

    df = pd.DataFrame([{'url': k, 'body_text': v[1]}
                       for k, v in articles_cleaned.items()])

    article_dict,  article_dt = MyMongo().get_article_attri()
    #article_dict_all = dict(article_dict)
    df['title'] = df['url'].map(lambda x: article_dict.get(x, 'Unknown'))
    df['uri'] = df['url'].map(lambda x: parse_url(x).host)
    df['dt'] = df['url'].map(lambda x: article_dt.get(x, ''))

    my_mongo.close()
    return df
Esempio n. 8
0
    def test_parse_url(self):
        url_host_map = {
            'http://google.com/mail': Url('http', host='google.com', path='/mail'),
            'http://google.com/mail/': Url('http', host='google.com', path='/mail/'),
            'google.com/mail': Url(host='google.com', path='/mail'),
            'http://google.com/': Url('http', host='google.com', path='/'),
            'http://google.com': Url('http', host='google.com'),
            'http://google.com?foo': Url('http', host='google.com', path='', query='foo'),

            # Path/query/fragment
            '': Url(),
            '/': Url(path='/'),
            '?': Url(path='', query=''),
            '#': Url(path='', fragment=''),
            '#?/!google.com/?foo#bar': Url(path='', fragment='?/!google.com/?foo#bar'),
            '/foo': Url(path='/foo'),
            '/foo?bar=baz': Url(path='/foo', query='bar=baz'),
            '/foo?bar=baz#banana?apple/orange': Url(path='/foo', query='bar=baz', fragment='banana?apple/orange'),

            # Port
            'http://google.com/': Url('http', host='google.com', path='/'),
            'http://google.com:80/': Url('http', host='google.com', port=80, path='/'),
            'http://google.com:/': Url('http', host='google.com', path='/'),
            'http://google.com:80': Url('http', host='google.com', port=80),
            'http://google.com:': Url('http', host='google.com'),

            # Auth
            'http://*****:*****@localhost/': Url('http', auth='foo:bar', host='localhost', path='/'),
            'http://foo@localhost/': Url('http', auth='foo', host='localhost', path='/'),
            'http://*****:*****@baz@localhost/': Url('http', auth='foo:bar@baz', host='localhost', path='/'),
            'http://@': Url('http', host=None, auth='')
        }
        for url, expected_url in url_host_map.items():
            returned_url = parse_url(url)
            self.assertEqual(returned_url, expected_url)
Esempio n. 9
0
def uris(rabbit_config):
    amqp_uri = rabbit_config["AMQP_URI"]
    scheme, auth, host, port, path, _, _ = parse_url(amqp_uri)
    bad_port = Url(scheme, auth, host, port + 1, path).url
    bad_user = Url(scheme, "invalid:invalid", host, port, path).url
    bad_vhost = Url(scheme, auth, host, port, "/unknown").url
    return {"good": amqp_uri, "bad_port": bad_port, "bad_user": bad_user, "bad_vhost": bad_vhost}
Esempio n. 10
0
 def test_parse_url(self):
     url_host_map = {
         "http://google.com/mail": Url("http", host="google.com", path="/mail"),
         "http://google.com/mail/": Url("http", host="google.com", path="/mail/"),
         "google.com/mail": Url(host="google.com", path="/mail"),
         "http://google.com/": Url("http", host="google.com", path="/"),
         "http://google.com": Url("http", host="google.com"),
         "http://google.com?foo": Url("http", host="google.com", path="", query="foo"),
         # Path/query/fragment
         "": Url(),
         "/": Url(path="/"),
         "?": Url(path="", query=""),
         "#": Url(path="", fragment=""),
         "#?/!google.com/?foo#bar": Url(path="", fragment="?/!google.com/?foo#bar"),
         "/foo": Url(path="/foo"),
         "/foo?bar=baz": Url(path="/foo", query="bar=baz"),
         "/foo?bar=baz#banana?apple/orange": Url(path="/foo", query="bar=baz", fragment="banana?apple/orange"),
         # Port
         "http://google.com/": Url("http", host="google.com", path="/"),
         "http://google.com:80/": Url("http", host="google.com", port=80, path="/"),
         "http://google.com:/": Url("http", host="google.com", path="/"),
         "http://google.com:80": Url("http", host="google.com", port=80),
         "http://google.com:": Url("http", host="google.com"),
         # Auth
         "http://*****:*****@localhost/": Url("http", auth="foo:bar", host="localhost", path="/"),
         "http://foo@localhost/": Url("http", auth="foo", host="localhost", path="/"),
         "http://*****:*****@baz@localhost/": Url("http", auth="foo:bar@baz", host="localhost", path="/"),
     }
     for url, expected_url in url_host_map.items():
         returned_url = parse_url(url)
         self.assertEquals(returned_url, expected_url)
Esempio n. 11
0
    def prepare_url(self, url, params):
        """Prepares the given HTTP URL."""
        #: Accept objects that have string representations.
        try:
            url = unicode(url)
        except NameError:
            # We're on Python 3.
            url = str(url)
        except UnicodeDecodeError:
            pass

        # Support for unicode domain names and paths.
        scheme, auth, host, port, path, query, fragment = parse_url(url)

        if not scheme:
            raise MissingSchema("Invalid URL %r: No schema supplied" % url)

        if not host:
            raise InvalidURL("Invalid URL %r: No host supplied" % url)

        # Only want to apply IDNA to the hostname
        try:
            host = host.encode('idna').decode('utf-8')
        except UnicodeError:
            raise InvalidURL('URL has an invalid label.')

        # Carefully reconstruct the network location
        netloc = auth or ''
        if netloc:
            netloc += '@'
        netloc += host
        if port:
            netloc += ':' + str(port)

        # Bare domains aren't valid URLs.
        if not path:
            path = '/'

        if is_py2:
            if isinstance(scheme, str):
                scheme = scheme.encode('utf-8')
            if isinstance(netloc, str):
                netloc = netloc.encode('utf-8')
            if isinstance(path, str):
                path = path.encode('utf-8')
            if isinstance(query, str):
                query = query.encode('utf-8')
            if isinstance(fragment, str):
                fragment = fragment.encode('utf-8')

        enc_params = self._encode_params(params)
        if enc_params:
            if query:
                query = '%s&%s' % (query, enc_params)
            else:
                query = enc_params

        url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment]))
        self.url = url
Esempio n. 12
0
    def getPPDURL(self, source_url):
        """
        Downloads the source_url, stores it locally and returns the local URL

        :param source_url: remote PPD URL
        :return: local URL to the cached PPD
        """
        source = parse_url(source_url)
        host = source.host
        if host is None or host == "localhost":
            # no host: we assume that the PPD can be found on the current active master backend
            with make_session() as session:
                # get any other registered backend
                master_backend = session.query(RegisteredBackend) \
                    .filter(RegisteredBackend.uuid != self.env.core_uuid,
                            RegisteredBackend.type == BackendTypes.active_master).first()
                if master_backend is None:
                    self.log.error(C.make_error("NO_MASTER_BACKEND_FOUND"))
                    return source_url

                # Try to log in with provided credentials
                url = parse_url(master_backend.url)
                host = url.host

        # check if file exists locally
        rel_path = source.path[1:] if source.path.startswith("/") else source.path
        local_path = path.join(self.ppd_dir, host, rel_path)
        if not path.exists(local_path):
            # cache locally
            try:
                r = requests.get(source_url)
                if r.ok:
                    local_dir = path.dirname(local_path)
                    if not path.exists(local_dir):
                        makedirs(local_dir)
                    with open(local_path, "w") as f:
                        f.write(r.text)
                else:
                    self.log.error("requesting PPD from %s failed with status code: %s" % (source_url, r.status_code))
                    return source_url
            except requests.exceptions.ConnectionError as e:
                self.log.error("requesting PPD from %s failed with error: %s" % (source_url, str(e)))
                return source_url

        return "%s%s/%s" % (self.base_url, host, rel_path)
 def test_image_field(self):
     field = Poll.objects.get(question="with image")
     self.assertIsNotNone(field)
     self.assertEqual(field.image.public_id, API_TEST_ID)
     self.assertEqual(
         parse_url(field.image.url).path,
         "/{cloud}/image/upload/v1234/{name}.jpg".format(cloud=cloudinary.config().cloud_name, name=API_TEST_ID)
     )
     self.assertTrue(False or field.image)
Esempio n. 14
0
def test_bad_user(rabbit_config):
    scheme, auth, host, port, path, _, _ = parse_url(rabbit_config['AMQP_URI'])
    amqp_uri = Url(scheme, 'invalid:invalid', host, port, path).url

    with pytest.raises(IOError) as exc_info:
        verify_amqp_uri(amqp_uri)
    message = str(exc_info.value)
    assert 'Error connecting to broker' in message
    assert 'invalid credentials' in message
Esempio n. 15
0
def test_bad_vhost(rabbit_config):
    scheme, auth, host, port, path, _, _ = parse_url(rabbit_config['AMQP_URI'])
    amqp_uri = Url(scheme, auth, host, port, '/unknown').url

    with pytest.raises(IOError) as exc_info:
        verify_amqp_uri(amqp_uri)
    message = str(exc_info.value)
    assert 'Error connecting to broker' in message
    assert 'invalid or unauthorized vhost' in message
Esempio n. 16
0
 def test_explicit(self):
     """should support explicit """
     result = uploader.explicit("cloudinary", type="twitter_name", eager=[dict(crop="scale", width="2.0")])
     url = utils.cloudinary_url("cloudinary", type="twitter_name", crop="scale", width="2.0", format="png",
                                version=result["version"])[0]
     if result["eager"][0]["url"].startswith("/res/"):
         actual = result["eager"][0]["url"][4:]
     else:
         actual = result["eager"][0]["url"]
     self.assertEqual(actual, parse_url(url).path)
Esempio n. 17
0
    def test_netloc(self):
        url_netloc_map = {
            'http://google.com/mail': 'google.com',
            'http://google.com:80/mail': 'google.com:80',
            'google.com/foobar': 'google.com',
            'google.com:12345': 'google.com:12345',
        }

        for url, expected_netloc in url_netloc_map.items():
            self.assertEqual(parse_url(url).netloc, expected_netloc)
Esempio n. 18
0
    def test_netloc(self):
        url_netloc_map = {
            "http://google.com/mail": "google.com",
            "http://google.com:80/mail": "google.com:80",
            "google.com/foobar": "google.com",
            "google.com:12345": "google.com:12345",
        }

        for url, expected_netloc in url_netloc_map.items():
            self.assertEquals(parse_url(url).netloc, expected_netloc)
Esempio n. 19
0
def pixel(path_domain=""):
    seen = {}
    sites = request.cookies.site
    sites = sites.replace('"', '')
    for c in sites.split(' '):
        if '.' in c:
            seen[c] = True

    ref_domain = parse_url(request.get_header('Referer')).host
    req_domain = parse_url(request.url).host

    if ref_domain and ref_domain != req_domain:
        seen[ref_domain] = True

    try:
        del(seen['ad.aloodo.com'])
    except KeyError:
        pass

    cdata = ' '.join(seen.keys())
    if cdata:
        response.set_header('Set-Cookie',
            'site="%s"; Max-Age=31536000; Path=/' % cdata)

    response.status=200
    response.set_header('Tk', 'D')
   
    accept = request.get_header('Accept')
    if not "image" in accept and "text/html" in accept:
        response.set_header('Content-Type', 'text/html')
        return template('info',
            req_headers=format_headers(request.headers),
            res_headers=format_headers(response.headers),
            req_url=request.url)
    else:
        response.set_header('Content-Type', 'image/png')
        if len(seen) >= 3 or path_domain == ref_domain:
            expdt = datetime.now() + timedelta(days=7)
            exp = mktime(expdt.timetuple())
            response.set_header('Expires', formatdate(
                timeval=exp, localtime=False, usegmt=True))
        return buf
Esempio n. 20
0
def get_url_data(serviceurl, params=None):
    """

    :param serviceurl: url to retrieve data
    :param params: http://docs.python-requests.org/en/master/user/quickstart/#passing-parameters-in-urls
    :return: json url_data
    """

    # Get data from the url
    # Support https without verification of certificate
    # req = requests.get(serviceurl, verify=False, params=params)

    cnt = 0
    max_retry = 3
    purl = parse_url(serviceurl)
    if purl.auth:
        username = purl.auth.split(':')[0]
        password = purl.auth.split(':')[1]
    else:
        username = None
        password = None
    # Add url like http://host
    burl = '{}://{}'.format(purl.scheme, purl.host)
    if purl.port:
        # Add port like: http://host:8080
        burl += ':{}'.format(purl.port)
    if purl.request_uri:
        # Add path and query like: http://host:8080/path/uri?query
        burl += '{}'.format(purl.request_uri)

    while cnt < max_retry:
        try:
            req = requests.get(burl, verify=False, params=params, timeout=timeout, auth=(username, password))
            if req.json():
                return req.json()
            elif req.from_cache:
                # Clear cache to retry again
                requests_cache.clear()
                req = requests.get(burl, verify=False, params=params, timeout=timeout, auth=(username, password))
                if req.json():
                    return req.json()
            else:
                # Raise a custom exception
                raise ValueError('No data from response')

        except requests.exceptions.RequestException as e:
            time.sleep(2 ** cnt)
            cnt += 1
            if cnt >= max_retry:
                raise e

    data = req.json()

    return data
Esempio n. 21
0
 def test_request_uri(self):
     url_host_map = {
         'http://google.com/mail': '/mail',
         'http://google.com/mail/': '/mail/',
         'http://google.com/': '/',
         'http://google.com': '/',
         '': '/',
         '/': '/',
         '?': '/?',
         '#': '/',
         '/foo?bar=baz': '/foo?bar=baz',
     }
     for url, expected_request_uri in url_host_map.items():
         returned_url = parse_url(url)
         self.assertEqual(returned_url.request_uri, expected_request_uri)
Esempio n. 22
0
 def test_request_uri(self):
     url_host_map = {
         "http://google.com/mail": "/mail",
         "http://google.com/mail/": "/mail/",
         "http://google.com/": "/",
         "http://google.com": "/",
         "": "/",
         "/": "/",
         "?": "/?",
         "#": "/",
         "/foo?bar=baz": "/foo?bar=baz",
     }
     for url, expected_request_uri in url_host_map.items():
         returned_url = parse_url(url)
         self.assertEquals(returned_url.request_uri, expected_request_uri)
Esempio n. 23
0
    def get_services_and_ips(services, compute_nodes):
        service_ips = {}

        for name, data in services.items():
            url = data['endpoints']['admin']['url'] if 'admin' in data['endpoints'] \
                else data['endpoints']['public']['url'] if 'public' in data['endpoints'] \
                else data['endpoints']['internal']['url']

            service_ips[name] = socket.gethostbyname(parse_url(url).host) if url is not None else '0.0.0.0'

        for name, data in compute_nodes.items():
            service_ips[name] = data['ip'] if data['ip'] is not None else socket.gethostbyname(data['name'])

        logging.info('Service IPs:\n{}'.format(pprint.PrettyPrinter(2).pformat(service_ips)))

        return service_ips
Esempio n. 24
0
 def test_parse_url(self):
     url_host_map = {
         "http://google.com/mail": Url("http", host="google.com", path="/mail"),
         "http://google.com/mail/": Url("http", host="google.com", path="/mail/"),
         "google.com/mail": Url(host="google.com", path="/mail"),
         "http://google.com/": Url("http", host="google.com", path="/"),
         "http://google.com": Url("http", host="google.com"),
         "http://google.com?foo": Url("http", host="google.com", path="", query="foo"),
         "": Url(),
         "/": Url(path="/"),
         "?": Url(path="", query=""),
         "#": Url(path="", fragment=""),
         "#?/!google.com/?foo#bar": Url(path="", fragment="?/!google.com/?foo#bar"),
         "/foo": Url(path="/foo"),
         "/foo?bar=baz": Url(path="/foo", query="bar=baz"),
         "/foo?bar=baz#banana?apple/orange": Url(path="/foo", query="bar=baz", fragment="banana?apple/orange"),
     }
     for url, expected_url in url_host_map.items():
         returned_url = parse_url(url)
         self.assertEquals(returned_url, expected_url)
Esempio n. 25
0
def get_web_title(hyperlink):
    """Get title of given hyperlink.

    :param hyperlink: target url
    :type hyperlink: str
    :return: title of the website
    :rtype: str
    """
    web_schemes = ["http", "https"]
    given_scheme = parse_url(hyperlink).scheme.lower()
    if given_scheme in web_schemes:
        resp = requests.get(hyperlink)
        if resp.status_code == requests.codes.ok:
            soup = BeautifulSoup(resp.text, 'lxml')
            title = soup.title.string
            return title
        else:
            return "<Unnamed>"
    else:
        return "<Not allowed>"
Esempio n. 26
0
 def test_parse_url(self):
     url_host_map = {
         'http://google.com/mail': Url('http', host='google.com', path='/mail'),
         'http://google.com/mail/': Url('http', host='google.com', path='/mail/'),
         'google.com/mail': Url(host='google.com', path='/mail'),
         'http://google.com/': Url('http', host='google.com', path='/'),
         'http://google.com': Url('http', host='google.com'),
         'http://google.com?foo': Url('http', host='google.com', path='', query='foo'),
         '': Url(),
         '/': Url(path='/'),
         '?': Url(path='', query=''),
         '#': Url(path='', fragment=''),
         '#?/!google.com/?foo#bar': Url(path='', fragment='?/!google.com/?foo#bar'),
         '/foo': Url(path='/foo'),
         '/foo?bar=baz': Url(path='/foo', query='bar=baz'),
         '/foo?bar=baz#banana?apple/orange': Url(path='/foo', query='bar=baz', fragment='banana?apple/orange'),
     }
     for url, expected_url in url_host_map.items():
         returned_url = parse_url(url)
         self.assertEquals(returned_url, expected_url)
Esempio n. 27
0
    def run_spider(self, target_ip, target_web, client):
        # Execute crawling using Scrapy.
        all_targets_log = []
        for target_info in target_web:
            target_url = target_info[1] + target_ip + ':' + target_info[0] + '/'
            target_log = [target_url]
            response_log = target_ip + '_' + target_info[0] + '.log'
            now_time = self.get_current_date('%Y%m%d%H%M%S')
            result_file = os.path.join(self.output_base_path, now_time + self.output_filename)
            option = ' -a target_url=' + target_url + ' -a allow_domain=' + target_ip + \
                     ' -a delay=' + self.spider_delay_time + ' -a store_path=' + self.store_path + \
                     ' -a response_log=' + response_log + ' -a msgrpc_host=' + client.host + \
                     ' -a msgrpc_port=' + str(client.port) + ' -a msgrpc_token=' + client.token.decode('utf-8') + \
                     ' -a msgrpc_console_id=' + client.console_id.decode('utf-8') + ' -o ' + result_file
            command = 'scrapy runspider Spider.py' + option
            proc = Popen(command, shell=True)
            proc.wait()

            # Get crawling result.
            dict_json = {}
            if os.path.exists(result_file):
                with codecs.open(result_file, 'r', encoding='utf-8') as fin:
                    target_text = self.delete_ctrl_char(fin.read())
                    if target_text != '':
                        dict_json = json.loads(target_text)
                    else:
                        self.print_message(WARNING, '[{}] is empty.'.format(result_file))

            # Exclude except allowed domains.
            for idx in range(len(dict_json)):
                items = dict_json[idx]['urls']
                for item in items:
                    try:
                        if target_ip == util.parse_url(item).host:
                            target_log.append(item)
                    except Exception as err:
                        self.print_exception(err, 'Parsed error: {}'.format(item))
            all_targets_log.append([target_url, os.path.join(self.store_path, response_log), list(set(target_log))])
        return all_targets_log
Esempio n. 28
0
 def preprocess_body(self, body: EntityDefinition) -> EntityDefinition:
     body = super().preprocess_body(body)
     if not self.pulp_ctx.has_plugin("core", min_version="3.11.dev"):
         # proxy_username and proxy_password are separate fields starting with 3.11
         # https://pulp.plan.io/issues/8167
         proxy_username = body.pop("proxy_username", None)
         proxy_password = body.pop("proxy_password", None)
         if proxy_username or proxy_password:
             if "proxy_url" in body:
                 if proxy_username and proxy_password:
                     parsed_url = parse_url(body["proxy_url"])
                     body["proxy_url"] = parsed_url._replace(auth=":".join(
                         [proxy_username, proxy_password])).url
                 else:
                     raise click.ClickException(
                         _("Proxy username and password can only be provided in conjunction."
                           ))
             else:
                 raise click.ClickException(
                     _("Proxy credentials can only be provided with a proxy url."
                       ))
     return body
Esempio n. 29
0
def get_host_and_port(url):
    """Get the host, or the host:port pair if port is explicitly included, for the given URL.

    Examples:
    >>> get_host_and_port('example.com')
    'example.com'
    >>> get_host_and_port('example.com:443')
    'example.com:443'
    >>> get_host_and_port('http://example.com')
    'example.com'
    >>> get_host_and_port('https://example.com/')
    'example.com'
    >>> get_host_and_port('https://example.com:8081')
    'example.com:8081'
    >>> get_host_and_port('ssh://example.com')
    'example.com'

    :param url: the URL string to parse
    :return: a string with the host:port pair if the URL includes port number explicitly; otherwise, returns host only
    """
    url = urllib3_util.parse_url(url)
    return "{}:{}".format(url.host, url.port) if url.port else url.host
Esempio n. 30
0
    def __init__(self,
                 base_url,
                 token=None,
                 identity=None,
                 default_429_wait_ms=5000,
                 use_authorization_header=True):
        try:
            scheme, auth, host, port, path, query, fragment = parse_url(
                base_url)
        except LocationParseError:
            raise MatrixError("Invalid homeserver url %s" % base_url)
        if not scheme:
            raise MatrixError("No scheme in homeserver url %s" % base_url)
        self._base_url = base_url

        self.token = token
        self.identity = identity
        self.txn_id = 0
        self.validate_cert = True
        self.session = Session()
        self.default_429_wait_ms = default_429_wait_ms
        self.use_authorization_header = use_authorization_header
Esempio n. 31
0
def _http_request(method: str, endpoint: str, **kwargs) -> Optional[JT]:
    url = build_url(endpoint)
    parsed_url = parse_url(url)
    pm_args = {
        "num_pools":
        constants.HTTP_POOL_MANAGER_COUNT,
        "host":
        parsed_url.host,
        "port":
        parsed_url.port,
        "retries":
        Retry(connect=constants.HTTP_REQUEST_RETRIES_COUNT,
              read=constants.HTTP_REQUEST_RETRIES_COUNT,
              redirect=constants.HTTP_REQUEST_RETRIES_COUNT,
              backoff_factor=constants.HTTP_REQUEST_BACKOFF_FACTOR,
              method_whitelist=METHODS_WHITELIST),
        "ssl_context":
        _ssl_context,
    }
    if _ssl_context is not None and url.startswith("https"):
        pm_args["assert_hostname"] = False
    http_pool_manager: PoolManager = PoolManager(**pm_args)
    try:
        logger.trace("HTTP {0} to {1}", method, url)
        response = http_pool_manager.request(
            method=method,
            url=parsed_url.url,
            timeout=constants.HTTP_REQUEST_TIMEOUT,
            **kwargs)
        raise_for_status(response)
    except MaxRetryError as e:
        logger.info("{} to {} failed due to: {}.", method, url, e)
        return None
    except Exception as e:  # pylint: disable=broad-except
        logger.error(log_messages.HTTP_REQUEST_RETURNED_ERROR, method, url, e)
        return None

    return json.loads(response.data)
Esempio n. 32
0
 def _stage_files_to_be_validated(self):
     upload_area_id = None
     file_names = []
     for s3_file_url in self.s3_file_urls:
         url_bits = parse_url(s3_file_url)
         s3_bucket_name = url_bits.netloc
         s3_object_key = urllib.parse.unquote(url_bits.path.lstrip('/'))
         key_parts = s3_object_key.split('/')
         upload_area_id = key_parts.pop(0)
         file_name = "/".join(key_parts)
         file_names.append(file_name)
         staged_file_path = pathlib.Path(self.staging_folder, s3_object_key)
         self._log("Staging s3://{bucket}/{key} at {file_path}".format(bucket=s3_bucket_name,
                                                                       key=s3_object_key,
                                                                       file_path=staged_file_path))
         staged_file_path.parent.mkdir(parents=True, exist_ok=True)
         self._download_file_from_bucket_to_filesystem(s3_bucket_name, s3_object_key, staged_file_path)
         if not staged_file_path.is_file():
             raise UploadException(status=500, title="Staged file path is not a file",
                                   detail=f"Attempting to stage file path {staged_file_path} failed because it is "
                                   f"not a file.")
         self.staged_file_paths.append(staged_file_path)
     return upload_area_id, file_names
Esempio n. 33
0
def join_proxy_url(address: str, username: Optional[str],
                   password: Optional[str]) -> str:
    """
    Gets a splitted address, username, password
    returns: Joined URL forming proxy_url
    """
    # http://username:password@address

    if not address.startswith(("http://", "https://")):
        address = "http://" + address

    parsed = parse_url(address)
    auth_items = []

    if username:
        auth_items.append(str(username))

        # password is set only if there is a username
        # to avoid it being set as e.g: http://:[email protected]
        if password:
            auth_items.append(str(password))

    return get_parsed_url(parsed, auth_items)
    def classifier_signature(self, target_info):
        product_list = []
        for target in target_info:
            for target_url in target[2]:
                # Get HTTP response (header + body).
                response = ''
                http = urllib3.PoolManager(timeout=self.util.http_timeout)
                try:
                    self.util.print_message(OK, 'Accessing: {}'.format(target_url))
                    res = http.request('GET', target_url)
                    for key in res.headers._container.keys():
                        response += key + ': ' + res.headers[key] + '\r\n'
                    response += '\r\n\r\n' + res.data.decode('utf-8')
                except Exception as err:
                    self.util.print_message(WARNING, '{}'.format(err))

                for category in ['os', 'web', 'framework', 'cms']:
                    prod_info = self.identify_product(category, response)
                    for product in prod_info:
                        parsed = util.parse_url(target_url)
                        product_list.append([product, parsed.scheme, parsed.port, parsed.path])

        return product_list
Esempio n. 35
0
def prepend_scheme_if_needed(url, new_scheme):
    """Given a URL that may or may not have a scheme, prepend the given scheme.
    Does not replace a present scheme with the one provided as an argument.

    :rtype: str
    """
    parsed = parse_url(url)
    scheme, auth, host, port, path, query, fragment = parsed

    # A defect in urlparse determines that there isn't a netloc present in some
    # urls. We previously assumed parsing was overly cautious, and swapped the
    # netloc and path. Due to a lack of tests on the original defect, this is
    # maintained with parse_url for backwards compatibility.
    netloc = parsed.netloc
    if not netloc:
        netloc, path = path, netloc

    if scheme is None:
        scheme = new_scheme
    if path is None:
        path = ''

    return urlunparse((scheme, netloc, path, '', query, fragment))
Esempio n. 36
0
    def is_url(self, url):
        #: Accept objects that have string representations.
        try:
            url = str(url)
        except NameError:
            # We're on Python 3.
            url = str(url)
        except UnicodeDecodeError:
            pass

        # Support for unicode domain names and paths.
        scheme, auth, host, port, path, query, fragment = parse_url(url)

        if not scheme or not host:
            return False

        # Only want to apply IDNA to the hostname
        try:
            host = host.encode('idna').decode('utf-8')
        except UnicodeError:
            return False

        return True
Esempio n. 37
0
    def get_github_url(api_url: str, repository_name: str,
                       repository_branch: str) -> str:
        """
        Generates the URL to the location of the pushed DAG
        :param api_url: url of the GitHub API
        :param repository_name: name of the GitHub repository in the form [name or org]/[repository name]
        :param repository_branch: name of the GitHub branch
        :return: a URL in string format
        """

        parsed_url = parse_url(api_url)
        scheme = parsed_url.scheme + ":/"
        host = parsed_url.host
        port = ''

        if parsed_url.host.split('.')[0] == 'api':
            host = ".".join(parsed_url.host.split('.')[1:])

        if parsed_url.port:
            port = ':' + parsed_url.port

        return "/".join(
            [scheme, host, port, repository_name, 'tree', repository_branch])
Esempio n. 38
0
    def is_url(url):
        #: Accept objects that have string representations.
        try:
            url = unicode(url)
        except NameError:
            # We're on Python 3.
            url = str(url)
        except UnicodeDecodeError:
            pass

        # Support for unicode domain names and paths.
        scheme, auth, host, port, path, query, fragment = parse_url(url)

        if not scheme or not host:
            return False

        # Only want to apply IDNA to the hostname
        try:
            host = host.encode('idna').decode('utf-8')
        except UnicodeError:
            return False

        return True
Esempio n. 39
0
    def check_url_is_allowed(self, value: str) -> str:
        """Check if a given URL is allowed or not and return the cleaned URL,
        e.g. fixed a mal-encoded URL.

        By default, only the protocol ``javascript`` is denied.
        Other protocols are allowed (HTTP(S), FTP, inline data (images, files, ...), ..)

        Fragments are allowed as well (``#my-content``).

        Query-strings and relative/absolute paths are allowed.

        :returns: The cleaned URL.
        :raises ValueError: If the URL is invalid or blacklisted.
        """

        url = parse_url(value.strip())

        if url.scheme in self.blacklisted_url_schemes:
            raise ValueError(
                "Scheme: {scheme} is blacklisted".format(scheme=url.scheme)
            )

        return url.url
Esempio n. 40
0
 def dnevnik_authorization(self):
     """ Функция для проведения авторизации, возвращает ответ сервера с токеном и профайлами """
     ss = requests.Session()
     # ss.proxies = {
     #     'http': '85.26.146.169:80',
     #     'https': '85.26.146.169:80'
     # }
     login_form_request = ss.get(self.OAUTH_URL, timeout=5)
     sleep(randint(10, 30) / 10)
     ss.get("https://stats.mos.ru/handler/handler.js?time={time}".format(
         time=datetime.today().timestamp()),
            timeout=5)
     sleep(randint(10, 30) / 10)
     login_request = ss.post(
         "https://login.mos.ru/sps/login/methods/password",
         data={
             "isDelayed": False,
             "login": self._login,
             "password": self._password,
         },
         allow_redirects=False,
         timeout=5)
     sleep(randint(10, 30) / 10)
     if login_request.status_code in range(300, 400):
         redirect_uri = login_request.headers["Location"]
         code = parse_url(redirect_uri).query.split("=")[1]
         req = ss.get(
             "https://dnevnik.mos.ru/lms/api/sudir/oauth/te?code={}".format(
                 code),
             headers={"Accept": "application/vnd.api.v3+json"},
             timeout=5)
         return json.loads(req.content.decode("utf-8"))
     else:
         if login_request.status_code == 200:
             raise Exception(
                 f"Something went wrong! Status code ({login_request.status_code}) is incorrect."
                 f" Maybe you entered incorrect login/password?")
Esempio n. 41
0
def all_of(resource: str, session: requests.Session) -> List[Dict[str, Any]]:
    assert resource in (
        "consumers",
        "services",
        "routes",
        "plugins",
        "acls",
        "key-auths",
        "basic-auths",
    )
    logger.debug(f"Collecting all entries from `{resource}` ...")
    data: List[Dict[str, Any]] = []
    next_ = f"/{resource}"
    while next_:
        resp = session.get(f"{next_}")
        _check_resp(resp)
        jresp = resp.json()
        data += jresp["data"]
        next_ = jresp.get("next")
        if next_:
            u = parse_url(next_)
            next_ = u.request_uri
            logger.debug(f"... next page `{next_}`")
    return data
Esempio n. 42
0
    def configure(self, *args, **kwargs):
        vars = filter(lambda x: x[0].startswith('OS_'), os.environ.iteritems())
        conf_keys = self.conf.keys()
        for k, v in vars:
            # Try the full var first
            n = k.lower()
            cands = (n, n[3:])
            for var in cands:
                if var in conf_keys:
                    self.conf.set_default(name=var, default=v)
                    break

        self.conf(args[0])

        # bail using keystoneauth1 if not available.
        # FIXME: this is hacky...
        if self.conf.use_keystoneauth1 and not HAS_KEYSTONEAUTH1:
            raise Exception('Requested module keystoneauth1 is not available.')
        # adjust the logging
        if self.conf.debug:
            ch = logging.StreamHandler(stream=sys.stderr)
            ch.setLevel(logging.DEBUG)
            self.logger.addHandler(ch)
            # This is questionable...
            self._logging_handlers['debug'] = ch
            self.logger.removeHandler(self._logging_handlers['info'])
            self.logger.setLevel(logging.DEBUG)

        self.os_service_endpoint = self.conf.os_service_endpoint
        if self.os_service_endpoint is None:
            base = {'path': None}
            url = parse_url(self.conf.auth_url)
            l = list(url)[:4] + [None] * (len(url._fields) - 4)
            self.os_service_endpoint = Url(*l).url
            self.conf.set_default('os_service_endpoint',
                                  default=self.os_service_endpoint)
Esempio n. 43
0
    def custom_search(self, query, max_page_count=1, target_fqdn=''):
        # Google Custom Search API.
        self.utility.write_log(20, '[In] Execute Google custom search [{}].'.format(self.file_name))

        # Setting of Google Custom Search.
        service = None
        if self.utility.proxy != '':
            # Set proxy.
            self.utility.print_message(WARNING, 'Set proxy server: {}'.format(self.utility.proxy))
            parsed = util.parse_url(self.utility.proxy)
            proxy = None
            if self.utility.proxy_pass != '':
                proxy = httplib2.ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP,
                                           proxy_host=parsed.host,
                                           proxy_port=parsed.port,
                                           proxy_user=self.utility.proxy_user,
                                           proxy_pass=self.utility.proxy_pass)
            else:
                proxy = httplib2.ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP,
                                           proxy_host=parsed.host,
                                           proxy_port=parsed.port)
            my_http = httplib2.Http(proxy_info=proxy, disable_ssl_certificate_validation=True)
            service = build("customsearch", "v1", developerKey=self.api_key, http=my_http)
        else:
            # None proxy.
            service = build("customsearch", "v1", developerKey=self.api_key)

        # Execute search.
        urls = []
        fqdn_list = []
        result_count = 0
        start_index = self.start_index
        try:
            search_count = 0
            while search_count < max_page_count:
                self.utility.print_message(OK, 'Using query : {}'.format(query))
                response = service.cse().list(
                    q=query,
                    cx=self.search_engine_id,
                    num=10,
                    start=start_index,
                    filter='0',
                    safe='off',
                ).execute()

                # Get finding counts.
                result_count = int(response.get('searchInformation').get('totalResults'))
                is_new_query = False

                # Get extracted link (url).
                search_urls = []
                if result_count != 0:
                    items = response['items']
                    for item in items:
                        urls.append(item['link'])
                        search_urls.append(item['link'])

                # Set new query.
                if result_count <= 10 or max_page_count == 1:
                    fqdn_list.extend(self.utility.transform_url_hostname_list(search_urls))
                    break
                else:
                    # Refine search range using "-inurl" option.
                    tmp_list = self.utility.transform_url_hostname_list(search_urls)
                    for fqdn in tmp_list:
                        if fqdn not in fqdn_list:
                            subdomain = self.utility.extract_subdomain(fqdn, target_fqdn)
                            if target_fqdn != '' and subdomain == target_fqdn:
                                query += ' -inurl:http://' + subdomain + ' -inurl:https://' + subdomain
                                is_new_query = True
                                search_count = -1
                            elif subdomain != '':
                                query += ' -inurl:' + subdomain
                                is_new_query = True
                                search_count = -1
                            fqdn_list.append(fqdn)
                    if is_new_query is False:
                        if 'nextPage' in response.get('queries').keys():
                            start_index = response.get('queries').get('nextPage')[0].get('startIndex')
                        else:
                            self.utility.print_message(WARNING, 'There is not next page.')
                            break

                search_count += 1
        except Exception as e:
            msg = 'Google custom search is failure : {}'.format(e)
            self.utility.print_exception(e, msg)
            self.utility.write_log(30, msg)
            self.utility.write_log(20, '[Out] Execute Google custom search [{}].'.format(self.file_name))
            return urls, result_count, fqdn_list

        self.utility.write_log(20, '[Out] Execute Google custom search [{}].'.format(self.file_name))
        return urls, result_count, list(set(fqdn_list))
Esempio n. 44
0
def main():
    """
    Run interactively. Call when the module is run by itself.
    :return: Exit code
    """
    # check for Updates
    updates = check_updates()
    if updates:
        print(BLUE + BOLD + "\n\n * An update is available and is recommended update before continuing.\n" +
              "   Do you want to update now?")
        pick = input("   YES/no ? ").lower() if version_info[0] >= 3 else raw_input("   YES/no ? ").lower()
        print (ENDC)
        if pick != "no":
            updated = auto_update()
            if updated:
                print(GREEN + BOLD + "\n * The JexBoss has been successfully updated. Please run again to enjoy the updates.\n" +ENDC)
                exit(0)
            else:
                print(RED + BOLD + "\n\n * An error occurred while updating the JexBoss. Please try again..\n" +ENDC)
                exit(1)

    vulnerables = False
    # check vulnerabilities for standalone mode
    if gl_args.mode == 'standalone':
        url = gl_args.host
        scan_results = check_vul(url)
        # performs exploitation
        for i in ["jmx-console", "web-console", "JMXInvokerServlet", "admin-console"]:
            if scan_results[i] == 200 or scan_results[i] == 500:
                vulnerables = True
                if gl_args.auto_exploit:
                    auto_exploit(url, i)
                else:
                    print(BLUE + "\n\n * Do you want to try to run an automated exploitation via \"" +
                          BOLD + i + NORMAL + "\" ?\n" +
                          "   This operation will provide a simple command shell to execute commands on the server..\n" +
                          RED + "   Continue only if you have permission!" + ENDC)
                    pick = input("   yes/NO ? ").lower() if version_info[0] >= 3 else raw_input("   yes/NO ? ").lower()
                    if pick == "yes":
                        auto_exploit(url, i)
    # check vulnerabilities for auto scan mode
    elif gl_args.mode == 'auto-scan':
        file_results = open(gl_args.results, 'w')
        file_results.write("JexBoss Scan Mode Report\n\n")
        for ip in gl_args.network.hosts():
            if gl_interrupted: break
            for port in gl_args.ports.split(","):
                if check_connectivity(ip, port):
                    url = "{0}:{1}".format(ip,port)
                    ip_results = check_vul(url)
                    for key in ip_results.keys():
                        if ip_results[key] == 200 or ip_results[key] == 500:
                            vulnerables = True
                            if gl_args.auto_exploit:
                                result_exploit = auto_exploit(url, key)
                                if result_exploit:
                                    file_results.write("{0}:\t[EXPLOITED VIA {1}]\n".format(url, key))
                                else:
                                    file_results.write("{0}:\t[FAILED TO EXPLOITED VIA {1}]\n".format(url, key))
                            else:
                                file_results.write("{0}:\t[POSSIBLY VULNERABLE TO {1}]\n".format(url, key))

                            file_results.flush()
                else:
                    print (RED+"\n * Host %s:%s does not respond."% (ip,port)+ENDC)
        file_results.close()

    elif gl_args.mode == 'file-scan':
        file_results = open(gl_args.out, 'w')
        file_results.write("JexBoss Scan Mode Report\n\n")
        file_input = open(gl_args.file, 'r')
        for url in file_input.readlines():
            if gl_interrupted: break
            url = url.strip()
            ip = str(parse_url(url)[2])
            port = parse_url(url)[3] if parse_url(url)[3] != None else 80
            if check_connectivity(ip, port):
                url_results = check_vul(url)
                for key in url_results.keys():
                    if url_results[key] == 200 or url_results[key] == 500:
                        vulnerables = True
                        if gl_args.auto_exploit:
                            result_exploit = auto_exploit(url, key)
                            if result_exploit:
                                file_results.write("{0}:\t[EXPLOITED VIA {1}]\n".format(url, key))
                            else:
                                file_results.write("{0}:\t[FAILED TO EXPLOITED VIA {1}]\n".format(url, key))
                        else:
                            file_results.write("{0}:\t[POSSIBLY VULNERABLE TO {1}]\n".format(url, key))

                        file_results.flush()
            else:
                print (RED + "\n * Host %s:%s does not respond." % (ip, port) + ENDC)
        file_results.close()

    # resume results
    if vulnerables:
        banner()
        print(RED + BOLD+" Results: potentially compromised server!" + ENDC)
        if gl_args.mode  == 'file-scan':
            print(RED + BOLD + " ** Check more information on file {0} **".format(gl_args.out) + ENDC)
        elif gl_args.mode == 'auto-scan':
            print(RED + BOLD + " ** Check more information on file {0} **".format(gl_args.results) + ENDC)
        print(GREEN + " * - - - - - - -  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*\n"
             +BOLD+   " Recommendations: \n" +ENDC+
              GREEN+  " - Remove web consoles and services that are not used, eg:\n"
                      "    $ rm web-console.war\n"
                      "    $ rm http-invoker.sar\n"
                      "    $ rm jmx-console.war\n"
                      "    $ rm jmx-invoker-adaptor-server.sar\n"
                      "    $ rm admin-console.war\n"
                      " - Use a reverse proxy (eg. nginx, apache, F5)\n"
                      " - Limit access to the server only via reverse proxy (eg. DROP INPUT POLICY)\n"
                      " - Search vestiges of exploitation within the directories \"deploy\" and \"management\".\n\n"
                      " References:\n"
                      "   [1] - https://developer.jboss.org/wiki/SecureTheJmxConsole\n"
                      "   [2] - https://issues.jboss.org/secure/attachment/12313982/jboss-securejmx.pdf\n"
                      "\n"
                      " - If possible, discard this server!\n"
                      " * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*\n")
    else:
        print(GREEN + "\n\n * Results: \n" +
              "   The server is not vulnerable to bugs tested ... :D\n\n" + ENDC)
    # infos
    print(ENDC + " * Info: review, suggestions, updates, etc: \n" +
          "   https://github.com/joaomatosf/jexboss\n")

    print(GREEN + BOLD + " * DONATE: " + ENDC + "Please consider making a donation to help improve this tool,\n"
                                                "           including research to new versions of JBoss and zero days. \n\n" +
          GREEN + BOLD + " * Paypal: " + ENDC + " [email protected] \n" +
          GREEN + BOLD + " * Bitcoin Address: " + ENDC + " 14x4niEpfp7CegBYr3tTzTn4h6DAnDCD9C \n" +
          GREEN + BOLD + " * URI: " + ENDC + " bitcoin:14x4niEpfp7CegBYr3tTzTn4h6DAnDCD9C?label=jexboss\n")
Esempio n. 45
0
    def prepare_url(self, url, params):
        """Prepares the given HTTP URL."""
        #: Accept objects that have string representations.
        #: We're unable to blindly call unicode/str functions
        #: as this will include the bytestring indicator (b'')
        #: on python 3.x.
        #: https://github.com/requests/requests/pull/2238
        if isinstance(url, bytes):
            url = url.decode('utf8')
        else:
            url = unicode(url) if is_py2 else str(url)

        # Remove leading whitespaces from url
        url = url.lstrip()

        # Don't do any URL preparation for non-HTTP schemes like `mailto`,
        # `data` etc to work around exceptions from `url_parse`, which
        # handles RFC 3986 only.
        if ':' in url and not url.lower().startswith('http'):
            self.url = url
            return

        # Support for unicode domain names and paths.
        try:
            scheme, auth, host, port, path, query, fragment = parse_url(url)
        except LocationParseError as e:
            raise InvalidURL(*e.args)

        if not scheme:
            error = ("Invalid URL {0!r}: No schema supplied. Perhaps you meant http://{0}?")
            error = error.format(to_native_string(url, 'utf8'))

            raise MissingSchema(error)

        if not host:
            raise InvalidURL("Invalid URL %r: No host supplied" % url)

        # In general, we want to try IDNA encoding the hostname if the string contains
        # non-ASCII characters. This allows users to automatically get the correct IDNA
        # behaviour. For strings containing only ASCII characters, we need to also verify
        # it doesn't start with a wildcard (*), before allowing the unencoded hostname.
        if not unicode_is_ascii(host):
            try:
                host = self._get_idna_encoded_host(host)
            except UnicodeError:
                raise InvalidURL('URL has an invalid label.')
        elif host.startswith(u'*'):
            raise InvalidURL('URL has an invalid label.')

        # Carefully reconstruct the network location
        netloc = auth or ''
        if netloc:
            netloc += '@'
        netloc += host
        if port:
            netloc += ':' + str(port)

        # Bare domains aren't valid URLs.
        if not path:
            path = '/'

        if is_py2:
            if isinstance(scheme, str):
                scheme = scheme.encode('utf-8')
            if isinstance(netloc, str):
                netloc = netloc.encode('utf-8')
            if isinstance(path, str):
                path = path.encode('utf-8')
            if isinstance(query, str):
                query = query.encode('utf-8')
            if isinstance(fragment, str):
                fragment = fragment.encode('utf-8')

        if isinstance(params, (str, bytes)):
            params = to_native_string(params)

        enc_params = self._encode_params(params)
        if enc_params:
            if query:
                query = '%s&%s' % (query, enc_params)
            else:
                query = enc_params

        url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment]))
        self.url = url
Esempio n. 46
0
    def prepare_url(self, url, params):
        """Prepares the given HTTP URL."""
        #: Accept objects that have string representations.
        #: We're unable to blindly call unicode/str functions
        #: as this will include the bytestring indicator (b'')
        #: on python 3.x.
        #: https://github.com/psf/requests/pull/2238
        if isinstance(url, bytes):
            url = url.decode("utf8")
        else:
            url = str(url)

        # Remove leading whitespaces from url
        url = url.lstrip()

        # Don't do any URL preparation for non-HTTP schemes like `mailto`,
        # `data` etc to work around exceptions from `url_parse`, which
        # handles RFC 3986 only.
        if ":" in url and not url.lower().startswith("http"):
            self.url = url
            return

        # Support for unicode domain names and paths.
        try:
            scheme, auth, host, port, path, query, fragment = parse_url(url)
        except LocationParseError as e:
            raise InvalidURL(*e.args)

        if not scheme:
            raise MissingSchema(f"Invalid URL {url!r}: No scheme supplied. "
                                f"Perhaps you meant http://{url}?")

        if not host:
            raise InvalidURL(f"Invalid URL {url!r}: No host supplied")

        # In general, we want to try IDNA encoding the hostname if the string contains
        # non-ASCII characters. This allows users to automatically get the correct IDNA
        # behaviour. For strings containing only ASCII characters, we need to also verify
        # it doesn't start with a wildcard (*), before allowing the unencoded hostname.
        if not unicode_is_ascii(host):
            try:
                host = self._get_idna_encoded_host(host)
            except UnicodeError:
                raise InvalidURL("URL has an invalid label.")
        elif host.startswith(("*", ".")):
            raise InvalidURL("URL has an invalid label.")

        # Carefully reconstruct the network location
        netloc = auth or ""
        if netloc:
            netloc += "@"
        netloc += host
        if port:
            netloc += f":{port}"

        # Bare domains aren't valid URLs.
        if not path:
            path = "/"

        if isinstance(params, (str, bytes)):
            params = to_native_string(params)

        enc_params = self._encode_params(params)
        if enc_params:
            if query:
                query = f"{query}&{enc_params}"
            else:
                query = enc_params

        url = requote_uri(
            urlunparse([scheme, netloc, path, None, query, fragment]))
        self.url = url
Esempio n. 47
0
    def prepare_url(self, url, params):
        """Prepares the given HTTP URL."""
        #: Accept objects that have string representations.
        #: We're unable to blindly call unicode/str functions
        #: as this will include the bytestring indicator (b'')
        #: on python 3.x.
        #: https://github.com/requests/requests/pull/2238
        if isinstance(url, bytes):
            url = url.decode('utf8')
        else:
            url = unicode(url) if is_py2 else str(url)

        # Remove leading whitespaces from url
        url = url.lstrip()

        # Don't do any URL preparation for non-HTTP schemes like `mailto`,
        # `data` etc to work around exceptions from `url_parse`, which
        # handles RFC 3986 only.
        if ':' in url and not url.lower().startswith('http'):
            self.url = url
            return

        # Support for unicode domain names and paths.
        try:
            scheme, auth, host, port, path, query, fragment = parse_url(url)
        except LocationParseError as e:
            raise InvalidURL(*e.args)

        if not scheme:
            error = ("Invalid URL {0!r}: No schema supplied. Perhaps you meant http://{0}?")
            error = error.format(to_native_string(url, 'utf8'))

            raise MissingSchema(error)

        if not host:
            raise InvalidURL("Invalid URL %r: No host supplied" % url)

        # In general, we want to try IDNA encoding the hostname if the string contains
        # non-ASCII characters. This allows users to automatically get the correct IDNA
        # behaviour. For strings containing only ASCII characters, we need to also verify
        # it doesn't start with a wildcard (*), before allowing the unencoded hostname.
        if not unicode_is_ascii(host):
            try:
                host = self._get_idna_encoded_host(host)
            except UnicodeError:
                raise InvalidURL('URL has an invalid label.')
        elif host.startswith(u'*'):
            raise InvalidURL('URL has an invalid label.')

        # Carefully reconstruct the network location
        netloc = auth or ''
        if netloc:
            netloc += '@'
        netloc += host
        if port:
            netloc += ':' + str(port)

        # Bare domains aren't valid URLs.
        if not path:
            path = '/'

        if is_py2:
            if isinstance(scheme, str):
                scheme = scheme.encode('utf-8')
            if isinstance(netloc, str):
                netloc = netloc.encode('utf-8')
            if isinstance(path, str):
                path = path.encode('utf-8')
            if isinstance(query, str):
                query = query.encode('utf-8')
            if isinstance(fragment, str):
                fragment = fragment.encode('utf-8')

        if isinstance(params, (str, bytes)):
            params = to_native_string(params)

        enc_params = self._encode_params(params)
        if enc_params:
            if query:
                query = '%s&%s' % (query, enc_params)
            else:
                query = enc_params

        url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment]))
        self.url = url
    def prepare_url(self, url, params):
        """Prepares the given HTTP URL.
        Mostly copied from requests lib, removed python2 checks and added checks for https"""
        from urllib3.util import parse_url
        from urllib3.exceptions import LocationParseError
        from urllib.parse import urlunparse
        from requests.exceptions import InvalidURL
        from requests.utils import requote_uri

        if isinstance(url, bytes):
            url = url.decode('utf8')
        else:
            url = str(url)

        # Don't do any URL preparation for non-HTTP schemes like `mailto`,
        # `data` etc to work around exceptions from `url_parse`, which
        # handles RFC 3986 only.
        if ':' in url and not url.lower().startswith('http'):
            self.url = url
            return

        # Support for unicode domain names and paths.
        try:
            scheme, auth, host, port, path, query, fragment = parse_url(url)
        except LocationParseError as e:
            raise InvalidURL(*e.args)

        if not scheme:
            # normally an error is thrown, we assume https
            scheme = 'https'
        elif scheme != 'https':
            raise InvalidURL('Invalid URL %r: must be https' % url)

        if not host:
            raise InvalidURL("Invalid URL %r: No host supplied" % url)

        # Only want to apply IDNA to the hostname
        try:
            host = host.encode('idna').decode('utf-8')
        except UnicodeError:
            raise InvalidURL('URL has an invalid label.')

        # Carefully reconstruct the network location
        netloc = auth or ''
        if netloc:
            netloc += '@'
        netloc += host
        if port:
            netloc += ':' + str(port)

        # Bare domains aren't valid URLs.
        if not path:
            path = '/'

        if isinstance(params, (str, bytes)):
            params = requests.utils.to_native_string(params)

        enc_params = self._encode_params(params)
        if enc_params:
            if query:
                query = '%s&%s' % (query, enc_params)
            else:
                query = enc_params

        url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment]))
        self.url = url
Esempio n. 49
0
    def prepare_url(self, url, params):
        """Prepares the given HTTP URL."""
        #: Accept objects that have string representations.
        #: We're unable to blindy call unicode/str functions
        #: as this will include the bytestring indicator (b'')
        #: on python 3.x.
        #: https://github.com/kennethreitz/requests/pull/2238
        if isinstance(url, bytes):
            url = url.decode('utf8')
        else:
            url = unicode(url) if is_py2 else str(url)

        # Don't do any URL preparation for non-HTTP schemes like `mailto`,
        # `data` etc to work around exceptions from `url_parse`, which
        # handles RFC 3986 only.
        if ':' in url and not url.lower().startswith('http'):
            self.url = url
            return

        # Support for unicode domain names and paths.
        try:
            scheme, auth, host, port, path, query, fragment = parse_url(url)
        except LocationParseError as e:
            raise InvalidURL(*e.args)

        if not scheme:
            raise MissingSchema("Invalid URL {0!r}: No schema supplied. "
                                "Perhaps you meant http://{0}?".format(
                                    to_native_string(url, 'utf8')))

        if not host:
            raise InvalidURL("Invalid URL %r: No host supplied" % url)

        # Only want to apply IDNA to the hostname
        try:
            host = host.encode('idna').decode('utf-8')
        except UnicodeError:
            raise InvalidURL('URL has an invalid label.')

        # Carefully reconstruct the network location
        netloc = auth or ''
        if netloc:
            netloc += '@'
        netloc += host
        if port:
            netloc += ':' + str(port)

        # Bare domains aren't valid URLs.
        if not path:
            path = '/'

        if is_py2:
            if isinstance(scheme, str):
                scheme = scheme.encode('utf-8')
            if isinstance(netloc, str):
                netloc = netloc.encode('utf-8')
            if isinstance(path, str):
                path = path.encode('utf-8')
            if isinstance(query, str):
                query = query.encode('utf-8')
            if isinstance(fragment, str):
                fragment = fragment.encode('utf-8')

        enc_params = self._encode_params(params)
        if enc_params:
            if query:
                query = '%s&%s' % (query, enc_params)
            else:
                query = enc_params

        url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment]))
        self.url = url
Esempio n. 50
0
 def get_name(self):
     parseUrl = parse_url(self.url)
     return parseUrl
Esempio n. 51
0
            with codecs.open(inventory_list_path, 'r', 'utf-8') as fin:
                targets = fin.readlines()
                for target in targets:
                    items = target.replace('\r', '').replace('\n', '').split('\t')
                    if len(items) != 2:
                        utility.print_message(FAIL, 'Invalid inventory target : {}'.format(target))
                        continue

                    # Check target URL.
                    port_num = ''
                    invent_url = ''
                    keyword = ''
                    try:
                        invent_url = items[0]
                        keyword = items[1]
                        parsed = util.parse_url(invent_url)

                        # Judge port number.
                        if parsed.port is None and parsed.scheme == 'https':
                            port_num = '443'
                        elif parsed.port is None and parsed.scheme == 'http':
                            port_num = '80'
                        elif parsed.port is not None:
                            port_num = str(parsed.port)
                        else:
                            utility.print_message(FAIL, 'Invalid URL : {}'.format(invent_url))
                            utility.write_log(30, 'Invalid URL : {}'.format(invent_url))
                            continue
                    except Exception as e:
                        utility.print_exception(e, 'Parsed error : {}'.format(invent_url))
                        utility.write_log(30, 'Parsed error : {}'.format(invent_url))
Esempio n. 52
0
    def prepare_url(self, url, params):
        """Prepares the given HTTP URL."""
        #: Accept objects that have string representations.
        try:
            url = unicode(url)
        except NameError:
            # We're on Python 3.
            url = str(url)
        except UnicodeDecodeError:
            pass

        # Don't do any URL preparation for oddball schemes
        if ':' in url and not url.lower().startswith('http'):
            self.url = url
            return

        # Support for unicode domain names and paths.
        scheme, auth, host, port, path, query, fragment = parse_url(url)

        if not scheme:
            raise MissingSchema("Invalid URL {0!r}: No schema supplied. "
                                "Perhaps you meant http://{0}?".format(url))

        if not host:
            raise InvalidURL("Invalid URL %r: No host supplied" % url)

        # Only want to apply IDNA to the hostname
        try:
            host = host.encode('idna').decode('utf-8')
        except UnicodeError:
            raise InvalidURL('URL has an invalid label.')

        # Carefully reconstruct the network location
        netloc = auth or ''
        if netloc:
            netloc += '@'
        netloc += host
        if port:
            netloc += ':' + str(port)

        # Bare domains aren't valid URLs.
        if not path:
            path = '/'

        if is_py2:
            if isinstance(scheme, str):
                scheme = scheme.encode('utf-8')
            if isinstance(netloc, str):
                netloc = netloc.encode('utf-8')
            if isinstance(path, str):
                path = path.encode('utf-8')
            if isinstance(query, str):
                query = query.encode('utf-8')
            if isinstance(fragment, str):
                fragment = fragment.encode('utf-8')

        enc_params = self._encode_params(params)
        if enc_params:
            if query:
                query = '%s&%s' % (query, enc_params)
            else:
                query = enc_params

        url = requote_uri(
            urlunparse([scheme, netloc, path, None, query, fragment]))
        self.url = url
Esempio n. 53
0
 def get_url_root(self, url):
     return util.parse_url(url).hostname
Esempio n. 54
0
def read_legacy(component_type: str,
                ip_address: str,
                port: str,
                slave_id0: str,
                slave_id1: Optional[str] = None,
                slave_id2: Optional[str] = None,
                slave_id3: Optional[str] = None,
                batwrsame: Optional[int] = None,
                extprodakt: Optional[int] = None,
                zweiterspeicher: Optional[int] = None,
                subbat: Optional[int] = None,
                ip2address: Optional[str] = None,
                num: Optional[int] = None) -> None:
    def get_bat_state() -> Tuple[List, List]:
        def create_bat(modbus_id: int) -> bat.SolaredgeBat:
            component_config = SolaredgeBatSetup(
                id=num,
                configuration=SolaredgeBatConfiguration(modbus_id=modbus_id))
            return bat.SolaredgeBat(dev.device_config.id, component_config,
                                    dev.client)

        bats = [create_bat(1)]
        if zweiterspeicher == 1:
            bats.append(create_bat(2))
        soc_bat, power_bat = [], []
        for battery in bats:
            state = battery.read_state()
            power_bat.append(state.power)
            soc_bat.append(state.soc)
        return power_bat, soc_bat

    def get_external_inverter_state(dev: Device, id: int) -> InverterState:
        component_config = SolaredgeExternalInverterSetup(
            id=num,
            configuration=SolaredgeExternalInverterConfiguration(modbus_id=id))

        ext_inverter = external_inverter.SolaredgeExternalInverter(
            dev.device_config.id, component_config, dev.client)
        return ext_inverter.read_state()

    def create_inverter(modbus_id: int) -> inverter.SolaredgeInverter:
        component_config = SolaredgeInverterSetup(
            id=num,
            configuration=SolaredgeInverterConfiguration(modbus_id=modbus_id))
        return inverter.SolaredgeInverter(dev.device_config.id,
                                          component_config, dev.client)

    log.debug("Solaredge IP: " + ip_address + ":" + str(port))
    log.debug("Solaredge Slave-IDs: [" + str(slave_id0) + ", " +
              str(slave_id1) + ", " + str(slave_id2) + ", " + str(slave_id3) +
              "]")
    log.debug("Solaredge Bat-WR-gleiche IP: " + str(batwrsame) +
              ", Externer WR: " + str(extprodakt) + ", 2. Speicher: " +
              str(zweiterspeicher) + ", Speicherleistung subtrahieren: " +
              str(subbat) + " 2. IP: " + str(ip2address) + ", Num: " +
              str(num))

    if port == "":
        parsed_url = parse_url(ip_address)
        ip_address = parsed_url.hostname
        if parsed_url.port:
            port = parsed_url.port
        else:
            port = 502
    dev = Device(
        Solaredge(configuration=SolaredgeConfiguration(ip_address=ip_address,
                                                       port=int(port))))
    if component_type == "counter":
        dev.add_component(
            SolaredgeCounterSetup(id=num,
                                  configuration=SolaredgeCounterConfiguration(
                                      modbus_id=int(slave_id0))))
        log.debug('Solaredge ModbusID: ' + str(slave_id0))
        dev.update()
    elif component_type == "inverter":
        if ip2address == "none":
            modbus_ids = list(
                map(
                    int,
                    filter(lambda id: id.isnumeric(),
                           [slave_id0, slave_id1, slave_id2, slave_id3])))
            inverters = [
                create_inverter(modbus_id) for modbus_id in modbus_ids
            ]
            with SingleComponentUpdateContext(inverters[0].component_info):
                total_power = 0
                total_energy = 0
                total_currents = [0.0] * 3
                with dev.client:
                    for inv in inverters:
                        state = inv.read_state()
                        total_power += state.power
                        total_energy += state.exported
                        total_currents = list(
                            map(add, total_currents, state.currents))

                    if extprodakt:
                        state = get_external_inverter_state(
                            dev, int(slave_id0))
                        total_power -= state.power

                    if batwrsame == 1:
                        bat_power, soc_bat = get_bat_state()
                        if subbat == 1:
                            total_power -= sum(min(p, 0) for p in bat_power)
                        else:
                            total_power -= sum(bat_power)
                if batwrsame == 1:
                    get_bat_value_store(1).set(
                        BatState(power=sum(bat_power), soc=mean(soc_bat)))
                get_inverter_value_store(num).set(
                    InverterState(exported=total_energy,
                                  power=min(0, total_power),
                                  currents=total_currents))
        else:
            inv = create_inverter(int(slave_id0))
            with SingleComponentUpdateContext(inv.component_info):
                with dev.client:
                    state = inv.read_state()
                    total_power = state.power * -1
                    total_energy = state.exported

                if batwrsame == 1:
                    zweiterspeicher = 0
                    bat_power, _ = get_bat_state()
                    total_power -= sum(bat_power)
                    get_bat_value_store(1).set(
                        BatState(power=sum(bat_power), soc=mean(soc_bat)))
                device_config = Solaredge(configuration=SolaredgeConfiguration(
                    ip_address=ip2address))
                dev = Device(device_config)
                inv = create_inverter(int(slave_id0))
                with dev.client:
                    state = inv.read_state()
                    total_power -= state.power
                    total_energy += state.exported
                    if extprodakt:
                        state = get_external_inverter_state(
                            dev, int(slave_id0))
                        total_power -= state.power
                get_inverter_value_store(num).set(
                    InverterState(exported=total_energy, power=total_power))

    elif component_type == "bat":
        with SingleComponentUpdateContext(
                ComponentInfo(0, "Solaredge Speicher", "bat")):
            power_bat, soc_bat = get_bat_state()
            get_bat_value_store(1).set(
                BatState(power=sum(power_bat), soc=mean(soc_bat)))
Esempio n. 55
0
 def from_str_url(cls, url: str):
     return cls(parse_url(url))
def main():
    global config
    df = pd.read_excel(config.table)
    log_df = pd.DataFrame(columns=[
        "dataset_name",
        "resource_name",
        "resource_filename",
        "resource_url",
        "new_resource_url",
        "scraperwiki_name",
        "dir",
        "file",
        "status",
        "update_status",
    ])

    c = urllib3.PoolManager()
    i = 0
    additional_df = pd.DataFrame(
        columns=["decision", "dataset_name", "resource_name", "resource_url"])
    for resource_index, row in df.iterrows():
        i += 1
        dataset_name = str(row.dataset_name)
        resource_name = str(row.resource_name)
        # print ("%(i)3d %(dataset_name)30s %(resource_name)30s"%locals())
        resource_url = row.resource_url
        if resource_name.endswith(".csv") or resource_name.endswith(
                ".xls") or resource_name.endswith(".xlsx"):
            resource_filename = resource_name
        else:
            resource_filename = os.path.split(
                ut.parse_url(resource_url).path)[1]

        # localpath = os.path.join(os.path.join(config.target,dataset_name),resource_name)
        localpath = os.path.join(config.target, dataset_name)
        localfile = os.path.join(localpath, resource_filename)
        new_resource_url = None

        update_status = ""
        scraperwiki_resources = []
        if row.decision == config.decision:
            new_resource_url = config.url_prefix
            if new_resource_url[-1] != "/":
                new_resource_url += "/"
            new_resource_url += dataset_name + "/" + resource_filename
            if config.new_url_pattern not in new_resource_url:
                logging.warning(
                    "New url '%s' does not contain the new-url-pattern '%s'" %
                    (new_resource_url, config.new_url_pattern))

            dataset = Dataset.read_from_hdx(dataset_name)
            if dataset is None:
                status = "ERROR"
                update_status = "DATASET NOT FOUND"
            else:
                resource_index = resource_number_from_url(
                    dataset, resource_url)
                for i, r in enumerate(dataset.get_resources()):
                    if (config.old_url_pattern in r["url"]
                            or config.new_url_pattern
                            in r["url"]) and i != resource_index:
                        additional_df = additional_df.append(dict(
                            decision=row.decision,
                            dataset_name=dataset_name,
                            resource_name=r["name"],
                            resource_url=r["url"]),
                                                             ignore_index=True)
                additional_df.to_csv(config.additional, index_label='Index')

                if config.update_url:
                    logging.info(
                        "Update url %(dataset_name)s, resource: %(resource_name)s to %(new_resource_url)s"
                        % locals())

                    try:
                        resource = resource_from_name(dataset, resource_name)
                        if resource is None:
                            update_status = "RESOURCE NOT FOUND"
                        else:
                            resource["url"] = new_resource_url
                            resource.update_in_hdx()
                            update_status = "OK"
                    except:
                        logging.error(
                            "Update url failed for %(dataset_name)s resource %(resource_name)s"
                            % locals())
                        update_status = "ERROR"
                        traceback.print_exc()

                try:
                    os.makedirs(localpath)
                except:
                    pass
                logging.info("Process dataset %(dataset_name)s" % locals())
                logging.info("Fetch data from url %(dataset_name)s" % locals())
                if config.refresh or not os.path.exists(localfile):
                    try:
                        with c.request(
                                'GET', resource_url,
                                preload_content=False) as response, open(
                                    localfile, 'wb') as f:
                            shutil.copyfileobj(response, f)
                        status = "OK"
                    except:
                        logging.exception(
                            "Download error for dataset %(dataset_name)s" %
                            locals())
                        status = "ERROR"
                else:
                    status = "Ok"

                if config.upload:
                    logging.info(
                        "Upload %(dataset_name)s, resource: %(resource_name)s"
                        % locals())

                    try:
                        resource = resource_from_name(dataset, resource_name)
                        if resource is None:
                            update_status = "RESOURCE NOT FOUND"
                        else:
                            try:
                                file_type = os.path.splitext(
                                    localfile)[1][1:].lower()
                            except:
                                file_type = "csv"

                            resource.set_file_type(
                                file_type)  # set the file type to eg. csv
                            resource.set_file_to_upload(localfile)
                            resource.update_in_hdx()
                            update_status = "OK"
                    except:
                        logging.error(
                            "Uploading %(dataset_name)s resource %(resource_name)s failed"
                            % locals())
                        update_status = "ERROR"
                        traceback.print_exc()

        else:
            status = "SKIPPED"

        log_df = log_df.append(dict(dataset_name=dataset_name,
                                    resource_name=resource_name,
                                    resource_filename=resource_filename,
                                    resource_url=resource_url,
                                    new_resource_url=new_resource_url,
                                    scraperwiki_name=row.scraperwiki_name,
                                    dir=localpath,
                                    file=localfile,
                                    status=status,
                                    update_status=update_status),
                               ignore_index=True)
        log_df.to_csv(config.processed, index_label='Index')
    additional_df["additional"] = 1
    df["additional"] = 0
    df = df.append(additional_df, ignore_index=True)
    writer = pd.ExcelWriter(config.additional_table)
    df.to_excel(writer)
    writer.save()
    writer.close()
Esempio n. 57
0
    def custom_search(self, query, max_page_count=1, target_domain=''):
        # Google Custom Search API.
        self.utility.write_log(
            20,
            '[In] Execute Google custom search [{}].'.format(self.file_name))

        # Setting of Google Custom Search.
        service = None
        if self.utility.proxy != '':
            # Set proxy.
            self.utility.print_message(
                WARNING, 'Set proxy server: {}'.format(self.utility.proxy))
            parsed = util.parse_url(self.utility.proxy)
            proxy = None
            if self.utility.proxy_pass != '':
                proxy = httplib2.ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP,
                                           proxy_host=parsed.host,
                                           proxy_port=parsed.port,
                                           proxy_user=self.utility.proxy_user,
                                           proxy_pass=self.utility.proxy_pass)
            else:
                proxy = httplib2.ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP,
                                           proxy_host=parsed.host,
                                           proxy_port=parsed.port)
            my_http = httplib2.Http(proxy_info=proxy,
                                    disable_ssl_certificate_validation=True)
            service = build("customsearch",
                            "v1",
                            developerKey=self.api_key,
                            http=my_http)
        else:
            # None proxy.
            service = build("customsearch", "v1", developerKey=self.api_key)

        # Execute search.
        urls = []
        sub_domain_list = []
        result_count = 0
        start_index = self.start_index
        try:
            search_count = 0
            search_urls = []
            while search_count < max_page_count:
                self.utility.print_message(OK,
                                           'Using query : {}'.format(query))
                response = service.cse().list(
                    q=query,
                    cx=self.search_engine_id,
                    num=10,
                    start=start_index,
                    filter='0',
                    safe='off',
                ).execute()

                # Get finding counts.
                result_count = int(
                    response.get('searchInformation').get('totalResults'))

                # Get extracted link (url).
                if result_count != 0:
                    items = response['items']
                    for item in items:
                        urls.append(item['link'])
                        search_urls.append(item['link'])

                # Set new query.
                if result_count <= 10 or max_page_count == 1:
                    tmp_sub_domain_list = self.utility.transform_url_hostname_list(
                        search_urls)
                    # Update query for report.
                    for sub_domain in tmp_sub_domain_list:
                        if target_domain != sub_domain and sub_domain not in sub_domain_list:
                            query += ' -site:' + sub_domain
                    sub_domain_list.extend(tmp_sub_domain_list)
                    break
                else:
                    # Refine search range using "-site" option.
                    tmp_sub_domain_list = self.utility.transform_url_hostname_list(
                        search_urls)
                    for sub_domain in tmp_sub_domain_list:
                        if target_domain != sub_domain and sub_domain not in sub_domain_list:
                            query += ' -site:' + sub_domain
                        sub_domain_list.append(sub_domain)
                search_count += 1
                time.sleep(self.delay_time)
        except Exception as e:
            msg = 'Google custom search is failure : {}'.format(e)
            self.utility.print_exception(e, msg)
            self.utility.write_log(30, msg)
            self.utility.write_log(
                20, '[Out] Execute Google custom search [{}].'.format(
                    self.file_name))
            return urls, result_count, query, sub_domain_list

        self.utility.write_log(
            20,
            '[Out] Execute Google custom search [{}].'.format(self.file_name))
        return urls, result_count, query, list(set(sub_domain_list))
Esempio n. 58
0
                    target_list = random.sample(target[2], len(target[2]))
                if max_target_url != 0 and max_target_url < len(target_list):
                    utility.print_message(
                        WARNING, 'Cutting target list {} to {}.'.format(
                            len(target[2]), max_target_url))
                    target_list = target_list[:max_target_url]

                for count, target_url in enumerate(target_list):
                    utility.print_message(
                        NOTE, '{}/{} Start analyzing: {}'.format(
                            count + 1, len(target_list), target_url))

                    # Check target url.
                    parsed = None
                    try:
                        parsed = util.parse_url(target_url)
                    except Exception as e:
                        utility.print_exception(
                            e, 'Parsed error : {}'.format(target_url))
                        utility.write_log(
                            30, 'Parsed error : {}'.format(target_url))
                        continue

                    # Get HTTP response (header + body).
                    date = utility.get_current_date('%Y%m%d%H%M%S%f')[:-3]
                    print_date = utility.transform_date_string(
                        utility.transform_date_object(date[:-3],
                                                      '%Y%m%d%H%M%S'))
                    _, server_header, res_header, res_body, _ = utility.send_request(
                        'GET', target_url)
Esempio n. 59
0
def get_url_name(url):
    if not isinstance(url, six.string_types):
        return
    return urllib3_util.parse_url(url).host
Esempio n. 60
0
 def _prepare_api_url(self, url, p):
     scheme, auth, host, port, path, query, fragment = parse_url(url)
     if scheme is None or scheme == "http":
         return f"http://{host}:{p}"
     else:
         raise InvalidSchema("Invalid scheme %r: Do not supply" % scheme)