Example #1
0
 def get_download_links(self, folderid=None, fields=""):
     download_links = dict()
     download_api_url = self.base_url[:]
     fields = (set(fields.split(",")) | set(["filename"])) - set([""])
     if len(fields) > 1:
         supported_fields = set("downloads,lastdownload,filename,size,killcode,serverid,type,x,y,realfolder,bodtype,killdeadline,licids,uploadtime".split(","))
         for field in fields:
             if field not in supported_fields:
                 raise HosterAPIError, "field: %s not supported" % field
     query = urllib.urlencode({"sub": "listfiles",
                               "fields": ",".join(fields)})
     download_api_url[4] += "&"+query
     if folderid is not None:
         query = urllib.urlencode({"realfolder": folderid})
         download_api_url[4] += "&"+query
     url = urlunparse(download_api_url)
     lines = urllib2.urlopen(url).readlines()
     self._catch_error(lines, url)
     for line in lines:
         try:
             rows = line.split(",")
             fileid = rows[0]
             properties = dict(zip(fields, rows[1:]))
             properties["filename"] = properties["filename"].replace("\n", "")
             download_url = [self.scheme, "rapidshare.com", "files/%s/%s" % (fileid, properties["filename"]), "", "", ""]
             properties["url"] = urlunparse(download_url)
             download_links[fileid] = properties
         except (ValueError, KeyError):
             pass
     return download_links
Example #2
0
def translate_git_url(git_url, commit_id):
    """Create a real git URL based on defined translations.

    :param git_url: The git URL as obtained from the backend.
    :param commit_id: The git SHA.
    :return The base URL to create URLs, and the real commit URL.
    """

    base_url = None
    commit_url = None

    if git_url and commit_id:
        t_url = urlparse.urlparse(git_url)
        known_git_urls = CONFIG_GET("KNOWN_GIT_URLS")

        if t_url.netloc in known_git_urls.keys():
            known_git = known_git_urls.get(t_url.netloc)

            path = t_url.path
            for replace_rule in known_git[3]:
                path = path.replace(*replace_rule)

            base_url = urlparse.urlunparse((
                known_git[0], t_url.netloc, known_git[1] % path,
                "", "", ""
            ))
            commit_url = urlparse.urlunparse((
                known_git[0], t_url.netloc,
                (known_git[2] % path) + commit_id,
                "", "", ""
            ))

    return base_url, commit_url
Example #3
0
def download_css(soup, url_parts, dst_folder, index_path):
    """ 
    parse css src's and download css to dst_folder 
    """
    tmp_url_parts = deepcopy(url_parts)
    for css in soup.findAll("link", {"rel": "stylesheet"}):
        if css.has_key("href"):
            file_name = css["href"].split("/")[-1]
            file_name = sanitize_file_name(file_name)

            logging.debug("Downloading css " + file_name + "...")

            new_src = create_directories(dst_folder, list(urlparse.urlparse(css["href"]))[2])
            full_path = os.path.join(dst_folder, new_src)
            outpath = os.path.join(full_path, file_name)

            if css["href"].lower().startswith("http"):
                tmp_url_parts = list(urlparse.urlparse(css["href"]))
                download_file(css["href"], outpath)
            else:
                tmp_url_parts[2] = css["href"]
                download_file(urlparse.urlunparse(tmp_url_parts), outpath)

            root_url = urlparse.urlunparse(tmp_url_parts)
            file_name_index = root_url.index(file_name)
            root_url = root_url[:file_name_index]
            download_css_imports(
                soup, list(urlparse.urlparse(css["href"])), root_url, new_src + "/" + file_name, dst_folder, index_path
            )
            css["href"] = css["href"].replace(css["href"], index_path + "/" + new_src + "/" + file_name)
Example #4
0
def _add_utm_param(url, type, sourse, campaign, name, matching):
    url_parts = list(urlparse.urlparse(url))
    if not _url_param_safe_check(url_parts[4]):
        return urlparse.urlunparse(url_parts)
    query = dict(urlparse.parse_qsl(url_parts[4]))

    if (type == 'banner'):
        utm_medium = 'cpm_yottos'
    else:
        utm_medium = 'cpc_yottos'
    utm_source =  str(sourse or 'other')
    utm_campaign = str(campaign)
    utm_content = str(name)
    if query.has_key('utm_source'):
        utm_term = str(sourse or 'other')
    else:
        utm_term = str(matching)

    if not query.has_key('utm_medium'):
        query.update({'utm_medium':utm_medium})
    if not query.has_key('utm_source'):
        query.update({'utm_source':utm_source})
    if not query.has_key('utm_campaign'):
        query.update({'utm_campaign':utm_campaign})
    if not query.has_key('utm_content'):
        query.update({'utm_content':utm_content})
    if not query.has_key('utm_term'):
        query.update({'utm_term':utm_term})
    url_parts[4] = urllib.urlencode(query)
    return urlparse.urlunparse(url_parts)
Example #5
0
    def _secure_request(self, url, method, data=None, files=None, headers=None,
                        raw=False, send_as_json=True, content_type=None,
                        **request_kwargs):

        full_url = self.build_url(url)

        # Add token (if it's not already there)
        if self._token:
            parsed = list(urlparse.urlparse(full_url))
            if not parsed[4]:  # query
                parsed[4] = 'token=%s' % self._token
                full_url = urlparse.urlunparse(parsed)
            elif 'token' not in urlparse.parse_qs(parsed[4]):
                parsed[4] += '&token=%s' % self._token
                full_url = urlparse.urlunparse(parsed)
        headers = headers or {}

        # If files are being sent, we cannot encode data as JSON
        if send_as_json and not files:
            headers['content-type'] = 'application/json'
            data = json.dumps(data or {})
        else:
            if content_type:
                headers['content-type'] = content_type
            data = data or ''

        method = getattr(requests, method, None)
        response = method(full_url, data=data, files=files, headers=headers,
                          **request_kwargs)
        self.check_for_errors(response)  # Raise exception if something failed

        if raw or not response.content:
            return response.content
        return json.loads(response.text)
Example #6
0
    def _stripSitePath(self, uri, parms):
        """
            Strip off our site-host and site-path from 'uri'.
        """
        ( scheme
        , netloc
        , path
        , url_parm
        , query
        , fragment
        ) = urlparse.urlparse( uri )

        site_host = urlparse.urlunparse( ( scheme, netloc, '', '', '', '' ) )

        if scheme and parms.get( 'site_host' ) is None:
            parms[ 'site_host' ] = site_host

        if site_host != parms[ 'site_host' ]: # XXX foreign site!  Punt!
            return None, None

        if self._site_path and path.startswith( self._site_path ):
            path = path[ len( self._site_path ) : ]

        uri = urlparse.urlunparse(
                            ( '', '', path, url_parm, query, fragment ) )

        return uri, query
Example #7
0
def _insert_links(data_dict, limit, offset):
    '''Adds link to the next/prev part (same limit, offset=offset+limit)
    and the resource page.'''
    data_dict['_links'] = {}

    # get the url from the request
    urlstring = toolkit.request.environ['CKAN_CURRENT_URL']

    # change the offset in the url
    parsed = list(urlparse.urlparse(urlstring))
    query = urllib2.unquote(parsed[4])

    arguments = dict(urlparse.parse_qsl(query))
    arguments_start = dict(arguments)
    arguments_prev = dict(arguments)
    arguments_next = dict(arguments)
    if 'offset' in arguments_start:
        arguments_start.pop('offset')
    arguments_next['offset'] = int(offset) + int(limit)
    arguments_prev['offset'] = int(offset) - int(limit)

    parsed_start = parsed[:]
    parsed_prev = parsed[:]
    parsed_next = parsed[:]
    parsed_start[4] = urllib.urlencode(arguments_start)
    parsed_next[4] = urllib.urlencode(arguments_next)
    parsed_prev[4] = urllib.urlencode(arguments_prev)

    # add the links to the data dict
    data_dict['_links']['start'] = urlparse.urlunparse(parsed_start)
    data_dict['_links']['next'] = urlparse.urlunparse(parsed_next)
    if int(offset) - int(limit) > 0:
        data_dict['_links']['prev'] = urlparse.urlunparse(parsed_prev)
Example #8
0
 def validate_ticket(self, ticket, request):
     service_name = self.service_name
     ticket_name = self.ticket_name
     
     this_url = self.get_url(request)
     p = urlparse.urlparse(this_url)
     qs_map = urlparse.parse_qs(p.query)
     if ticket_name in qs_map:
         del qs_map[ticket_name]
     param_str = urlencode(qs_map)
     p = urlparse.ParseResult(*tuple(p[:4] + (param_str,) + p[5:]))
     service_url = urlparse.urlunparse(p)
     
     params = {
             service_name: service_url,
             ticket_name: ticket,}
     param_str = urlencode(params)
     p = urlparse.urlparse(self.cas_info['service_validate_url'])
     p = urlparse.ParseResult(*tuple(p[:4] + (param_str,) + p[5:]))
     service_validate_url = urlparse.urlunparse(p)
     
     log.msg("[INFO] requesting URL '%s' ..." % service_validate_url)
     http_client = HTTPClient(self.agent) 
     d = http_client.get(service_validate_url)
     d.addCallback(treq.content)
     d.addCallback(self.parse_sv_results, service_url, ticket, request)
     return d
Example #9
0
def urlunjoin(base_url, url):
    if url == None:
        return str(base_url)
    else:
        url = str(url)
    if base_url == None:
        return str(url)
    else:
        base_url = str(base_url)
    if url.startswith('_:'): # you might expect that '_' would be parsed as a scheme  by urlparse, but it isn't
        return url
    else:
        o = urlparse.urlparse(url)
        if (o.scheme == '' or o.scheme == 'http' or o.scheme == 'https'):
            if o.netloc == '': # http(s) relative url
                if len(o.path) > 0 and o.path[0] == '/':
                    return url
                else:
                    abs_url = urlparse.urljoin(base_url, url) #make it absolute first 
                    o = list(urlparse.urlparse(abs_url))
                    o[0] = o[1] = '' # blank out the scheme and the netloc
                    return urlparse.urlunparse(o)
            else:
                b = urlparse.urlparse(base_url)
                if o.netloc == b.netloc:
                    o = list(o)
                    o[0] = o[1] = '' # blank out the scheme and the netloc
                    return urlparse.urlunparse(o)
                else:
                    return url
        else:
            return url
Example #10
0
    def _do_lastfm_query(self, type, method, **kwargs):

        args = {
            'method': method,
            'api_key': self.key,
            }
        for k, v in kwargs.items():
            args[k] = v.encode('utf8')

        s = ''
        for k in sorted(args.keys()):
            s+=k+args[k]
        s+=self.secret

        if 'sk' in args.keys() or 'token' in args.keys():
            args['api_sig'] = hashlib.md5(s).hexdigest()

        if type == 'GET':
            url = urlparse.urlunparse(('http',
                'ws.audioscrobbler.com',
                '/2.0/',
                '',
                urllib.urlencode(args),
                ''))
            return self._do_raw_lastfm_query(url)
        elif type == 'POST':
            url = urlparse.urlunparse(('http',
                'ws.audioscrobbler.com',
                '/2.0/', '', '', ''))
            self._do_lastfm_post(url, urllib.urlencode(args))
Example #11
0
    def __init__(self, merchant_code, secret_code, merchant_titular, merchant_name, terminal_number, return_url=None, transaction_type=None, lang=None, domain=None, domain_protocol="http", currency_code=None, redirect_url=None, **kwargs):
        self._merchant_code = merchant_code
        self._secret_code = secret_code
        self._merchant_titular = merchant_titular
        self._merchant_name = merchant_name
        self._terminal_number = terminal_number
        self._redirect_url = redirect_url

        self._lang = lang or self._lang
        self._transaction_type = transaction_type or self._transaction_type

        self._currency_code = currency_code or self._currency_code

        self._domain = domain or urlparse.urlunparse((
                    domain_protocol,
                    Site.objects.get_current().domain,
                    '/',
                    None,
                    None,
                    None))

        domain = urlparse.urlparse(self._domain)
        merchant_path = reverse('process_payment', args=[kwargs.get('variant')])
        self._merchant_url = urlparse.urlunparse((domain.scheme, domain.netloc, merchant_path, None, None, None))

        self._return_url = return_url

        return super(CaixaCatalunyaBaseProvider, self).__init__(**kwargs)
Example #12
0
	def process(self):
		parsed = urlparse.urlparse(self.uri)
		protocol = parsed[0]
		host = parsed[1]
		port = self.ports[protocol]
		if ':' in host:
			host, port = host.split(':')
			port = int(port)
		rest = urlparse.urlunparse(('', '') + parsed[2:])
		if not rest:
			rest = rest + '/'
		class_ = self.protocols[protocol]
		headers = self.getAllHeaders().copy()
		if 'host' not in headers:
			headers['host'] = host
                        real_host = host
                else:
                    real_host = headers['host']
		self.path = urlparse.urlunparse((protocol, real_host) + parsed[2:])
		self.content.seek(0, 0)
		s = self.content.read()
		clientFactory = class_(self.method, rest, self.clientproto, headers,
							   s, self)
		# The magic line for SSL support!
		if self.useSSL:
			self.reactor.connectSSL(host, port, clientFactory, ssl.ClientContextFactory())
		else:
			self.reactor.connectTCP(host, port, clientFactory)
Example #13
0
def open_with_auth(url):
    """Open a urllib2 request, handling HTTP authentication"""

    scheme, netloc, path, params, query, frag = urlparse.urlparse(url)

    if scheme in ('http', 'https'):
        auth, host = urllib2.splituser(netloc)
    else:
        auth = None

    if auth:
        auth = "Basic " + urllib2.unquote(auth).encode('base64').strip()
        new_url = urlparse.urlunparse((scheme,host,path,params,query,frag))
        request = urllib2.Request(new_url)
        request.add_header("Authorization", auth)
    else:
        request = urllib2.Request(url)

    request.add_header('User-Agent', user_agent)
    fp = urllib2.urlopen(request)

    if auth:
        # Put authentication info back into request URL if same host,
        # so that links found on the page will work
        s2, h2, path2, param2, query2, frag2 = urlparse.urlparse(fp.url)
        if s2==scheme and h2==host:
            fp.url = urlparse.urlunparse((s2,netloc,path2,param2,query2,frag2))

    return fp
Example #14
0
    def do_METHOD_Direct(self):
        scheme, netloc, path, params, query, fragment = urlparse.urlparse(self.path, 'http')
        try:
            host, _, port = netloc.rpartition(':')
            port = int(port)
        except ValueError:
            host = netloc
            port = 80
        try:
            self.log_request()
            if not common.PROXY_ENABLE:
                sock = socket.create_connection((host, port))
                self.headers['connection'] = 'close'
                data = '%s %s %s\r\n'  % (self.command, urlparse.urlunparse(('', '', path, params, query, '')), self.request_version)
                data += ''.join('%s: %s\r\n' % (k, self.headers[k]) for k in self.headers if not k.startswith('proxy-'))
                data += '\r\n'
            else:
                sock = socket.create_connection((common.PROXY_HOST, common.PROXY_PORT))
                host = common.HOSTS_MAP.get(host, host)
                url = urlparse.urlunparse((scheme, host + ('' if port == 80 else ':%d' % port), path, params, query, ''))
                data ='%s %s %s\r\n'  % (self.command, url, self.request_version)
                data += ''.join('%s: %s\r\n' % (k, self.headers[k]) for k in self.headers if k != 'host')
                data += 'Host: %s\r\n' % netloc
                if common.PROXY_USERNAME and not common.PROXY_NTLM:
                    data += '%s\r\n' % proxy_auth_header(common.PROXY_USERNAME, common.PROXY_PASSWROD)
                data += 'Proxy-connection: close\r\n'
                data += '\r\n'

            content_length = int(self.headers.get('content-length', 0))
            if content_length > 0:
                data += self.rfile.read(content_length)
            sock.sendall(data)
            socket_forward(self.connection, sock)
        except Exception, ex:
            logging.exception('GaeProxyHandler.do_GET Error, %s', ex)
Example #15
0
def form_url(parenturl,url):
     url = url.strip() # ran across an image with a space in the
                       # src. Browser handled it, so we'd better, too.
 
     if "//" in url or parenturl == None:
         returl = url
     else:
         parsedUrl = urlparse.urlparse(parenturl)
         if url.startswith("/") :
             returl = urlparse.urlunparse(
                 (parsedUrl.scheme,
                  parsedUrl.netloc,
                  url,
                  '','',''))
         else:
             toppath=""
             if parsedUrl.path.endswith("/"):
                 toppath = parsedUrl.path
             else:
                 toppath = parsedUrl.path[:parsedUrl.path.rindex('/')]
             returl = urlparse.urlunparse(
                 (parsedUrl.scheme,
                  parsedUrl.netloc,
                  toppath + '/' + url,
                  '','',''))
     return returl
Example #16
0
def canonURL(url, parent_domain):

    (scheme, netloc, path, parameters, query, fragment) = urlparse(url)

    parent = urlparse(parent_domain)

    if not netloc and not path:
        return ""

    if not netloc and path.startswith("."):
        new_url = urljoin(parent_domain, path)
        (scheme, netloc, path, parameters, query, fragment) = urlunparse(new_url)

    elif not netloc and path:
        netloc = parent.netloc

    netloc_lower = netloc.lower()
    netloc = netloc_lower.split(":")[0]

    prev_path = path
    print prev_path
    while 1:
        path = collapse_url.sub('/', path, 1)
        print path
        if prev_path == path:
            break
        prev_path = path

    path = unquote(path)

    canon_url = urlunparse((scheme, netloc, path, "", "", ""))
    return canon_url
Example #17
0
 def doHarvest(self, fromDate, until):
     lrUrl = self.config['lr']['url']
     if not fromDate:
         fromDate = self.config['lr']['first_run_start_date']
     urlParts = urlparse.urlparse(lrUrl)
     params = {"until": until}
     if fromDate:
         params['from'] = fromDate
     newQuery = urllib.urlencode(params)
     lrUrl = urlparse.urlunparse((urlParts[0],
                                  urlParts[1],
                                  '/harvest/listrecords',
                                  urlParts[3],
                                  newQuery,
                                  urlParts[5]))
     resumption_token = self.harvestData(lrUrl)
     while resumption_token is not None:
         newQuery = urllib.urlencode({"resumption_token": resumption_token})
         lrUrl = urlparse.urlunparse((urlParts[0],
                                  urlParts[1],
                                  '/harvest/listrecords',
                                  urlParts[3],
                                  newQuery,
                                  urlParts[5]))
         resumption_token = self.harvestData(lrUrl)
Example #18
0
    def setUp(self):
        super(TestShotgunDownloadAndUnpack, self).setUp()

        zip_file_location = os.path.join(self.fixtures_root, "misc", "zip")
        # Identify the source file to "download"
        self.download_source = os.path.join(zip_file_location, "tank_core.zip")
        # store the expected contents of the zip, to ensure it's properly
        # extracted.
        self.expected_output_txt = os.path.join(zip_file_location, "tank_core.txt")
        self.expected_output = open(self.expected_output_txt).read().split("\n")

        # Construct URLs from the source file name
        # "file" will be used for the protocol, so this URL will look like
        # `file:///fixtures_root/misc/zip/tank_core.zip`
        self.good_zip_url = urlparse.urlunparse(
            ("file", None, self.download_source, None, None, None)
        )
        self.bad_zip_url = urlparse.urlunparse(
            ("file", None, self.download_source, None, None, None)
        )

        # Temporary destination to unpack sources to.
        self.download_destination = os.path.join(
            self.tank_temp, self.short_test_name, "test_unpack"
        )
        os.makedirs(os.path.dirname(self.download_destination))
        if os.path.exists(self.download_destination):
            os.remove(self.download_destination)

        # Make sure mockgun is properly configured
        if self.mockgun.config.server is None:
            self.mockgun.config.server = "unit_test_mock_sg"
Example #19
0
 def validate_ticket(self, ticket, request):
     service_name = self.service_name
     ticket_name = self.ticket_name
     this_url = self.get_url(request)
     p = urlparse.urlparse(this_url)
     qs_map = urlparse.parse_qs(p.query)
     if ticket_name in qs_map:
         del qs_map[ticket_name]
     param_str = urlencode(qs_map, doseq=True)
     p = urlparse.ParseResult(*tuple(p[:4] + (param_str,) + p[5:]))
     service_url = urlparse.urlunparse(p)
     params = {
             service_name: service_url,
             ticket_name: ticket,}
     param_str = urlencode(params, doseq=True)
     p = urlparse.urlparse(self.cas_info['service_validate_url'])
     p = urlparse.ParseResult(*tuple(p[:4] + (param_str,) + p[5:]))
     service_validate_url = urlparse.urlunparse(p)
     self.log(
         "Requesting service-validate URL => '{0}' ...".format(
             service_validate_url))
     http_client = HTTPClient(self.cas_agent) 
     d = http_client.get(service_validate_url)
     d.addCallback(treq.content)
     d.addCallback(self.parse_sv_results, service_url, ticket, request)
     return d
Example #20
0
def get_canonical_and_alternates_urls(url, drop_ln=True, washed_argd=None, quote_path=False):
    """
    Given an Invenio URL returns a tuple with two elements. The first is the
    canonical URL, that is the original URL with CFG_SITE_URL prefix, and
    where the ln= argument stripped. The second element element is mapping,
    language code -> alternate URL

    @param quote_path: if True, the path section of the given C{url}
                       is quoted according to RFC 2396
    """
    dummy_scheme, dummy_netloc, path, dummy_params, query, fragment = urlparse(url)
    canonical_scheme, canonical_netloc = urlparse(CFG_SITE_URL)[0:2]
    parsed_query = washed_argd or parse_qsl(query)
    no_ln_parsed_query = [(key, value) for (key, value) in parsed_query if key != "ln"]
    if drop_ln:
        canonical_parsed_query = no_ln_parsed_query
    else:
        canonical_parsed_query = parsed_query
    if quote_path:
        path = urllib.quote(path)
    canonical_query = urlencode(canonical_parsed_query)
    canonical_url = urlunparse((canonical_scheme, canonical_netloc, path, dummy_params, canonical_query, fragment))
    alternate_urls = {}
    for ln in CFG_SITE_LANGS:
        alternate_query = urlencode(no_ln_parsed_query + [("ln", ln)])
        alternate_url = urlunparse((canonical_scheme, canonical_netloc, path, dummy_params, alternate_query, fragment))
        alternate_urls[ln] = alternate_url
    return canonical_url, alternate_urls
def format_message(template, config, first_name=None, last_name=None, uid=None, target_email=None):
	first_name = ('Alice' if not isinstance(first_name, (str, unicode)) else first_name)
	last_name = ('Liddle' if not isinstance(last_name, (str, unicode)) else last_name)
	target_email = ('*****@*****.**' if not isinstance(target_email, (str, unicode)) else target_email)
	uid = (uid or config['server_config'].get('server.secret_id') or make_uid())

	template = template_environment.from_string(template)
	template_vars = {}
	template_vars['uid'] = uid
	template_vars['first_name'] = first_name
	template_vars['last_name'] = last_name
	template_vars['email_address'] = target_email
	template_vars['company_name'] = config.get('mailer.company_name', '')

	webserver_url = config.get('mailer.webserver_url', '')
	webserver_url = urlparse.urlparse(webserver_url)
	tracking_image = config['server_config']['server.tracking_image']
	template_vars['webserver'] = webserver_url.netloc
	tracking_url = urlparse.urlunparse((webserver_url.scheme, webserver_url.netloc, tracking_image, '', 'id=' + uid, ''))
	webserver_url = urlparse.urlunparse((webserver_url.scheme, webserver_url.netloc, webserver_url.path, '', '', ''))
	template_vars['tracking_dot_image_tag'] = "<img src=\"{0}\" style=\"display:none\" />".format(tracking_url)

	template_vars_url = {}
	template_vars_url['rickroll'] = 'http://www.youtube.com/watch?v=oHg5SJYRHA0'
	template_vars_url['webserver'] = webserver_url + '?id=' + uid
	template_vars_url['webserver_raw'] = webserver_url
	template_vars_url['tracking_dot'] = tracking_url
	template_vars['url'] = template_vars_url
	template_vars.update(template_environment.standard_variables)
	return template.render(template_vars)
def open_with_auth(url):
    """Open a urllib2 request, handling HTTP authentication"""

    scheme, netloc, path, params, query, frag = urlparse.urlparse(url)

    # Double scheme does not raise on Mac OS X as revealed by a
    # failing test. We would expect "nonnumeric port". Refs #20.
    if netloc.endswith(':'):
        raise httplib.InvalidURL("nonnumeric port: ''")

    if scheme in ('http', 'https'):
        auth, host = urllib2.splituser(netloc)
    else:
        auth = None

    if auth:
        auth = "Basic " + _encode_auth(auth)
        new_url = urlparse.urlunparse((scheme,host,path,params,query,frag))
        request = urllib2.Request(new_url)
        request.add_header("Authorization", auth)
    else:
        request = urllib2.Request(url)

    request.add_header('User-Agent', user_agent)
    fp = urllib2.urlopen(request)

    if auth:
        # Put authentication info back into request URL if same host,
        # so that links found on the page will work
        s2, h2, path2, param2, query2, frag2 = urlparse.urlparse(fp.url)
        if s2==scheme and h2==host:
            fp.url = urlparse.urlunparse((s2,netloc,path2,param2,query2,frag2))

    return fp
Example #23
0
	def doPrivmsg(self, irc, msg):
		if(self.registryValue('urlmodify',msg.args[0])):
			
			toModify = {
			'what.cd' : { 'http' : 'what.cd', 'https' : 'ssl.what.cd'},
			'awesome-hd.net' : { 'http' : 'awesome-hd.net', 'https' : 'ssl.awesome-hd.net'}
			}
			
			tmp = []
			
			for name in toModify:
				if(msg.args[1].find(name) != -1):
					for word in msg.args[1].split(' '):
						if(word.find(name) != -1):
							url = urlparse(word)
							if (url[0] == 'http'):
								url = urlunparse(('https', toModify[name]['https'], url[2], url[3], url[4], url[5]))
								tmp.append(url)
							if (url[0] == 'https'):
								url = urlunparse(('http', toModify[name]['http'], url[2], url[3], url[4], url[5]))
								tmp.append(url)
			if len(tmp) > 0:
				tmp.reverse()
				reply = ' '.join(tmp)
				irc.queueMsg(ircmsgs.privmsg(msg.args[0], reply))
Example #24
0
def get_favicon(url):
    try:
        html = requests.request('GET', url)
        soup = BeautifulSoup(html.text)
        icon = soup.find('link', rel='shortcut icon')
        if icon is None:
            icon = soup.find('link', type='image/x-icon')
        icon_href = None
        if hasattr(icon, 'href'):
            icon_href = str(icon['href'])
        if icon_href is None or icon_href.strip() == "":
            parsed_url = urlparse.urlparse(url)
            icon_href = urlparse.urlunparse((parsed_url.scheme, parsed_url.netloc, '', '', '', '')) + '/favicon.ico'
            last_try = requests.request('GET', icon_href)
            if last_try.status_code == 200:
                return icon_href
            else:
                return None
        if "http://" not in icon_href:
            parsed_url = urlparse.urlparse(url)
            icon_href = urlparse.urlunparse((parsed_url.scheme, parsed_url.netloc, '', '', '', '')) + icon_href
        last_try = requests.request('GET', icon_href)
        if last_try.status_code == 200:
            return icon_href
        else:
            return 'https://www.readbox.co/static/lightpng.png'
    except Exception:
        return 'https://www.readbox.co/static/lightpng.png'
Example #25
0
    def __init__(self, scheme=None, hostname=None, path="/",
                 params="", query={}, fragment="",
                 username=None, password=None, port=None):

        self.path = path
        self.params = params
        self.query = query
        self.fragment = fragment

        if hostname:
            # Absolute URL
            if username:
                if password:
                    netloc = username + ':' + password + '@' + hostname
                else:
                    netloc = username + '@' + hostname
            else:
                netloc = hostname

            if not scheme:
                scheme = DEFAULT_SCHEME

            host = hostname

            defport = DEFAULT_PORTS.get(scheme, None)

            if port:
                if port != defport:
                    netloc = netloc + ':' + str(port)
                    host = host + ':' + str(port)
            else:
                port = defport

            self.scheme = scheme
            self.netloc = netloc
            self.host = host
            self.hostname = hostname
            self.username = username
            self.password = password
            self.port = port

        else:
            # Relative URL
            self.scheme = ""
            self.netloc = ""
            self.host = ""
            self.hostname = ""
            self.username = None
            self.password = None
            self.port = None

        query_string = unparse_qs(self.query)
        quoted_path = urllib.quote(self.path)

        self.location = urlparse.urlunparse(('', '', quoted_path, self.params,
                                             query_string, self.fragment))

        self.url = urlparse.urlunparse((self.scheme, self.netloc, quoted_path,
                                        self.params, query_string,
                                        self.fragment))
Example #26
0
    def _starter1(self):
        # to be honest, i ignore why I should make this GET request...
        headers = dict(self.headers)
        headers.update({
            "Referer":
            urlunparse((__PROTOCOL__,
                        self.host,
                        __STARTER_PATH__, None, None, None))})

        url = urlunparse((__PROTOCOL__,
                          self.host,
                          __STARTER_PATH__, None, None, None))
        log.debug(url)
        cookies = dict(
            DSSignInURL="/",
            DSLastAccess=self.DSLastAccess,
            DSID=self.DSID,
            DSFirstAccess=self.last_res.cookies['DSLastAccess'])

        params = dict(check="yes")

        res = requests.get(
            url,
            headers=headers,
            params=params,
            cookies=cookies,
            verify=False)
        log.debug(res.text)
        res.raise_for_status()
Example #27
0
def makeArchive(srcrepo, archive, pkgs):
    # pull is the pkgrecv(1) command
    import pull

    print "source directory:", srcrepo
    print "package list:", pkgs

    urlprefix = ['http://', 'https://', 'file://']
    if not True in [srcrepo.startswith(i) for i in urlprefix]:
        # We need the replace statement because the urllib.url2pathname
        # command used in pull.py will work correctly with '/' slash in
        # windows.
        srcrepo = urlunparse(("file", os.path.abspath(srcrepo).replace('\\', '/'), '','','',''))

    destrepo = tempfile.mkdtemp()
    if not True in [destrepo.startswith(i) for i in urlprefix]:
        destrepo_url = urlunparse(("file", os.path.abspath(destrepo).replace('\\', '/'), '','','',''))

    sys.argv = [sys.argv[0], '-m', 'all-timestamps', '-s', srcrepo, '-d', destrepo_url]
    sys.argv.extend(pkgs)
    rv = pull.main_func()

    #copy the cfg_cache to the archive
    if isinstance(archive, zipfile.ZipFile):
        for root, dirs, files in os.walk(destrepo, topdown=False):
            reldir = root[len(destrepo)+1:]
            for name in files:
                archive.write(os.path.join(root, name), os.path.join(reldir, name))
    elif isinstance(archive, tarfile.TarFile):
        archive.add(destrepo, destrepo[len(destrepo):])

    #close the archive
    archive.close()
    return rv
Example #28
0
def _do_lastfm_query(type, method,**kwargs):
	args = { 
		"method" : method,
		"api_key" : key,
	 	}
	for k,v in kwargs.items():
		args[k] = v.encode("utf8")
	s = ""
	for k in sorted(args.keys()):
		s+=k+args[k]
	s+=secret
	if "sk" in args.keys() or "token" in args.keys():
		args["api_sig"] = hashlib.md5(s).hexdigest()

	if type == "GET":
		url=urlparse.urlunparse(('http',
			'ws.audioscrobbler.com',
			'/2.0/',
			'',
			urllib.urlencode(args),
			''))
		return _do_raw_lastfm_query(url)
	elif type == "POST":
		url=urlparse.urlunparse(('http',
			'ws.audioscrobbler.com',
			'/2.0/', '', '', ''))
		_do_lastfm_post(url, urllib.urlencode(args))
def normalizeURL(url):
    url = url.lower()
    url = urlparse.urldefrag(url)[0]

    # split the URL
    link_parts = urlparse.urlparse(url)

    # link has been updated, so resplitting is required
    link_parts = urlparse.urlparse(url)
    if link_parts.path == '/':
        temp = list(link_parts[:])
        temp[2] = ''
        url = urlparse.urlunparse(tuple(temp))

    # link has been updated, so resplitting is required
    link_parts = urlparse.urlparse(url)
    try:
        if link_parts.netloc.split(':')[1] == '80' or \
                        link_parts.netloc.split(':')[1] == '443':
            temp = list(link_parts[:])
            temp[1] = temp[1].split(':')[0]
            url = urlparse.urlunparse(tuple(temp))
    except IndexError:
        pass

    url = url.decode('utf-8', 'ignore')

    return url
Example #30
0
    def handle_endtag(self, tag):
        if self._link_counter is not None:
            if self._link_counter < 1:
                if tag != 'a':
                    logger.warn(
                        u'Invalid HTML tags in %s', self._url)
                href = self._get_link_attr('href')
                #  We discard anchors and empty href.
                if href and href[0] != '#':
                    href_parts = urlparse.urlparse(href)
                    # Convert absolute URL to absolute URI
                    if href[0] == '/':
                        href = urlparse.urlunparse(
                            self._base_uri +  href_parts[2:])
                    elif not is_remote_uri(href):
                        # Handle relative URL
                        href = urlparse.urlunparse(
                            self._base_uri +
                            ('/'.join((self._relative_path, href_parts[2])),) +
                            href_parts[3:])

                    filename = os.path.basename(href_parts[2])
                    # If the content of the link is empty, we use the last
                    # part of path.
                    if self._buffer:
                        name = ' '.join(self._buffer)
                    else:
                        name = filename
                    rel = self._get_link_attr('rel')
                    self.links.append((href, filename, name, rel),)
                self._link_counter = None
                self._link_attrs = None
                self._buffer = None
            else:
                self._link_counter -= 1
Example #31
0
    def solve_cf_challenge(self, resp, **original_kwargs):
        self.tries += 1

        timeout = int(
            re.compile("\}, ([\d]+)\);", re.MULTILINE).findall(
                resp.text)[0]) / 1000
        sleep(timeout)

        body = resp.text
        parsed_url = urlparse(resp.url)
        domain = parsed_url.netloc
        submit_url = '{}://{}/cdn-cgi/l/chk_jschl'.format(
            parsed_url.scheme, domain)

        cloudflare_kwargs = deepcopy(original_kwargs)
        headers = cloudflare_kwargs.setdefault('headers',
                                               {'Referer': resp.url})

        try:
            params = cloudflare_kwargs.setdefault(
                'params',
                OrderedDict([
                    ('s',
                     re.search(r'name="s"\svalue="(?P<s_value>[^"]+)',
                               body).group('s_value')),
                    ('jschl_vc',
                     re.search(r'name="jschl_vc" value="(\w+)"',
                               body).group(1)),
                    ('pass', re.search(r'name="pass" value="(.+?)"',
                                       body).group(1)),
                ]))

            answer = self.get_answer(body, domain)

        except Exception as e:
            logging.error("Unable to parse Cloudflare anti-bots page. %s" % e)
            raise

        try:
            params["jschl_answer"] = str(answer)
        except:
            pass

        # Requests transforms any request into a GET after a redirect,
        # so the redirect has to be handled manually here to allow for
        # performing other types of requests even as the first request.
        method = resp.request.method

        cloudflare_kwargs['allow_redirects'] = False

        redirect = self.request(method, submit_url, **cloudflare_kwargs)
        redirect_location = urlparse(redirect.headers['Location'])
        if not redirect_location.netloc:
            redirect_url = urlunparse(
                (parsed_url.scheme, domain, redirect_location.path,
                 redirect_location.params, redirect_location.query,
                 redirect_location.fragment))
            return self.request(method, redirect_url, **original_kwargs)

        return self.request(method, redirect.headers['Location'],
                            **original_kwargs)
Example #32
0
def make_absolute_url(path):
    # NOTE: We're using the default site as set by
    #       settings.SITE_ID and the Sites framework
    site = get_current_site(None)
    return urlunparse(('https' if settings.USE_SSL else 'http', site.domain,
                       path, '', '', ''))
Example #33
0
 def _create_url(self, destination, path_bytes, param_bytes, query_bytes):
     return urlparse.urlunparse(
         ("matrix", destination, path_bytes, param_bytes, query_bytes, ""))
Example #34
0
    def _cached_http_get(self,
                         url,
                         base_url,
                         timeout,
                         params=None,
                         data=None,
                         multipart_data=None,
                         headers=None,
                         cookies=None,
                         allow_redirect=True,
                         method=None,
                         require_debrid=False,
                         read_error=False,
                         cache_limit=8):
        if require_debrid:
            if Scraper.debrid_resolvers is None:
                Scraper.debrid_resolvers = [
                    resolver for resolver in resolveurl.relevant_resolvers()
                    if resolver.isUniversal()
                ]
            if not Scraper.debrid_resolvers:
                logger.log(
                    '%s requires debrid: %s' %
                    (self.__module__, Scraper.debrid_resolvers),
                    log_utils.LOGDEBUG)
                return ''

        if cookies is None: cookies = {}
        if timeout == 0: timeout = None
        if headers is None: headers = {}
        if url.startswith('//'): url = 'http:' + url
        referer = headers['Referer'] if 'Referer' in headers else base_url
        if params:
            if url == base_url and not url.endswith('/'):
                url += '/'

            parts = urlparse.urlparse(url)
            if parts.query:
                params.update(scraper_utils.parse_query(url))
                url = urlparse.urlunparse(
                    (parts.scheme, parts.netloc, parts.path, parts.params, '',
                     parts.fragment))

            url += '?' + urllib.urlencode(params)
        logger.log(
            'Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' %
            (url, cookies, data, headers), log_utils.LOGDEBUG)
        if data is not None:
            if isinstance(data, basestring):
                data = data
            else:
                data = urllib.urlencode(data, True)

        if multipart_data is not None:
            headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X'
            data = multipart_data

        _created, _res_header, html = self.db_connection().get_cached_url(
            url, data, cache_limit)
        if html:
            logger.log('Returning cached result for: %s' % (url),
                       log_utils.LOGDEBUG)
            return html

        try:
            self.cj = self._set_cookies(base_url, cookies)
            if isinstance(url, unicode): url = url.encode('utf-8')
            request = urllib2.Request(url, data=data)
            headers = headers.copy()
            request.add_header('User-Agent', scraper_utils.get_ua())
            request.add_header('Accept', '*/*')
            request.add_header('Accept-Encoding', 'gzip')
            request.add_unredirected_header('Host', request.get_host())
            if referer: request.add_unredirected_header('Referer', referer)
            if 'Referer' in headers: del headers['Referer']
            if 'Host' in headers: del headers['Host']
            for key, value in headers.iteritems():
                request.add_header(key, value)
            self.cj.add_cookie_header(request)
            if not allow_redirect:
                opener = urllib2.build_opener(NoRedirection)
                urllib2.install_opener(opener)
            else:
                opener = urllib2.build_opener(urllib2.HTTPRedirectHandler)
                urllib2.install_opener(opener)
                opener2 = urllib2.build_opener(
                    urllib2.HTTPCookieProcessor(self.cj))
                urllib2.install_opener(opener2)

            if method is not None: request.get_method = lambda: method.upper()
            response = urllib2.urlopen(request, timeout=timeout)
            self.cj.extract_cookies(response, request)
            if kodi.get_setting('cookie_debug') == 'true':
                logger.log(
                    'Response Cookies: %s - %s' %
                    (url, scraper_utils.cookies_as_str(self.cj)),
                    log_utils.LOGDEBUG)
            self.cj._cookies = scraper_utils.fix_bad_cookies(self.cj._cookies)
            self.cj.save(ignore_discard=True)
            if not allow_redirect and (
                    response.getcode() in [301, 302, 303, 307]
                    or response.info().getheader('Refresh')):
                if response.info().getheader('Refresh') is not None:
                    refresh = response.info().getheader('Refresh')
                    return refresh.split(';')[-1].split('url=')[-1]
                else:
                    redir_url = response.info().getheader('Location')
                    if redir_url.startswith('='):
                        redir_url = redir_url[1:]
                    return redir_url

            content_length = response.info().getheader('Content-Length', 0)
            if int(content_length) > MAX_RESPONSE:
                logger.log(
                    'Response exceeded allowed size. %s => %s / %s' %
                    (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING)

            if method == 'HEAD':
                return ''
            else:
                if response.info().get('Content-Encoding') == 'gzip':
                    html = ungz(response.read(MAX_RESPONSE))
                else:
                    html = response.read(MAX_RESPONSE)
        except urllib2.HTTPError as e:
            if e.info().get('Content-Encoding') == 'gzip':
                html = ungz(e.read(MAX_RESPONSE))
            else:
                html = e.read(MAX_RESPONSE)

            if CF_CAPCHA_ENABLED and e.code == 403 and 'cf-captcha-bookmark' in html:
                html = cf_captcha.solve(url, self.cj, scraper_utils.get_ua(),
                                        self.get_name())
                if not html:
                    return ''
            elif e.code == 503 and 'cf-browser-verification' in html:
                html = cloudflare.solve(url,
                                        self.cj,
                                        scraper_utils.get_ua(),
                                        extra_headers=headers)
                if not html:
                    return ''
            else:
                logger.log(
                    'Error (%s) during scraper http get: %s' % (str(e), url),
                    log_utils.LOGWARNING)
                if not read_error:
                    return ''
        except Exception as e:
            logger.log(
                'Error (%s) during scraper http get: %s' % (str(e), url),
                log_utils.LOGWARNING)
            return ''

        self.db_connection().cache_url(url, html, data)
        return html
Example #35
0
    if player.isPlayingVideo():
        player.position = player.getTime()
        player.totaltime = player.getTotalTime()
        try:
            position = player.position * 100 / player.totaltime

            #log("Playing %s at %s %%" %(params['title'], position) )
            if position >= 95:
                log("Mark as viewed")

                # Python = langage de merde donc
                # je decoupe et recalcul mon url
                urlparams = urlparse.urlparse(params['markviewed'])
                urlparams_copie = (urlparams[0], urlparams.netloc.split('@')[1]
                                   ) + urlparams[2:]
                url = urlparse.urlunparse(urlparams_copie)

                # python = langage de merde donc
                # je cree un contexte pour pas etre emmerde avec ssl
                ctx = ssl.create_default_context()
                ctx.check_hostname = False
                ctx.verify_mode = ssl.CERT_NONE

                # python = langage de merde donc
                # je cree une requete (c'est peut etre le moins pourri de tout le code)
                # encore que je fait l'authen basic a la main car sinon c'est trop reloud
                req = urllib2.Request(url, 'action=set')
                auth_cred = "%s:%s" % (urlparams.username, urlparams.password)
                auth_cred = auth_cred.encode('base64')
                auth_cred = auth_cred.replace("\n", "")
                auth_cred = auth_cred.rstrip()
Example #36
0
def torrentAction(method, arguments):

    host = lazylibrarian.CONFIG['TRANSMISSION_HOST']
    port = check_int(lazylibrarian.CONFIG['TRANSMISSION_PORT'], 0)

    if not host or not port:
        logger.error('Invalid transmission host or port, check your config')
        return False

    username = lazylibrarian.CONFIG['TRANSMISSION_USER']
    password = lazylibrarian.CONFIG['TRANSMISSION_PASS']

    if not host.startswith("http://") and not host.startswith("https://"):
        host = 'http://' + host

    if host.endswith('/'):
        host = host[:-1]

    # Fix the URL. We assume that the user does not point to the RPC endpoint,
    # so add it if it is missing.
    parts = list(urlparse.urlparse(host))

    if parts[0] not in ("http", "https"):
        parts[0] = "http"

    if ':' not in parts[1]:
        parts[1] += ":%s" % port

    if not parts[2].endswith("/rpc"):
        parts[2] += "/transmission/rpc"

    host = urlparse.urlunparse(parts)

    # Retrieve session id
    auth = (username, password) if username and password else None
    proxies = proxyList()
    timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
    response = requests.get(host, auth=auth, proxies=proxies, timeout=timeout)

    if response is None:
        logger.error("Error getting Transmission session ID")
        return

    # Parse response
    session_id = ''
    if response.status_code == 401:
        if auth:
            logger.error("Username and/or password not accepted by "
                         "Transmission")
        else:
            logger.error("Transmission authorization required")
        return
    elif response.status_code == 409:
        session_id = response.headers['x-transmission-session-id']

    if not session_id:
        logger.error("Expected a Session ID from Transmission")
        return

    # Prepare next request
    headers = {'x-transmission-session-id': session_id}
    data = {'method': method, 'arguments': arguments}
    proxies = proxyList()
    timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
    try:
        response = requests.post(host,
                                 data=json.dumps(data),
                                 headers=headers,
                                 proxies=proxies,
                                 auth=auth,
                                 timeout=timeout)
        response = response.json()
    except Exception as e:
        logger.debug('Transmission %s: %s' % (type(e).__name__, str(e)))
        response = ''

    if not response:
        logger.error("Error sending torrent to Transmission")
        return

    return response
Example #37
0
def fetch_via_oauth(url_to_fetch,
                    email_of_user_wanting_access=None,
                    password_of_user_wanting_access=None,
                    consumer_is_anointed=False,
                    method="GET"):
    """Fetches a given url (e.g. http://localhost:8080/api/v1/user)
    that requires oauth authentication, and returns the results.
    This function takes care of all the necessary oauth handshaking.

    The host at this url must accept remote-api calls via
    /_ah/remote_api.  The intended use is for it to be a dev_appserver
    instance.

    **NOTE**: this function also will stub out all appengine calls
    so they go to the remote api server (using remote_api_stub)!
    Be careful if you call appengine functions after calling this.
    TODO(csilvers): can we unstub at the end of this function?

    Arguments:
       url_to_fetch:
          The url to retreive.  The host/port should be that of
          the local dev-appserver instance, probably localhost:8080.
          The protocol should probably be http://.

       email_of_user_wanting_access:
          This is who the oauth process will say is logging in (the
          'resource provider').  This user must exist in the khan db.
          You can create a user via
             user_data = user_models.UserData.insert_for('random_string',
                                                         '*****@*****.**')
             user_data.set_password('password')
          If you pass in None, we will use an 'internal' user we create.

       password_of_user_wanting_access:
          The password corresponding to the user specified via email.
          If you pass in None for email_of_user_wanting_access, the
          value here is ignored.

       consumer_is_anointed:
          oauth cares not only about the user it's retrieving
          information for, but also about the client (application)
          fetching the data.  Khan recognizes two classes of clients:
          anointed (like the ipad), and non-anointed (the default).
          Anointed clients can perform some actions that non-anointed
          ones cannot.  This boolean specifies whether you wish the
          oauth requeste to seem to come from an anointed client or
          a non-anointed client.

       method:
          GET or POST are definitely supported.  PUT will probably work.

    Returns:
       The response from fetching the given url.  The HTTP response code
       is not returned.
    """
    user_pw_pair = (email_of_user_wanting_access,
                    password_of_user_wanting_access)
    scheme, hostname, path, params, query, fragment = \
            urlparse.urlparse(url_to_fetch)
    dev_appserver_url = urlparse.urlunparse((scheme, hostname, '', '', '', ''))

    # Do the stubbing if we haven't done it already.
    global _CALLED_STUB_APPENGINE_FOR_DEV_APPSERVER
    if not _CALLED_STUB_APPENGINE_FOR_DEV_APPSERVER:
        _stub_appengine_for_dev_appserver(hostname)
    _CALLED_STUB_APPENGINE_FOR_DEV_APPSERVER = True

    # If the caller doesn't care who the user is making the request,
    # we'll just use one that we make and keep around.
    if user_pw_pair == (None, None):
        (_, user, pw) = _create_user('test_user_for_oauth_fetch')
        user_pw_pair = (user, pw)

    # If we already have the tokens cached, don't refetch (it's expensive).
    # The key is exactly the set of arguments for _create_oauth_tokens.
    token_map_key = (dev_appserver_url, user_pw_pair, consumer_is_anointed)
    if token_map_key in _TOKEN_MAP:
        consumer, access_token = _TOKEN_MAP[token_map_key]
    else:
        consumer, access_token = _create_oauth_tokens(*token_map_key)
        _TOKEN_MAP[token_map_key] = (consumer, access_token)

    return _access_resource(url_to_fetch, consumer, access_token, method)
Example #38
0
    def __init__(self, data, encoding='UTF8'):
        """
        @:parameter data: Either a string representing a URL or a 6-elems tuple
            representing the URL components:
            <scheme>://<netloc>/<path>;<params>?<query>#<fragment>

        Simple generic test, more detailed tests in each method!

        # u = url_object('http://w3af.com/foo/bar.txt')
        # u.path
        '/foo/bar.txt'
        # u.scheme
        'http'
        # u.getFileName()
        'bar.txt'
        # u.getExtension()
        'txt'
        #

        #
        # http is the default protocol, we can provide URLs with no proto
        #
        # u = url_object('w3af.com')
        # u.getDomain
        'w3af.com'
        # u.getProtocol()
        'http'

        #
        # But we can't specify a URL without a domain!
        #
        # u = url_object('http://')
        Traceback (most recent call last):
          File "<stdin>", line 1, in ?
        ValueError: Invalid URL "http://"

        # u = url_object(u'http://w3af.com/foo/bar.txt')
        # u.path
        u'/foo/bar.txt'

        # u = url_object('http://w3af.org/?foo=http://w3af.com')
        # u.netloc
        'w3af.org'

        # u = url_object('http://w3af.org/', encoding='x-euc-jp')
        Traceback (most recent call last):
          File "<stdin>", line 1, in ?
        ValueError: Invalid encoding "x-euc-jp" when creating URL.

        """
        self._already_calculated_url = None
        self._querystr = None
        self._changed = True
        self._encoding = encoding

        if data is None:
            raise ValueError('Can not build a url_object from data=None.')

        # Verify that the encoding is a valid one. If we don't do it here,
        # things might get crazy afterwards.

        if isinstance(data, tuple):
            scheme, netloc, path, params, qs, fragment = data
        else:
            scheme, netloc, path, params, qs, fragment = \
                urlparse.urlparse(data)
            #
            # This is the case when someone creates a url_object like
            # this: url_object('www.w3af.com')
            #
            if scheme == netloc == '' and path:
                # By default we set the protocol to "http"
                scheme = 'http'
                netloc = path
                path = ''

        self.scheme = scheme or u''
        self.netloc = netloc or u''
        self.path = path or u''
        self.params = params or u''
        self.querystring = qs or u''
        self.fragment = fragment or u''

        if not self.netloc:
            # The URL is invalid, we don't have a netloc!
            if isinstance(data, tuple):
                invalid_url = urlparse.urlunparse(data)
            else:
                invalid_url = data
            raise ValueError, 'Invalid URL "%s"' % (invalid_url, )
Example #39
0
 def baseurl(self):
     return urlparse.urlunparse((self.scheme, self.netloc, self.path))
Example #40
0
 def __repr__(self):
     parsed_url = (self.scheme, self.netloc, self.path, self.params,
                   self.query, self.fragment)
     return urlparse.urlunparse(parsed_url)
Example #41
0
    def parse_addition_data(self, response, sku, js_data):

        meta = response.meta.copy()
        product = response.meta['product']
        reqs = meta.get('reqs', [])
        currency = response.xpath(
            '//meta[contains(@itemprop, "priceCurrency")]/@content').extract()

        if currency:
            meta['product']['price'] = Price(price=0.00, priceCurrency='USD')

        # if js_data['review']['count'] > 0:
        reqs.append(
            Request(url=self.REVIEW_URL.format(sku=sku),
                    dont_filter=True,
                    callback=self.parse_buyer_reviews,
                    meta=meta))

        url = self.RELATED_PRODUCT.format(sku=sku)
        params = {
            'pType': 'product',
            'prodId': sku,
            'prodName': product['title'].encode('ascii', 'ignore'),
            'ref': '',
            'status': 'ok',
            'url': 'http://www.staples.com/product_%s' % sku,
            'userAgent': self.user_agent,
        }

        url_parts = list(urlparse.urlparse(url))
        url_parts[4] = urllib.urlencode(params)
        new_url = urlparse.urlunparse(url_parts)

        reqs.append(
            Request(url=new_url,
                    dont_filter=True,
                    callback=self.parse_related_product,
                    meta=meta))
        # Get base product data and child "additionalProductsWarrantyServices" variants, if any
        try:
            reqs.append(
                Request(
                    url=self.PRICE_URL.format(
                        sku=sku,
                        metadata__coming_soon_flag=js_data['metadata']
                        ['coming_soon_flag'],
                        metadata__price_in_cart_flag=js_data['metadata']
                        ['price_in_cart_flag'],
                        prod_doc_key=js_data['prod_doc_key'],
                        metadata__product_type__id=js_data['metadata']
                        ['product_type']['id'],
                        metadata__preorder_flag=js_data['metadata']
                        ['preorder_flag'],
                        street_date=time.time(),
                        metadata__channel_availability_for__id=js_data[
                            'metadata']['channel_availability_for']['id'],
                        metadata__backorder_flag=js_data['metadata']
                        ['backorder_flag']),
                    dont_filter=True,
                    callback=self.get_price_and_stockstatus,
                    meta=meta,
                ))
        except Exception as e:
            self.log(
                "Error while forming request for base product data: {}".format(
                    e), WARNING)
        # Get real variants, if any
        # import pprint
        # pprint.pprint(response.meta['product']['variants'])
        if self.scrape_variants_with_extra_requests:
            for v in response.meta['product']['variants']:
                try:
                    reqs.append(
                        Request(
                            url=self.PRICE_URL.format(
                                sku=v['partnumber'],
                                metadata__coming_soon_flag=js_data['metadata']
                                ['coming_soon_flag'],
                                metadata__price_in_cart_flag=js_data[
                                    'metadata']['price_in_cart_flag'],
                                prod_doc_key=v['prod_doc_key'],
                                metadata__product_type__id=js_data['metadata']
                                ['product_type']['id'],
                                metadata__preorder_flag=js_data['metadata']
                                ['preorder_flag'],
                                street_date=time.time(),
                                metadata__channel_availability_for__id=js_data[
                                    'metadata']['channel_availability_for']
                                ['id'],
                                metadata__backorder_flag=js_data['metadata']
                                ['backorder_flag']),
                            dont_filter=True,
                            callback=self.get_variant_price,
                            meta=meta,
                        ))

                except Exception as e:
                    self.log(
                        "Error while forming request for variant: {}".format(
                            e), WARNING)

        if reqs:
            return self.send_next_request(reqs, response)
        else:
            return product
Example #42
0
 def add_login_token_to_redirect_url(self, url, token):
     url_parts = list(urlparse.urlparse(url))
     query = dict(urlparse.parse_qsl(url_parts[4]))
     query.update({"loginToken": token})
     url_parts[4] = urllib.urlencode(query)
     return urlparse.urlunparse(url_parts)
Example #43
0
def get_logged_in_program_certificate_url(certificate_url):
    parsed_url = urlparse(certificate_url)
    query_string = 'next=' + parsed_url.path
    url_parts = (parsed_url.scheme, parsed_url.netloc, '/login/', '', query_string, '')
    return urlunparse(url_parts)
Example #44
0
def _NormalizeUrl(url):
  """Returns normalized URL such as removing trailing slashes."""
  parsed_url = list(urlparse.urlparse(url))
  parsed_url[2] = re.sub(r'/{2,}', r'/', parsed_url[2])
  return urlparse.urlunparse(parsed_url)
def includeme(config):
    """Install SyncServer application into the given Pyramid configurator."""
    # Set the umask so that files are created with secure permissions.
    # Necessary for e.g. created-on-demand sqlite database files.
    os.umask(0o077)

    # If PyOpenSSL is available, configure requests to use it.
    # This helps improve security on older python versions.
    if HAS_PYOPENSSL:
        requests.packages.urllib3.contrib.pyopenssl.inject_into_urllib3()

    settings = config.registry.settings
    import_settings_from_environment_variables(settings)

    # Sanity-check the deployment settings and provide sensible defaults.
    public_url = settings.get("syncserver.public_url")
    if public_url is None:
        raise RuntimeError("you must configure syncserver.public_url")
    public_url = public_url.rstrip("/")
    settings["syncserver.public_url"] = public_url

    secret = settings.get("syncserver.secret")
    if secret is None:
        secret = generate_random_hex_key(64)
    sqluri = settings.get("syncserver.sqluri")
    if sqluri is None:
        rootdir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
        sqluri = "sqlite:///" + os.path.join(rootdir, "syncserver.db")

    # Automagically configure from IdP if one is given.
    idp = settings.get("syncserver.identity_provider")
    if idp is not None:
        r = requests.get(urljoin(idp, '/.well-known/fxa-client-configuration'))
        r.raise_for_status()
        idp_config = r.json()
        idp_issuer = urlparse(idp_config["auth_server_base_url"]).netloc

    # Configure app-specific defaults based on top-level configuration.
    settings.pop("config", None)
    if "tokenserver.backend" not in settings:
        # Default to our simple static node-assignment backend
        settings["tokenserver.backend"] = DEFAULT_TOKENSERVER_BACKEND
    if settings["tokenserver.backend"] == DEFAULT_TOKENSERVER_BACKEND:
        # Provide some additional defaults for the default backend,
        # unless overridden in the config.
        if "tokenserver.sqluri" not in settings:
            settings["tokenserver.sqluri"] = sqluri
        if "tokenserver.node_url" not in settings:
            settings["tokenserver.node_url"] = public_url
        if "endpoints.sync-1.5" not in settings:
            settings["endpoints.sync-1.5"] = "{node}/storage/1.5/{uid}"
    if "tokenserver.monkey_patch_gevent" not in settings:
        # Default to no gevent monkey-patching
        settings["tokenserver.monkey_patch_gevent"] = False
    if "tokenserver.applications" not in settings:
        # Default to just the sync-1.5 application
        settings["tokenserver.applications"] = "sync-1.5"
    if "tokenserver.secrets.backend" not in settings:
        # Default to a single fixed signing secret
        settings["tokenserver.secrets.backend"] = "mozsvc.secrets.FixedSecrets"
        settings["tokenserver.secrets.secrets"] = [secret]
    if "tokenserver.allow_new_users" not in settings:
        allow_new_users = settings.get("syncserver.allow_new_users")
        if allow_new_users is not None:
            settings["tokenserver.allow_new_users"] = allow_new_users
    if "hawkauth.secrets.backend" not in settings:
        # Default to the same secrets backend as the tokenserver
        for key in settings.keys():
            if key.startswith("tokenserver.secrets."):
                newkey = "hawkauth" + key[len("tokenserver"):]
                settings[newkey] = settings[key]
    if "storage.backend" not in settings:
        # Default to sql syncstorage backend
        settings["storage.backend"] = "syncstorage.storage.sql.SQLStorage"
        settings["storage.sqluri"] = sqluri
        settings["storage.create_tables"] = True
    if "storage.batch_upload_enabled" not in settings:
        settings["storage.batch_upload_enabled"] = True
    if "browserid.backend" not in settings:
        # Default to local verifier to reduce external dependencies,
        # unless an explicit verifier URL has been configured.
        verifier_url = settings.get("syncserver.browserid_verifier")
        if not verifier_url:
            settings["browserid.backend"] = \
                "tokenserver.verifiers.LocalBrowserIdVerifier"
        else:
            settings["browserid.backend"] = \
                "tokenserver.verifiers.RemoteBrowserIdVerifier"
            settings["browserid.verifier_url"] = verifier_url
        # Use base of public_url as only audience
        audience = urlunparse(urlparse(public_url)._replace(path=""))
        settings["browserid.audiences"] = audience
        # If an IdP was specified, allow it and only it as issuer.
        if idp is not None:
            settings["browserid.trusted_issuers"] = [idp_issuer]
            settings["browserid.allowed_issuers"] = [idp_issuer]
    if "oauth.backend" not in settings:
        settings["oauth.backend"] = "tokenserver.verifiers.RemoteOAuthVerifier"
        # If an IdP was specified, use it for oauth verification.
        if idp is not None:
            settings["oauth.server_url"] = idp_config["oauth_server_base_url"]
            settings["oauth.default_issuer"] = idp_issuer
    if "loggers" not in settings:
        # Default to basic logging config.
        root_logger = logging.getLogger("")
        if not root_logger.handlers:
            logging.basicConfig(level=logging.WARN)
    if "fxa.metrics_uid_secret_key" not in settings:
        # Default to a randomly-generated secret.
        # This setting isn't useful in a self-hosted setup
        # and setting a default avoids scary-sounding warnings.
        settings["fxa.metrics_uid_secret_key"] = generate_random_hex_key(32)

    # Include the relevant sub-packages.
    config.scan("syncserver", ignore=["syncserver.wsgi_app"])
    config.include("syncstorage", route_prefix="/storage")
    config.include("tokenserver", route_prefix="/token")

    # Add a top-level "it works!" view.
    def itworks(request):
        return Response("it works!")

    config.add_route('itworks', '/')
    config.add_view(itworks, route_name='itworks')
Example #46
0
def main(cmdline):
    """main(cmdline) -- process cmdline as if it were sys.argv"""

    # parse options/files
    options = []
    optvalues = {}
    for opt in cmdline[1:]:
        if opt.startswith('-'):
            if ':' in opt:
                k, v = tuple(opt.split(':', 1))
                optvalues[k] = v
                options.append(k)
            else:
                options.append(opt)
        else:
            break
    files = cmdline[len(options)+1:]

    ### create converting object

    verbose = ('-v' in options)

    # load fontifier
    if '-marcs' in options:
        # use mxTextTool's tagging engine as fontifier
        from mx.TextTools import tag
        from mx.TextTools.Examples.Python import python_script
        tagfct = lambda text, tag=tag, pytable=python_script: tag(
            text, pytable)[1]
        print "Py2HTML: using Marc's tagging engine"
    else:
        # load Just's fontifier
        try:
            import PyFontify
            if PyFontify.__version__ < '0.3':
                raise ImportError
            tagfct = PyFontify.fontify
        except ImportError:
            print """
Sorry, but this script needs the PyFontify.py module version 0.3;
You can download it from Just's homepage at
URL: http://starship.python.net/~just/
"""
            sys.exit()

    if '-format' in options:
        format = optvalues['-format']
    else:
        # use default
        format = 'html'

    if '-mode' in options:
        mode = optvalues['-mode']
    else:
        # use default
        mode = 'color'

    c = PrettyPrint(tagfct, format, mode)
    convert = c.file_filter

    ### start working

    if '-title' in options:
        c.title = optvalues['-title']

    if '-bgcolor' in options:
        c.bgcolor = optvalues['-bgcolor']

    if '-header' in options:
        try:
            f = open(optvalues['-header'])
            c.header = f.read()
            f.close()
        except IOError:
            if verbose:
                print 'IOError: header file not found'

    if '-footer' in options:
        try:
            f = open(optvalues['-footer'])
            c.footer = f.read()
            f.close()
        except IOError:
            if verbose:
                print 'IOError: footer file not found'

    if '-URL' in options:
        c.replace_URLs = True

    if '-' in options:
        convert(sys.stdin, sys.stdout)
        sys.exit()

    if '-h' in options:
        print __doc__
        sys.exit()

    if not files:
        # Turn URL processing on
        c.replace_URLs = True
        # Try CGI processing...
        import cgi, urllib, urlparse, os
        form = cgi.FieldStorage()
        if 'script' not in form:
            # Ok, then try pathinfo
            if 'PATH_INFO' not in os.environ:
                if INPUT_FORM:
                    redirect_to(INPUT_FORM)
                else:
                    sys.stdout.write('Content-Type: text/html\r\n\r\n')
                    write_html_error('Missing Parameter',
                        'Missing script=URL field in request')
                sys.exit(1)
            url = os.environ['PATH_INFO'][1:]  # skip the leading slash
        else:
            url = form['script'].value
        sys.stdout.write('Content-Type: text/html\r\n\r\n')
        scheme, host, path, params, query, frag = urlparse.urlparse(url)
        if not host:
            scheme = 'http'
            host = os.environ.get('HTTP_HOST', 'localhost')
            url = urlparse.urlunparse((scheme, host, path, params, query, frag))
        #print url; sys.exit()
        network = urllib.URLopener()
        try:
            tempfile, headers = network.retrieve(url)
        except IOError as reason:
            write_html_error('Error opening "%s"' % url,
                'The given URL could not be opened. Reason: %s' % str(reason))
            sys.exit(1)
        f = open(tempfile,'rb')
        c.title = url
        c.footer = __cgifooter__
        convert(f, sys.stdout)
        f.close()
        network.close()
        sys.exit()

    if '-stdout' in options:
        filebreak = '-'*72
        for f in files:
            try:
                if len(files) > 1:
                    print filebreak
                    print 'File:', f
                    print filebreak
                convert(f, sys.stdout)
            except IOError:
                pass
    else:
        if verbose:
            print 'Py2HTML: working on',
        for f in files:
            try:
                if verbose:
                    print f,
                convert(f, f+'.html')
            except IOError:
                if verbose:
                    print '(IOError!)',
        if verbose:
            print
            print 'Done.'
Example #47
0
def remove_fragment(url):
    scheme, netloc, url, params, query, fragment = urlparse.urlparse(url)
    return urlparse.urlunparse((scheme, netloc, url, params, query, ''))
Example #48
0
def list_repos(compact=False):
    """
    List the Zoomdata repositories which are locally configured.

    compact : False
        Set ``True`` to get compact dictionary containing the Zoomdata
        repositories configuration

    CLI Example:

    .. code-block:: bash

        salt '*' zoomdata.list_repos
    """
    repo_config = {
        'base_url': None,
        'gpgkey': None,
        'release': None,
        'repositories': [],
        'components': [],
    }

    repos = {
        k: v
        for (k, v) in __salt__['pkg.list_repos']().items()  # pylint: disable=undefined-variable
        if k.startswith(ZOOMDATA)
    }

    if not compact:
        return repos

    for repo in repos:
        # Skip repository discovery if disabled
        if not int(repos[repo].get('enabled', 0)):
            continue

        url = urlparse.urlparse(repos[repo]['baseurl'].strip())
        if not repo_config['base_url']:
            repo_config['base_url'] = urlparse.urlunparse(
                (url.scheme, url.netloc, '', '', '', ''))

        try:
            if not repo_config['gpgkey'] and 'gpgkey' in repos[repo] and \
               int(repos[repo].get('gpgcheck', '0')):
                repo_config['gpgkey'] = repos[repo]['gpgkey'].strip()
        except ValueError:
            pass

        repo_root = url.path.split('/')[1]
        log.debug("zoomdata.list_repos: Processing repo_root: %s" % repo_root)
        try:
            if repo_root == 'latest':
                repo_config['release'] = repo_root
            else:
                if not StrictVersion(repo_root):
                    raise ValueError
                # repo_root is a string like '5.8' or '5.10'
                if isinstance(repo_config['release'], type(None)):
                    repo_config['release'] = repo_root
                elif isinstance(repo_config['release'], str) and \
                        LooseVersion(repo_root) > LooseVersion(repo_config['release']):
                    repo_config['release'] = repo_root
        except ValueError:
            # Collect all other unique repos which are not release numbers,
            # such as ``tools`` for example.
            if repo_root not in repo_config['repositories']:
                repo_config['repositories'].append(repo_root)

        component = url.path.rstrip('/').rsplit('/')[-1]
        if component not in repo_config['components']:
            repo_config['components'].append(component)

    return repo_config
    def _Dynamic_Fetch(self, request, response):
        """Trivial implementation of URLFetchService::Fetch().

    Args:
      request: the fetch to perform, a URLFetchRequest
      response: the fetch response, a URLFetchResponse
    """
        (protocol, host, path, parameters, query,
         fragment) = urlparse.urlparse(request.url())

        payload = ''
        if request.method() == urlfetch_service_pb.URLFetchRequest.GET:
            method = 'GET'
        elif request.method() == urlfetch_service_pb.URLFetchRequest.POST:
            method = 'POST'
            payload = request.payload()
        elif request.method() == urlfetch_service_pb.URLFetchRequest.HEAD:
            method = 'HEAD'
        elif request.method() == urlfetch_service_pb.URLFetchRequest.PUT:
            method = 'PUT'
            payload = request.payload()
        elif request.method() == urlfetch_service_pb.URLFetchRequest.HEAD:
            method = 'DELETE'
        else:
            logging.error('Invalid method: %s', request.method())
            raise apiproxy_errors.ApplicationError(
                urlfetch_service_pb.URLFetchServiceError.UNSPECIFIED_ERROR)

        if not (protocol == 'http' or protocol == 'https'):
            logging.error('Invalid protocol: %s', protocol)
            raise apiproxy_errors.ApplicationError(
                urlfetch_service_pb.URLFetchServiceError.INVALID_URL)

        url = urlparse.urlunparse(('', '', path, parameters, query, fragment))
        logging.debug('Fetching URL: %s', url)

        headers = {
            'Content-Length': len(payload),
            'Host': host,
            'Accept': '*/*',
        }
        for header in request.header_list():
            headers[header.key()] = header.value()

        logging.debug(
            'Making HTTP request: host = %s, '
            'url = %s, payload = %s, headers = %s', host, url, payload,
            headers)

        try:
            if protocol == 'http':
                connection = httplib.HTTPConnection(host)
            elif protocol == 'https':
                connection = httplib.HTTPSConnection(host)
            else:
                raise apiproxy_errors.ApplicationError(
                    urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
            try:
                connection.request(method, url, payload, headers)
                http_response = connection.getresponse()
                http_response_data = http_response.read()
            finally:
                connection.close()
        except (httplib.error, socket.error, IOError), e:
            raise apiproxy_errors.ApplicationError(
                urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e))
Example #50
0
    def notify(cls, resource_id):
        """
            Asynchronous task to notify a subscriber about updates,
            runs a POST?format=msg request against the subscribed
            controller which extracts the data and renders and sends
            the notification message (see send()).

            @param resource_id: the pr_subscription_resource record ID
        """

        _debug = current.log.debug
        _debug("S3Notifications.notify(resource_id=%s)" % resource_id)

        db = current.db
        s3db = current.s3db

        stable = s3db.pr_subscription
        rtable = db.pr_subscription_resource
        ftable = s3db.pr_filter

        # Extract the subscription data
        join = stable.on(rtable.subscription_id == stable.id)
        left = ftable.on(ftable.id == stable.filter_id)

        # @todo: should not need rtable.resource here
        row = db(rtable.id == resource_id).select(stable.id,
                                                  stable.pe_id,
                                                  stable.frequency,
                                                  stable.notify_on,
                                                  stable.method,
                                                  stable.email_format,
                                                  stable.attachment,
                                                  rtable.id,
                                                  rtable.resource,
                                                  rtable.url,
                                                  rtable.last_check_time,
                                                  ftable.query,
                                                  join=join,
                                                  left=left).first()
        if not row:
            return True

        s = getattr(row, "pr_subscription")
        r = getattr(row, "pr_subscription_resource")
        f = getattr(row, "pr_filter")

        # Create a temporary token to authorize the lookup request
        auth_token = str(uuid4())

        # Store the auth_token in the subscription record
        r.update_record(auth_token=auth_token)
        db.commit()

        # Construct the send-URL
        public_url = current.deployment_settings.get_base_public_url()
        lookup_url = "%s/%s/%s" % (public_url,
                                   current.request.application,
                                   r.url.lstrip("/"))

        # Break up the URL into its components
        purl = list(urlparse.urlparse(lookup_url))

        # Subscription parameters
        # Date (must ensure we pass to REST as tz-aware)
        last_check_time = s3_encode_iso_datetime(r.last_check_time)
        query = {"subscription": auth_token, "format": "msg"}
        if "upd" in s.notify_on:
            query["~.modified_on__ge"] = "%sZ" % last_check_time
        else:
            query["~.created_on__ge"] = "%sZ" % last_check_time

        # Filters
        if f.query:
            from s3filter import S3FilterString
            resource = s3db.resource(r.resource)
            fstring = S3FilterString(resource, f.query)
            for k, v in fstring.get_vars.iteritems():
                if v is not None:
                    if k in query:
                        value = query[k]
                        if type(value) is list:
                            value.append(v)
                        else:
                            query[k] = [value, v]
                    else:
                        query[k] = v
            query_nice = s3_unicode(fstring.represent())
        else:
            query_nice = None

        # Add subscription parameters and filters to the URL query, and
        # put the URL back together
        query = urlencode(query)
        if purl[4]:
            query = "&".join((purl[4], query))
        page_url = urlparse.urlunparse([purl[0], # scheme
                                        purl[1], # netloc
                                        purl[2], # path
                                        purl[3], # params
                                        query,   # query
                                        purl[5], # fragment
                                        ])

        # Serialize data for send (avoid second lookup in send)
        data = json.dumps({"pe_id": s.pe_id,
                           "notify_on": s.notify_on,
                           "method": s.method,
                           "email_format": s.email_format,
                           "attachment": s.attachment,
                           "resource": r.resource,
                           "last_check_time": last_check_time,
                           "filter_query": query_nice,
                           "page_url": lookup_url,
                           "item_url": None,
                           })

        # Send the request
        _debug("Requesting %s" % page_url)
        req = urllib2.Request(page_url, data=data)
        req.add_header("Content-Type", "application/json")
        success = False
        try:
            response = json.loads(urllib2.urlopen(req).read())
            message = response["message"]
            if response["status"] == "success":
                success = True
        except urllib2.HTTPError, e:
            message = ("HTTP %s: %s" % (e.code, e.read()))
Example #51
0
    def _get_automatic_captions(self, video_id, webpage=None):
        sub_tracks = []
        if None == webpage:
            url = 'http://www.youtube.com/watch?v=%s&hl=%s&has_verified=1' % (
                video_id, GetDefaultLang())
            sts, data = self.cm.getPage(url)
            if not sts: return sub_tracks

        sts, data = self.cm.ph.getDataBeetwenMarkers(data,
                                                     ';ytplayer.config =',
                                                     '};', False)
        if not sts: return sub_tracks
        try:
            player_config = json_loads(data.strip() + '}')
            args = player_config['args']
            caption_url = args.get('ttsurl')
            if caption_url:
                timestamp = args['timestamp']
                # We get the available subtitles
                list_params = urllib.urlencode({
                    'type': 'list',
                    'tlangs': 1,
                    'asrs': 1,
                })
                list_url = caption_url + '&' + list_params
                caption_list = self.cm.getPage(list_url)
                printDBG(caption_list)
                return sub_lang_list

                original_lang_node = caption_list.find('track')
                if original_lang_node is None:
                    return []
                original_lang = original_lang_node.attrib['lang_code']
                caption_kind = original_lang_node.attrib.get('kind', '')

                sub_lang_list = {}
                for lang_node in caption_list.findall('target'):
                    sub_lang = lang_node.attrib['lang_code']
                    sub_formats = []
                    for ext in self._SUBTITLE_FORMATS:
                        params = urllib.urlencode({
                            'lang': original_lang,
                            'tlang': sub_lang,
                            'fmt': ext,
                            'ts': timestamp,
                            'kind': caption_kind,
                        })
                        sub_formats.append({
                            'url': caption_url + '&' + params,
                            'ext': ext,
                        })
                    sub_lang_list[sub_lang] = sub_formats
                return sub_lang_list

            # Some videos don't provide ttsurl but rather caption_tracks and
            # caption_translation_languages (e.g. 20LmZk1hakA)
            caption_tracks = args['caption_tracks']
            caption_translation_languages = args[
                'caption_translation_languages']
            caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
            parsed_caption_url = urlparse(caption_url)
            caption_qs = compat_parse_qs(parsed_caption_url.query)

            sub_lang_list = {}
            for lang in caption_translation_languages.split(','):
                lang_qs = compat_parse_qs(urllib.unquote_plus(lang))
                sub_lang = lang_qs.get('lc', [None])[0]
                if not sub_lang: continue
                caption_qs.update({
                    'tlang': [sub_lang],
                    'fmt': ['vtt'],
                })
                sub_url = urlunparse(
                    parsed_caption_url._replace(
                        query=urllib.urlencode(caption_qs, True)))
                sub_tracks.append({
                    'title': lang_qs['n'][0].encode('utf-8'),
                    'url': sub_url,
                    'lang': sub_lang.encode('utf-8'),
                    'ytid': len(sub_tracks),
                    'format': 'vtt'
                })
        except Exception:
            printExc()
        return sub_tracks
Example #52
0
    def DeviceDetail(self, **unused_args):
        """Query for a specific device."""
        errormsg = None

        device_id = self.request.get('device_id')
        device = model.DeviceInfo.GetDeviceWithAcl(device_id)
        try:
            if not device:
                errormsg = 'Device %s not found' % device_id
                template_args = {
                    'error': errormsg,
                    'user': users.get_current_user().email(),
                    'logout_link': users.create_logout_url('/')
                }
                self.response.out.write(
                    template.render('templates/devicedetail.html',
                                    template_args))
                return

            # Get set of properties associated with this device
            query = device.deviceproperties_set
            query.order('-timestamp')
            properties = query.fetch(config.NUM_PROPERTIES_IN_LIST)

            # Get current tasks assigned to this device
            cur_schedule = [
                device_task.task for device_task in device.devicetask_set
            ]

            # Get measurements
            cursor = self.request.get('measurement_cursor')
            if self.request.get('all') == '1':
                query = db.GqlQuery(
                    'SELECT * FROM Measurement '
                    'WHERE ANCESTOR IS :1 '
                    'ORDER BY timestamp DESC', device.key())
            else:
                query = db.GqlQuery(
                    'SELECT * FROM Measurement '
                    'WHERE ANCESTOR IS :1 AND success = TRUE '
                    'ORDER BY timestamp DESC', device.key())
            if cursor:
                query.with_cursor(cursor)

            measurements = query.fetch(config.NUM_MEASUREMENTS_IN_LIST)
            # If there are more measurements to show, give the user a cursor
            if len(measurements) == config.NUM_MEASUREMENTS_IN_LIST:
                cursor = query.cursor()
                parsed_url = list(urlparse.urlparse(self.request.url))
                url_query_dict = {
                    'device_id': device_id,
                    'measurement_cursor': cursor,
                    'all': self.request.get('all')
                }
                parsed_url[4] = urllib.urlencode(url_query_dict)
                more_measurements_link = urlparse.urlunparse(parsed_url)
            else:
                more_measurements_link = None

            template_args = {
                'error': errormsg,
                'device_id': device_id,
                'dev': device,
                'properties': properties,
                'measurements': measurements,
                'more_measurements_link': more_measurements_link,
                'schedule': cur_schedule,
                'user': users.get_current_user().email(),
                'logout_link': users.create_logout_url('/'),
            }
            self.response.out.write(
                template.render('templates/devicedetail.html', template_args))
        except:
            raise
Example #53
0
def expander(self):
    # the json object to return
    data = {}

    # get the passed url
    url = self.request.get("url")

    # url has no scheme, default to http
    url = url if url_regex.match(url) != None else "http://" + url

    # fix IDNA urls
    error = False
    try:
        # parse url into it's components
        parsed = list(urlparse.urlparse(url))
        # loop each label in the domain and convert them to ascii
        parsed[1] = ".".join([
            encodings.idna.ToASCII(domain) for domain in parsed[1].split(".")
        ])
        url = urlparse.urlunparse(parsed)
    except Exception as e:
        data["status"] = "InternalError"
        error = True

    # put together the basic data
    data["urls"] = [url]
    data["start_url"] = url
    data["end_url"] = url

    if not error:
        # if the input URL still doesn't start with http:// or https://, discard it
        if not url.startswith("http://") and not url.startswith("https://"):
            data["status"] = "InvalidURL"
        else:
            requests = 0
            # follow redirects, max x times
            while (requests < max_redirects):
                requests += 1
                try:
                    # fetch the url _without_ following redirects, we handle them manually
                    response = google.appengine.api.urlfetch.fetch(
                        url,
                        follow_redirects=False,
                        allow_truncated=True,
                        method="HEAD")
                except:
                    data["status"] = "InvalidURL"
                    break

                if response.status_code in (300, 301, 302, 303, 307):
                    if "location" in response.headers:
                        location = response.headers["location"]
                    elif "Location" in response.headers:
                        location = response.headers["Location"]
                    else:
                        data["status"] = "OK"
                        break
                    # check if the url is relative or absolute
                    if location.startswith('/'):
                        parsedloc = list(urlparse.urlparse(location))
                        parsedurl = list(urlparse.urlparse(url))
                        url = urlparse.urlunparse(parsedurl[:2] +
                                                  parsedloc[2:])
                    else:
                        url = location
                else:
                    # no more redirects; we're done
                    data["status"] = "OK"
                    break

                # add the current url to the urls array in the output
                data["urls"].append(url)
            else:
                data["status"] = "TooManyRedirects"

    data["redirects"] = len(data["urls"]) - 1
    data["end_url"] = url

    # output in json
    self.response.out.write(json.dumps(data))
Example #54
0
def iriToUri(iri):
    parts = urlparse.urlparse(iri)
    return urlparse.urlunparse(
        part.encode('idna') if parti ==
        1 else urlEncodeNonAscii(part.encode('utf-8'))
        for parti, part in enumerate(parts))
Example #55
0
 def test_unparse_parse(self):
     for u in ['Python', './Python','x-newscheme://foo.com/stuff']:
         self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u)
         self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
Example #56
0
def open_url(url, data=None, headers=None, method=None, use_proxy=True,
        force=False, last_mod_time=None, timeout=10, validate_certs=True,
        url_username=None, url_password=None, http_agent=None,
        force_basic_auth=False, follow_redirects='urllib2'):
    '''
    Fetches a file from an HTTP/FTP server using urllib2
    '''
    handlers = []
    ssl_handler = maybe_add_ssl_handler(url, validate_certs)
    if ssl_handler:
        handlers.append(ssl_handler)

    # FIXME: change the following to use the generic_urlparse function
    #        to remove the indexed references for 'parsed'
    parsed = urlparse.urlparse(url)
    if parsed[0] != 'ftp':
        username = url_username

        if headers is None:
            headers = {}

        if username:
            password = url_password
            netloc = parsed[1]
        elif '@' in parsed[1]:
            credentials, netloc = parsed[1].split('@', 1)
            if ':' in credentials:
                username, password = credentials.split(':', 1)
            else:
                username = credentials
                password = ''

            parsed = list(parsed)
            parsed[1] = netloc

            # reconstruct url without credentials
            url = urlparse.urlunparse(parsed)

        if username and not force_basic_auth:
            passman = urllib2.HTTPPasswordMgrWithDefaultRealm()

            # this creates a password manager
            passman.add_password(None, netloc, username, password)

            # because we have put None at the start it will always
            # use this username/password combination for  urls
            # for which `theurl` is a super-url
            authhandler = urllib2.HTTPBasicAuthHandler(passman)

            # create the AuthHandler
            handlers.append(authhandler)

        elif username and force_basic_auth:
            headers["Authorization"] = basic_auth_header(username, password)

        else:
            try:
                rc = netrc.netrc(os.environ.get('NETRC'))
                login = rc.authenticators(parsed[1])
            except IOError:
                login = None

            if login:
                username, _, password = login
                if username and password:
                    headers["Authorization"] = basic_auth_header(username, password)

    if not use_proxy:
        proxyhandler = urllib2.ProxyHandler({})
        handlers.append(proxyhandler)

    if HAS_SSLCONTEXT and not validate_certs:
        # In 2.7.9, the default context validates certificates
        context = SSLContext(ssl.PROTOCOL_SSLv23)
        context.options |= ssl.OP_NO_SSLv2
        context.options |= ssl.OP_NO_SSLv3
        context.verify_mode = ssl.CERT_NONE
        context.check_hostname = False
        handlers.append(urllib2.HTTPSHandler(context=context))

    # pre-2.6 versions of python cannot use the custom https
    # handler, since the socket class is lacking create_connection.
    # Some python builds lack HTTPS support.
    if hasattr(socket, 'create_connection') and CustomHTTPSHandler:
        handlers.append(CustomHTTPSHandler)

    handlers.append(RedirectHandlerFactory(follow_redirects, validate_certs))

    opener = urllib2.build_opener(*handlers)
    urllib2.install_opener(opener)

    if method:
        if method.upper() not in ('OPTIONS','GET','HEAD','POST','PUT','DELETE','TRACE','CONNECT','PATCH'):
            raise ConnectionError('invalid HTTP request method; %s' % method.upper())
        request = RequestWithMethod(url, method.upper(), data)
    else:
        request = urllib2.Request(url, data)

    # add the custom agent header, to help prevent issues
    # with sites that block the default urllib agent string
    request.add_header('User-agent', http_agent)

    # if we're ok with getting a 304, set the timestamp in the
    # header, otherwise make sure we don't get a cached copy
    if last_mod_time and not force:
        tstamp = last_mod_time.strftime('%a, %d %b %Y %H:%M:%S +0000')
        request.add_header('If-Modified-Since', tstamp)
    else:
        request.add_header('cache-control', 'no-cache')

    # user defined headers now, which may override things we've set above
    if headers:
        if not isinstance(headers, dict):
            raise ValueError("headers provided to fetch_url() must be a dict")
        for header in headers:
            request.add_header(header, headers[header])

    urlopen_args = [request, None]
    if sys.version_info >= (2,6,0):
        # urlopen in python prior to 2.6.0 did not
        # have a timeout parameter
        urlopen_args.append(timeout)

    r = urllib2.urlopen(*urlopen_args)
    return r
 def get_repo_url(self):
     return urlparse.urlunparse(
         ("file", "", urllib.pathname2url(self.__dir), "", "", ""))
Example #58
0
    def _create_request(self,
                        destination,
                        method,
                        path_bytes,
                        body_callback,
                        headers_dict={},
                        param_bytes=b"",
                        query_bytes=b"",
                        retry_on_dns_fail=True,
                        timeout=None,
                        long_retries=False):
        """ Creates and sends a request to the given url
        """
        headers_dict[b"User-Agent"] = [self.version_string]
        headers_dict[b"Host"] = [destination]

        url_bytes = self._create_url(destination, path_bytes, param_bytes,
                                     query_bytes)

        txn_id = "%s-O-%s" % (method, self._next_id)
        self._next_id = (self._next_id + 1) % (sys.maxint - 1)

        outbound_logger.info("{%s} [%s] Sending request: %s %s", txn_id,
                             destination, method, url_bytes)

        # XXX: Would be much nicer to retry only at the transaction-layer
        # (once we have reliable transactions in place)
        if long_retries:
            retries_left = MAX_LONG_RETRIES
        else:
            retries_left = MAX_SHORT_RETRIES

        http_url_bytes = urlparse.urlunparse(
            ("", "", path_bytes, param_bytes, query_bytes, ""))

        log_result = None
        try:
            while True:
                producer = None
                if body_callback:
                    producer = body_callback(method, http_url_bytes,
                                             headers_dict)

                try:

                    def send_request():
                        request_deferred = preserve_context_over_fn(
                            self.agent.request, method, url_bytes,
                            Headers(headers_dict), producer)

                        return self.clock.time_bound_deferred(
                            request_deferred,
                            time_out=timeout / 1000. if timeout else 60,
                        )

                    response = yield preserve_context_over_fn(send_request)

                    log_result = "%d %s" % (
                        response.code,
                        response.phrase,
                    )
                    break
                except Exception as e:
                    if not retry_on_dns_fail and isinstance(e, DNSLookupError):
                        logger.warn("DNS Lookup failed to %s with %s",
                                    destination, e)
                        log_result = "DNS Lookup failed to %s with %s" % (
                            destination, e)
                        raise

                    logger.warn(
                        "{%s} Sending request failed to %s: %s %s: %s - %s",
                        txn_id,
                        destination,
                        method,
                        url_bytes,
                        type(e).__name__,
                        _flatten_response_never_received(e),
                    )

                    log_result = "%s - %s" % (
                        type(e).__name__,
                        _flatten_response_never_received(e),
                    )

                    if retries_left and not timeout:
                        if long_retries:
                            delay = 4**(MAX_LONG_RETRIES + 1 - retries_left)
                            delay = min(delay, 60)
                            delay *= random.uniform(0.8, 1.4)
                        else:
                            delay = 0.5 * 2**(MAX_SHORT_RETRIES - retries_left)
                            delay = min(delay, 2)
                            delay *= random.uniform(0.8, 1.4)

                        yield sleep(delay)
                        retries_left -= 1
                    else:
                        raise
        finally:
            outbound_logger.info(
                "{%s} [%s] Result: %s",
                txn_id,
                destination,
                log_result,
            )

        if 200 <= response.code < 300:
            pass
        else:
            # :'(
            # Update transactions table?
            body = yield preserve_context_over_fn(readBody, response)
            raise HttpResponseException(response.code, response.phrase, body)

        defer.returnValue(response)
Example #59
0
def _create_oauth_tokens(dev_appserver_url,
                         email_and_password_of_user_wanting_access, anointed):
    """Helper for _setup_dev_appserver_for_oauth, to create two token-sets."""
    if anointed:
        name = 'test_consumer_anointed'
    else:
        name = 'test_consumer_not_anointed'

    # First, we need to create a user.
    (user_data, _, _) = _create_user('test_user_for_oauth_token')

    # To start the oauth request, we need a consumer token.
    consumer_object = models_oauth.Consumer.get_or_insert(
        key_name=name,
        name=name,
        description=name,
        website='',
        user=user_data.user,
        status=consts.ACCEPTED,
        phone='',
        company='',
        anointed=anointed)
    if not consumer_object.secret:  # we just created it
        consumer_object.generate_random_codes()
    consumer = oauth.OAuthConsumer(consumer_object.key_,
                                   consumer_object.secret)

    # Next, create an oauth request token.
    request = oauth.OAuthRequest.from_consumer_and_token(
        consumer, http_url='%s/api/auth/request_token' % dev_appserver_url)
    request.sign_request(oauth.OAuthSignatureMethod_PLAINTEXT(), consumer,
                         None)

    request_token_req = urllib2.urlopen(request.to_url())
    if request_token_req.code != 200:
        raise RuntimeError('Unable to get the request token, '
                           'instead got %d: "%s"' %
                           (request_token_req.code, request_token_req.read()))

    # Next, we need to register the request token in Khan Academy
    # oauth-map.  The response from the request-token fetch is the url
    # we need to hit to do this: probably /login/mobileoauth?<stuff>.
    # The only thing we need to add are the name and password of the
    # user who wants access (which can/will be different from the user
    # who created the consumer key, above).
    scheme, netloc, path, params, query, fragment = \
             urlparse.urlparse(request_token_req.geturl())
    query += ('&identifier=%s&password=%s' %
              email_and_password_of_user_wanting_access)
    oauth_map_url = urlparse.urlunparse(
        (scheme, netloc, path, params, {}, fragment))
    oauth_map_req = urllib2.urlopen(oauth_map_url, query)
    contents = oauth_map_req.read()
    if contents != 'OK':
        raise RuntimeError(
            'Unable to get the access token, instead got: "%s"' % contents)
    # The url that we end up with after going through
    # /login/mobileauth, which yields a *second* request token.
    # TODO(csilvers): figure out what's going on here.
    oauth_map_key_and_secret = urlparse.urlparse(oauth_map_req.geturl())[4]
    oauth_map_token = oauth.OAuthToken.from_string(oauth_map_key_and_secret)

    # Finally, we can get the access token from the previous request token.
    request = oauth.OAuthRequest.from_consumer_and_token(
        consumer,
        token=oauth_map_token,
        http_url="%s/api/auth/access_token" % dev_appserver_url)
    request.sign_request(oauth.OAuthSignatureMethod_HMAC_SHA1(), consumer,
                         oauth_map_token)
    access_token_req = urllib2.urlopen(request.to_url())
    if access_token_req.code != 200:
        raise RuntimeError('Unable to get the access token, '
                           'instead got %d: "%s"' %
                           (access_token_req.code, access_token_req.read()))
    access_key_and_secret_str = access_token_req.read()
    access_token = oauth.OAuthToken.from_string(access_key_and_secret_str)

    return (consumer, access_token)
Example #60
0
def msg_search_subscription_notifications(frequency):
    """
        Send Notifications for all Subscriptions
    """

    s3db = current.s3db
    table = s3db.pr_saved_search

    if frequency not in dict(table.notification_frequency.requires.options()):
        return

    db = current.db
    searches = db(table.notification_frequency == frequency).select()
    if not searches:
        return

    import urlparse
    from urllib import urlencode
    from uuid import uuid4

    try:
        import json  # try stdlib (Python 2.6)
    except ImportError:
        try:
            import simplejson as json  # try external module
        except:
            import gluon.contrib.simplejson as json  # fallback to pure-Python module
    loads = json.loads

    from gluon.tools import fetch

    msg = current.msg
    settings = current.deployment_settings
    public_url = settings.get_base_public_url()
    system_name_short = settings.get_system_name_short()

    def send(search, message):
        if not message:
            return
        # Send the email
        msg.send_by_pe_id(search.pe_id,
                          subject="%s Search Notification %s" % \
                            (system_name_short, search.name),
                          message=message)

    for search in searches:
        # Fetch the latest records from the search

        # search.url has no host
        search_url = "%s%s" % (public_url, search.url)

        # Create a temporary token for this search
        # that will be used when impersonating users
        auth_token = uuid4()
        search.update_record(auth_token=auth_token)
        # Commit so that when we request via http, then we'll see the change
        db.commit()

        # Parsed URL, break up the URL into its components
        purl = list(urlparse.urlparse(search_url))

        if search.notification_batch:
            # Send all records in a single notification

            # query string parameters to be added to the search URL
            page_qs_parms = {
                "search_subscription": auth_token,
                "%s.modified_on__ge" % (search.resource_name):
                search.last_checked,
                "format": "email",
            }

            # Turn the parameter list into a URL query string
            page_qs = urlencode(page_qs_parms)

            # Put the URL back together
            page_url = urlparse.urlunparse([
                purl[0],  # scheme
                purl[1],  # netloc
                purl[2],  # path
                purl[3],  # params
                "&".join([purl[4], page_qs]),  # query
                purl[5],  # fragment
            ])
            message = fetch(page_url)

            # Send the email
            send(search, message)

        else:
            # Not batch

            # query string parameters to be added to the search URL
            page_qs_parms = {
                "search_subscription": auth_token,
                "%s.modified_on__ge" % (search.resource_name):
                search.last_checked,
                "format": "json",
            }

            # Turn the parameter list into a URL query string
            page_qs = urlencode(page_qs_parms)

            # Put the URL back together
            page_url = urlparse.urlunparse([
                purl[0],  # scheme
                purl[1],  # netloc
                purl[2],  # path
                purl[3],  # params
                "&".join([purl[4], page_qs]),  # query
                purl[5],  # fragment
            ])
            # Fetch the record list as json
            json_string = fetch(page_url)

            if json_string:
                records = loads(json_string)

                for record in records:
                    email_qs = urlencode({
                        "search_subscription":
                        auth_token,
                        "format":
                        "email",
                        "%s.id__eq" % search.resource_name:
                        record["id"],
                    })
                    email_url = urlparse.urlunparse([
                        purl[0],  # scheme
                        purl[1],  # netloc
                        purl[2],  # path
                        purl[3],  # params
                        email_qs,  # query
                        purl[5],  # fragment
                    ])

                    message = fetch(email_url)

                    # Send the email
                    send(search, message)

    # Update the saved searches to indicate they've just been checked
    # & revoke the temporary token
    query = (table.notification_frequency == frequency) & \
            (table.deleted != True)
    db(query).update(
        last_checked=datetime.datetime.utcnow(),
        auth_token=None,
    )
    # Explictly commit
    db.commit()