def correct_title(artist, title):
    tmp_json = 'tmp_track.json'

    url_name = path_track_1 + quote(artist) + path_track_2 + quote(title) + path_track_3

    try:

        urllib.request.urlretrieve(url_name, tmp_json)

        with open(tmp_json) as corr_json:
            corr = json.load(corr_json)

            corr_data = corr['corrections']['correction']['track']

            if 'name' in corr_data:
                title_corr = corr_data['name']

            corr_json.close()
    except:
        print("error for:", artist, "-", title)

    time.sleep(0.25)


    return title_corr
Example #2
0
    def urlencode(self, safe=None):
        """
        Returns an encoded string of all query string arguments.

        :arg safe: Used to specify characters which do not require quoting, for
            example::

                >>> q = QueryDict('', mutable=True)
                >>> q['next'] = '/a&b/'
                >>> q.urlencode()
                'next=%2Fa%26b%2F'
                >>> q.urlencode(safe='/')
                'next=/a%26b/'

        """
        output = []
        if safe:
            safe = smart_bytes(safe, self.encoding)
            encode = lambda k, v: '%s=%s' % ((quote(k, safe), quote(v, safe)))
        else:
            encode = lambda k, v: urlencode({k: v})
        for k, list_ in self.lists():
            k = smart_bytes(k, self.encoding)
            output.extend([encode(k, smart_bytes(v, self.encoding))
                           for v in list_])
        return '&'.join(output)
Example #3
0
def get_history_data(d, g, prefix_dict=None):
	''' Given a HistoryData object and graph, add status and replaces replationships
	 to graph.'''
	
	print('gathering history information from {0} ...'.format(str(d._prefix)))
	if not isinstance(d, datasets.HistoryDataSet):
		print('ERROR - {0} is not a HistoryDataSet object!'.format(str(d)))
		return None

	else:
		if prefix_dict == None:
			print('Building required prefix dictionsry ...')
			prefix_dict = build_prefix_dict()
		n = Namespace("http://www.openbel.org/bel/namespace/" + prefix_dict[d._prefix] + '/')
		scheme = namespace[prefix_dict[d._prefix]]
		obsolete = d.get_obsolete_ids()
		for term_id, new_value in obsolete.items():
			term_clean = parse.quote(term_id)
			term_uri = URIRef(n[term_clean])
			g.add((term_uri, SKOS.inScheme, scheme))
			if new_value == 'withdrawn':
				g.add((term_uri, BELV.status, literal('withdrawn')))
			elif new_value is not None:
				new_value_clean = parse.quote(new_value)
				new_uri = URIRef(n[new_value_clean])
				g.add((term_uri, BELV.status, literal('retired')))
				g.add((new_uri, DCTERMS.replaces, term_uri))
			else:
				print('Check values for {0}: {1}'.format(str(d), term_id))
Example #4
0
def test_api_filter_by_milestone__estimated_start_and_end(client, field_name):
    user = f.UserFactory.create()
    project = f.ProjectFactory.create(owner=user)
    role = f.RoleFactory.create(project=project)
    f.MembershipFactory.create(
        project=project, user=user, role=role, is_admin=True
    )
    milestone = f.MilestoneFactory.create(project=project, owner=user)

    assert hasattr(milestone, field_name)
    date = getattr(milestone, field_name)
    before = (date - timedelta(days=1)).isoformat()
    after = (date + timedelta(days=1)).isoformat()

    client.login(milestone.owner)

    expections = {
        field_name + "__gte=" + quote(before): 1,
        field_name + "__gte=" + quote(after): 0,
        field_name + "__lte=" + quote(before): 0,
        field_name + "__lte=" + quote(after): 1
    }

    for param, expection in expections.items():
        url = reverse("milestones-list") + "?" + param
        response = client.get(url)
        number_of_milestones = len(response.data)

        assert response.status_code == 200
        assert number_of_milestones == expection, param
        if number_of_milestones > 0:
            assert response.data[0]["slug"] == milestone.slug
Example #5
0
    def amz_cano_querystring(qs):
        """
        Parse and format querystring as per AWS4 auth requirements.

        Perform percent quoting as needed.

        qs -- querystring

        """
        safe_qs_amz_chars = '&=+'
        safe_qs_unresvd = '-_.~'
        # If Python 2, switch to working entirely in str
        # as quote() has problems with Unicode
        if PY2:
            qs = qs.encode('utf-8')
            safe_qs_amz_chars = safe_qs_amz_chars.encode()
            safe_qs_unresvd = safe_qs_unresvd.encode()
        qs = unquote(qs)
        space = b' ' if PY2 else ' '
        qs = qs.split(space)[0]
        qs = quote(qs, safe=safe_qs_amz_chars)
        qs_items = {}
        for name, vals in parse_qs(qs, keep_blank_values=True).items():
            name = quote(name, safe=safe_qs_unresvd)
            vals = [quote(val, safe=safe_qs_unresvd) for val in vals]
            qs_items[name] = vals
        qs_strings = []
        for name, vals in qs_items.items():
            for val in vals:
                qs_strings.append('='.join([name, val]))
        qs = '&'.join(sorted(qs_strings))
        if PY2:
            qs = unicode(qs)
        return qs
Example #6
0
def clone_repo(repo, backup_dir, http, username, password, mirror=False, with_wiki=False):
    global _quiet, _verbose
    scm = repo.get('scm')
    slug = repo.get('slug')
    owner = repo.get('owner')

    owner_url = quote(owner)
    username_url = quote(username)
    password_url = quote(password)
    slug_url = quote(slug)
    command = None
    if scm == 'hg':
        if http:
            command = 'hg clone https://%s:%[email protected]/%s/%s' % (username_url, password_url, owner_url, slug_url)
        else:
            command = 'hg clone ssh://[email protected]/%s/%s' % (owner_url, slug_url)
    if scm == 'git':
        git_command = 'git clone'
        if mirror:
            git_command = 'git clone --mirror'
        if http:
            command = "%s https://%s:%[email protected]/%s/%s.git" % (git_command, username_url, password_url, owner_url, slug_url)
        else:
            command = "%s [email protected]:%s/%s.git" % (git_command, owner_url, slug_url)
    if not command:
        exit("could not build command (scm [%s] not recognized?)" % scm)
    debug("Cloning %s..." % repo.get('name'))
    exec_cmd('%s "%s"' % (command, backup_dir))
    if with_wiki and repo.get('has_wiki'):
        debug("Cloning %s's Wiki..." % repo.get('name'))
        exec_cmd("%s/wiki %s_wiki" % (command, backup_dir))
Example #7
0
def renderClusterHtml(clust,width,height,scopeFile=None):
    html = ''
    scopeHtml = getScopeHtml(scopeFile)
    header = '''
    	<HTML>
    		<title>Web Application Catalog</title>
    		<BODY>
    			<h1>Web Application Catalog</h1>
    '''
    if(scopeHtml is not None):
    	header = header+scopeHtml
    header = header + '''
    			<script type="text/javascript" src="popup.js"></script>
    			<LINK href="style.css" rel="stylesheet" type="text/css">
    			<h3>Catalog:</h3>
    			'''
    html = html+'<table border="1">'
    
    for cluster,siteList in clust.iteritems():
        html=html+'<TR>'
        screenshotName = quote(siteList[0][0:-4], safe='')
        html=html+'<TR><TD><img src="'+screenshotName+'png" width='+str(width)+' height='+str(height)+'/></TD></TR>'
        for site in siteList:
            screenshotName = quote(site[0:-5], safe='')
            html=html+'<TD onmouseout="clearPopup()" onmouseover="popUp(event,\''+screenshotName+'.png\');"><a href="'+unquote(unquote(screenshotName[4:]).decode("utf-8")).decode("utf-8")+'">'+unquote(unquote(screenshotName[4:]).decode("utf-8")).decode("utf-8")+'</a></TD>'
        html=html+'</TR>'
    html=html+'</table>'
    footer = '</BODY></HTML>'

    return [header,html,footer]
Example #8
0
    def __str__(self):
        """
        Like L{to_string()}.

        @rtype:  str
        @return: A URL.
        """
        url = ''
        if self.protocol is not None:
            url += self.protocol + '://'
        if self.username is not None or \
           self.password1 is not None or \
           self.password2 is not None:
            if self.username is not None:
                url += quote(self.username, '')
            if self.password1 is not None or self.password2 is not None:
                url += ':'
            if self.password1 is not None:
                url += quote(self.password1, '')
            if self.password2 is not None:
                url += ':' + quote(self.password2, '')
            url += '@'
        url += self.hostname
        if self.port:
            url += ':' + str(self.port)
        if self.path:
            url += '/' + self.path

        if self.vars:
            pairs = []
            for key, values in list(self.vars.items()):
                for value in values:
                    pairs.append((key, value))
            url += '?' + urlencode(pairs)
        return url
Example #9
0
def user_agent_username(username=None):
    """
    Reduce username to a representation permitted in HTTP headers.

    To achieve that, this function:
    1) replaces spaces (' ') with '_'
    2) encodes the username as 'utf-8' and if the username is not ASCII
    3) URL encodes the username if it is not ASCII, or contains '%'
    """
    if not username:
        return ''
    username = username.replace(' ', '_')  # Avoid spaces or %20.
    try:
        username.encode('ascii')  # just test, but not actually use it
    except UnicodeEncodeError:
        pass
    else:
        # % is legal in the default $wgLegalTitleChars
        # This is so that ops know the real pywikibot will not
        # allow a useragent in the username to allow through a hand-coded
        # percent-encoded value.
        if '%' in username:
            return quote(username)
        else:
            return username
    username = quote(username.encode('utf-8'))
    return username
Example #10
0
def _encode_query_string_flatten(obj, parent, encoding):
    if isinstance(obj, dict):
        if obj:
            for member in obj:
                yield from _encode_query_string_flatten(obj[member], parent + (quote(member, encoding=encoding),), encoding)
        elif parent:
            yield (parent, '')
    elif isinstance(obj, (list, tuple)):
        if obj:
            for idx, value in enumerate(obj):
                yield from _encode_query_string_flatten(value, parent + (quote(str(idx), encoding=encoding),), encoding)
        elif parent:
            yield (parent, '')
    else:
        if isinstance(obj, date):
            ostr = obj.isoformat()
        elif isinstance(obj, datetime):
            ostr = (obj if obj.tzinfo else obj.replace(tzinfo=TZLOCAL)).isoformat()
        elif isinstance(obj, UUID):
            ostr = str(obj)
        elif isinstance(obj, bool):
            ostr = 'true' if obj else 'false'
        elif obj is None:
            ostr = 'null'
        else:
            ostr = obj if isinstance(obj, str) else str(obj)
        yield (parent, quote(ostr, encoding=encoding))
Example #11
0
    def _list_dir(self, path):
        """
        Show a directory listing.

        """
        entries = os.listdir(path)
        dir_entries = [[[
            '..',
            quote(os.path.normpath(os.path.join(path, '..')), safe='')
        ]]]
        for name in entries:
            if name.startswith('.'):
                # skip invisible files/directories
                continue
            fullname = os.path.join(path, name)
            displayname = linkname = name
            # Append / for directories or @ for symbolic links
            if os.path.isdir(fullname):
                displayname += '/'
            if os.path.islink(fullname):
                displayname += '@'
            dir_entries.append(
                [[displayname, quote(os.path.join(path, linkname), safe='')]])

        self.render(
            'dir.html', dir_name=path, dir_entries=dir_entries)
def update_tracker(session_token, download_id, tracker):
    announce_url = tracker['announce']
    parts = list(urlparse(announce_url))
    parts[1] = NEW_TRACKER_HOST
    new_announce = urlunparse(parts)
    print(">  UPDATE tracker %s ==> %s" % (announce_url, new_announce))
    # add new tracker
    url = MAFREEBOX_API_URL + ("downloads/%d/trackers" % download_id)
    rep = requests.post(url, json={
        'announce': new_announce,
        'is_enabled': True
    }, headers={
        'X-Fbx-App-Auth': session_token
    })
    get_api_result(rep)

    # remove prev tracker
    url = MAFREEBOX_API_URL + ("downloads/%d/trackers/%s" % (download_id, quote(announce_url, safe='')))
    rep = requests.delete(url, headers={
        'X-Fbx-App-Auth': session_token
    })
    get_api_result(rep)

    # active new tracker
    url = MAFREEBOX_API_URL + ("downloads/%d/trackers/%s" % (download_id, quote(new_announce, safe='')))
    rep = requests.delete(url, json={
        'is_enabled': True
    }, headers={
        'X-Fbx-App-Auth': session_token
    })
    get_api_result(rep)
Example #13
0
    def redirect(self, url, permanent=False, anchor=""):
        """Cause a redirection without raising an error 
    - accepts full URL (eg http://whatever...) or URI (eg /versere/Account/2/view)
    - retains anchor and messages
    """
        if type(url) == bytes:
            url = str(url, 'utf8')
        url = str(url).strip(
        )  #safety first - defend against unicode, which will break both http and string matching
        #    print "REDIRECT:",url
        if not url.startswith('http'):
            url = "http://%s%s" % (self.get_host(), url)

#    print "REDIRECT full url:",url or "no url", " == anchor: ", anchor or "no anchor"
        if '#' in url:
            url, anchor = url.rsplit('#', 1)
        ats = self.error and ['error=%s' % quote(self.error)] or []
        if self.warning:
            ats.append('warning=%s' % quote(self.warning))
        if self.message:
            ats.append('message=%s' % quote(self.message))
        q = url.find('?') > -1 and "&" or "?"
        ats = ats and (q + '&'.join(ats)) or ""
        url = '%s%s%s%s' % (url, ats, anchor and '#' or '', anchor)
        # do the redirect
        self.request.setResponseCode(permanent and 301 or 302, None)
        if type(url) != bytes:
            url = bytes(bytearray(url, 'utf8'))
        self.request.setHeader('Location', url)
        return " "  #return a True blank string, to indicate we have a page result
Example #14
0
  def query(self, params):
    '''Perform an uncached SpinPapi query.

      :param dict params: SpinPapi query parameters. At least method
        must be present in the dict.

      :return string: A complete signed query URL.
      '''

    # Add requested params to object default params
    all_params = self.params
    for key, val in params.items():
      all_params[key] = val

    # Add current GMT timestamp to params
    all_params['timestamp'] = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())

    # Sort params alphabetically by dict key
    all_params = sorted(all_params.items(), key=itemgetter(0))

    # Build the urlencoded query in a list and join it with '&' chars
    the_query = []
    for key, val in all_params:
      the_query.append(quote(key) + '=' + quote(val))
    the_query = '&'.join(the_query)

    # Construct the query's HMAC signature.
    signature = bytes(self.host + '\n' + self.url + '\n' + the_query, 'utf-8')
    signature = hmac.new(self.secret, signature, hashlib.sha256)
    signature = signature.digest()
    signature = base64.b64encode(signature)
    signature = quote(signature, '')

    return ('http://' + self.host + self.url
        + '?' + the_query + '&signature=' + signature)
Example #15
0
def upload(recipe, result, server, key=None):
    '''upload build'''
    branch = result.pop('branch', 'unknown')

    # FIXME: use urljoin
    request = Request('{}/build/{}/{}/{}'.format(
        server, quote(recipe['name']), quote(branch),
        quote('{} {}'.format(sys.platform, platform.machine()))))

    request.add_header('Content-Type', 'application/json')
    if key is not None:
        request.add_header('Authorization', key)

    try:
        urlopen(request, json.dumps(result).encode('UTF-8'))
    except HTTPError as exc:
        logging.error("The server couldn't fulfill the request.")
        logging.error('Error code: %s', exc.code)
        if exc.code == 400:
            logging.error("Client is broken, wrong syntax given to server")
        elif exc.code == 401:
            logging.error("Wrong key provided for project.")
        logging.error("%s", exc.read())
        return False
    except URLError as exc:
        logging.error('Failed to reach a server.')
        logging.error('Reason: %s', exc.reason)
        return False

    return True
Example #16
0
    async def get(self):
        headers = {
            "apikey": self.API_KEY
        }

        name = self.get_query_argument("name")
        keyword = self.get_query_argument("keyword")

        page_args = self.get_query_arguments("page")
        rows_args = self.get_query_arguments("rows")
        type_args = self.get_query_arguments("type")

        params = dict()
        params["name"] = quote(name)
        params["keyword"] = quote(keyword)
        params["page"] = page_args[0] if len(page_args) else 1
        params["rows"] = rows_args[0] if len(rows_args) else 20
        params["type"] = type_args[0] if len(type_args) else ""

        request = HTTPRequest(self.URL.format(**params), method="GET", headers=headers)
        response = await self.browser.fetch(request)
        if response.code == 200:
            html = response.body.decode("utf-8")
            print(html)
            # result = json.loads(html)
            # data = result["result"]["data"]
            # print(data)

        else:
            self.logger.debug("none 200 response code")
            raise HTTPError
 def __init__(self, name, pusher):
     self.pusher = pusher
     self.name = str(name)
     if not channel_name_re.match(self.name):
         raise NameError("Invalid channel id: %s" % self.name)
     self.path = '/apps/%s/channels/%s/events' % (self.pusher.app_id, quote(self.name))
     self.users_path = '/apps/%s/channels/%s/users' % (self.pusher.app_id, quote(self.name))
Example #18
0
def wolframalpha(text):
    """
    Query WolframAlpha about text.

    >>> print(wolframalpha('mass of sol'))
    Input interpretation: Sun | mass / Result: 1.988435×10^30 kg  (kilograms) / Unit conversions: 4.383749×10^30 lb  (pounds) / 2.191874×10^27 sh tn  (short tons) / 1.988435×10^33 grams / 1 M_☉  (solar ma http://wolframalpha.com/?input=mass%20of%20sol

    Check URL encoding:
    >>> print(wolframalpha('4+6'))
    Input: 4+6 / Result: 10 / Number name: ten / Number line: Manipulatives illustration:  | + |  |  |  4 |  | 6 |  | 10 / Typical human computation times: age 6:  5.3 seconds  |  age 8:  2.6 seconds  |  age 10:  1.7 seconds  |   age 18:  0.93 seconds (ignoring concentration, repetition, variations in education, etc.) / 

    >>> print(wolframalpha('é'))
    Input interpretation: é  (character) / Visual form: Name: Latin small letter e with acute / Positions in alphabets: Czech | 9th letter (33rd letter from the end) Slovak | 12th letter (35th letter from http://wolframalpha.com/?input=%C3%A9
    """
    r = urlopen(
        'http://api.wolframalpha.com/v2/query?format=plaintext&appid=3JEW42-4XXE264A93&input='
        + quote(text))
    tree = ET.parse(r)
    reply = ''
    for n in tree.iter():
        if n.tag == 'pod':
            reply += n.attrib['title'] + ': '
        if n.tag == 'plaintext' and n.text and len(n.text.strip()):
            reply += n.text + ' / '
    if len(reply) > 512:
        reply = reply[:200] + " http://wolframalpha.com/?input=" + quote(text)
    r.close()
    return reply.replace('\n', ' ')
Example #19
0
    def create_binding(self, exchange_name, queue_name, routing_key):
        """
        Create a message binding.

        :param str exchange_name: name of the exchange to create/update
        :param str queue_name: name of the queue to create/update
        :param str routing_key: routing key that binds the exchange
            and the queue

        This method creates the specified queue and exchange if they
        do not already exist and then binds `routing_key` such that
        messages with it are routed through the exchange and queue.

        """
        if not self.virtual_host:
            raise RuntimeError(
                'attempted to create a binding without a virtual host')

        exchange_name = parse.quote(exchange_name, safe='')
        queue_name = parse.quote(queue_name, safe='')

        self._rabbit_api_request(
            'PUT', 'queues', self.virtual_host, queue_name,
            data={'auto_delete': False, 'durable': False},
        ).raise_for_status()
        self._rabbit_api_request(
            'PUT', 'exchanges', self.virtual_host, exchange_name,
            data={'type': 'topic', 'durable': False},
        ).raise_for_status()
        self._rabbit_api_request(
            'POST', 'bindings', self.virtual_host,
            'e', exchange_name, 'q', queue_name,
            data={'routing_key': routing_key},
        ).raise_for_status()
Example #20
0
def encode(username, password):
    """Returns an HTTP basic authentication encrypted string given a valid
    username and password.
    """
    credentials = '{0}:{1}'.format(quote(username), quote(password)).\
        encode('latin1')
    return 'Basic {0}'.format(b64encode(credentials).decode('latin1'))
Example #21
0
    def search(self, term):
        try:
            exactterm = format_term(term)
            exactterm = quote(exactterm)
            exacturl = self.endpoints['url'].format(exactterm)
            html = web.get(exacturl)
            return (html, exacturl)
        except HTTPError:
            pass

        term = deformat_term(term)
        term = quote(term)
        apiurl = self.endpoints['api'].format(term)

        try:
            result = json.loads(web.get(apiurl))
        except ValueError as e:
            raise ContentDecodingError(str(e))

        if 'error' in result:
            raise ServerFault(result['error'])

        result = result['query']['search']

        if not result:
            return None

        term = result[0]['title']
        term = format_term(term)
        term = quote(term)

        url = self.endpoints['url'].format(term)
        html = web.get(url)
        return (html, url)
Example #22
0
def normalize_uri(uri):
    try:
        return quote(uri, safe=str('/@:+?=&()%#*,'))
    except KeyError:
        # Python 2 throws a KeyError sometimes
        try:
            return quote(uri.encode('utf-8'), safe=str('/@:+?=&()%#*,'))
        except UnicodeDecodeError:
            # Python 2 also throws a UnicodeDecodeError, complaining about
            # the width of the "safe" string. Removing this parameter
            # solves the issue, but yields overly aggressive quoting, but we
            # can correct those errors manually.
            s = quote(uri.encode('utf-8'))
            s = re.sub(r'%40', '@', s)
            s = re.sub(r'%3A', ':', s)
            s = re.sub(r'%2B', '+', s)
            s = re.sub(r'%3F', '?', s)
            s = re.sub(r'%3D', '=', s)
            s = re.sub(r'%26', '&', s)
            s = re.sub(r'%28', '(', s)
            s = re.sub(r'%29', ')', s)
            s = re.sub(r'%25', '%', s)
            s = re.sub(r'%23', '#', s)
            s = re.sub(r'%2A', '*', s)
            s = re.sub(r'%2C', ',', s)
            return s
Example #23
0
    def test_filter_within(self):
        golden_gate_park_json = """{"type": "MultiPolygon", "coordinates": [[[[-122.511067, 37.771276], [-122.510037, 37.766391], [-122.510037, 37.763813], [-122.456822, 37.765848], [-122.452960, 37.766459], [-122.454848, 37.773990], [-122.475362, 37.773040], [-122.511067, 37.771276]]]]}"""

        # Get points
        connection = self.get_connection()
        connection.request('GET', '/api/v1/geonotes/?points__within=%s' % quote(golden_gate_park_json), headers={'Accept': 'application/json'})
        response = connection.getresponse()
        connection.close()
        self.assertEqual(response.status, 200)

        data = json.loads(response.read().decode('utf-8'))
        # We get back the points inside Golden Gate park!
        self.assertEqual(data['objects'][0]['content'], "Wooo two points inside Golden Gate park")
        self.assertEqual(data['objects'][0]['points']['type'], 'MultiPoint')
        self.assertAlmostEqual(data['objects'][0]['points']['coordinates'][0][0], -122.475233, places=5)
        self.assertAlmostEqual(data['objects'][0]['points']['coordinates'][0][1], 37.768616, places=5)
        self.assertAlmostEqual(data['objects'][0]['points']['coordinates'][1][0], -122.470416, places=5)
        self.assertAlmostEqual(data['objects'][0]['points']['coordinates'][1][1], 37.767381, places=5)

        # Get lines
        connection = self.get_connection()
        connection.request('GET', '/api/v1/geonotes/?lines__within=%s' % quote(golden_gate_park_json), headers={'Accept': 'application/json'})
        response = connection.getresponse()
        connection.close()
        self.assertEqual(response.status, 200)

        data = json.loads(response.read().decode('utf-8'))
        # We get back the line inside Golden Gate park!
        self.assertEqual(data['objects'][0]['content'], "A path inside Golden Gate Park! Huzzah!")
        self.assertEqual(data['objects'][0]['lines']['type'], 'MultiLineString')
        self.assertAlmostEqual(data['objects'][0]['lines']['coordinates'][0][0][0], -122.504544, places=5)
        self.assertAlmostEqual(data['objects'][0]['lines']['coordinates'][0][0][1], 37.767002, places=5)
        self.assertAlmostEqual(data['objects'][0]['lines']['coordinates'][0][1][0], -122.499995, places=5)
        self.assertAlmostEqual(data['objects'][0]['lines']['coordinates'][0][1][1], 37.768223, places=5)
Example #24
0
 def calc_signature(self, args):
     split = urlsplit(args['url'])
     path = split.path
     if len(path) == 0:
         path = '/'
     string_to_sign = '%s\n%s\n%s\n' % (args['method'],
                                        split.netloc,
                                        path)
     lhmac = hmac.new(self.credentials.secret_key.encode('utf-8'),
                     digestmod=sha256)
     args['params']['SignatureMethod'] = 'HmacSHA256'
     if self.credentials.token:
         args['params']['SecurityToken'] = self.credentials.token
     sorted_params = sorted(args['params'])
     pairs = []
     for key in sorted_params:
         value = args['params'][key]
         pairs.append(quote(key, safe='') + '=' +
                      quote(value, safe='-_~'))
     qs = '&'.join(pairs)
     string_to_sign += qs
     logger.debug('string_to_sign')
     logger.debug(string_to_sign)
     lhmac.update(string_to_sign.encode('utf-8'))
     b64 = base64.b64encode(lhmac.digest()).strip().decode('utf-8')
     return (qs, b64)
Example #25
0
  def test_x_delete_name(self):
    method = 'test_delete_name'

    rv = self.app.post('/api/roles', content_type='application/json', data=self.new_role_body)

    url = '/api/roles/name/{}?session_id=test'.format(quote(self.new_role.theName))
    rv = self.app.delete(url)
    if (sys.version_info > (3,)):
      responseData = rv.data.decode('utf-8')
    else:
      responseData = rv.data
    self.logger.debug('[%s] Response data: %s', method, responseData)
    json_resp = json_deserialize(responseData)
    self.assertIsNotNone(json_resp, 'No results after deserialization')
    message = json_resp.get('message', None)
    self.assertIsNotNone(message, 'No message returned')
    self.logger.info('[%s] Message: %s\n', method, message)

    rv = self.app.post('/api/roles', content_type='application/json', data=self.new_role_body)

    url = '/api/roles/name/Test2'

    upd_role = self.new_role
    upd_role.theName = 'Test2'
    upd_role_dict = self.new_role_dict
    upd_role_dict['object'] = upd_role
    upd_role_body = jsonpickle.encode(upd_role_dict)
    rv = self.app.put(url, content_type='application/json', data=upd_role_body)
    url = '/api/roles/name/Test2?session_id=test'.format(quote(self.new_role.theName))
    rv = self.app.delete(url)
    def __make_query_filter(self, filters):
        if isinstance(filters, list):
            formated_filter = "&filter=".join(quote(f) for f in filters)
        else:
            formated_filter = quote(filters)

        return "&filter=" + formated_filter
Example #27
0
def _oauth10a_signature(consumer_token,
                        method, url, parameters={}, token=None):
    """Calculates the HMAC-SHA1 OAuth 1.0a signature for the given request.

    See http://oauth.net/core/1.0a/#signing_process
    """
    parts = urlparse.urlparse(url)
    scheme, netloc, path = parts[:3]
    normalized_url = scheme.lower() + "://" + netloc.lower() + path

    base_elems = []
    base_elems.append(method.upper())
    base_elems.append(normalized_url)
    base_elems.append("&".join("%s=%s" % (k, _oauth_escape(str(v)))
                               for k, v in sorted(parameters.items())))

    base_string = "&".join(_oauth_escape(e) for e in base_elems)
    key_elems = [escape.utf8(
                 urllib_parse.quote(consumer_token["secret"], safe='~'))]
    key_elems.append(escape.utf8(
                     urllib_parse.quote(token["secret"], safe='~') if token else ""))
    key = "&".join(key_elems)

    hash = hmac.new(key, escape.utf8(base_string), hashlib.sha1)
    return binascii.b2a_base64(hash.digest())[:-1]
Example #28
0
    def sites_linking_in(self, urls, count=MAX_SITES_LINKING_IN_COUNT, start=0):
        if count > self.MAX_SITES_LINKING_IN_COUNT:
            raise RuntimeError("Maximum SitesLinkingIn result count is %s." % self.MAX_SITES_LINKING_IN_COUNT)

        params = { "Action": "SitesLinkingIn" }
        if not isinstance(urls, (list, tuple)):
            params.update({
                "Url": quote(urls),
                "ResponseGroup": "SitesLinkingIn",
                "Count": count,
                "Start": start,
             })
        else:
            if len(urls) > self.MAX_BATCH_REQUESTS:
                raise RuntimeError("Maximum number of batch URLs is %s." % self.MAX_BATCH_REQUESTS)

            params.update({
                "SitesLinkingIn.Shared.ResponseGroup": "SitesLinkingIn",
                "SitesLinkingIn.Shared.Count": count,
                "SitesLinkingIn.Shared.Start": start,
            })

            for i, url in enumerate(urls):
                params.update({"SitesLinkingIn.%d.Url" % (i + 1): quote(url)})

        return self.request(params)
Example #29
0
	def get(self):
		data = {'is_debug': config('DEBUG')}
		urls = []

		session = Session()

		try:
			pages = session.query(StaticPageModel)\
				.filter_by(is_active=True)\
				.order_by(StaticPageModel.id.asc()).all()
			sections = session.query(CatalogSectionModel)\
				.filter_by(is_active=True)\
				.order_by(CatalogSectionModel.id.asc()).all()
			items = session.query(CatalogItemModel)\
				.filter_by(is_active=True)\
				.order_by(CatalogItemModel.id.asc()).all()
		except Exception as e:
			session.close()
			print('SiteMapRoute.get(): cannot get data from DB:\n',\
				e, file=sys.stderr)
			raise e

		session.close()

		for page in [x.item for x in pages]:
			if '404' in page['alias']:
				continue
			urls.append({
				'alias': quote(page['alias'], encoding='utf-8'),
				'lastmod': page['last_change']
			})

		for section in [x.item for x in sections]:
			url = '/catalog/{0}.html'.format(section['alias'])
			url = quote(url, encoding='utf-8')
			urls.append({
				'alias': url,
				'lastmod': section['last_change']
			})

		for item in [x.item for x in items]:
			section_alias = None
			for section in [x.item for x in sections]:
				if section['id'] == item['section_id']:
					section_alias = section['alias']
			if section_alias is None:
				e = Exception('SiteMapRoute: '+\
					'cannot find section for element #%d' % item['id'])
				print(e, file=sys.stderr)
				continue
			url = '/catalog/{0}/{1}.html'.format(section_alias, item['alias'])
			url = quote(url, encoding='utf-8')
			urls.append({
				'alias': url,
				'lastmod': section['last_change']
			})

		data.update({'urls': tuple(urls)})
		self.set_header('Content-Type', 'text/xml; charset="utf-8"')
		return self.render('client/sitemap.jade', **data)
Example #30
0
def normalize_uri(uri):
    try:
        return quote(uri, safe="/@:+?=&()%#*")
    except KeyError:
        # Python 2 throws a KeyError sometimes
        try:
            return quote(uri.encode("utf-8"), safe="/@:+?=&()%#*")
        except UnicodeDecodeError:
            # Python 2 also throws a UnicodeDecodeError, complaining about
            # the width of the "safe" string. Removing this parameter
            # solves the issue, but yields overly aggressive quoting, but we
            # can correct those errors manually.
            s = quote(uri.encode("utf-8"))
            s = re.sub(r"%40", "@", s)
            s = re.sub(r"%3A", ":", s)
            s = re.sub(r"%2B", "+", s)
            s = re.sub(r"%3F", "?", s)
            s = re.sub(r"%3D", "=", s)
            s = re.sub(r"%26", "&", s)
            s = re.sub(r"%28", "(", s)
            s = re.sub(r"%29", ")", s)
            s = re.sub(r"%25", "%", s)
            s = re.sub(r"%23", "#", s)
            s = re.sub(r"%2A", "*", s)
            return s
Example #31
0
class DmozSpider4(scrapy.Spider):  # 继承Spider类

    print("进入%s了!!!!!!!!!" % num)
    import os
    if os.path.exists('output'):
        shutil.rmtree('output')
    yuming = '中国青年'
    lang = '英语'
    '''
    超参数都在这里修改, 就下面这2个有用.name 随便起一个,在main函数里面调用这个名就行.
    html就是要爬取的网站.
    '''
    name = "dmoz%s" % num  # 爬虫的唯一标识,不能重复,启动爬虫的时候要用
    print("name", name)
    # html='http://www.171english.cn/news/'
    # html='http://www.171english.cn/news/2018'
    # html='http://www.171english.cn/news/2019'
    html = ' http://roll.edu.sina.com.cn/english/syxw/ss4/index.shtml'
    # html=' http://roll.edu.sina.com.cn/english/syxw/ss4/index_5.shtml'
    html = html.strip()

    from bs4 import BeautifulSoup
    #首页写这里

    baseUrl = html

    import requests
    # a=requests.get(html).content

    # bs = BeautifulSoup(a, "html.parser")  # 缩进格式
    # print(bs)
    # 下面冲bs中找到所有爬取的页.
    # print(bs.find_all("a"))  # 获取所有的a标签,也就是超链接
    from selenium import webdriver
    import sys

    # browser = webdriver.Firefox()  # Get local session of firefox
    # aaa=browser.get("http://news.sina.com.cn/c/2013-07-11/175827642839.shtml ")  # Load page
    # print(aaa)
    saveall = [html]
    print(777777777777777777777777777777, baseUrl)
    if 0:  #调试用, 一般不用这么跑.这个只是动态js代码需要这么使用而已. 一般网页没有这种方式.这个方式太慢爬虫.但是可以避免不必要的js bug
        while 1:
            tmpurl = saveall[-1]
            from selenium import webdriver
            from selenium.webdriver.chrome.options import Options

            chrome_options = Options()
            chrome_options.add_argument("--headless")
            from .utilsme import driver

            base_url = tmpurl
            driver.get(base_url)  # 注意这里面结果直接写到deriver里面
            # print(driver.page_source)
            a = driver.page_source

            bs = BeautifulSoup(a, "html.parser")  # 缩进格式
            # print(bs)
            # 下面冲bs中找到所有爬取的页.
            # print(bs.find_all("a"))
            import re
            # tmp=bs.find_all(text=re.compile("Next[ ]*"))
            # print(tmp)
            now = None

            for s in bs('a'):
                # print(s.text,444444444444444444444444444444444444444444444444)
                if s.text == "»":
                    now = s.extract()
                    # 需要对now进行中文转码
                    # now=parse.quote(now.get('href'))
                    print("loook", now)
                    # 注意这种旧网站的编码方式.
                    now = parse.quote(now.get('href'),
                                      safe=";/?:@&=+$, ",
                                      encoding="gbk")
                    now = 'https://ru.hujiang.com/' + now
                    print(now, "now网页是!!!!!!!!!!")
            if now == None or now in saveall:  #防止循环
                break
            else:
                saveall.append(now)
        print(saveall, '最后获取的所有index页')

#-------------推荐的方式获取全部index页

# 下面是直接匹配方式获取所有index页. 也就是一般需求这么跑就可以获取所有index页了.
    import urllib.request  # 导入urllib.request库

    if 0:  #调试用
        while 1:

            tmpurl = saveall[-1]

            import urllib
            from bs4 import BeautifulSoup

            url = tmpurl
            print(url, 8989898998)
            print(repr(url),
                  9999999999999999999999999999999999999999999999999999)

            a = urllib.request.urlopen(url)  # 打开指定网址
            page = a.read()  # 读取网页源码
            try:
                page = page.decode('gbk')  # 会有2中编码方式. gbk 或者utf-8
            except:
                page = page.decode('utf-8')  # 会有2中编码方式. gbk 或者utf-8

            print(type(page), 'yuio')
            # page = requests.get(url)         # 开不开,一直404.
            # page = requests.get('http://www.i21st.cn/story/index_1.html')         # 开不开,一直404.
            # page.encoding = 'utf-8'
            # soup = BeautifulSoup(page,"html.parser")
            print(page, 3434343434343)
            bs = BeautifulSoup(page, "html.parser")  # 缩进格式
            print(bs, 999999999999999999999999999999999999)
            # print(bs)
            # 下面冲bs中找到所有爬取的页.
            # print(bs.find_all("a"))
            import re
            # tmp=bs.find_all(text=re.compile("Next[ ]*"))
            # print(tmp)
            now = None

            print(url, bs('a'), 'uiop')
            for s in bs('a'):
                print(s.text, 'yyyyyyyyyy')
                if s.text == "下一页":
                    now = s.extract()
                    print(now, 12345)
                    # 需要对now进行中文转码
                    # now=str(now)
                    print(now, 888888888888888888888888)
                    # now=parse.quote(re.findall(r'href=".*"',now)[0])

                    print("loook", now)
                    # 注意这种旧网站的编码方式.
                    now = parse.quote(
                        now.get('href'), safe=";/?:@&=+$%, ", encoding="gbk"
                    )  # 中文的处理方式是里面加上%即可!!!!!!!!!!!!!!!!!!!!!!
                    print(89898934392423423, now)

                    if now[0] == '.':
                        now = now[2:]
                    now = 'http://roll.edu.sina.com.cn/english/syxw/ss4/' + now

                    # now=r'https://' + 'jp.hjenglish.com'+now
                    print(now, "now网页是!!!!!!!!!!")
            if now == None:
                break
            else:
                # print(now,556565656565)
                saveall.append(now)
        print("我们通过普通index算法得到所有的index页信息是", saveall)

    # 直接修改这里面!!!!!!!!!!!!! ,可以手动的吧上面的到的saveall直接改下面即可.就得到了全爬虫.

    saveall = [

        #'http://www.171english.cn/news/2018/june/',
        html,
    ]
    start_urls = saveall  # 开始爬取的链接 start_urls必须用这个名.

    def parse(self, response):  # 一级爬取代码
        print("进入了一级爬虫")
        #xpath教学:https://blog.csdn.net/qq_27283619/article/details/88704479
        #https://www.cnblogs.com/wt7018/p/11749778.html
        # @表示属性
        # 好像使用框架scrapy没法debug.只能疯狂print了
        # help(response.url)
        print(response.url, 77777777777777777777777777777777777777777777777777)
        print(response, '**********************当前爬取的网页链接')
        div_list = response.xpath('//ul[@class="list_009"]//a/@href')  # 加入正则
        # div_list = response.xpath('//div[@class="module cl xl"]/ul/li')  # 加入正则

        # print(85654645654, div_list)
        div_list = [i.extract() for i in div_list]

        # 去掉调回的情况.
        div_list = [i for i in div_list if i != response.url]
        div_list = list(set(div_list))
        print(85654645654, div_list)
        # div_list = response.xpath('//div[@class="newslist solid"]')  # 加入正则
        # print(90909090,div_list)

        # print(div_list)
        # print(div_list[0])
        # print(div_list[-1])
        # print((div_list))

        print(div_list, 99999999999999999999999999999999999999)
        for i in div_list:
            # print(self.baseUrl+i.extract())# 获得了全部链接,进入二级爬虫.
            item = en_youth()
            item['link'] = i
            item['link'] = item['link']
            # print(item['link'],"lianjie !!!!!!!!!!!!!!!!!!!!!!")
            #每一次一级爬虫得到的页面,都触发一次二级爬虫.
            yield scrapy.Request(item['link'],
                                 callback=self.parse_detail,
                                 meta={'item': item},
                                 encoding='raw_unicode_escape')

    #https://blog.csdn.net/Light__1024/article/details/88763541 如何进行爬取二级界面

    def parse_detail(self, response):  # 二级爬取代码
        infomation = response.meta['item']['link']
        # print(infomation,988776754456435345435345435)
        print(infomation, "二级爬取的地址是")
        item = response.body
        # print(item,9090909090909090909090909090)
        # print(item,444444444444444444444444444444444444)
        # print(item)
        # print(response.body,"???????????????")
        # print("********打印二次爬虫结果")#[@class="TRS_Editor"]
        item = en_youth()
        print('进入2极品宠')

        # 预过滤: 改了body,但是还是不生效.??
        #
        # # response.body="dfadsf"
        #
        # tmp=re.sub(r'<script.*</script>','',str(response.body))
        # print(tmp,6666666666666666666666666666666666666666)
        # response._set_body(tmp.encode(response.encoding))
        # print(response.body,777777777777777777777777777777777777777777777)
        # print(response.body,88888888888888888888888888888888888)
        # HtmlResponse.replace()
        # HtmlResponse.replace('body',remove_tags_with_content(response.body, 'script'))
        # HtmlResponse.replace('body',remove_tags_with_content(response.body, 'script'))

        # tmp2=response.xpath('//td[@class="e14"]//text()').extract()
        #下面要设计多重xpath判断.因为格式不同意.
        # 下面这个是只有div 里面写没有p标签.

        # 如果要提取这个标签里面的不管多深的全部文本, 就不用写细节了.直接div extract就可以实现!
        item['neirong'] = response.xpath(
            '//div[@class="article"]//p').extract()
        item['neirong'] += response.xpath(
            '//div[@class="content"]//p').extract()
        item['neirong'] += response.xpath('//div[@id="article"]//p').extract()
        item['neirong'] += response.xpath('//td[@class="e14"]').extract()
        item['neirong'] += response.xpath(
            '//td[@id="article_content"]').extract()
        # print(item['neirong'],22222222222222222222222)

        save = []

        item['neirong'] = [i for i in item['neirong'] if '<script' not in i]
        item['neirong'] = [replace_tags(i, '') for i in item['neirong']]

        print('neirong2222222222222', item['neirong'])

        # item['neirong']+= response.xpath('//div[@id="article"]/div/p/text()').extract()
        # item['neirong']+= response.xpath('//div[@id="article"]/p/text()').extract()

        # 下面进行脚本滤过.

        # item['neirong'] = filter(lambda x: '<script>'not in x, item['neirong'])

        # print(item['neirong'], '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        # print(item['neirong'], 8888888888888888888)

        save2 = '\n'.join(item['neirong'])
        print(save2, 9999999999999999999999999999999999999)
        item['neirong'] = save2
        item['title'] = infomation
        yield item
        # 下面学习pipeline, 进行文件读写.
        # setttings里面设置pipeline写入文件
        #https://www.cnblogs.com/python2687806834/p/9836935.html
        pass


#
# if __name__=="__main__":
#     DmozSpider()
# import urllib,
import sys, json
import ssl
import urllib.request
from urllib import parse

host = 'https://jisuznwd.market.alicloudapi.com'
path = '/iqa/query'
method = 'GET'
appcode = 'e44b5b8f74ce47ebb3a202dfab10002c'
q = '习近平最近怎么啦'

encode_q = parse.quote(q)
print(encode_q)
querys = 'question={}'.format(encode_q)

bodys = {}
url = host + path + '?' + querys

req = urllib.request.Request(url)
req.add_header('Authorization', 'APPCODE ' + appcode)
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
response = urllib.request.urlopen(req, context=ctx)
content = response.read()
# print(content)
str_content = str(content, 'utf-8')
if (content):
    # print(content)
    js = json.loads(str_content)
Example #33
0
 def escaped_name(self):
     """My name, escaped for use in URLs, cookies, etc."""
     return quote(self.name, safe='@')
    def call(self,
             op,
             path='',
             is_extended=False,
             expected_error_code=None,
             retry_policy=None,
             headers={},
             **kwargs):
        """ Execute a REST call

        Parameters
        ----------
        op: str
            webHDFS operation to perform, one of `DatalakeRESTInterface.ends`
        path: str
            filepath on the remote system
        is_extended: bool (False)
            Indicates if the API call comes from the webhdfs extensions path or the basic webhdfs path.
            By default, all requests target the official webhdfs path. A small subset of custom convenience
            methods specific to Azure Data Lake Store target the extension path (such as SETEXPIRY).
        expected_error_code: int
            Optionally indicates a specific, expected error code, if any. In the event that this error
            is returned, the exception will be logged to DEBUG instead of ERROR stream. The exception
            will still be raised, however, as it is expected that the caller will expect to handle it
            and do something different if it is raised.
        kwargs: dict
            other parameters, as defined by the webHDFS standard and
            https://msdn.microsoft.com/en-us/library/mt710547.aspx
        """
        retry_policy = ExponentialRetryPolicy(
        ) if retry_policy is None else retry_policy
        if op not in self.ends:
            raise ValueError("No such op: %s", op)
        self._check_token()
        method, required, allowed = self.ends[op]
        allowed.add('api-version')
        data = kwargs.pop('data', b'')
        stream = kwargs.pop('stream', False)
        keys = set(kwargs)
        if required > keys:
            raise ValueError("Required parameters missing: %s",
                             required - keys)
        if keys - allowed > set():
            raise ValueError("Extra parameters given: %s", keys - allowed)
        params = {'OP': op}
        if self.api_version:
            params['api-version'] = self.api_version

        params.update(kwargs)

        if is_extended:
            url = self.url + self.extended_operations
        else:
            url = self.url + self.webhdfs
        url += urllib.quote(path)

        retry_count = -1
        request_id = str(uuid.uuid1())
        while True:
            retry_count += 1
            last_exception = None
            try:
                response = self.__call_once(method=method,
                                            url=url,
                                            params=params,
                                            data=data,
                                            stream=stream,
                                            request_id=request_id,
                                            retry_count=retry_count,
                                            op=op,
                                            path=path,
                                            headers=headers,
                                            **kwargs)
            except requests.exceptions.RequestException as e:
                last_exception = e
                response = None

            request_successful = self.is_successful_response(
                response, last_exception)
            if request_successful or not retry_policy.should_retry(
                    response, last_exception, retry_count):
                break

        if not request_successful and last_exception is not None:
            raise DatalakeRESTException('HTTP error: ' + repr(last_exception))

        exception_log_level = logging.ERROR
        if expected_error_code and response.status_code == expected_error_code:
            logger.log(
                logging.DEBUG,
                'Error code: {} was an expected potential error from the caller. Logging the exception to the debug stream'
                .format(response.status_code))
            exception_log_level = logging.DEBUG

        if response.status_code == 403:
            self.log_response_and_raise(response,
                                        PermissionError(path),
                                        level=exception_log_level)
        elif response.status_code == 404:
            self.log_response_and_raise(response,
                                        FileNotFoundError(path),
                                        level=exception_log_level)
        elif response.status_code >= 400:
            err = DatalakeRESTException('Data-lake REST exception: %s, %s' %
                                        (op, path))
            if self._is_json_response(response):
                out = response.json()
                if 'RemoteException' in out:
                    exception = out['RemoteException']['exception']
                    if exception == 'BadOffsetException':
                        err = DatalakeBadOffsetException(path)
                        self.log_response_and_raise(response,
                                                    err,
                                                    level=logging.DEBUG)
            self.log_response_and_raise(response,
                                        err,
                                        level=exception_log_level)
        else:
            self._log_response(response)

        if self._is_json_response(response):
            out = response.json()
            if out.get('boolean', True) is False:
                err = DatalakeRESTException('Operation failed: %s, %s' %
                                            (op, path))
                self.log_response_and_raise(response, err)
            return out
        return response
Example #35
0
 def getfid_web(self, kw):
     fid_url = 'http://tieba.baidu.com/f/commit/share/fnameShareApi?ie=utf-8&fname=' + parse.quote(
         kw)
     fid = self.getRes(fid_url)['data']['fid']
     return str(fid)
Example #36
0
def get_mp3_url(title):
    return 'http://dl.justing.com.cn/page/{0}.mp3'.format(
            parse.quote(title))
Example #37
0
 def encode_uri_component(uri):
     return _urlencode.quote(uri, safe="~()*!.'")
Example #38
0
    def parse_single_relation_table(self, rt_id: int):
        """
        解析单个相关内容表格(ajax异步内容,需要请求服务器)
        :param rt_id: 表格id
        :raise Exception 请求失败
        """
        root_url = "https://baike.baidu.com/guanxi/jsondata"  # 获取内容的地址
        get_appendix = '?action={action}&args={args}'  # get传参模版
        action_str = "getViewLemmaData"  # 固定参数
        args = [0, 8, {"fentryTableId": rt_id}, False]  # 在这里传入条目表的id

        # 将参数内容转为url转义编码插入
        request_url = (root_url + get_appendix.format(action=quote(action_str),
                                                      args=quote(str(args))))
        try:
            # 获取表格json
            req = urllib.request.Request(request_url, headers=self.headers)
            response = urllib.request.urlopen(req, timeout=5)
            if self.__check_404_error(response.url):
                return None
            if response.getcode() != 200:
                raise Exception("connection error on relation table fetching")
        except Exception as e:
            # 连接中断:
            raise e  # 目前单纯传出异常
        # json直接取值,获得表格区的HTML和总标题
        json_data = json.load(response)
        html_text = regex.sub(r'(\r\n)', "", json_data['html'])
        main_title = json_data["title"]
        # 初始化输出缓存
        result_single_table = dict()
        # 加入表名
        result_single_table['#head_name#'] = main_title
        result_single_table['#head_link#'] = None
        result_single_table['#table_id#'] = rt_id
        # 解析Html
        relation_soup = bs4.BeautifulSoup(html_text, features='html.parser')
        r_unit_list = relation_soup.find_all(class_='relation-unit',
                                             recursive=False)
        # h3,div,table混合格式
        h3_name = None
        h3_buffer = {}
        for unit in r_unit_list:  # 切分为最大单个元素,分别处理
            if unit.name == 'h3':  # 以h3为分界,按顺序打包
                if h3_name not in (None, ''):
                    result_single_table[h3_name] = h3_buffer
                h3_name = ''.join(unit.stripped_strings)
                h3_buffer.clear()
            if unit.name == 'table':
                # 移交递归函数处理table
                item = self.__parse_table_recursive(unit)
                if h3_name is not None:
                    if item.get('#head_name#') is None:
                        h3_buffer = dict(h3_buffer, **item)
                    else:
                        h3_buffer[item.get('#head_name#')] = item
                else:
                    if item.get('#head_name#') is None:
                        result_single_table = dict(result_single_table, **item)
                    else:
                        result_single_table[item.get('#head_name#')] = item
            if unit.name == "div":
                # 提取 div
                div_content = self.__parse_div_(unit)
                if h3_name is not None:
                    h3_buffer = dict(h3_buffer, **div_content)
                else:
                    result_single_table = dict(result_single_table,
                                               **div_content)
        if h3_name is not None:  # 输出缓存
            h3_buffer['#head_name#'] = None
            result_single_table[h3_name] = h3_buffer  # 输出缓存
        return result_single_table
Example #39
0
 def url(self):
     return 'https://mp.weixin.qq.com/cgi-bin/showqrcode?ticket=' + \
             quote(json.loads(self.content).get('ticket'))
Example #40
0
 def get_full_path(self, path: str) -> str:
     """Compute full path for the given path."""
     return unquote(urljoin(self.base_path, quote(path.lstrip("/"))))  # pragma: no mutate
Example #41
0
        for trip in item["times"]:
            result[lineid].append({
                "time": trip["realtimeArrival"],
                "rt": trip["realtime"],
                "dest": dest
            })
    for item in result:
        result[item] = sorted(result[item], key=lambda i: i["time"])
    return result


if __name__ == "__main__":
    print("Bus | iconName=mark-location-symbolic")
    print("---")
    for stopquery in config["stations"]:
        stop = getStopInfo(quote(stopquery))
        stopid = stop["properties"]["id"]
        stopname = stop["properties"]["LIBELLE"]
        schedule = getSchedule(stopid)
        now = datetime.now()
        todayref = now.replace(hour=0, minute=0, second=0, microsecond=0)
        for line in schedule:
            lineinfo = getLineInfo(line)
            print("{} : <span foreground=\"#{}\" background=\"#{}\">{}</span>".
                  format(stopname, lineinfo["textColor"], lineinfo["color"],
                         lineinfo["shortName"]))
            for trip in schedule[line]:
                abstime = todayref + timedelta(0, trip["time"])
                reltime = abstime - now
                if trip["rt"]:
                    icon = "application-rss+xml-symbolic"
Example #42
0
from rdflib import Graph, URIRef
from urllib.parse import quote, urlparse

url = 'http://dbpedia.org/resource/Category:Films_set_in_colonial_Australia'
g = Graph()
g.parse(url)

triples = list(
    g.triples((None, URIRef('http://purl.org/dc/terms/subject'), URIRef(url))))
print("Loaded {} films.".format(len(list(triples))))
path = '/home/shawn/projects/rest-app/rest-app.git/tests/testdata/movies/movies_set_in_colonial_australia/'
for film, _, _ in triples:
    url_parsed = urlparse(film)
    if '&' in url_parsed.path:
        url = url_parsed.geturl().replace('&', quote('&'))
    else:
        url = url_parsed.geturl()

    filename = film.split('/')[-1]
    print("{} -> {}".format(film, filename))
    g = Graph()
    try:
        g.parse(url)
    except Exception as e:
        print("Failed to parse {}".format(url))
        continue

    try:
        with open("{}/{}.json".format(path, filename), 'wb') as f:
            f.write(g.serialize(format='json-ld'))
Example #43
0
__author__ = 'Terry'

'''
    任何加密都是针对的bytes: b'\xB3\X3B'


    URL编码解码:
    用于url的参数提交
    中文、特殊字符 转换为 %B3%3B%53。。。。。。
'''
from urllib import parse

s = '#%&+='
# 默认编码为UTF-8
s1 = parse.quote(s)  # 编码
print(s1)
s2 = parse.unquote('%2b')  # 解码
print(s2)
d = {
    'k1': '中文',
    'k2': 'fab123'
}
# 将字典转换为 url 的get 参数串
print(urlencode(d))

'''
    base64:
    是网络上最常见的用于传输8Bit字节码的编码方式之一,
    Base64就是一种基于64个可打印字符来表示二进制数据的方法,用于在HTTP环境下传递较长的标识信息
    后一两位可能有“=”
Example #44
0
 def encode(k, v):
     return '%s=%s' % ((quote(k, safe), quote(v, safe)))
Example #45
0
def article_summary(content):
    return quote(
        BeautifulSoup(
            content.summary,
            'html.parser').get_text().strip().encode('utf-8'))  # noqa
Example #46
0
    https://colab.research.google.com/drive/1uA2Ov2nltlBKqyax1D6C_6OfxGH-u7wc
"""

import pandas as pd
import datetime as dt
from datetime import datetime
from urllib.parse import quote
import requests
now = str(datetime.now())
time = pd.date_range(end=now, periods=2, freq="30D")
start_time = str(time[0])[0:10]
end_time = str(time[1])[0:10]
now

start_time_1H = pd.date_range(end=start_time, periods=2, freq="1H")
start_time_1H = quote(str(start_time_1H[0])[0:16])
start_time_1H

end_time_1H = pd.date_range(start=end_time, periods=2, freq="1D59min")
end_time_1H = quote(str(end_time_1H[1])[0:16])
end_time_1H

lst_min_max_lat_lon_事件圈圈 = [
    "min_lat=22.4342&max_lat=22.89&min_lon=120.1393&max_lon=120.4398",
    "min_lat=23.4386&max_lat=23.5347&min_lon=120.397&max_lon=120.5106",
    "min_lat=22.9036&max_lat=23.2027&min_lon=120.0584&max_lon=120.2666",
    "min_lat=21.8727&max_lat=22.8734&min_lon=120.4398&max_lon=120.9212",
    "min_lat=24.075452&max_lat=24.42158&min_lon=120.49468&max_lon=120.742974",
    "min_lat=24.576&max_lat=24.7401&min_lon=120.7107&max_lon=120.9529",
    "min_lat=23.519469&max_lat=23.793247&min_lon=120.1385103&max_lon=120.7179493",
    "min_lat=23.7868&max_lat=24.2007&min_lon=120.2524&max_lon=120.6884",
Example #47
0
 def create_cdata_element(document, name, value):
     xml_elem = document.createElement(name)
     cdata = document.createCDATASection(quote(value, encoding="UTF-8"))
     xml_elem.appendChild(cdata)
     return xml_elem
Example #48
0
def forge_url(url):
    return 'https://www.facebook.com/plugins/share_button.php?href=%s&layout=button_count' % quote(url)
Example #49
0
def fijo_calcula(request):
    # print(request.POST, len(request.POST))

    # start = time.time()

    iteraciones = 20

    # inicia cosas de sympy
    x, y, z = sympy.symbols('x y z')

    # Inicia cosas de matplotlib
    plt.rcParams.update(plt.rcParamsDefault)
    plt.close('all')

    valores = request.POST  # obtiene el input

    n = len(request.POST)  # 1 llave y par de valores por ecuación.

    if n == 3:  # una variable
        funo = str(valores['fx'])  # +"+x"
        x0 = float(valores['x0'])

        # print(type(x0), type(funo), x0, funo)

        # x, y, z = sympy.symbols('x y z')
        f*x = sympy.sympify(funo)

        # print(sympy.solve(f*x, 0, implicit=True, numerical=False, warn=True, manual=True, cubics=True))

        fx = sympy.sympify(
            str(sympy.solve(f*x, x, implicit=True, quick=True,
                            manual=True)).strip('[]'))

        for q in range(10):
            x0 = round(fx.subs(x, x0))
            print(x0.n(4), fx.subs(x, x0))

    elif n == 5:  # dos variables

        resul = {'titulos': ['n', 'Xn', 'Yn'], 'filas': []}

        funo = sympy.sympify(valores['fx'])
        x0 = float(valores['x0'])

        fundos = sympy.sympify(valores['fy'])
        y0 = float(valores['x0'])

        fx = sympy.sympify(
            str(sympy.solve(funo, x, implicit=True,
                            rational=False)).strip('[]'))
        fy = sympy.sympify(
            str(sympy.solve(fundos, y, implicit=True,
                            rational=False)).strip('[]'))

        for q in range(1, iteraciones + 1):
            # x0 = fx.subs({x: x0, y: y0})
            # y0 = fy.subs({x: x0, y: y0})
            x0 = round(fx.subs({x: x0, y: y0}), 8)
            y0 = round(fy.subs({x: x0, y: y0}), 8)

            resul['filas'].append([q, x0.n(5), y0.n(5)])

        context = {'context': resul}

        # graficación

        plt.rc_context({
            'axes.edgecolor': 'w',
            'xtick.color': 'w',
            'ytick.color': 'w'
        })
        plt.style.use("dark_background")

        titulo = '\n' + estiliza_string(
            valores['fx']) + "  y  " + estiliza_string(valores['fy']) + '\n'

        p1 = plot_implicit(funo,
                           show=False,
                           line_color='#27864d',
                           title=titulo)
        p2 = plot_implicit(fundos, show=False, line_color='#40E0D0')
        p1.extend(p2)

        p1.show()
        buf = BytesIO()
        # experimental, que la compresión dependa del timepo, para así dar una respuesta más rádi

        p1._backend.fig.savefig(buf,
                                format='jpg',
                                quality=90,
                                bbox_inches='tight',
                                facecolor="#000000",
                                edgecolor='#000000',
                                dpi=150,
                                transparent=True)
        # p1._backend.fig.savefig(buf, format='png', quality=1, facecolor="#004c3f", edgecolor='#004c3f', dpi=150, transparent=True)
        buf.seek(0)
        uri = 'data:image/png;base64,' + parse.quote(b64encode(buf.read()))
        context['image'] = uri

        # print(sys.getsizeof(buf))
        # end = time.time()
        # print(end - start)

    return render(request, "fijo_calculado.html", context)
Example #50
0
def article_url(content):
    site_url = content.settings['SITEURL']
    return quote(('%s/%s' % (site_url, content.url)).encode('utf-8'))
Example #51
0
def fijo_ejemplo_1(request):  # Ejemplo 1 para una variable

    # Calculando valores

    iteraciones = 20
    resul = {'titulos': ['n', 'Xn', 'f(x)'], 'filas': []}

    x = sympy.symbols('x')

    # Ejemplos de Curiel
    fun = "x**3+4*x**2-10"
    gx = "sqrt((10)/(x+4))"
    x0 = 1

    fuxx = sympy.lambdify(x, fun, "math")
    gxxx = sympy.lambdify(x, gx, "math")

    for q in range(1, iteraciones + 1):
        x0 = gxxx(x0)
        num = "{0:.6f}".format(fuxx(x0))
        resul['filas'].append([q, "{0:.6f}".format(x0), num])

    context = {'context': resul}

    # Graficación

    plt.rcParams.update(plt.rcParamsDefault)
    plt.close('all')

    r = float(resul['filas'][-1][1])
    t = np.arange(r - 5, r + .5, .1)
    s = []
    for n in t:
        s.append(fuxx(n))

    plt.rc_context({
        'axes.edgecolor': 'black',
        'xtick.color': 'black',
        'ytick.color': 'black'
    })
    # plt.style.use("dark_background")
    fig, ax = plt.subplots()

    ax.axhline(0, color='black')

    ax.plot(t, s, label=f'f(x) = {estiliza_string(fun)}', color='navy')
    ax.grid(color="gray")

    plt.plot(r,
             fuxx(r),
             marker='o',
             markersize=5,
             color="red",
             label=f"Corte con Eje x = {r:.4f}")
    ax.set(xlabel='x',
           ylabel='f(x)',
           title=f"Raíz calculada después de {iteraciones} iteraciones")

    plt.legend(loc='best')

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=160, transparent=True)
    buf.seek(0)
    uri = 'data:image/png;base64,' + parse.quote(b64encode(buf.read()))
    context['image'] = uri

    return render(request, "fijo_calculado.html", context)
Example #52
0
    def create_xml_element(self, document):
        xml_notify = document.createElement("mobilpay")
        xml_notify.set("timestamp", f"{datetime.now():%Y%m%d%H%M%S}")

        self._crc = hashlib.md5(
            str(int(random.random() *
                    int(time.time()))).encode('utf-8')).hexdigest()
        xml_notify.set("crc", self._crc)

        xml_notify.appendChild(
            self.create_text_element(xml_notify, "action", self.action))

        if isinstance(self.customer, Address):
            xml_notify.appendChild(
                self.customer.create_xml_element(xml_notify, "customer"))

        xml_notify.appendChild(
            self.create_text_element(xml_notify, "purchase", self.purchaseId))

        if self.originalAmount is not None:
            xml_notify.appendChild(
                self.create_text_element(xml_notify, "original_amount",
                                         self.originalAmount))

        if self.processedAmount is not None:
            xml_notify.appendChild(
                self.create_text_element(xml_notify, "processed_amount",
                                         self.processedAmount))

        if self.promotionAmount is not None:
            xml_notify.appendChild(
                self.create_text_element(xml_notify, "promotion_amount",
                                         self.promotionAmount))

        if self.current_payment_count is not None:
            xml_notify.appendChild(
                self.create_text_element(xml_notify, "current_payment_count",
                                         self.current_payment_count))

        if self.pan_masked is not None:
            xml_notify.appendChild(
                self.create_text_element(xml_notify, "pan_masked",
                                         self.pan_masked))

        if self.rrn is not None:
            xml_notify.appendChild(
                self.create_text_element(xml_notify, "rrn", self.rrn))

        if self.paymentInstrumentId is not None:
            xml_notify.appendChild(
                self.create_text_element(xml_notify, "payment_instrument_id",
                                         self.current_payment_count))

        if self.token_id is not None:
            xml_notify.appendChild(
                self.create_text_element(xml_notify, "token_id",
                                         self.current_payment_count))

        if self.token_expiration_date is not None:
            xml_notify.appendChild(
                self.create_text_element(xml_notify, "token_expiration_date",
                                         self.token_expiration_date))

        if self.customer_type is not None:
            xml_notify.appendChild(
                self.create_text_element(xml_notify, "customer_type",
                                         self.customer_type))

        if self.customer_id is not None:
            xml_notify.appendChild(
                self.create_text_element(xml_notify, "customer_id",
                                         self.customer_id))

        if self.issuer is not None:
            xml_notify.appendChild(
                self.create_text_element(xml_notify, "issuer", self.issuer))

        if self.paidByPhone is not None:
            xml_notify.appendChild(
                self.create_text_element(xml_notify, "paid_by_phone",
                                         self.paidByPhone))

        if self.validationCode is not None:
            xml_notify.appendChild(
                self.create_text_element(xml_notify, "validation_code",
                                         self.validationCode))

        if self.installments is not None:
            xml_notify.appendChild(
                self.create_text_element(xml_notify, "installments",
                                         self.installments))

        if len(self.discounts) > 0:
            discounts = xml_notify.createElement("discounts")
            for discount in self.discounts:
                discounts.set("id", discount["id"])
                discounts.set("amount", discount["amount"])
                discounts.set("currency", discount["currency"])
                discounts.set("third_party", discount["third_party"])
                discounts.appendChild(discount)
            xml_notify.appendChild(discounts)

        error_element = xml_notify.createElement("error")
        error_element.set("code", self.errorCode)
        error_text = xml_notify.createCDATASection(
            quote(self.errorMessage, encoding="utf-8"))
        error_element.appendChild(error_text)

        xml_notify.appendChild(error_element)

        return xml_notify
Example #53
0
 def _event_url(view, point):
     editor = 'sublime3'
     filename = quote(path_for_url(realpath(view.file_name())))
     hash_ = _md5(view.substr(sublime.Region(0, view.size())))
     return ('/api/buffer/{}/{}/{}/hover?cursor_runes={}'.format(
         editor, filename, hash_, point))
Example #54
0
def fijo_ejemplo_2(request):  # Ejemplo 2 para una variables

    # Calculando valores

    iteraciones = 10
    resul = {'titulos': ['n', 'Xn', 'Yn', 'f(x, y)', 'g(x, y)'], 'filas': []}

    x, y = sympy.symbols('x y')

    # Ejemplos de Curiel
    funx = "x**2-10*x+y**2+8"
    funy = "x*y**2+x-10*y+8"

    # despejes
    fx = "(x**2+y**2+8)/(10)"
    fy = "(x*y**2+x+8)/(10)"
    x0 = 0
    y0 = 0

    f*x = sympy.sympify(funx)
    fuy = sympy.sympify(funy)

    # fxx = sympy.sympify(fx)
    # fyy = sympy.sympify(fy)

    fxn = sympy.lambdify([x, y], funx, "numpy")
    fyn = sympy.lambdify([x, y], funy, "numpy")

    fxxn = sympy.lambdify([x, y], fx, "numpy")
    fyyn = sympy.lambdify([x, y], fy, "numpy")

    for q in range(1, iteraciones + 1):
        x0 = fxxn(x0, y0)
        y0 = fyyn(x0, y0)

        num = fxn(x0, y0)
        num2 = fyn(x0, y0)

        resul['filas'].append(
            [q, f'{x0:.6}', f'{y0:.6}', f'{num:.6}', f'{num2:.6}'])

    context = {'context': resul}

    # Graficación

    plt.rc_context({
        'axes.edgecolor': 'black',
        'xtick.color': 'black',
        'ytick.color': 'black'
    })

    titulo = '\n' + estiliza_string(funx) + "  y  " + estiliza_string(
        funy) + '\n'

    p1 = plot_implicit(f*x, (x, x0 - 1.5, x0 + 1.5), (y, y0 - 1, y0 + 1),
                       show=False,
                       line_color='#27864d',
                       title=titulo,
                       adaptative=False,
                       points=1)
    p2 = plot_implicit(fuy, (x, x0 - 1.5, x0 + 1.5), (y, y0 - 1, y0 + 1),
                       show=False,
                       line_color='#40E0D0',
                       adaptative=False,
                       points=1)
    p1.extend(p2)

    buf = BytesIO()
    p1.show()

    p1._backend.fig.savefig(buf,
                            format='jpg',
                            quality=90,
                            bbox_inches='tight',
                            facecolor="#f3f2f1",
                            edgecolor='#f3f2f1',
                            dpi=150)
    buf.seek(0)
    uri = 'data:image/png;base64,' + parse.quote(b64encode(buf.read()))
    context['image'] = uri
    return render(request, "fijo_calculado.html", context)
Example #55
0
 def format_uri(self, string, remove_space=False):
     """remove space and quote"""
     if remove_space:
         return quote(string.replace(' ', ''))
     return quote(string)
Example #56
0
    def run(self):
        """Run the Bot."""
        try:
            deadLinks = codecs.open(listof404pages, 'r', 'latin_1').read()
        except IOError:
            pywikibot.output(
                'You need to download '
                'http://www.twoevils.org/files/wikipedia/404-links.txt.gz '
                'and to ungzip it in the same directory')
            raise
        socket.setdefaulttimeout(30)
        editedpages = 0
        for page in self.generator:
            try:
                # Load the page's text from the wiki
                new_text = page.get()
                if not page.canBeEdited():
                    pywikibot.output(u"You can't edit page %s" %
                                     page.title(asLink=True))
                    continue
            except pywikibot.NoPage:
                pywikibot.output(u'Page %s not found' %
                                 page.title(asLink=True))
                continue
            except pywikibot.IsRedirectPage:
                pywikibot.output(u'Page %s is a redirect' %
                                 page.title(asLink=True))
                continue

            # for each link to change
            for match in linksInRef.finditer(
                    textlib.removeDisabledParts(page.get())):

                link = match.group(u'url')
                # debugging purpose
                # print link
                if u'jstor.org' in link:
                    # TODO: Clean URL blacklist
                    continue

                ref = RefLink(link, match.group('name'))
                f = None
                try:
                    socket.setdefaulttimeout(20)
                    try:
                        f = urlopen(ref.url.decode("utf8"))
                    except UnicodeError:
                        ref.url = quote(ref.url.encode("utf8"), "://")
                        f = urlopen(ref.url)
                    # Try to get Content-Type from server
                    headers = f.info()
                    if sys.version_info[0] > 2:
                        contentType = headers.get_content_type()
                    else:
                        contentType = headers.getheader('Content-Type')
                    if contentType and not self.MIME.search(contentType):
                        if ref.link.lower().endswith('.pdf') and \
                           not self.getOption('ignorepdf'):
                            # If file has a PDF suffix
                            self.getPDFTitle(ref, f)
                        else:
                            pywikibot.output(
                                color_format(
                                    '{lightyellow}WARNING{default} : '
                                    'media : {0} ', ref.link))
                        if ref.title:
                            if not re.match(
                                    u'(?i) *microsoft (word|excel|visio)',
                                    ref.title):
                                ref.transform(ispdf=True)
                                repl = ref.refTitle()
                            else:
                                pywikibot.output(
                                    color_format(
                                        '{lightyellow}WARNING{default} : '
                                        'PDF title blacklisted : {0} ',
                                        ref.title))
                                repl = ref.refLink()
                        else:
                            repl = ref.refLink()
                        new_text = new_text.replace(match.group(), repl)
                        continue
                    # Get the real url where we end (http redirects !)
                    redir = f.geturl()
                    if redir != ref.link and \
                       domain.findall(redir) == domain.findall(link):
                        if soft404.search(redir) and \
                           not soft404.search(ref.link):
                            pywikibot.output(
                                color_format(
                                    '{lightyellow}WARNING{default} : '
                                    'Redirect 404 : {0} ', ref.link))
                            continue
                        if dirIndex.match(redir) and \
                           not dirIndex.match(ref.link):
                            pywikibot.output(
                                color_format(
                                    u'{lightyellow}WARNING{default} : '
                                    u'Redirect to root : {0} ', ref.link))
                            continue

                    # uncompress if necessary
                    if headers.get('Content-Encoding') in ('gzip', 'x-gzip'):
                        # XXX: small issue here: the whole page is downloaded
                        # through f.read(). It might fetch big files/pages.
                        # However, truncating an encoded gzipped stream is not
                        # an option, or unzipping will fail.
                        compressed = io.BytesIO(f.read())
                        f = gzip.GzipFile(fileobj=compressed)

                    # Read the first 1,000,000 bytes (0.95 MB)
                    linkedpagetext = f.read(1000000)
                    socket.setdefaulttimeout(None)

                except UnicodeError:
                    # example : http://www.adminet.com/jo/20010615¦/ECOC0100037D.html
                    # in [[fr:Cyanure]]
                    pywikibot.output(
                        color_format('{lightred}Bad link{default} : %s in %s',
                                     ref.url, page.title(asLink=True)))
                    continue
                except HTTPError as e:
                    pywikibot.output(
                        u'HTTP error (%s) for %s on %s' %
                        (e.code, ref.url, page.title(asLink=True)),
                        toStdout=True)
                    # 410 Gone, indicates that the resource has been purposely
                    # removed
                    if e.code == 410 or \
                       (e.code == 404 and (u'\t%s\t' % ref.url in deadLinks)):
                        repl = ref.refDead()
                        new_text = new_text.replace(match.group(), repl)
                    continue
                except (URLError, socket.error, IOError, httplib.error) as e:
                    pywikibot.output(u'Can\'t retrieve page %s : %s' %
                                     (ref.url, e))
                    continue
                except ValueError:
                    # Known bug of httplib, google for :
                    # "httplib raises ValueError reading chunked content"
                    continue
                finally:
                    if f:
                        f.close()

                # remove <script>/<style>/comments/CDATA tags
                linkedpagetext = self.NON_HTML.sub(b'', linkedpagetext)

                meta_content = self.META_CONTENT.search(linkedpagetext)
                enc = []
                s = None
                if contentType:
                    # use charset from http header
                    s = self.CHARSET.search(contentType)
                if meta_content:
                    tag = meta_content.group()
                    # Prefer the contentType from the HTTP header :
                    if not contentType:
                        contentType = tag
                    if not s:
                        # use charset from html
                        s = self.CHARSET.search(tag)
                if s:
                    tmp = s.group('enc').strip("\"' ").lower()
                    naked = re.sub(r'[ _\-]', '', tmp)
                    # Convert to python correct encoding names
                    if naked == "gb2312":
                        enc.append("gbk")
                    elif naked == "shiftjis":
                        enc.append("shift jis 2004")
                        enc.append("cp932")
                    elif naked == "xeucjp":
                        enc.append("euc-jp")
                    else:
                        enc.append(tmp)
                else:
                    pywikibot.output(u'No charset found for %s' % ref.link)
                if not contentType:
                    pywikibot.output(u'No content-type found for %s' %
                                     ref.link)
                    continue
                elif not self.MIME.search(contentType):
                    pywikibot.output(
                        color_format(
                            '{lightyellow}WARNING{default} : media : %s ',
                            ref.link))
                    repl = ref.refLink()
                    new_text = new_text.replace(match.group(), repl)
                    continue

                # Ugly hacks to try to survive when both server and page
                # return no encoding.
                # Uses most used encodings for each national suffix
                if u'.ru' in ref.link or u'.su' in ref.link:
                    # see http://www.sci.aha.ru/ATL/ra13a.htm : no server
                    # encoding, no page encoding
                    enc = enc + ['koi8-r', 'windows-1251']
                elif u'.jp' in ref.link:
                    enc.append("shift jis 2004")
                    enc.append("cp932")
                elif u'.kr' in ref.link:
                    enc.append("euc-kr")
                    enc.append("cp949")
                elif u'.zh' in ref.link:
                    enc.append("gbk")

                if 'utf-8' not in enc:
                    enc.append('utf-8')
                try:
                    u = linkedpagetext.decode(enc[0])  # Bug 67410
                except (UnicodeDecodeError, LookupError) as e:
                    pywikibot.output(u'%s : Decoding error - %s' %
                                     (ref.link, e))
                    continue

                # Retrieves the first non empty string inside <title> tags
                for m in self.TITLE.finditer(u):
                    t = m.group()
                    if t:
                        ref.title = t
                        ref.transform()
                        if ref.title:
                            break

                if not ref.title:
                    repl = ref.refLink()
                    new_text = new_text.replace(match.group(), repl)
                    pywikibot.output(u'%s : No title found...' % ref.link)
                    continue

                # XXX Ugly hack
                if u'é' in ref.title:
                    repl = ref.refLink()
                    new_text = new_text.replace(match.group(), repl)
                    pywikibot.output(u'%s : Hybrid encoding...' % ref.link)
                    continue

                if self.titleBlackList.match(ref.title):
                    repl = ref.refLink()
                    new_text = new_text.replace(match.group(), repl)
                    pywikibot.output(
                        color_format(
                            '{lightred}WARNING{default} {0} : '
                            'Blacklisted title ({1})', ref.link, ref.title))
                    continue

                # Truncate long titles. 175 is arbitrary
                if len(ref.title) > 175:
                    ref.title = ref.title[:175] + "..."

                repl = ref.refTitle()
                new_text = new_text.replace(match.group(), repl)

            # Add <references/> when needed, but ignore templates !
            if page.namespace != 10:
                if self.norefbot.lacksReferences(new_text):
                    new_text = self.norefbot.addReferences(new_text)

            new_text = self.deduplicator.process(new_text)

            self.userPut(page,
                         page.text,
                         new_text,
                         summary=self.msg,
                         ignore_save_related_errors=True,
                         ignore_server_errors=True)

            if new_text == page.text:
                continue
            else:
                editedpages += 1

            if self.getOption(
                    'limit') and editedpages >= self.getOption('limit'):
                pywikibot.output('Edited %s pages, stopping.' %
                                 self.getOption('limit'))
                return

            if editedpages % 20 == 0:
                pywikibot.output(
                    color_format('{lightgreen}Checking stop page...{default}'))
                actualRev = self.stopPage.latest_revision_id
                if actualRev != self.stopPageRevId:
                    pywikibot.output(
                        u'[[%s]] has been edited : Someone wants us to stop.' %
                        self.stopPage)
                    return
Example #57
0
    def get(ip_address,
            api_key='test',
            db_path=None,
            username=None,
            password=None):
        # process request
        try:
            request = requests.get('https://api.ipdata.co/' +
                                   quote(ip_address) + '?api-key=' +
                                   quote(api_key),
                                   timeout=62)
        except:
            raise ServiceError()

        # check for HTTP errors
        if request.status_code != 200 and request.status_code != 400:
            if request.status_code == 401:
                raise PermissionRequiredError()
            elif request.status_code == 403:
                raise LimitExceededError()
            else:
                raise ServiceError()

        # parse content
        try:
            content = request.content.decode('utf-8')
            content = json.loads(content)
        except:
            raise InvalidResponseError()

        # check for errors
        if content.get('message'):
            if 'private IP address' in content['message']:
                raise IpAddressNotFoundError(ip_address)
            else:
                raise InvalidRequestError()

        # prepare return value
        ip_location = IpLocation(ip_address)

        # format data
        if content['country_code'] == '':
            ip_location.country = None
        else:
            ip_location.country = content['country_code']

        if content['region'] == '':
            ip_location.region = None
        else:
            ip_location.region = content['region']

        if content['city'] == '':
            ip_location.city = None
        else:
            ip_location.city = content['city']

        if content['latitude'] != '-' and content['longitude'] != '-':
            ip_location.latitude = float(content['latitude'])
            ip_location.longitude = float(content['longitude'])
        else:
            ip_location.latitude = None
            ip_location.longitude = None

        return ip_location
import json
import datetime
import time
from urllib import parse
from urllib import request

subject = input("请输入专业名称(Enter):")
subject = parse.quote(subject)

eolApi = "https://api.eol.cn/gkcx/api/?access_token=&admissions=&central=&department=&dual_class=&f211=&f985=&is_dual_class=&keyword=" + subject + "&local_batch_id=&local_type_id=&page=1&province_id=&school_type=&signsafe=&size=20&type=&uri=apidata/api/gk/score/special&year=" + (
    str)(datetime.datetime.now().year - 1)
head = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36'
}

req = request.Request(eolApi, headers=head)
data = request.urlopen(req)
data = data.read().decode('UTF-8')

data = json.loads(data)

collegeNum = data["data"]["numFound"]  # 专业数量
print("该专业共寻找到", collegeNum, "个招生院校。")

studentFrom = input("请输入生源地省份(请勿输入省字)(Enter):")

pageHave = 20
if collegeNum % pageHave != 0:
    loadNeed = (int)(collegeNum / pageHave) + 1
else:
Example #59
0
    def get(ip_address,
            api_key=None,
            db_path=None,
            username=None,
            password=None):
        # process request
        try:
            request = requests.get(
                'https://https-api.eurekapi.com/iplocation/v1.8/locateip?' +
                'ip=' + quote(ip_address) + '&key=' + quote(api_key) +
                '&format=JSON',
                timeout=62)
        except:
            raise ServiceError()

        # check for HTTP errors
        if request.status_code != 200:
            if request.status_code == 429:
                raise LimitExceededError()
            elif request.status_code == 500:
                raise InvalidRequestError()
            else:
                raise ServiceError()

        # parse content
        try:
            content = request.content.decode('utf-8')
            content = json.loads(content)
        except:
            raise InvalidResponseError()

        # prepare return value
        ip_location = IpLocation(ip_address)

        # check for errors
        if content['query_status']['query_status_code'] != 'OK':
            error_status = content['query_status']['query_status_code']
            error_status_desc = content['query_status'][
                'query_status_description']

            if error_status == 'MISSING_SERVICE_ACCESS_KEY' \
               or error_status == 'INVALID_SERVICE_ACCESS_KEY' \
               or error_status == 'FREE_TRIAL_LICENSE_EXPIRED' \
               or error_status == 'SUBSCRIPTION_EXPIRED':
                raise PermissionRequiredError(error_status_desc)
            elif error_status == 'MISSING_IP_ADDRESS' \
                 or error_status == 'INVALID_IP_ADDRESS':
                raise IpAddressNotFoundError(ip_address)
            else:
                ip_location.country = None
                ip_location.region = None
                ip_location.city = None
                ip_location.latitude = None
                ip_location.longitude = None
                return ip_location

        # format data
        if content.get('geolocation_data'):
            if content['geolocation_data'].get('country_code_iso3166alpha2'):
                ip_location.country = content['geolocation_data'][
                    'country_code_iso3166alpha2']
            else:
                ip_location.country = None

            if content['geolocation_data'].get('region_name'):
                ip_location.region = content['geolocation_data']['region_name']
            else:
                ip_location.region = None

            if content['geolocation_data'].get('city'):
                ip_location.city = content['geolocation_data']['city']
            else:
                ip_location.city = None

            if content['geolocation_data'].get('latitude') \
               and content['geolocation_data'].get('longitude'):
                ip_location.latitude = float(
                    content['geolocation_data']['latitude'])
                ip_location.longitude = float(
                    content['geolocation_data']['longitude'])
            else:
                ip_location.latitude = None
                ip_location.longitude = None
        else:
            ip_location.country = None
            ip_location.region = None
            ip_location.city = None
            ip_location.latitude = None
            ip_location.longitude = None

        return ip_location
Example #60
0
    def __init__(self,
                 object_id,
                 parent,
                 path,
                 mimetype,
                 urlbase,
                 UPnPClass,
                 update=False,
                 store=None):
        BackendItem.__init__(self)
        self.id = object_id
        self.parent = parent
        if parent:
            parent.add_child(self, update=update)
        if mimetype == 'root':
            self.location = str(path)
        else:
            if mimetype == 'item' and path is None:
                path = os.path.join(parent.get_realpath(), str(self.id))
            # self.location = FilePath(unicode(path))
            self.location = FilePath(path)
        self.mimetype = mimetype
        if urlbase[-1] != '/':
            urlbase += '/'
        self.url = urlbase + str(self.id)

        self.store = store

        if parent is None:
            parent_id = -1
        else:
            parent_id = parent.get_id()

        self.item = UPnPClass(object_id, parent_id, self.get_name())
        if isinstance(self.item, Container):
            self.item.childCount = 0
        self.child_count = 0
        self.children = []
        self.sorted = False
        self.caption = None

        if mimetype in ['directory', 'root']:
            self.update_id = 0
            self.get_url = lambda: self.url
            # self.item.searchable = True
            # self.item.searchClass = 'object'
            if (isinstance(self.location, FilePath)
                    and self.location.isdir() is True):
                self.check_for_cover_art()
                if getattr(self, 'cover', None):
                    _, ext = os.path.splitext(self.cover)
                    ''' add the cover image extension to help clients
                        not reacting on the mimetype '''
                    self.item.albumArtURI = \
                        ''.join((urlbase, str(self.id), '?cover', str(ext)))
        else:
            self.get_url = lambda: self.url

            if self.mimetype.startswith('audio/'):
                if getattr(parent, 'cover', None):
                    _, ext = os.path.splitext(parent.cover)
                    ''' add the cover image extension to help clients
                        not reacting on the mimetype '''
                    self.item.albumArtURI = \
                        ''.join((urlbase, str(self.id), '?cover', ext))

            _, host_port, _, _, _ = urlsplit(urlbase)
            if host_port.find(':') != -1:
                host, port = tuple(host_port.split(':'))
            else:
                host = host_port

            try:
                size = self.location.getsize()
            except Exception:
                size = 0

            if (self.store.server and self.store.server.coherence.config.get(
                    'transcoding', 'no') == 'yes'):
                if self.mimetype in ('application/ogg', 'audio/ogg',
                                     'audio/x-wav', 'audio/x-m4a',
                                     'application/x-flac'):
                    new_res = Resource(self.url + '/transcoded.mp3',
                                       f'http-get:*:{"audio/mpeg"}:*')
                    new_res.size = None
                    # self.item.res.append(new_res)

            if mimetype != 'item':
                res = Resource(
                    'file://' + quote(self.get_path(), encoding='utf-8'),
                    f'internal:{host}:{self.mimetype}:*')
                res.size = size
                self.item.res.append(res)

            if mimetype != 'item':
                res = Resource(self.url, f'http-get:*:{self.mimetype}:*')
            else:
                res = Resource(self.url, 'http-get:*:*:*')

            res.size = size
            self.item.res.append(res)
            ''' if this item is of type audio and we want to add a transcoding
                rule for it, this is the way to do it:

                create a new Resource object, at least a 'http-get'
                and maybe an 'internal' one too

                for transcoding to wav this looks like that

                res = Resource(
                    url_for_transcoded audio,
                    'http-get:*:audio/x-wav:%s'% ';'.join(
                        ['DLNA.ORG_PN=JPEG_TN']+simple_dlna_tags))
                res.size = None
                self.item.res.append(res)
            '''

            if (self.store.server and self.store.server.coherence.config.get(
                    'transcoding', 'no') == 'yes'):
                if self.mimetype in ('audio/mpeg', 'application/ogg',
                                     'audio/ogg', 'audio/x-wav', 'audio/x-m4a',
                                     'audio/flac', 'application/x-flac'):
                    dlna_pn = 'DLNA.ORG_PN=LPCM'
                    dlna_tags = simple_dlna_tags[:]
                    # dlna_tags[1] = 'DLNA.ORG_OP=00'
                    dlna_tags[2] = 'DLNA.ORG_CI=1'
                    new_res = Resource(
                        self.url + '?transcoded=lpcm',
                        f'http-get:*:{"audio/L16;rate=44100;channels=2"}:'
                        f'{";".join([dlna_pn] + dlna_tags)}')
                    new_res.size = None
                    # self.item.res.append(new_res)

                    if self.mimetype != 'audio/mpeg':
                        new_res = Resource(self.url + '?transcoded=mp3',
                                           f'http-get:*:{"audio/mpeg"}:*')
                        new_res.size = None
                        # self.item.res.append(new_res)
            ''' if this item is an image and we want to add a thumbnail for it
                we have to follow these rules:

                create a new Resource object, at least a 'http-get'
                and maybe an 'internal' one too

                for an JPG this looks like that

                res = Resource(url_for_thumbnail,
                        'http-get:*:image/jpg:%s'% ';'.join(
                        ['DLNA.ORG_PN=JPEG_TN']+simple_dlna_tags))
                res.size = size_of_thumbnail
                self.item.res.append(res)

                and for a PNG the Resource creation is like that

                res = Resource(url_for_thumbnail,
                        'http-get:*:image/png:%s'% ';'.join(
                        simple_dlna_tags+['DLNA.ORG_PN=PNG_TN']))

                if not hasattr(self.item, 'attachments'):
                    self.item.attachments = {}
                self.item.attachments[key] = utils.StaticFile(
                filename_of_thumbnail)
            '''

            if (self.mimetype in ('image/jpeg', 'image/png')
                    or self.mimetype.startswith('video/')):
                try:
                    filename, mimetype, dlna_pn = _find_thumbnail(
                        self.get_path())
                except NoThumbnailFound:
                    pass
                except Exception:
                    self.warning(traceback.format_exc())
                else:
                    dlna_tags = simple_dlna_tags[:]
                    dlna_tags[
                        3] = 'DLNA.ORG_FLAGS=00f00000000000000000000000000000'

                    hash_from_path = str(id(filename))
                    new_res = Resource(
                        self.url + '?attachment=' + hash_from_path,
                        f'http-get:*:{mimetype}:'
                        f'{";".join([dlna_pn] + dlna_tags)}')
                    new_res.size = os.path.getsize(filename)
                    self.item.res.append(new_res)
                    if not hasattr(self.item, 'attachments'):
                        self.item.attachments = {}
                    self.item.attachments[hash_from_path] = utils.StaticFile(
                        filename)

            if self.mimetype.startswith('video/'):
                # check for a subtitles file
                caption, _ = os.path.splitext(self.get_path())
                caption = caption + '.srt'
                if os.path.exists(caption):
                    hash_from_path = str(id(caption))
                    mimetype = 'smi/caption'
                    new_res = Resource(
                        self.url + '?attachment=' + hash_from_path,
                        f'http-get:*:{mimetype}:{"*"}')
                    new_res.size = os.path.getsize(caption)
                    self.caption = new_res.data
                    self.item.res.append(new_res)
                    if not hasattr(self.item, 'attachments'):
                        self.item.attachments = {}
                    self.item.attachments[hash_from_path] = utils.StaticFile(
                        caption)

            try:
                # FIXME: getmtime is deprecated in Twisted 2.6
                self.item.date = datetime.fromtimestamp(
                    self.location.getmtime())
            except Exception:
                self.item.date = None