Example #1
0
def vola_importer(url="https://raw.githubusercontent.com/flyingeek/editolido/gh-pages/ext-sources/vola_legacy_report.txt"):
    # https://oscar.wmo.int/oscar/vola/vola_legacy_report.txt
    if PY2:
        delimiter = b'\t'
        data = urlopen(url)
    else:
        delimiter = '\t'
        import codecs
        data = codecs.iterdecode(urlopen(url), 'utf-8')
    reader = csv.reader(data, delimiter=delimiter, quoting=csv.QUOTE_NONE)

    def geo_normalize(value):
        # recognize NSEW or undefined (which is interpreted as North)
        orientation = value[-1]
        sign = -1 if orientation in 'SW' else 1
        coords = value if orientation not in 'NEWS' else value[:-1]
        coords += ' 0 0'  # ensure missing seconds or minutes are 0
        degrees, minutes, seconds = map(float, coords.split(' ', 3)[:3])
        return sign * (degrees + (minutes / 60) + (seconds / 3600))

    headers = next(reader)
    for row in reader:
        name = row[5]
        if not name:
            continue
        yield name, geo_normalize(row[9]), geo_normalize(row[8]), row[28].split(', ')
Example #2
0
def load_data():
    """Loads movie_data, cust_data, and answers from pickles.

    Returns:
        The tuple (movie_data, cust_data, answers) with the objects loaded from
        their pickles.
    """
    # load movie data cache
    if isfile(CACHE_LOC + MOVIE_PICKLE):
        with open(CACHE_LOC + MOVIE_PICKLE, 'rb') as movie_file:
            movie_data = load(movie_file)
    else:
        movie_data = loads(urlopen(CACHE_URL + MOVIE_PICKLE).read())
    # load customer data cache
    if isfile(CACHE_LOC + CUSTOMER_PICKLE):
        with open(CACHE_LOC + CUSTOMER_PICKLE, 'rb') as cust_file:
            cust_data = load(cust_file)
    else:
        cust_data = loads(urlopen(CACHE_URL + CUSTOMER_PICKLE).read())
    # load answers
    if isfile(CACHE_LOC + ANSWER_PICKLE):
        with open(CACHE_LOC + ANSWER_PICKLE, 'rb') as answer_file:
            answers = load(answer_file)
    else:
        answers = loads(urlopen(CACHE_URL + ANSWER_PICKLE).read())
    return(movie_data, cust_data, answers)
Example #3
0
 def load_doi(self, doi):
     "Create a ref based on a DOI, using the bibtex obtained from the DOI providers"
     
     doi = get_doi(doi)
     if not doi:
         return
     
     try:
         req = Request("http://dx.doi.org/%s"%doi, headers = {'Accept' : 'application/x-bibtex'})
         bibtex = urlopen(req).read()
     except: bibtex = None
     
     # add fallback request to crossref API
     if bibtex is None:
         try:
             req = Request("http://api.crossref.org/works/%s/transform/application/x-bibtex" % doi)
             bibtex = urlopen(req).read()
         except: bibtex = None
     
     if bibtex is None:
         return None
     
     bibtex = bibtex.decode('utf-8').strip()
     ref = self.parse_bibtex(bibtex)
     
     # add the DOI link
     if "links" not in ref:
         ref["links"] = {"doi": doi}
     else:
         ref["links"]["doi"] = doi
     
     return ref
Example #4
0
    def main(self):
        """
            dummy for main core method.
        """
        url = 'http://%s:%s/orlangur/_authenticate' % (
        self.api.config.options.app.orlangur_server, self.api.config.options.app.orlangur_port)
        data = {'username': self.api.config.options.app.orlangur_user,
                'password': self.api.config.options.app.orlangur_password}
        from urllib.error import URLError

        try:
            r = req.Request(url, parse.urlencode(data).encode('utf8'))
            req.urlopen(r).read()
            connection = Connection(self.api.config.options.app.orlangur_server,
                self.api.config.options.app.orlangur_port)
            db = connection.orlangur
            self.db = db
        except URLError:
            QMessageBox.warning(self.app, 'Error',
                'Orlangur server seems down')
            return


        self.compiler = pystache

        self.app.async(self.getConfig, self.assignConfig)
Example #5
0
def __query_website(d):
    """ Communicate with the CMD website """
    webserver = 'http://stev.oapd.inaf.it'
    print('Interrogating {0}...'.format(webserver))
    # url = webserver + '/cgi-bin/cmd_2.8'
    url = webserver + '/cgi-bin/cmd'
    q = urlencode(d)
    # print('Query content: {0}'.format(q))
    if py3k:
        req = request.Request(url, q.encode('utf8'))
        c = urlopen(req).read().decode('utf8')
    else:
        c = urlopen(url, q).read()
    aa = re.compile('output\d+')
    fname = aa.findall(c)
    if len(fname) > 0:
        url = '{0}/~lgirardi/tmp/{1}.dat'.format(webserver, fname[0])
        print('Downloading data...{0}'.format(url))
        bf = urlopen(url)
        r = bf.read()
        typ = file_type(r, stream=True)
        if typ is not None:
            r = zlib.decompress(bytes(r), 15 + 32)
        return r
    else:
        # print(c)
        print(url + q)
        if "errorwarning" in c:
            p = __CMD_Error_Parser()
            p.feed(c)
            print('\n', '\n'.join(p.data).strip())
        raise RuntimeError('Server Response is incorrect')
Example #6
0
    def stop(self):
        """
        Tells the ChromeDriver to stop and cleans up the process
        """
        # If its dead dont worry
        if self.process is None:
            return

        # Tell the Server to die!
        try:
            from urllib import request as url_request
        except ImportError:
            import urllib2 as url_request

        url_request.urlopen("http://127.0.0.1:%d/shutdown" % self.port)
        count = 0
        while utils.is_connectable(self.port):
            if count == 30:
                break
            count += 1
            time.sleep(1)

        # Tell the Server to properly die in case
        try:
            if self.process:
                self.process.kill()
                self.process.wait()
        except OSError:
            # kill may not be available under windows environment
            pass
 def testPasswordProtectedSite(self):
     support.requires('network')
     with support.transient_internet('mueblesmoraleda.com'):
         url = 'http://mueblesmoraleda.com'
         robots_url = url + "/robots.txt"
         # First check the URL is usable for our purposes, since the
         # test site is a bit flaky.
         try:
             urlopen(robots_url)
         except HTTPError as e:
             if e.code not in {401, 403}:
                 self.skipTest(
                     "%r should return a 401 or 403 HTTP error, not %r"
                     % (robots_url, e.code))
         else:
             self.skipTest(
                 "%r should return a 401 or 403 HTTP error, not succeed"
                 % (robots_url))
         parser = urllib.robotparser.RobotFileParser()
         parser.set_url(url)
         try:
             parser.read()
         except URLError:
             self.skipTest('%s is unavailable' % url)
         self.assertEqual(parser.can_fetch("*", robots_url), False)
Example #8
0
	def run(self):
		print("{0} started!".format(self.getName()))
		for i in range(100):  
			urllib2.urlopen(req)

		time.sleep(.2)                                      
		print("{0} finished!".format(self.getName()))            
Example #9
0
def get_fb_post_json(user):
    """
    :param user: 유저 객체
    :return: 유저의 모든 게시물 json 의 url
    """
    url = 'https://graph.facebook.com/me?access_token=%s&fields=posts' % user.access_token

    json_data = json.loads(urlopen(url).read())

    for article in json_data['posts']['data']:
        article['image'] = get_fb_images_from_article(user, article['id'])

    all_post_data = json_data['posts']['data']

    url = json_data['posts']['paging']['next']

    while True:
        json_data = json.loads(urlopen(url).read())

        if len(json_data['data']) == 0:
            break

        url = json_data['paging']['next']

        for article in json_data['data']:
            article['image'] = get_fb_images_from_article(user, article['id'])

        all_post_data.append(json_data['data'])

    return all_post_data
Example #10
0
def getPublicIP(v6=True):
  if v6:
#    try:
      text=urlopen("http://ipv6.ip6.me/").read()
      if v3:
        match=re.search(bytes("\+3>([^<]+)<", 'ascii'), text)
      else:
        match=re.search("\+3>([^<]+)<", text)
      ip=match.group(1)
      ip=ip.decode('ascii')
      return ip
#    except Exception as e:
#      print(e)
#      ip=urlopen("http://whatismyv6ip.com/myip").read()
#      return ip.decode('ascii')
  else:
    text=urlopen("http://ip4.me/").read()
    if v3:
      match=re.search(bytes("\+3>([^<]+)<", 'ascii'), text)
    else:
      match=re.search("\+3>([^<]+)<", text)
#     ip=urlopen("http://whatismyv6ip.com/myip").read()
#     return ip.decode('ascii')
    ip=match.group(1)
    ip=ip.decode('ascii')
    return ip
def youtube_download(video_url):
    video_id = parse_qs(urlparse(video_url).query)['v'][0]

    url_data = urlopen('http://www.youtube.com/get_video_info?&video_id=' + video_id).read()
    url_info = parse_qs(unquote(url_data.decode('utf-8')))
    token_value = url_info['token'][0]

    download_url = "http://www.youtube.com/get_video?video_id={0}&t={1}&fmt=18".format(
        video_id, token_value)

    video_title = url_info['title'][0] if 'title' in url_info else ''
    # Unicode filenames are more trouble than they're worth
    filename = video_title.encode('ascii', 'ignore').decode('ascii').replace("/", "-") + '.mp4'

    print("\t Downloading '{}' to '{}'...".format(video_title, filename))

    try:
        download = urlopen(download_url).read()
        f = open(filename, 'wb')
        f.write(download)
        f.close()
    except Exception as e:
        print("\t Downlad failed! {}".format(str(e)))
        print("\t Skipping...")
    else:
        print("\t Done.")
Example #12
0
 def import_graph_from_urls(self, node_url, edge_url, consume=True, **kwargs):
     node_file = urlopen(node_url)
     edge_file = urlopen(edge_url)
     if consume:
         node_file = io.BytesIO(node_file.read())
         edge_file = io.BytesIO(edge_file.read())
     self.import_graph_from_files(node_file, edge_file, **kwargs)
Example #13
0
def mp3s(users):
    title = 'example.com'
    link = 'http://www.example.com'
    info = urlopen(link).info()
    size = info['Content-Length']
    mime = info['Content-Type']
    test_example = PodcastItem.objects.create(title=title,
                                              description='',
                                              url=link,
                                              size=size,
                                              mime=mime,
                                              owner=users['user'])

    title = 'ctc1'
    link = 'http://media.reformedforum.org/assets/download/download/audio/ctc1.mp3'
    info = urlopen(link).info()
    size = info['Content-Length']
    mime = info['Content-Type']
    test_ctc1 = PodcastItem.objects.create(title=title,
                                           description='ctc1ss',
                                           url=link,
                                           size=size,
                                           mime=mime,
                                           owner=users['user'])
    mp3s = {'test_example': test_example,
            'test_ctc1': test_ctc1,
            }
    return mp3s
Example #14
0
def urlget(n, url):
    if n == 1:
        with request.urlopen(url) as f:
            data = f.read()
            for k, v in f.getheaders():
                print('%s: %s' % (k, v))
            #print('Data:', data.decode('utf-8'))

        js = json.loads(data.decode('utf-8'))
        print('JSON: ', end='')
        pp_json(js)

    if n == 2:
        with request.urlopen(url) as f:
            for k, v in f.getheaders():
                print('%s: %s' % (k, v))
            s = f.read().decode('GB2312')
            print('\n\nData:\n', s)

            file_name = r'600518.htm'
            with open(file_name, 'w') as ff:
                ff.write(s)
    if n == 3:
        with request.urlopen(url) as f:
            for k, v in f.getheaders():
                print('%s: %s' % (k, v))
            s = f.read()

            file_name = r'StockAList.htm'
            with open(file_name, 'wb') as ff:
                ff.write(s)
Example #15
0
def getStats(gen):
    #f = open('stats.html', 'r+')
    #s = f.read()
    #f.close()
    if (gen == 1):
        s = urlopen("http://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_by_base_stats_%28Generation_I%29").read().decode("utf-8")
    elif (gen < 6):
        s = urlopen("http://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_by_base_stats_%28Generation_II-V%29").read().decode("utf-8")
    else:
        s = urlopen("http://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_by_base_stats_%28Generation_VI-present%29").read().decode("utf-8")

    #step1 = s.split('<')
    step1 = s.splitlines()
    
    step2 = [x for x in step1 if (('FF5959' in x) or 
                                  ('F5AC78' in x) or
                                  ('FAE078' in x) or
                                  ('9DB7F5' in x) or
                                  ('A7DB8D' in x) or
                                  ('FA92B2' in x) or
                                  ('(Pokémon)' in x))]

    step3 = removeABs(step2)
    step4 = [x[1:] for x in step3]
    step5 = toDict(step4)
    return step5
    def run(self):
        global lock
        global num
        while not self.work_queue.empty():  # 队列非空时,一直循环
            url = self.work_queue.get()  # 取出一条数据
            try:
                try:
                    r = request.urlopen(url["url"], timeout=60)  # 下载图片,超时为60秒
                except:
                    r = request.urlopen(url["url"], timeout=120)  # 如果超时,再次下载,超时为120秒

                if "Content-Type" in r.info():
                    fileName = os.path.join(
                        self.fold, replace(url["name"] + "." + r.info()["Content-Type"].split("image/")[1])
                    )  # 根据查看返回的“Content-Type”来判断图片格式,然后生成保存路径
                    if lock.acquire():  # 线程同步
                        print("开始下载第" + str(num) + "张照片")
                        if os.path.exists(fileName):
                            # 图片名称若存在,则重命名图片名称
                            fileName = os.path.join(
                                self.fold,
                                replace("重命名_图片_" + str(num) + "." + r.info()["Content-Type"].split("image/")[1]),
                            )
                        num = num + 1
                        lock.release()
                    f = open(fileName, "wb")
                    f.write(r.read())
                    f.close()

            except:
                print(url["url"] + ":下载超时!")
Example #17
0
def test_MockApp_assert_has_calls_unordered_fails():
    app = MockApp()
    with mock_server(app) as port:
        urlopen('http://127.0.0.1:%d/hello' % port)
        urlopen('http://127.0.0.1:%d/world' % port)
    assert_raises(AssertionError, lambda:
        app.assert_has_calls(['GET /cruel', 'GET /planet'], any_order=True))
Example #18
0
    def submit_request(self, request, return_response=False):
        '''submit_request will make the request,
        via a stream or not. If return response is True, the
        response is returned as is without further parsing.
        Given a 401 error, the update_token function is called
        to try the request again, and only then the error returned.
        '''

        try:
            response = urlopen(request)

        # If we have an HTTPError, try to follow the response
        except HTTPError as error:

            # Case 1: we have an http 401 error, and need to refresh token
            bot.debug('Http Error with code %s' % (error.code))

            if error.code == 401:
                self.update_token(response=error)
                try:
                    request = self.prepare_request(request.get_full_url(),
                                                   headers=self.headers)
                    response = urlopen(request)
                except HTTPError as error:
                    bot.debug('Http Error with code %s' % (error.code))
                    return error
            else:
                return error

        return response
Example #19
0
def a(url):
    file = url.split('/')[-1]
    u = urlopen(url)
    meta = u.info()
    file_size = int(meta.get_all("Content-Length")[0])

    file_dl = 0
    block_sz = 8192

    if os.path.exists(file) and file_size == os.path.getsize(file):
        print("The file '%s' already exist." % file)
        exit()

    elif os.path.exists(file) and file_size != os.path.getsize(file):
        print("Resuming Download")
        f = open(file, "ab")
        dld = os.path.getsize(file)
        print("Downloading: {} Bytes: {}".format(file, file_size))
        while True:
            buffer = u.read(dld)
            if not buffer:
                break
            req = Request(url)
            req.headers['Range'] = 'bytes=%s-%s' % (dld, file_size)
            buffer = urlopen(req).read()

            file_dl += len(buffer)
            f.write(buffer)
            remain = dld * 100./ file_size
            status = "\r%10d [%3.2f%%]" % (file_dl, file_dl * remain / file_size)
            status = status + chr(8)*(len(status)+1)

            time.sleep(1)
            sys.stdout.write(status)
            sys.stdout.flush()

        f.close()
        print("File: %s Downloaded Successfully" % (file))

        exit()

    f = open(file, 'wb')
    print("Downloading: {} Bytes: {}".format(file, file_size))

    while True:
        buffer = u.read(block_sz)
        if not buffer:
            break

        file_dl += len(buffer)
        f.write(buffer)
        status = "\r%10d [%3.2f%%]" % (file_dl, file_dl * 100. / file_size)
        status = status + chr(8)*(len(status)+1)

        time.sleep(1)
        sys.stdout.write(status)
        sys.stdout.flush()

    f.close()
    print("File: %s Downloaded Successfully" % (file))
Example #20
0
def test_MockApp_assert_called_once_with_two_calls():
    app = MockApp()
    with mock_server(app) as port:
        urlopen('http://127.0.0.1:%d/hello' % port)
        urlopen('http://127.0.0.1:%d/world' % port)
    assert_raises(AssertionError, lambda:
        app.assert_called_once_with('GET /world'))
Example #21
0
def create_app(name, engine):
    """
        Create a Skeleton application (needs internet connection to github)
    """
    try:
        if engine.lower() == "sqlalchemy":
            url = urlopen(SQLA_REPO_URL)
            dirname = "Flask-AppBuilder-Skeleton-master"
        elif engine.lower() == "mongoengine":
            url = urlopen(MONGOENGIE_REPO_URL)
            dirname = "Flask-AppBuilder-Skeleton-me-master"
        zipfile = ZipFile(BytesIO(url.read()))
        zipfile.extractall()
        os.rename(dirname, name)
        click.echo(click.style("Downloaded the skeleton app, good coding!", fg="green"))
        return True
    except Exception as e:
        click.echo(click.style("Something went wrong {0}".format(e), fg="red"))
        if engine.lower() == "sqlalchemy":
            click.echo(
                click.style(
                    "Try downloading from {0}".format(SQLA_REPO_URL), fg="green"
                )
            )
        elif engine.lower() == "mongoengine":
            click.echo(
                click.style(
                    "Try downloading from {0}".format(MONGOENGIE_REPO_URL), fg="green"
                )
            )
        return False
Example #22
0
def main():
    width = 550
    height = 550

    print("Updating...")
    with urlopen("http://himawari8-dl.nict.go.jp/himawari8/img/D531106/latest.json") as latest_json:
        latest = strptime(loads(latest_json.read().decode("utf-8"))["date"], "%Y-%m-%d %H:%M:%S")

    print("Latest version: {} GMT\n".format(strftime("%Y/%m/%d/%H:%M:%S", latest)))

    url_format = "http://himawari8.nict.go.jp/img/D531106/{}d/{}/{}_{}_{}.png"

    png = Image.new('RGB', (width*level, height*level))

    print("Downloading tiles: 0/{} completed".format(level*level), end="\r")
    for x in range(level):
        for y in range(level):
            with urlopen(url_format.format(level, width, strftime("%Y/%m/%d/%H%M%S", latest), x, y)) as tile_w:
                tiledata = tile_w.read()

            tile = Image.open(BytesIO(tiledata))
            png.paste(tile, (width*x, height*y, width*(x+1), height*(y+1)))

            print("Downloading tiles: {}/{} completed".format(x*level + y + 1, level*level), end="\r")
    print("\nDownloaded\n")

    makedirs(split(output_file)[0], exist_ok=True)
    png.save(output_file, "PNG")
    
    call(["feh", "--bg-fill", "--no-fehbg", output_file])

    print("Done!\n")
Example #23
0
    def delete_note(self, note_id):
        """ method to permanently delete a note

        Arguments:
            - note_id (string): key of the note to trash

        Returns:
            A tuple `(note, status)`

            - note (dict): an empty dict or an error message
            - status (int): 0 on sucesss and -1 otherwise

        """
        # notes have to be trashed before deletion
        note, status = self.trash_note(note_id)
        if (status == -1):
            return note, status

        params = '/{0}?auth={1}&email={2}'.format(str(note_id), self.get_token(),
                                                  self.username)
        request = Request(url=DATA_URL+params, method='DELETE')
        try:
            urllib2.urlopen(request)
        except IOError as e:
            return e, -1
        return {}, 0
Example #24
0
	def youtube(url):
		if url.find("v=") != -1: page_url = urlopen("http://gdata.youtube.com/feeds/api/videos/"+url.partition("v=")[2].partition("&")[0])
		elif url.find("/v/") != -1: page_url = urlopen("http://gdata.youtube.com/feeds/api/videos/"+url.partition("/v/")[2].partition("&")[0])
		elif url.find("/user/") != -1:
			shana.say("[URI %s] 1,0You0,4tube %s's Channel" % (uri_number, url.partition("/user/")[2]))
			new_uri([url, "1,0You0,4tube %s's Channel\n" % url.partition("/user/")[2]])
			return
		else:
			page_url = urlopen(url)
			soup = BS(page_url.read(8196))
			title = soup.find("title").get_text().strip()
			shana.say("[URI %s] 1,0You0,4tube %s" % (uri_number, title))
			new_uri([url, "1,0You0,4tube %s\n" % title])
			return

		
		soup = BS(page_url.read())
		title = soup.find("title").get_text().strip()
		views = soup.find("yt:statistics")['viewcount']
		length = int(soup.find("yt:duration")['seconds'])
		seconds = length % 60
		minutes = (length -seconds) / 60
		
		try: rates = int(soup.find("gd:rating")['numraters'])
		except:
			rates = 0
			thumbs_up = 0
		else:
			thumbs_up = int( ((float(soup.find("gd:rating")['average']) - 1.0) / 4.0) * rates )
		
		shana.say("[URI %s] 1,0You0,4tube %s [%d:%02d] - %s views %d 3☺ %d 4☹" % (uri_number, title, minutes, seconds, views, thumbs_up, rates - thumbs_up))
		new_uri([url, "1,0You0,4tube %s [%d:%02d]\n" % (title, minutes, seconds)])
Example #25
0
    def _add_logo(self, episode, audio):
        # APIC part taken from http://mamu.backmeister.name/praxis-tipps/pythonmutagen-audiodateien-mit-bildern-versehen/
        url = episode.logo_url
        if url is not None:
            request = Request(url)
            request.get_method = lambda: "HEAD"
            try:
                response = urlopen(request)
                logo_type = response.getheader("Content-Type")

                if logo_type in ["image/jpeg", "image/png"]:
                    img_data = urlopen(url).read()
                    img = APIC(
                        encoding=3,  # 3 is for utf-8
                        mime=logo_type,
                        type=3,  # 3 is for the cover image
                        desc="Station logo",
                        data=img_data,
                    )
                    audio.add(img)
            except (HTTPError, URLError) as e:
                message = "Error during capturing %s - %s" % (url, e)
                logging.error(message)
            except Exception as e:
                raise e
Example #26
0
	def fourchan_images(url):
		if url.find("/b/") != -1:
			shana.write(['KICK', event.sender+' '+event.nick], 'Rule #1, f****t')
			return
		if url.find("/mlp/") != -1:
			shana.say("ponies :/")
			return
		if url.rsplit(".", 1)[1].lower() in ['jpg', 'jpeg', 'png', 'gif', 'tif', 'tiff']:
			image_url = urlopen(url)
			image_info = image_url.info()
			image_size = int(image_info["Content-Length"])
			image = image_url.read(32768)
			try: w, h, t = identify("%w %h %m", image).strip().split()
			except Exception as e:
				shana.say("Yep, that's a link")
				shana.log("Exception parsing image: %s" % e, 4)
				return
			
			shana.say("[URI %s] 034chan: %sx%s %s %s" % (uri_number, w, h, bytes_to_better_bytes(image_size), t))
			new_uri([url, "034chan %sx%s %s %s\n" % (w, h, bytes_to_better_bytes(image_size), t)])
		if url.rsplit(".", 1)[1].lower() == "webm":
			image_url = urlopen(url)
			image_info = image_url.info()
			image_size = int(image_info["Content-Length"])
			
			shana.say("[URI %s] 034chan: %s WebM" % (uri_number, bytes_to_better_bytes(image_size)))
			new_uri([url, "034chan %s WebM\n" % bytes_to_better_bytes(image_size)])
Example #27
0
def wmo_importer(url='https://raw.githubusercontent.com/flyingeek/editolido/gh-pages/ext-sources/nsd_bbsss.txt'):
    # http://tgftp.nws.noaa.gov/data/nsd_bbsss.txt
    if PY2:
        delimiter = b';'
        data = urlopen(url)
    else:
        delimiter = ';'
        import codecs
        data = codecs.iterdecode(urlopen(url), 'utf-8')
    reader = csv.reader(data, delimiter=delimiter, quoting=csv.QUOTE_NONE)

    def geo_normalize(value):
        # recognize NSEW or undefined (which is interpreted as North)
        orientation = value[-1]
        sign = -1 if orientation in 'SW' else 1
        coords = value if orientation not in 'NEWS' else value[:-1]
        coords += '-0-0'  # ensure missing seconds or minutes are 0
        degrees, minutes, seconds = map(float, coords.split('-', 3)[:3])
        return sign * (degrees + (minutes / 60) + (seconds / 3600))

    not_airport = '----'

    for row in reader:
        name = row[0] + row[1] if row[2] == not_airport else row[2]
        yield name, row[0] + row[1], geo_normalize(row[8]), geo_normalize(row[7])
Example #28
0
def get_from_wiki(file_name):
    """We host some larger files used for the test suite separately on the TreeCorr wiki repo
    so people don't need to download them with the code when checking out the repo.
    Most people don't run the tests after all.
    """
    import os
    local_file_name = os.path.join('data',file_name)
    url = 'https://github.com/rmjarvis/TreeCorr/wiki/' + file_name
    if not os.path.isfile(local_file_name):
        try:
            from urllib.request import urlopen
        except ImportError:
            from urllib import urlopen
        import shutil

        print('downloading %s from %s...'%(local_file_name,url))
        # urllib.request.urlretrieve(url,local_file_name)
        # The above line doesn't work very well with the SSL certificate that github puts on it.
        # It works fine in a web browser, but on my laptop I get:
        # urllib.error.URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:600)>
        # The solution is to open a context that doesn't do ssl verification.
        # But that can only be done with urlopen, not urlretrieve.  So, here is the solution.
        # cf. http://stackoverflow.com/questions/7243750/download-file-from-web-in-python-3
        #     http://stackoverflow.com/questions/27835619/ssl-certificate-verify-failed-error
        try:
            import ssl
            context = ssl._create_unverified_context()
            u = urlopen(url, context=context)
        except (AttributeError, TypeError):
            # Note: prior to 2.7.9, there is no such function or even the context keyword.
            u = urlopen(url)
        with open(local_file_name, 'wb') as out:
            shutil.copyfileobj(u, out)
        u.close()
        print('done.')
Example #29
0
def ping_google(sitemap_url=None, ping_url=PING_URL):
    """
    Alerts Google that the sitemap for the current site has been updated.
    If sitemap_url is provided, it should be an absolute path to the sitemap
    for this site -- e.g., '/sitemap.xml'. If sitemap_url is not provided, this
    function will attempt to deduce it by using urlresolvers.reverse().
    """
    if sitemap_url is None:
        try:
            # First, try to get the "index" sitemap URL.
            sitemap_url = urlresolvers.reverse("django.contrib.sitemaps.views.index")
        except urlresolvers.NoReverseMatch:
            try:
                # Next, try for the "global" sitemap URL.
                sitemap_url = urlresolvers.reverse("django.contrib.sitemaps.views.sitemap")
            except urlresolvers.NoReverseMatch:
                pass

    if sitemap_url is None:
        raise SitemapNotFound("You didn't provide a sitemap_url, and the sitemap URL couldn't be auto-detected.")

    from django.contrib.sites.models import Site

    current_site = Site.objects.get_current()
    url = "http://%s%s" % (current_site.domain, sitemap_url)
    params = urlencode({"sitemap": url})
    urlopen("%s?%s" % (ping_url, params))
def nothings(input):
 response = str(BS(urlopen(url+str(input))).text)
 for each_try in range(input+400):
  try:
   response = str(BS(urlopen(url+str([int(s) for s in response.split() if s.isdigit()][0]))).text);print(str([int(s) for s in response.split() if s.isdigit()][0]))
  except:
   return("Non-Nothing URL found!", response)
Example #31
0
    # 保留标点符号,两端加空格。这样分割的时候标点符号会单独保留下来。
    punctuation = [',', '.', ';', ':']
    for symbol in punctuation:
        text = text.replace(symbol, " " + symbol + " ")
    
    # 分割单词,过滤空单词
    words = text.split(" ")
    words = [word for word in words if word != ""]
    
    wordDict = {}
    for i in range(1, len(words)):
        if words[i-1] not in wordDict:
            # 为单词新建一个词典
            wordDict[words[i-1]] = {}
        if words[i] not in wordDict[words[i-1]]:
            wordDict[words[i-1]][words[i]] = 0
        wordDict[words[i-1]][words[i]] = wordDict[words[i-1]][words[i]]+1
    return wordDict

text = str(urlopen("http://pythonscraping.com/files/inaugurationSpeech.txt").read(), 'utf-8')
wordDict = buildWordDict(text)

# 生成链长100的马尔可夫链
length = 100
chain = ""
currentWord = "I"
for i in range(0, length):
    chain += currentWord + " "
    currentWord = retrieveRandomWord(wordDict[currentWord])
print(chain)
Example #32
0
    from urllib.request import urlopen
else:
    from urllib import urlopen  # pylint: disable=no-name-in-module

# repo root path
ROOT = os.path.realpath(os.path.dirname(__file__))

# HTTP header names
HTTP_METHODS = list()

# HTTP method regex
METHOD_REGEX = re.compile(r'^[A-Za-z]+$', re.ASCII)

# see https://www.iana.org/assignments/http-methods/http-methods.xhtml
url = 'https://www.iana.org/assignments/http-methods/methods.csv'
page = urlopen(url)
data = page.read().decode('utf-8').strip().splitlines()
page.close()

reader = csv.reader(data)
header = next(reader)

for (method, _, _, _) in reader:
    if METHOD_REGEX.match(method.strip()) is None:
        continue
    HTTP_METHODS.append(method.strip().upper())

# generate Bro file
FILE = '''\
module HTTP;
Example #33
0
    "class %%%(object:\n\tdef ***(self, @@@))":
    "class %%% has-a function *** that takes self and @@@ params.",
    "*** = %%%()": "Set *** to an instance of class %%%.",
    "***.***(@@@)":
    "From *** get the *** function, call it with params self, @@@.",
    "***.*** = '***'": "From *** get the *** attribute and set it to '***'."
}

#do they want to drill phrases first
if len(sys.argv) == 2 and sys.argv[1] == "english":
    PHRASE_FIRST = True
else:
    PHRASE_FIRST = False

#load up the words from the website
for word in urlopen(WORD_URL).readlines():
    WORDS.append(str(word.strip(), encoding="utf-8"))


def convert(snippet, phrase):
    class_names = [
        w.capitalize() for w in random.sample(WORDS, snippet.count("%%%"))
    ]
    other_names = random.sample(WORDS, snippet.count("***"))
    results = []
    param_names = []

    for i in range(0, snippet.count("@@@")):
        param_count = random.randint(1, 3)
        param_names.append(', '.join(random.sample(WORDS, param_count)))
from urllib.request import urlopen
from bs4 import BeautifulSoup
html = urlopen("http://www.pythonscraping.com/pages/page3.html")
bsObj = BeautifulSoup(html)
for sibling in bsObj.find("table", {"id": "giftList"}).tr.next_siblings:
    print(sibling)
Example #35
0
from urllib.request import urlopen
from bs4 import BeautifulSoup

url_page = "https://en.wikipedia.org/wiki/Apple"
page = urlopen(url_page)
soup = BeautifulSoup(page, 'html.parser')
name_box = soup.find('div', attrs={'id': 'bodyContent'})
name = name_box.text.strip()  # strip() is used to remove starting and trailing
print(name)
# -- coding: utf-8 --
"""
Created on Sun Jan 20 16:24:33 2019

@author: tauab
"""
from urllib.request import urlopen 
from bs4 import BeautifulSoup as bs
import psycopg2

url = 'https://www.imd.ufrn.br/portal/noticias'
webPage = urlopen(url)
pageHTML = webPage.read()
webPage.close()

page_soup = bs(pageHTML, "html.parser")

conn = psycopg2.connect(host="host",database="db", user="******", password="******")
cursor = conn.cursor()
cursor.execute("TRUNCATE valores_banco_newsmodel")
cartoes_texto = page_soup.findAll("div", {"class":"card-block p-2"})

listaTituloCartao = []
listaTextoCartao = []
listaDataCartao = []

for cartaoDummy in cartoes_texto:
    listaTituloCartao.append(cartaoDummy.h4.text)
    listaTextoCartao.append(cartaoDummy.p.text)
    listaDataCartao.append(cartaoDummy.div.text.split('\n')[1].split('por')[0])
Example #37
0
import urllib.request as request
import requests

proxies = {'https': 'https://127.0.0.1:1080'}
headers = {
    'user-agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
}

print('--------------使用urllib--------------')
google_url = 'https://www.google.com'
opener = request.build_opener(request.ProxyHandler(proxies))
request.install_opener(opener)

req = request.Request(google_url, headers=headers)
response = request.urlopen(req)

print(response.read().decode())

print('--------------使用requests--------------')
response = requests.get(google_url, proxies=proxies)
print(response.text)
def get_page(url):
    webbrowser.open(url,0,True)
    req = urlopen(url).read()
    res = req.decode('utf-8')
    open_web(get_all_links(res))
Example #39
0
def send_analyze_request(url, data, converter):
    req = Request(url)
    req.add_header('Content-Type', 'application/json')
    with closing(urlopen(req, json.dumps(data).encode('utf-8'))) as response:
        result = json.loads(response.read().decode('utf-8'))
        return converter(result)
Example #40
0
 def parse_request(self, url):
     with urlopen(url) as response:
         tree = etree.parse(response)
         root = tree.getroot()
         return root
Example #41
0
def fetch_deck(input_dir):

    prefix_lookup = dict(IMPLICIT_PREFIXES)

    # Loop until we get a category that works
    while not input_dir:
        try:

            # Choose at random
            catname = get_category()
            if catname is None:
                raise Exception("No more categories")

            category = {
                'name': catname,
                'friendly': None,
                'description': None,
                'image': None,
            }
            logging.info('{} chosen'.format(category['name']))
            shorten_uri(prefix_lookup, category['name'])

            # Fetch top numerical properties as the statistics
            results = query(
                """%(prefixes)s
                               SELECT 
                                   ?p 
                                   COUNT(DISTINCT ?o) 
                                   GROUP_CONCAT(DISTINCT datatype(?v), "|") as ?t
                               WHERE
                               {
                                   ?o a %(category)s
                                   . ?o ?p ?v
                                   . FILTER( %(numeric-clause)s
                                             && ?p != dbo:wikiPageID 
                                             && ?p != dbo:wikiPageRevisionID )
                               }
                               GROUP BY ?p
                               ORDER BY DESC(COUNT(DISTINCT ?o))
                               LIMIT %(max-num-stats)d""" % {
                    'prefixes': prefix_declarations(prefix_lookup),
                    'category': shorten_uri(prefix_lookup, category['name']),
                    'max-num-stats':
                    MAX_NUM_STATS + 3,  # leeway for when we de-dup
                    'numeric-clause': NUMERIC_VAL_CLAUSE.format('?v')
                })

            statistics = []
            unqual_seen = set()
            for result in results:
                types = set(result['t'].split('|')) - {''}
                if len(types) == 0:
                    continue
                unqual = result['p'].split('/')[-1]
                if unqual in unqual_seen:
                    continue
                unqual_seen.add(unqual)
                statistics.append({
                    'name': result['p'],
                    'type': next(iter(types)),
                    'friendly': None,
                })
            statistics = statistics[:MAX_NUM_STATS]

            logging.info('{} stats'.format(len(statistics)))
            for s in statistics:
                shorten_uri(prefix_lookup, s['name'])

            if len(statistics) < MIN_NUM_STATS:
                logging.info("Insufficient stats: {}".format(len(statistics)))
                continue

            # Fetch ids of top category members
            results = query(
                """%(prefixes)s
                               SELECT ?o COUNT(DISTINCT ?p)
                               WHERE
                               {
                                    ?o a %(category)s
                                    . ?o ?p ?v
                                    . FILTER( ( %(properties)s ) 
                                              && %(numeric-clause)s )
                                }
                                GROUP BY ?o
                                HAVING ( COUNT(DISTINCT ?p) >= %(min-num-stats)d )
                                ORDER BY DESC(COUNT(DISTINCT ?p))
                                LIMIT %(max-deck-size)d""" % {
                    'prefixes':
                    prefix_declarations(prefix_lookup),
                    'category':
                    shorten_uri(prefix_lookup, category['name']),
                    'properties':
                    ' || '.join([
                        '?p = {}'.format(shorten_uri(prefix_lookup, p['name']))
                        for p in statistics
                    ]),
                    'min-num-stats':
                    MIN_NUM_STATS,
                    'max-deck-size':
                    MAX_DECK_SIZE,
                    'numeric-clause':
                    NUMERIC_VAL_CLAUSE.format('?v')
                })

            members = [b['o'] for b in results]
            logging.info('{} members'.format(len(members)))
            for m in members:
                shorten_uri(prefix_lookup, m)

            if len(members) < MIN_DECK_SIZE:
                logging.info("Insufficient members: {}".format(len(members)))
                continue

            # fetch category details
            results = query(
                """%(prefixes)s
                               SELECT 
                                   GROUP_CONCAT(?l, "|") as ?name 
                                   GROUP_CONCAT(?c, "|") as ?description 
                                   GROUP_CONCAT(?t, "|") as ?image
                               WHERE
                               {
                                   OPTIONAL { %(category)s rdfs:label ?l }
                                   OPTIONAL { %(category)s rdfs:comment ?c }
                                   OPTIONAL { %(category)s dbo:thumbnail ?t }
                                   FILTER ( (langMatches(lang(?l), "EN") || lang(?l) = "") 
                                             && (langMatches(lang(?c), "EN") || lang(?c) = "") )
                               }""" % {
                    'prefixes': prefix_declarations(prefix_lookup),
                    'category': shorten_uri(prefix_lookup, category['name'])
                })
            if len(results) > 0:
                result = results[0]
                category['friendly'] = pluralise(
                    result['name'].split('|')[0].title(
                    ) if result['name'] else uri_to_friendly(category['name']))
                category['description'] = first_sentence(result['description'].split('|')[0]) \
                                            if result['description'] else None
                category['image'] = result['image'].split('|')[0] \
                                            if result['image'] else None
            else:
                category['friendly'] = pluralise(
                    uri_to_friendly(category['name']))

            # fetch stat details
            results = query(
                """%(prefixes)s
                               SELECT ?p GROUP_CONCAT(?l, "|") as ?name
                               WHERE
                               {
                                   ?p rdfs:label ?l                               
                                   . FILTER( ( %(properties)s )
                                             && ( langMatches(lang(?l),"EN") || lang(?l) = "" ) )
                               }
                               GROUP BY ?p""" % {
                    'prefixes':
                    prefix_declarations(prefix_lookup),
                    'properties':
                    ' || '.join([
                        '?p = {}'.format(shorten_uri(prefix_lookup, p['name']))
                        for p in statistics
                    ])
                })

            lookup = {
                r['p']: r['name'].split('|')[0].title()
                for r in results if r['name']
            }
            for s in statistics:
                s['friendly'] = lookup.get(s['name'],
                                           uri_to_friendly(s['name']))

            # Fetch member details
            results = query(
                """%(prefixes)s
                               SELECT 
                                   ?o 
                                   GROUP_CONCAT(DISTINCT ?label,"|") as ?name
                                   GROUP_CONCAT(DISTINCT ?comment,"|") as ?description
                                   GROUP_CONCAT(DISTINCT ?thumbnail,"|") as ?image
                                   %(property-projections)s
                               WHERE
                              {
                                   VALUES ?o { %(members)s }
                                   OPTIONAL { ?o rdfs:label ?label }
                                   OPTIONAL { ?o rdfs:comment ?comment }
                                   OPTIONAL { ?o dbo:thumbnail ?thumbnail }
                                   %(property-joins)s
                                   FILTER( ( langMatches(lang(?label), "EN") || lang(?label) = "" )
                                            && ( langMatches(lang(?comment), "EN") || lang(?comment) = "" ) )
                               }
                               GROUP BY ?o""" % {
                    'prefixes':
                    prefix_declarations(prefix_lookup),
                    'property-projections':
                    '\n'.join([
                        'GROUP_CONCAT(DISTINCT ?p{}, "|") as ?stat{}'.format(
                            i, i) for i, p in enumerate(statistics)
                    ]),
                    'property-joins':
                    '\n'.join([
                        'OPTIONAL {{ ?o {} ?p{} . FILTER {} }}'.format(
                            shorten_uri(prefix_lookup, p['name']), i,
                            NUMERIC_VAL_CLAUSE.format('?p{}'.format(i)))
                        for i, p in enumerate(statistics)
                    ]),
                    'members':
                    ' '.join([
                        '{}'.format(shorten_uri(prefix_lookup, m))
                        for m in members
                    ])
                })

            cards = []
            for result in results:
                cards.append({
                    'name':
                    result['name'].split('|')[0].title()
                    if result['name'] else uri_to_friendly(result['o']),
                    'description':
                    first_sentence(result['description'].split('|')[0])
                    if result['description'] else None,
                    'image':
                    result['image'].split('|')[0] if result['image'] else None,
                    'stats': [],
                })
                for k, v in result.items():
                    if not k.startswith('stat'):
                        continue
                    idx = int(re.sub(r'[^0-9]', '', k))
                    stat = statistics[idx]
                    cards[-1]['stats'].append(
                        format_stat(stat['type'],
                                    v.split('|')[0]))

            deck = {
                'name':
                category['friendly'],
                'description':
                category['description'] if category['description'] else None,
                'stats': [s['friendly'] for s in statistics],
                'cards':
                cards,
            }

            output_name = 'deck_{}'.format(friendly_to_filename(deck['name']))
            output_dir = os.path.abspath(os.path.join('.', output_name))
            logging.info("Writing deck \"{}\" to {}".format(
                deck['name'], output_dir))
            os.mkdir(output_dir)

            for i, card in enumerate(deck['cards']):
                if card['image'] is None:
                    continue
                logging.debug('Downloading {}'.format(card['image']))
                try:
                    res = urlopen(
                        Request(uri_to_ascii(card['image']),
                                headers={'User-Agent': USER_AGENT}))
                except HTTPError as e:
                    if e.getcode() == 404:
                        logging.warn("404 for {}".format(card['image']))
                        card['image'] = None
                        continue
                    raise
                contenttype = res.headers['Content-Type']
                imagetype = IMAGE_TYPES.get(contenttype, None)
                if imagetype is None:
                    logging.warn("Non-image response ({}) for {}".format(
                        contenttype, card['image']))
                    card['image'] = None
                    continue
                imagename = 'card{:02d}.{}'.format(i, imagetype)
                with open(os.path.join(output_dir, imagename), 'wb') as f:
                    while True:
                        buff = res.read(1024)
                        if not buff:
                            break
                        f.write(buff)
                card['image'] = imagename

            logging.debug("writing json file")
            with open(os.path.join(output_dir, '{}.json'.format(output_name)),
                      'w') as f:
                json.dump(deck, f, indent=2)

            # exit condition - we're done
            input_dir = output_dir

        except (HTTPError, URLError) as e:
            logging.error(e)
            logging.debug("Pausing for {}s".format(ERROR_PAUSE_TIME))
            time.sleep(ERROR_PAUSE_TIME)
            continue

    return input_dir