Esempio n. 1
0
    def command(self):
        args = list(self.args)
        method, url = args[0:2]

        if not url.startswith('http'):
            url = 'http://%s:%s%s' % (self.session.config.sys.http_host,
                                      self.session.config.sys.http_port,
                                      ('/' + url).replace('//', '/'))

        # FIXME: The python URLopener doesn't seem to support other verbs,
        #        which is really quite lame.
        method = method.upper()
        assert(method in ('GET', 'POST'))

        qv, pv = [], []
        if method == 'POST':
            which = pv
        else:
            which = qv
        for arg in args[2:]:
            if '=' in arg:
                which.append(tuple(arg.split('=', 1)))
            elif arg.upper()[0] == 'P':
                which = pv
            elif arg.upper()[0] == 'Q':
                which = qv

        if qv:
            qv = urlencode(qv)
            url += ('?' in url and '&' or '?') + qv

        # Log us in automagically!
        httpd = self.session.config.http_worker.httpd
        global HACKS_SESSION_ID
        if HACKS_SESSION_ID is None:
            HACKS_SESSION_ID = httpd.make_session_id(None)
        mailpile.auth.SetLoggedIn(None,
                                  user='******',
                                  session_id=HACKS_SESSION_ID)
        cookie = httpd.session_cookie

        try:
            uo = URLopener()
            uo.addheader('Cookie', '%s=%s' % (cookie, HACKS_SESSION_ID))
            with TcpConnBroker().context(need=[TcpConnBroker.OUTGOING_HTTP]):
                if method == 'POST':
                    (fn, hdrs) = uo.retrieve(url, data=urlencode(pv))
                else:
                    (fn, hdrs) = uo.retrieve(url)
            hdrs = unicode(hdrs)
            data = open(fn, 'rb').read().strip()
            if data.startswith('{') and 'application/json' in hdrs:
                data = json.loads(data)
            return self._success('%s %s' % (method, url), result={
                'headers': hdrs.splitlines(),
                'data': data
            })
        except:
            self._ignore_exception()
            return self._error('%s %s' % (method, url))
Esempio n. 2
0
    def read_component_sitemap(self, sitemapindex_uri, sitemap_uri, sitemap, sitemapindex_is_file):
        """Read a component sitemap of a Resource List with index

        Each component must be a sitemap with the 
        """
        if (sitemapindex_is_file):
            if (not self.is_file_uri(sitemap_uri)):
                # Attempt to map URI to local file
                remote_uri = sitemap_uri
                sitemap_uri = self.mapper.src_to_dst(remote_uri)
                self.logger.info("Mapped %s to local file %s" % (remote_uri, sitemap_uri))
            else:
                # The individual sitemaps should be at a URL (scheme/server/path)
                # that the sitemapindex URL can speak authoritatively about
                if (self.check_url_authority and
                    not UrlAuthority(sitemapindex_uri).has_authority_over(sitemap_uri)):
                    raise ListBaseIndexError("The sitemapindex (%s) refers to sitemap at a location it does not have authority over (%s)" % (sitemapindex_uri,sitemap_uri))
        try:
            fh = URLopener().open(sitemap_uri)
            self.num_files += 1
        except IOError as e:
            raise ListBaseIndexError("Failed to load sitemap from %s listed in sitemap index %s (%s)" % (sitemap_uri,sitemapindex_uri,str(e)))
        # Get the Content-Length if we can (works fine for local files)
        try:
            self.content_length = int(fh.info()['Content-Length'])
            self.bytes_read += self.content_length
        except KeyError:
            # If we don't get a length then c'est la vie
            pass
        self.logger.info( "Reading sitemap from %s (%d bytes)" % (sitemap_uri,self.content_length) )
        component = sitemap.parse_xml( fh=fh, sitemapindex=False )
        # Copy resources into self, check any metadata
        for r in component:
            self.resources.add(r)
Esempio n. 3
0
def getRetriever(scheme):
    """
    Get the right retriever function depending on the scheme.
    If scheme is 'http' return urllib.urlretrieve, else if the scheme is https create a URLOpener
    with certificates taken from the X509_USER_PROXY variable. If certificates are not available return
    urllib.urlretrieve as for the http case.
    """
    if 'X509_USER_PROXY' in os.environ and os.path.isfile(
            os.environ['X509_USER_PROXY']):
        certfile = os.environ['X509_USER_PROXY']
    else:
        if scheme == 'https':
            print(
                "User proxy not found. Trying to retrieve the file without using certificates"
            )
        certfile = None

    if scheme == 'http' or not certfile:
        retriever = urllib.urlretrieve
    else:
        print("Using %s as X509 certificate" % certfile)
        op = URLopener(None, key_file=certfile, cert_file=certfile)
        op.addheader('Accept', 'application/octet-stream')
        retriever = op.retrieve

    return retriever
def getcif(target):
    """
    Get all ICSD cif files listed in target file.
    The target file should contain tag like '# BCC'.
    """
    matgenIDs=getMatgenIDs()

    if not os.path.isdir('./ciffiles'):
        os.makedirs('./ciffiles')

    with open(target,'r') as f:
        st=f.readline()
        t1=time.time()
        while st:
            if st[0]=='#':
                tg=st.split()[-1]
                st=f.readline()
                t2=time.time()
                print "time for the %s = %2.2f sec" %(tg,t2-t1) 
                t1=time.time()
                continue
            st=st.strip()
            ind=getID(st)
            if ind in matgenIDs:
                continue #skip matgen compounds
            URL=prefix+tg+'/'+st+'/'+st+'.cif' 
            testfile=URLopener()
            try:
                testfile.retrieve(URL,'ciffiles/'+st)
            except:
                print "Error: ",URL

            st=f.readline()
Esempio n. 5
0
def unshortenurl(short):
    from urllib import URLopener
    opener = URLopener()
    try:
        opener.open(short)
    except IOError, e:
        f = e
Esempio n. 6
0
class check_the_mangas():
	def __init__(self,manga_name):
		self.manga_name	     = manga_name
		self.myfile		     = open(configuration.DATA_FILE,'r').read()
		self.manga_oldnumber = self.get_number()
		self.manga_nownumber = self.manga_oldnumber
		self.manga_olddate   = self.get_date  ()
		self.nowdate		 = self.today_date()
		self.br			     = URLopener()

	def get_number(self):
		return re.findall(self.manga_name+':([0-9]+):',self.myfile)[0]

	def get_date(self):
		return re.findall(self.manga_name+":"+str(self.manga_oldnumber)+':(.*)\n',self.myfile)[0]

	def today_date(self):
		return subprocess.check_output(["date","+%a-%b-%e"]).replace("\n","")

	#return 1 if the connection is working
	def test_connection(self):
		try:
			response = self.br.open(configuration.WEBSITE_TO_CHECK_CONNECTION).read()
			if configuration.KEYWORD in response:
				return 1
			else:
				return 0
		except:
			print "manga connection"
			return 0

	def exec_cmd(self):
		pid = os.fork()
		os.umask(0)
		os.system(configuration.MANGA_NEW_CMD.replace("MANGA",self.manga_name))

	def run(self):
		if( self.test_connection() ):
			last_chapter = False
			try:
				while(last_chapter==False):
					to_open = "http://www.mangareader.net/" + self.manga_name + "/" + str( int(self.manga_nownumber)+1 )
					response = self.br.open( to_open).read()
					if "is not released yet" in response or "not published yet" in response or response == "":
						last_chapter = True
						if self.manga_name + ":" + str(self.manga_nownumber) not in open(configuration.DATA_FILE, "r").read():
							Thread(target=self.exec_cmd).start()
							configuration.backup()
							open(configuration.DATA_FILE,'w').write(open(configuration.DATA_FILE+".bak", "r").read().replace(self.manga_name+":"+str(self.manga_oldnumber)+":"+ self.manga_olddate, self.manga_name+":"+str(self.manga_nownumber)+":"+self.nowdate))
					else:
						print "not last chapter"
						self.manga_nownumber = str( int(self.manga_nownumber)+1 )
			except Exception,e :
				print e
				print "manga run"
				if "is not released yet. If you liked" in response:
					if self.manga_name + ":" + str(self.manga_nownumber) not in open(configuration.DATA_FILE, "r").read():
						configuration.backup()
						open(configuration.DATA_FILE,'w').write(open(configuration.DATA_FILE+".bak", "r").read().replace(self.manga_name+":"+str(self.manga_oldnumber)+":"+ self.manga_olddate, self.manga_name+":"+str(self.manga_nownumber)+":"+self.nowdate))
				pass
Esempio n. 7
0
def test_basic_startup():
    import thread
    # XXX: how to do this without threads?
    httpd = server.HTTPServer(('127.0.0.1', 21210), Handler)
    thread.start_new_thread(httpd.serve_forever, ())
    assert URLopener().open("http://127.0.0.1:21210/index").read() == "xxx"
    assert URLopener().open("http://127.0.0.1:21210/").read() == "xxx"
    def handle_starttag(self, tag, attrs):
        #tmpoutput = ""
        count = 0
        global bDoWork
        #self.output = ""
        # Only parse the 'anchor' tag.
        if tag == "a":
            # Check the list of defined attributes.
            for name, value in attrs:
                # If href is defined, print it.
                if name == "href":
                    if value[len(value) - 3:len(value)] == "jpg":
                        #print value
                        if not "http://" in value and bDoWork == True:
                            bDoWork = False
                            tmpoutput = value
                            #print "Val: " + value
                            imgurl = 'http://apod.nasa.gov/apod/' + tmpoutput
                            #print "IMGURL: " + imgurl
                            filename = imgurl.split('/')[-1]
                            #print "FileName: " + filename

                            if (not os.path.isfile(filename)) and (
                                    'apod.nasa.gov' in imgurl):
                                print "Downloading: " + filename
                                image = URLopener()
                                image.retrieve(imgurl, filename)
                                sleep(lWaitTime)
                            elif (os.path.isfile(filename)):
                                print "Verified: " + filename
                            break
Esempio n. 9
0
class check_the_mangas():
    def __init__(self,manga_name, db_conn):
        self.db_conn = db_conn
        self.manga_name = manga_name
        self.manga_oldnumber = sqlite_manager.get_manga_chapter(
            db_conn,
            manga_name)
        self.manga_nownumber = self.manga_oldnumber
        self.manga_olddate = sqlite_manager.get_manga_date(
            db_conn,
            manga_name)
        self.nowdate = self.today_date()
        self.br = URLopener()

    def today_date(self):
        return subprocess.check_output(["date","+%a-%b-%e"]).replace("\n","")

    #return 1 if the connection is working
    def test_connection(self):
        try:
            response = self.br.open(configuration.WEBSITE_TO_CHECK_CONNECTION).read()
            if configuration.KEYWORD in response:
                return 1
            else:
                return 0
        except:
            print "manga connection"
            return 0

    def exec_cmd(self):
        pid = os.fork()
        os.umask(0)
        os.system(configuration.MANGA_NEW_CMD.replace("MANGA",self.manga_name))

    def run(self):
        if( self.test_connection() ):
            last_chapter = False
            try:
                while(last_chapter==False):
                    to_open = "http://www.mangareader.net/" + self.manga_name + "/" + str( int(self.manga_nownumber)+1 )
                    response = self.br.open( to_open).read()
                    if "is not released yet" in response or "not published yet" in response or response == "":
                        last_chapter = True
                        if self.manga_nownumber != sqlite_manager.get_manga_chapter(self.db_conn, self.manga_name):
                            print self.manga_name+":"+self.manga_nownumber+":"+self.nowdate
                            sqlite_manager.update_manga(self.db_conn,
                                self.manga_name,
                                self.manga_nownumber,
                                self.nowdate)
                    else:
                        self.manga_nownumber = str( int(self.manga_nownumber)+1 )
            except Exception,e :
                if "is not released yet. If you liked" in response:
                    if self.manga_nownumber != sqlite_manager.get_manga_chapter(self.db_conn,self.manga_name):
                        print self.manga_name+":"+self.manga_nownumber+":"+self.nowdate
                        sqlite_manager.update_manga(self.db_conn,
                            self.manga_name,
                            self.manga_nownumber,
                            self.nowdate)
                pass
    def handle_starttag(self, tag, attrs):
        #tmpoutput = ""
        count = 0
        global bDoWork
        #self.output = ""
        # Only parse the 'anchor' tag.
        if tag == "a":
            # Check the list of defined attributes.
            for name, value in attrs:
                # If href is defined, print it.
                if name == "href":
                    if value[len(value) - 3:len(value)] == "jpg":
                        #print value
                        if not "http://" in value and bDoWork == True: 
                            bDoWork = False
                            tmpoutput = value
                            #print "Val: " + value
                            imgurl = 'http://apod.nasa.gov/apod/' + tmpoutput
                            #print "IMGURL: " + imgurl
                            filename = imgurl.split('/')[-1]
                            #print "FileName: " + filename

                            if (not os.path.isfile(filename)) and ('apod.nasa.gov' in imgurl):
                                #print "Downloading: " + filename
                                image = URLopener()
                                image.retrieve(imgurl,filename) 
                                sleep(lWaitTime)
                            elif (os.path.isfile(filename)):
                                print "Verified: " + filename
                            break
Esempio n. 11
0
def download_package(pkg_name, pkg_version):
    '''Download the required package. Sometimes the download can be flaky, so we use the
  retry decorator.'''
    pkg_type = 'sdist'  # Don't download wheel archives for now
    # This JSON endpoint is not provided by PyPI mirrors so we always need to get this
    # from pypi.python.org.
    pkg_info = json.loads(
        urlopen('https://pypi.python.org/pypi/%s/json' % pkg_name).read())

    downloader = URLopener()
    for pkg in pkg_info['releases'][pkg_version]:
        if pkg['packagetype'] == pkg_type:
            filename = pkg['filename']
            expected_md5 = pkg['md5_digest']
            if os.path.isfile(filename) and check_md5sum(
                    filename, expected_md5):
                print "File with matching md5sum already exists, skipping %s" % filename
                return True
            pkg_url = "{0}/packages/{1}".format(PYPI_MIRROR, pkg['path'])
            print "Downloading %s from %s" % (filename, pkg_url)
            downloader.retrieve(pkg_url, filename)
            actual_md5 = md5(open(filename).read()).hexdigest()
            if check_md5sum(filename, expected_md5):
                return True
            else:
                print "MD5 mismatch in file %s." % filename
                return False
    print "Could not find archive to download for %s %s %s" % (
        pkg_name, pkg_version, pkg_type)
    sys.exit(1)
Esempio n. 12
0
def unshortenurl(short):
    from urllib import URLopener
    opener = URLopener()
    try:
        opener.open(short)
    except IOError, e:
        f = e
Esempio n. 13
0
def download_package(pkg_name, pkg_version):
  '''Download the required package. Sometimes the download can be flaky, so we use the
  retry decorator.'''
  pkg_type = 'sdist' # Don't download wheel archives for now
  # This JSON endpoint is not provided by PyPI mirrors so we always need to get this
  # from pypi.python.org.
  pkg_info = json.loads(urlopen('https://pypi.python.org/pypi/%s/json' % pkg_name).read())

  downloader = URLopener()
  for pkg in pkg_info['releases'][pkg_version]:
    if pkg['packagetype'] == pkg_type:
      filename = pkg['filename']
      expected_md5 = pkg['md5_digest']
      if os.path.isfile(filename) and check_md5sum(filename, expected_md5):
        print "File with matching md5sum already exists, skipping %s" % filename
        return True
      pkg_url = "{0}/packages/{1}".format(PYPI_MIRROR, pkg['path'])
      print "Downloading %s from %s" % (filename, pkg_url)
      downloader.retrieve(pkg_url, filename)
      actual_md5 = md5(open(filename).read()).hexdigest()
      if check_md5sum(filename, expected_md5):
        return True
      else:
        print "MD5 mismatch in file %s." % filename
        return False
  print "Could not find archive to download for %s %s %s" % (
      pkg_name, pkg_version, pkg_type)
  sys.exit(1)
Esempio n. 14
0
 def do_method(self, method):
     method_conf = self.config[method]
     matchlen = 0
     match = None
     for path in method_conf:
         if self.is_path_prefix(path) and len(path) > matchlen:
             matchlen = len(path)
             match = path
     if matchlen > 0:
         self.send_error(method_conf[match])
     elif "forward_to" in self.config:
         url = urljoin(self.config['forward_to'], self.path)
         self.log_request()
         self.log_message("Forwarding to {}".format(url))
         o = URLopener().open(url)
         self.wfile.write(o.read())
         o.close()
     elif "*" in method_conf:
         self.send_error(method_conf['*'])
     else:
         print(method.upper(), self.path, self.config['port'])
         self.log_message(
             "No match for %s %s on port %d and no default configured" %
             (method.upper(), self.path, self.config['port']))
         self.send_error(404)
Esempio n. 15
0
    def command(self):
        args = list(self.args)
        method, url = args[0:2]

        if not url.startswith("http"):
            url = "http://%s:%s%s" % (
                self.session.config.sys.http_host,
                self.session.config.sys.http_port,
                ("/" + url).replace("//", "/"),
            )

        # FIXME: The python URLopener doesn't seem to support other verbs,
        #        which is really quite lame.
        method = method.upper()
        assert method in ("GET", "POST")

        qv, pv = [], []
        if method == "POST":
            which = pv
        else:
            which = qv
        for arg in args[2:]:
            if "=" in arg:
                which.append(tuple(arg.split("=", 1)))
            elif arg.upper()[0] == "P":
                which = pv
            elif arg.upper()[0] == "Q":
                which = qv

        if qv:
            qv = urlencode(qv)
            url += ("?" in url and "&" or "?") + qv

        # Log us in automagically!
        httpd = self.session.config.http_worker.httpd
        global HACKS_SESSION_ID
        if HACKS_SESSION_ID is None:
            HACKS_SESSION_ID = httpd.make_session_id(None)
        mailpile.auth.SetLoggedIn(None, user="******", session_id=HACKS_SESSION_ID)
        cookie = httpd.session_cookie

        try:
            uo = URLopener()
            uo.addheader("Cookie", "%s=%s" % (cookie, HACKS_SESSION_ID))
            with TcpConnBroker().context(need=[TcpConnBroker.OUTGOING_HTTP], oneshot=True):
                if method == "POST":
                    (fn, hdrs) = uo.retrieve(url, data=urlencode(pv))
                else:
                    (fn, hdrs) = uo.retrieve(url)
            hdrs = unicode(hdrs)
            data = open(fn, "rb").read().strip()
            if data.startswith("{") and "application/json" in hdrs:
                data = json.loads(data)
            return self._success("%s %s" % (method, url), result={"headers": hdrs.splitlines(), "data": data})
        except:
            self._ignore_exception()
            return self._error("%s %s" % (method, url))
Esempio n. 16
0
 def __init__(self, source, proxy = ""):
     self.source = source
     if len(proxy) > 0:
         self._opener = URLopener({"http": proxy})
     else:
         self._opener = URLopener()
     self._fetchQueue = Queue(0)
     self._fetchThread = Thread(target = self._FetchTile)
     self._fetchThread.setDaemon(True)
     self._fetchThread.start()
Esempio n. 17
0
def connection():
	try:
		br = URLopener()
		response = br.open(configuration.WEBSITE_TO_CHECK_CONNECTION).read()
		if configuration.KEYWORD in response:
			return 1
		else:
			return 0
	except:
		return 0
Esempio n. 18
0
    def read(self, uri=None, resources=None, index_only=False):
        """Read sitemap from a URI including handling sitemapindexes

        If index_only is True then individual sitemaps references in a sitemapindex
        will not be read. This will result in no resources being returned and is
        useful only to read the metadata and links listed in the sitemapindex.

        Includes the subtlety that if the input URI is a local file and is a 
        sitemapindex which contains URIs for the individual sitemaps, then these
        are mapped to the filesystem also.
        """
        try:
            fh = URLopener().open(uri)
            self.num_files += 1
        except IOError as e:
            raise IOError("Failed to load sitemap/sitemapindex from %s (%s)" %
                          (uri, str(e)))
        # Get the Content-Length if we can (works fine for local files)
        try:
            self.content_length = int(fh.info()['Content-Length'])
            self.bytes_read += self.content_length
            self.logger.debug("Read %d bytes from %s" %
                              (self.content_length, uri))
        except KeyError:
            # If we don't get a length then c'est la vie
            self.logger.debug("Read ????? bytes from %s" % (uri))
            pass
        self.logger.info("Read sitemap/sitemapindex from %s" % (uri))
        s = self.new_sitemap()
        s.parse_xml(fh=fh, resources=self, capability=self.capability_name)
        # what did we read? sitemap or sitemapindex?
        if (s.parsed_index):
            # sitemapindex
            if (not self.allow_multifile):
                raise ListBaseIndexError(
                    "Got sitemapindex from %s but support for sitemapindex disabled"
                    % (uri))
            self.logger.info("Parsed as sitemapindex, %d sitemaps" %
                             (len(self.resources)))
            sitemapindex_is_file = self.is_file_uri(uri)
            if (index_only):
                # don't read the component sitemaps
                self.sitemapindex = True
                return
            # now loop over all entries to read each sitemap and add to resources
            sitemaps = self.resources
            self.resources = self.resources_class()
            self.logger.info("Now reading %d sitemaps" % len(sitemaps.uris()))
            for sitemap_uri in sorted(sitemaps.uris()):
                self.read_component_sitemap(uri, sitemap_uri, s,
                                            sitemapindex_is_file)
        else:
            # sitemap
            self.logger.info("Parsed as sitemap, %d resources" %
                             (len(self.resources)))
Esempio n. 19
0
def utGrabFromUrl(p_url):
    """ Takes a file from a remote server """
    from urllib import URLopener
    try:
        l_opener = URLopener()
        l_file = l_opener.open(p_url)
        ctype = l_file.headers['Content-Type']
        l_opener.close()
        return (l_file.read(), ctype)
    except:
        return (None, 'text/x-unknown-content-type')
Esempio n. 20
0
class SlippyCache(object):
    """This is a basic map tile cache used by the SlippyPanel class
    to retrieve and store locally the images that form the map"""
    def __init__(self, source, proxy = ""):
        self.source = source
        if len(proxy) > 0:
            self._opener = URLopener({"http": proxy})
        else:
            self._opener = URLopener()
        self._fetchQueue = Queue(0)
        self._fetchThread = Thread(target = self._FetchTile)
        self._fetchThread.setDaemon(True)
        self._fetchThread.start()

    def _FetchTile(self):
        task = ""
        while task is not None:
            task = self._fetchQueue.get()
            url, fname = task
            if not os.path.isfile(fname):
                print "Getting", fname
                try:
                    self._opener.retrieve(url, "tmp.png")
                    shutil.move("tmp.png", fname)
                except IOError:
                    pass
            self._fetchQueue.task_done()

    def StartNewFetchBatch(self):
        try:
            while True:
                item = self._fetchQueue.get(False)
                self._fetchQueue.task_done()
        except Empty:
            pass

    def GetTileFilename(self, xtile, ytile, zoom):
        numTiles = 2 ** zoom
        while xtile >= numTiles:
            xtile -= numTiles
        if xtile < 0 or ytile < 0 or ytile >= numTiles:
            # Indicate that this is not a valid tile
            return None
        else:
            fname = "/".join([self.source.get_full_name(), str(zoom), str(xtile), str(ytile) + ".png"])
            if not os.path.isfile(fname):
                url = self.source.get_tile_url(xtile, ytile, zoom)
                # Ensure that the directory exists
                dname = os.path.dirname(fname)
                if not os.path.isdir(dname):
                    os.makedirs(dname)
                self._fetchQueue.put((url, fname))
            # Valid tile, though may not be present in the cache
            return fname
Esempio n. 21
0
    def read(self, uri=None, inventory=None):
        """Read sitemap from a URI including handling sitemapindexes

        Returns the inventory.

        Includes the subtlety that if the input URI is a local file and the 
        """
        if (inventory is None):
            inventory = Inventory()
        #
        try:
            fh = URLopener().open(uri)
        except IOError as e:
            raise Exception(
                "Failed to load sitemap/sitemapindex from %s (%s)" %
                (uri, str(e)))
        etree = parse(fh)
        # check root element: urlset (for sitemap), sitemapindex or bad
        self.sitemaps_created = 0
        if (etree.getroot().tag == '{' + SITEMAP_NS + "}urlset"):
            self.inventory_parse_xml(etree=etree, inventory=inventory)
            self.sitemaps_created += 1
        elif (etree.getroot().tag == '{' + SITEMAP_NS + "}sitemapindex"):
            if (not self.allow_multifile):
                raise Exception(
                    "Got sitemapindex from %s but support for sitemapindex disabled"
                    % (uri))
            sitemaps = self.sitemapindex_parse_xml(etree=etree)
            sitemapindex_is_file = self.is_file_uri(uri)
            # now loop over all entries to read each sitemap and add to inventory
            for sitemap_uri in sorted(sitemaps.resources.keys()):
                if (sitemapindex_is_file):
                    if (not self.is_file_uri(sitemap_uri)):
                        # Attempt to map URI to local file
                        remote_uri = sitemap_uri
                        sitemap_uri = self.mapper.src_to_dst(remote_uri)
                else:
                    # FIXME - need checks on sitemap_uri values:
                    # 1. should be in same server/path as sitemapindex URI
                    pass
                try:
                    fh = URLopener().open(sitemap_uri)
                except IOError as e:
                    raise Exception(
                        "Failed to load sitemap from %s listed in sitemap index %s (%s)"
                        % (sitemap_uri, uri, str(e)))
                self.inventory_parse_xml(fh=fh, inventory=inventory)
                self.sitemaps_created += 1
                #print "%s : now have %d resources" % (sitemap_uri,len(inventory.resources))
        else:
            raise ValueError("XML is not sitemap or sitemapindex")
        return (inventory)
Esempio n. 22
0
 def __init__(self, server, infoFile):
     """
     takes a server location and an info file as parameters in the constructor
     it will use this server to fetch the new information
     there should be a json/version and json/info.json dir on this server
     """
     self._infoFile = infoFile
     self._serverJSON = server + self._infoFile
     self._serverDate = server + "json/version"
     if sys.version < '3':
         self.br = URLopener()
     else:
         self.br = request
Esempio n. 23
0
    def command(self):
        args = list(self.args)
        method, url = args[0:2]

        if not url.startswith('http'):
            url = 'http://%s:%s%s' % (self.session.config.sys.http_host,
                                      self.session.config.sys.http_port,
                                      ('/' + url).replace('//', '/'))

        # FIXME: The python URLopener doesn't seem to support other verbs,
        #        which is really quite lame.
        method = method.upper()
        assert (method in ('GET', 'POST'))

        qv, pv = [], []
        if method == 'POST':
            which = pv
        else:
            which = qv
        for arg in args[2:]:
            if '=' in arg:
                which.append(tuple(arg.split('=', 1)))
            elif arg.upper()[0] == 'P':
                which = pv
            elif arg.upper()[0] == 'Q':
                which = qv

        if qv:
            qv = urlencode(qv)
            url += ('?' in url and '&' or '?') + qv

        try:
            uo = URLopener()
            if method == 'POST':
                (fn, hdrs) = uo.retrieve(url, data=urlencode(pv))
            else:
                (fn, hdrs) = uo.retrieve(url)
            hdrs = unicode(hdrs)
            data = open(fn, 'rb').read().strip()
            if data.startswith('{') and 'application/json' in hdrs:
                data = json.loads(data)
            return self._success('%s %s' % (method, url),
                                 result={
                                     'headers': hdrs.splitlines(),
                                     'data': data
                                 })
        except:
            self._ignore_exception()
            return self._error('%s %s' % (method, url))
Esempio n. 24
0
    def read(self, uri=None, resources=None, index_only=False):
        """Read sitemap from a URI including handling sitemapindexes

        If index_only is True then individual sitemaps references in a sitemapindex
        will not be read. This will result in no resources being returned and is
        useful only to read the metadata and links listed in the sitemapindex.

        Includes the subtlety that if the input URI is a local file and is a 
        sitemapindex which contains URIs for the individual sitemaps, then these
        are mapped to the filesystem also.
        """
        try:
            fh = URLopener().open(uri)
            self.num_files += 1
        except IOError as e:
            raise IOError("Failed to load sitemap/sitemapindex from %s (%s)" % (uri,str(e)))
        # Get the Content-Length if we can (works fine for local files)
        try:
            self.content_length = int(fh.info()['Content-Length'])
            self.bytes_read += self.content_length
            self.logger.debug( "Read %d bytes from %s" % (self.content_length,uri) )
        except KeyError:
            # If we don't get a length then c'est la vie
            self.logger.debug( "Read ????? bytes from %s" % (uri) )
            pass
        self.logger.info( "Read sitemap/sitemapindex from %s" % (uri) )
        s = self.new_sitemap()
        s.parse_xml(fh=fh,resources=self,capability=self.capability_name)
        # what did we read? sitemap or sitemapindex?
        if (s.parsed_index):
            # sitemapindex
            if (not self.allow_multifile):
                raise ListBaseIndexError("Got sitemapindex from %s but support for sitemapindex disabled" % (uri))
            self.logger.info( "Parsed as sitemapindex, %d sitemaps" % (len(self.resources)) )
            sitemapindex_is_file = self.is_file_uri(uri)
            if (index_only):
                # don't read the component sitemaps
                self.sitemapindex = True
                return
            # now loop over all entries to read each sitemap and add to resources
            sitemaps = self.resources
            self.resources = self.resources_class()
            self.logger.info( "Now reading %d sitemaps" % len(sitemaps.uris()) )
            for sitemap_uri in sorted(sitemaps.uris()):
                self.read_component_sitemap(uri,sitemap_uri,s,sitemapindex_is_file)
        else:
            # sitemap
            self.logger.info( "Parsed as sitemap, %d resources" % (len(self.resources)) )
Esempio n. 25
0
    def command(self):
        args = list(self.args)
        method, url = args[0:2]

        if not url.startswith('http'):
            url = 'http://%s:%s%s' % (self.session.config.sys.http_host,
                                      self.session.config.sys.http_port,
                                      ('/' + url).replace('//', '/'))

        # FIXME: The python URLopener doesn't seem to support other verbs,
        #        which is really quite lame.
        method = method.upper()
        assert(method in ('GET', 'POST'))

        qv, pv = [], []
        if method == 'POST':
            which = pv
        else:
            which = qv
        for arg in args[2:]:
            if '=' in arg:
                which.append(tuple(arg.split('=', 1)))
            elif arg.upper()[0] == 'P':
                which = pv
            elif arg.upper()[0] == 'Q':
                which = qv

        if qv:
            qv = urlencode(qv)
            url += ('?' in url and '&' or '?') + qv

        try:
            uo = URLopener()
            if method == 'POST':
                (fn, hdrs) = uo.retrieve(url, data=urlencode(pv))
            else:
                (fn, hdrs) = uo.retrieve(url)
            hdrs = unicode(hdrs)
            data = open(fn, 'rb').read().strip()
            if data.startswith('{') and 'application/json' in hdrs:
                data = json.loads(data)
            return self._success('%s %s' % (method, url), result={
                'headers': hdrs.splitlines(),
                'data': data
            })
        except:
            self._ignore_exception()
            return self._error('%s %s' % (method, url))
Esempio n. 26
0
    def test_05_03_http_image_zipfile(self):
        # Make a zipfile using files accessed from the web
        def alter_fn(module):
            self.assertTrue(isinstance(module, C.CreateWebPage))
            module.wants_zip_file.value = True
            module.zipfile_name.value = ZIPFILE_NAME
            module.directory_choice.dir_choice = C.ABSOLUTE_FOLDER_NAME
            module.directory_choice.custom_path = cpprefs.get_default_image_directory()

        url_root = "http://cellprofiler.org/svnmirror/ExampleImages/ExampleSBSImages/"
        url_query = "?r=11710"
        filenames = [(url_root, fn + url_query) for fn in
                     ("Channel1-01-A-01.tif", "Channel2-01-A-01.tif",
                      "Channel1-02-A-02.tif", "Channel2-02-A-02.tif")]
        #
        # Make sure URLs are accessible
        #
        try:
            for filename in filenames:
                URLopener().open("".join(filename)).close()
        except IOError, e:
            def bad_url(e=e):
                raise e

            unittest.expectedFailure(bad_url)()
Esempio n. 27
0
def download_package(pkg_name, pkg_version):
    file_name, path, expected_md5 = get_package_info(pkg_name, pkg_version)
    if not file_name:
        return False
    if os.path.isfile(file_name) and check_md5sum(file_name, expected_md5):
        print 'File with matching md5sum already exists, skipping {0}'.format(
            file_name)
        return True
    downloader = URLopener()
    pkg_url = '{0}/packages/{1}'.format(PYPI_MIRROR, path)
    print 'Downloading {0} from {1}'.format(file_name, pkg_url)
    downloader.retrieve(pkg_url, file_name)
    if check_md5sum(file_name, expected_md5):
        return True
    else:
        print 'MD5 mismatch in file {0}.'.format(file_name)
        return False
Esempio n. 28
0
	def __init__(self,manga_name):
		self.manga_name	     = manga_name
		self.myfile		     = open(configuration.DATA_FILE,'r').read()
		self.manga_oldnumber = self.get_number()
		self.manga_nownumber = self.manga_oldnumber
		self.manga_olddate   = self.get_date  ()
		self.nowdate		 = self.today_date()
		self.br			     = URLopener()
Esempio n. 29
0
    def download(self, sysctl, code):
        try:
            logging.info('Begin download files.')

            if not isdir(self.p_dwld):
                mkdir(self.p_dwld)

            obj = URLopener()
            for f in self.files:
                logging.info('Start download {}.'.format(f))
                obj.retrieve(self.url + f, self.p_dwld + f)
                logging.info('Download {} done.'.format(f))
            return True

        except BaseException as down:
            logging.error('Download {}.'.format(down))
            self._rolback(sysctl, code)
Esempio n. 30
0
def test_static_directory():
    py.test.skip("Fails")
    import thread
    tmpdir = py.test.ensuretemp("server_static_dir")
    tmpdir.ensure("a", dir=1)
    tmpdir.join("a").ensure("a.txt").write("aaa")
    tmpdir.join("a").ensure("b.txt").write("bbb")

    class StaticDir(server.Handler):
        static_dir = tmpdir
        a_dir = server.StaticDir(tmpdir.join("a"))

    httpd = server.HTTPServer(('127.0.0.1', 0), StaticDir)
    port = httpd.server_port
    thread.start_new_thread(httpd.serve_forever, ())
    addr = "http://127.0.0.1:%d/" % port
    assert URLopener().open(addr + "a_dir/a.txt").read() == "aaa"
    assert URLopener().open(addr + "a_dir/b.txt").read() == "bbb"
Esempio n. 31
0
def startplayback_images(args):
    """Shows an image
    """
    # cache path
    sDir = xbmc.translatePath(args._addon.getAddonInfo("profile"))
    if args.PY2:
        sPath = join(sDir.decode("utf-8"), u"image.jpg")
    else:
        sPath = join(sDir, "image.jpg")

    # download image
    file = URLopener()
    file.retrieve(args.url, sPath)

    # display image
    item = xbmcgui.ListItem(getattr(args, "title", "Title not provided"), path=sPath)
    xbmcplugin.setResolvedUrl(int(args._argv[1]), True, item)
    xbmc.executebuiltin("SlideShow(" + sDir + ")")
Esempio n. 32
0
def download_package(pkg_name, pkg_version):
  file_name, path, hash_algorithm, expected_digest = get_package_info(pkg_name,
      pkg_version)
  if not file_name:
    return False
  if os.path.isfile(file_name) and check_digest(file_name, hash_algorithm,
      expected_digest):
    print 'File with matching digest already exists, skipping {0}'.format(file_name)
    return True
  downloader = URLopener()
  pkg_url = '{0}/packages/{1}'.format(PYPI_MIRROR, path)
  print 'Downloading {0} from {1}'.format(file_name, pkg_url)
  downloader.retrieve(pkg_url, file_name)
  if check_digest(file_name, hash_algorithm, expected_digest):
    return True
  else:
    print 'Hash digest check failed in file {0}.'.format(file_name)
    return False
Esempio n. 33
0
def open_http(url, data=None):
    """Use HTTP protocol."""
    user_passwd = None
    proxy_passwd = None
    if isinstance(url, str):
        host, selector = splithost(url)
        if host:
            user_passwd, host = splituser(host)
            host = urllib.unquote(host)
        realhost = host
    else:
        host, selector = url
        # check whether the proxy contains authorization information
        proxy_passwd, host = splituser(host)
        # now we proceed with the url we want to obtain
        urltype, rest = urllib.splittype(selector)
        url = rest
        user_passwd = None
        if urltype.lower() != 'http':
            realhost = None
        else:
            realhost, rest = splithost(rest)
            if realhost:
                user_passwd, realhost = splituser(realhost)
            if user_passwd:
                selector = "%s://%s%s" % (urltype, realhost, rest)
            if urllib.proxy_bypass(realhost):
                host = realhost

        #print "proxy via http:", host, selector
    if not host: raise IOError('http error', 'no host given')

    if proxy_passwd:
        import base64
        proxy_auth = base64.b64encode(proxy_passwd).strip()
    else:
        proxy_auth = None

    if user_passwd:
        import base64
        auth = base64.b64encode(user_passwd).strip()
    else:
        auth = None
    c = FakeHTTPConnection(host)
    if data is not None:
        c.putrequest('POST', selector)
        c.putheader('Content-Type', 'application/x-www-form-urlencoded')
        c.putheader('Content-Length', '%d' % len(data))
    else:
        c.putrequest('GET', selector)
    if proxy_auth: c.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
    if auth: c.putheader('Authorization', 'Basic %s' % auth)
    if realhost: c.putheader('Host', realhost)
    for args in URLopener().addheaders:
        c.putheader(*args)
    c.endheaders()
    return c
Esempio n. 34
0
def download_reports(years=_years, weeks=_weeks):
    '''Crawls through IMoH website and download all excel files in the given weeks and years'''
    # Create paths for logging files and download loaction
    prefix = datetime.now().strftime('./log/weeklies/%y%m%d_%H%M%S_')
    log_d = prefix + "downloads.log"
    log_f = prefix + "FAILED.log"
    base_loc = 'http://www.health.gov.il/PublicationsFiles/IWER'
    # URL object
    my_file = URLopener()

    for year in years:
        print "\n", year,
        for week in weeks:
            f = open(log_d, 'a')
            f.write('\n{year}_{week}: '.format(week=week, year=year))
            # There are many different options of paths
            options = ['{base}{week:02d}_{year}.xls'.format(base=base_loc, week=week, year=year),
                       '{base}{week}_{year}.xls'.format(base=base_loc, week=week, year=year),
                       '{base}{week:02d}_{year}.xlsx'.format(base=base_loc, week=week, year=year),
                       '{base}{week}_{year}.xlsx'.format(base=base_loc, week=week, year=year)]
            for i, o in enumerate(options):
                filetype = o.split(".")[-1]
                try:
                    # Try different paths on remote, but always save on same path locally
                    my_file.retrieve(o,
                                     './data/weeklies/{year}_{week:02d}.{ft}'.format(week=week, year=year, ft=filetype))
                    # If succeeds write which filetype (xls/x) was saved
                    f.write('{ft}'.format(ft=filetype), )
                    # If downloads succeeds move close the log file and break the loop
                    f.close()
                    break
                except:
                    # When option excepted, write try number to the log
                    f.write("{} ".format(i + 1))
                    # If all options were exhausted, it has failed.
                    if i == len(options) - 1 and week != 53:
                        print "== {year}_{week:02d} FAILED ==".format(week=week, year=year),
                        with open(log_f, 'a') as failed:
                            failed.write("{year}_{week:02d} FAILED\n".format(week=week, year=year))
                        f.write("FAILED")
                        f.close()
        f.close()
Esempio n. 35
0
    def call_remote(self, category, params):
        '''
        The meetup api is set up such that the root url does not
        change much other than the'name' of the thing you call into.

        In other words, I can just use category to sprintf my way to a
        valid url, then tack on the rest of the query string specified
        in params.
        '''
        url = self.root_url
        url = url % (category)
        # Every call has to include key
        url = url + "?" + params + "&key=" + self.key
        client = URLopener()
        request = client.open(url)
        raw_str = request.read()
        results = json.loads(raw_str)
        # Let the caller interpret the results of the call. Both the
        # meta info and the results are passed back
        return results
Esempio n. 36
0
    def call_remote(self,category,params):
        '''
        The meetup api is set up such that the root url does not
        change much other than the'name' of the thing you call into.

        In other words, I can just use category to sprintf my way to a
        valid url, then tack on the rest of the query string specified
        in params.
        '''
        url = self.root_url
        url = url % (category)
        # Every call has to include key
        url = url + "?" + params + "&key=" + self.key
        client = URLopener()
        request = client.open(url)
        raw_str = request.read()
        results = json.loads(raw_str)
        # Let the caller interpret the results of the call. Both the
        # meta info and the results are passed back
        return results
Esempio n. 37
0
class Updater:
    """
    takes a server location and an info file as parameters in the constructor
    it will use this server to fetch the new information
    there should be a /hash and /info.json dir on this server
    """
    def __init__(self,server,infoFile):
        self._server = server
        self._infoFile = infoFile
        self.br = URLopener()

    """
    hasNewInfo :: Boolean
    compare the local info file hash with the one found on the server
    and returns true if they are different
    """
    def hasNewInfo(self):
        f = open(self._infoFile,'r').read()
        m = md5.new(f).hexdigest()
        response = self.br.open(self._server+'/hash').read()
        response = response.replace("\n","")
        return (m!=response)

    """
    generateTimeStamp :: String
    returns a string that is used to timestamp old config  backup files
    """
    def generateTimeStamp(self):
        return str(time.gmtime().tm_year)+"_"+str(time.gmtime().tm_mday)+"_"+str(time.gmtime().tm_hour)+"_"+str(time.gmtime().tm_min)

    """
    fetchNewInfo :: Void
    it will download the info file from the server
    use the timestamp to back it up
    and overwrite it
    """
    def fetchNewInfo(self):
        response = self.br.open(self._server+'/info.json').read()
        oldInfo = open(self._infoFile,'r').read()
        open(self._infoFile+"."+self.generateTimeStamp(),'w').write(oldInfo)
        open(self._infoFile,'w').write(response)
Esempio n. 38
0
    def read_component_sitemap(self, sitemapindex_uri, sitemap_uri, sitemap,
                               sitemapindex_is_file):
        """Read a component sitemap of a Resource List with index

        Each component must be a sitemap with the 
        """
        if (sitemapindex_is_file):
            if (not self.is_file_uri(sitemap_uri)):
                # Attempt to map URI to local file
                remote_uri = sitemap_uri
                sitemap_uri = self.mapper.src_to_dst(remote_uri)
                self.logger.info("Mapped %s to local file %s" %
                                 (remote_uri, sitemap_uri))
            else:
                # The individual sitemaps should be at a URL (scheme/server/path)
                # that the sitemapindex URL can speak authoritatively about
                if (self.check_url_authority and not UrlAuthority(
                        sitemapindex_uri).has_authority_over(sitemap_uri)):
                    raise ListBaseIndexError(
                        "The sitemapindex (%s) refers to sitemap at a location it does not have authority over (%s)"
                        % (sitemapindex_uri, sitemap_uri))
        try:
            fh = URLopener().open(sitemap_uri)
            self.num_files += 1
        except IOError as e:
            raise ListBaseIndexError(
                "Failed to load sitemap from %s listed in sitemap index %s (%s)"
                % (sitemap_uri, sitemapindex_uri, str(e)))
        # Get the Content-Length if we can (works fine for local files)
        try:
            self.content_length = int(fh.info()['Content-Length'])
            self.bytes_read += self.content_length
        except KeyError:
            # If we don't get a length then c'est la vie
            pass
        self.logger.info("Reading sitemap from %s (%d bytes)" %
                         (sitemap_uri, self.content_length))
        component = sitemap.parse_xml(fh=fh, sitemapindex=False)
        # Copy resources into self, check any metadata
        for r in component:
            self.resources.add(r)
Esempio n. 39
0
class Updater:
    def __init__(self,server,infoFile):
        self._server = server
        self._infoFile = infoFile
        self.br = URLopener()

    def hasNewInfo(self):
        f = open(self._infoFile,'r').read()
        m = md5.new(f).hexdigest()
        response = self.br.open(self._server+'/hash').read()
        response = response.replace("\n","")
        return (m!=response)

    def generateTimeStamp(self):
        return str(time.gmtime().tm_year)+"_"+str(time.gmtime().tm_mday)+"_"+str(time.gmtime().tm_hour)+"_"+str(time.gmtime().tm_min)

    def fetchNewInfo(self):
        response = self.br.open(self._server+'/info.json').read()
        oldInfo = open(self._infoFile,'r').read()
        open(self._infoFile+"."+self.generateTimeStamp(),'w').write(oldInfo)
        open(self._infoFile,'w').write(response)
Esempio n. 40
0
    def get(self,url,inventory=None):
        """Get a inventory from url

        Will either create a new Inventory object or add to one supplied.
        """
        # Either use inventory passed in or make a new one
        if (inventory is None):
            inventory = Inventory()

        inventory_fh = URLopener().open(url)
        Sitemap().inventory_parse_xml(fh=inventory_fh, inventory=inventory)
        return(inventory)
Esempio n. 41
0
 def __init__(self,manga_name, db_conn):
     self.db_conn = db_conn
     self.manga_name = manga_name
     self.manga_oldnumber = sqlite_manager.get_manga_chapter(
         db_conn,
         manga_name)
     self.manga_nownumber = self.manga_oldnumber
     self.manga_olddate = sqlite_manager.get_manga_date(
         db_conn,
         manga_name)
     self.nowdate = self.today_date()
     self.br = URLopener()
def download_if_not_exist(url, target_file):
    if not os.path.isfile(target_file):
        get_logger().info('downloading %s to %s', url, target_file)

        makedirs(os.path.dirname(target_file), exists_ok=True)

        temp_filename = target_file + '.part'
        if os.path.isfile(temp_filename):
            os.remove(temp_filename)
        URLopener().retrieve(url, temp_filename)
        os.rename(temp_filename, target_file)
    return target_file
Esempio n. 43
0
    def command(self):
        args = list(self.args)
        method, url = args[0:2]

        if not url.startswith('http'):
            url = 'http://%s:%s%s' % (self.session.config.sys.http_host,
                                      self.session.config.sys.http_port,
                                      ('/' + url).replace('//', '/'))

        # FIXME: The python URLopener doesn't seem to support other verbs,
        #        which is really quite lame.
        method = method.upper()
        assert (method in ('GET', 'POST'))

        qv, pv = [], []
        if method == 'POST':
            which = pv
        else:
            which = qv
        for arg in args[2:]:
            if '=' in arg:
                which.append(tuple(arg.split('=', 1)))
            elif arg.upper()[0] == 'P':
                which = pv
            elif arg.upper()[0] == 'Q':
                which = qv

        if qv:
            qv = urlencode(qv)
            url += ('?' in url and '&' or '?') + qv

        # Log us in automagically!
        httpd = self.session.config.http_worker.httpd
        global HACKS_SESSION_ID
        if HACKS_SESSION_ID is None:
            HACKS_SESSION_ID = httpd.make_session_id(None)
        mailpile.auth.SetLoggedIn(None,
                                  user='******',
                                  session_id=HACKS_SESSION_ID)
        cookie = httpd.session_cookie

        try:
            uo = URLopener()
            uo.addheader('Cookie', '%s=%s' % (cookie, HACKS_SESSION_ID))
            if method == 'POST':
                (fn, hdrs) = uo.retrieve(url, data=urlencode(pv))
            else:
                (fn, hdrs) = uo.retrieve(url)
            hdrs = unicode(hdrs)
            data = open(fn, 'rb').read().strip()
            if data.startswith('{') and 'application/json' in hdrs:
                data = json.loads(data)
            return self._success('%s %s' % (method, url),
                                 result={
                                     'headers': hdrs.splitlines(),
                                     'data': data
                                 })
        except:
            self._ignore_exception()
            return self._error('%s %s' % (method, url))
Esempio n. 44
0
	def __init__(self,manga_name,chapter,end_chapter,manga_location,dl_manager):
		self.manga_location = manga_location
		self.manga_name	    = manga_name
		self.chapter		= chapter
		self.end_chapter	= end_chapter
		self.current_image  = "000"
		self.img			= ""
		self.imgs		    = []
		self.chapters	    = []
		self.br             = URLopener()
		self.response       = ""
		self.response_lines = ""
		self.dl_manager     = dl_manager
Esempio n. 45
0
 def __init__(self, server, infoFile):
     """
     takes a server location and an info file as parameters in the constructor
     it will use this server to fetch the new information
     there should be a json/version and json/info.json dir on this server
     """
     self._infoFile = infoFile
     self._serverJSON = server + self._infoFile
     self._serverDate = server + "json/version"
     if sys.version < '3':
         self.br = URLopener()
     else:
         self.br = request
Esempio n. 46
0
    def __init__(self):
        global dbaselocal
        global datapath
        
        fname = datapath + 'TRMM_classmap.dat'
        print 'Loading class map ',fname


        if dbaselocal:
            landclassmap.data = np.loadtxt(fname, dtype='int')[:,1]
        else:
            f = URLopener().open(fname)
            tmp = []
            for line in f:
                columns = line.split()
                tmp.append(int(columns[1]))

            f.close()        
            landclassmap.data = np.array(tmp)

        landclassmap.data = np.reshape(landclassmap.data, (-1, 360))
        print 'Class map loaded'      
Esempio n. 47
0
def test_static_page():
    import thread
    tmpdir = py.test.ensuretemp("server_static_page")
    tmpdir.ensure("test.html").write("<html></html>")

    class StaticHandler(server.TestHandler):
        static_dir = str(tmpdir)
        index = server.Static(os.path.join(static_dir, "test.html"))

    httpd = server.HTTPServer(('127.0.0.1', 21212), StaticHandler)
    thread.start_new_thread(httpd.serve_forever, ())
    assert URLopener().open("http://127.0.0.1:21212/index").read() == \
           "<html></html>"
Esempio n. 48
0
def getRetriever(scheme):
    """
    Get the right retriever function depending on the scheme.
    If scheme is 'http' return urllib.urlretrieve, else if the scheme is https create a URLOpener
    with certificates taken from the X509_USER_PROXY variable. If certificates are not available return
    urllib.urlretrieve as for the http case.
    """
    if os.environ.has_key('X509_USER_PROXY') and os.path.isfile(os.environ['X509_USER_PROXY']):
        certfile = os.environ['X509_USER_PROXY']
    else:
        if scheme == 'https':
            print "User proxy not found. Trying to retrieve the file without using certificates"
        certfile = None

    if scheme == 'http' or not certfile:
        retriever = urllib.urlretrieve
    else:
        print "Using %s as X509 certificate" % certfile
        op = URLopener(None, key_file=certfile, cert_file=certfile)
        op.addheader( 'Accept', 'application/octet-stream' )
        retriever = op.retrieve

    return retriever
Esempio n. 49
0
def test_static_page_implicit():
    import thread
    tmpdir = py.test.ensuretemp("server_static_page_implicit")
    tmpdir.ensure("index.html").write("<html></html>")

    class StaticHandler(server.TestHandler):
        static_dir = str(tmpdir)
        index = server.Static()

    server.patch_handler(StaticHandler)
    httpd = server.HTTPServer(('127.0.0.1', 21213), StaticHandler)
    thread.start_new_thread(httpd.serve_forever, ())
    assert URLopener().open("http://127.0.0.1:21213/index").read() == \
           "<html></html>"
Esempio n. 50
0
class Updater:
    def __init__(self, server, infoFile):
        """
        takes a server location and an info file as parameters in the constructor
        it will use this server to fetch the new information
        there should be a json/version and json/info.json dir on this server
        """
        self._infoFile = infoFile
        self._serverJSON = server + self._infoFile
        self._serverDate = server + "json/version"
        if sys.version < '3':
            self.br = URLopener()
        else:
            self.br = request

    def hasNewInfo(self):
        """
        hasNewInfo :: Boolean
        compare the local version tag with the one found on the server
        and returns true if the server version is newer
        """
        jsonDate = open(location_manager.VERSION, 'r').read().strip()
        if sys.version < '3':
            servDate = self.br.open(self._serverDate).read().strip()
        else:
            servDate = self.br.urlopen(self._serverDate).read().strip()
        return (int(jsonDate) < int(servDate))

    def generateTimeStamp(self):
        """
        generateTimeStamp :: String
        returns a string that is used to timestamp old config backup files
        """
        return open(location_manager.VERSION, 'r').read().strip()

    def fetchNewInfo(self):
        """
        fetchNewInfo :: Void
        it will download the info file from the server
        use the timestamp to back it up
        and overwrite it
        """
        # Fetching server's info.json
        if sys.version < '3':
            response = self.br.open(self._serverJSON).read()
        else:
            response = self.br.urlopen(self._serverJSON).read().decode("utf-8")
        oldInfo = open(self._infoFile, 'r').read()
        open(self._infoFile + "." + self.generateTimeStamp(),
             'w').write(oldInfo)
        open(self._infoFile, 'w').write(response)
        # Fetching server's version
        if sys.version < '3':
            servDate = int(self.br.open(self._serverDate).read().strip())
        else:
            servDate = int(self.br.urlopen(self._serverDate).read().strip())
        open(location_manager.VERSION, 'w').write(str(servDate))
    def download__grobid_service_zip_if_not_exist(self):
        if not os.path.isfile(self.grobid_service_zip_filename):
            get_logger().info(
                'downloading %s to %s',
                self.grobid_service_zip_url,
                self.grobid_service_zip_filename
            )

            makedirs(os.path.dirname(self.grobid_service_zip_filename), exists_ok=True)

            temp_zip_filename = self.grobid_service_zip_filename + '.part'
            if os.path.isfile(temp_zip_filename):
                os.remove(temp_zip_filename)
            URLopener().retrieve(self.grobid_service_zip_url, temp_zip_filename)
            os.rename(temp_zip_filename, self.grobid_service_zip_filename)
Esempio n. 52
0
class Updater:
    def __init__(self, server, infoFile):
        """
        takes a server location and an info file as parameters in the constructor
        it will use this server to fetch the new information
        there should be a json/version and json/info.json dir on this server
        """
        self._infoFile = infoFile
        self._serverJSON = server + self._infoFile
        self._serverDate = server + "json/version"
        if sys.version < '3':
            self.br = URLopener()
        else:
            self.br = request

    def hasNewInfo(self):
        """
        hasNewInfo :: Boolean
        compare the local version tag with the one found on the server
        and returns true if the server version is newer
        """
        jsonDate = open(location_manager.VERSION , 'r').read().strip()
        if sys.version < '3':
            servDate = self.br.open(self._serverDate).read().strip()
        else:
            servDate = self.br.urlopen(self._serverDate).read().strip()
        return (int(jsonDate) < int(servDate))

    def generateTimeStamp(self):
        """
        generateTimeStamp :: String
        returns a string that is used to timestamp old config backup files
        """
        return open(location_manager.VERSION, 'r').read().strip()

    def fetchNewInfo(self):
        """
        fetchNewInfo :: Void
        it will download the info file from the server
        use the timestamp to back it up
        and overwrite it
        """
        # Fetching server's info.json
        if sys.version < '3':
            response = self.br.open(self._serverJSON).read()
        else:
            response = self.br.urlopen(self._serverJSON).read().decode("utf-8")
        oldInfo = open(self._infoFile, 'r').read()
        open(self._infoFile + "." + self.generateTimeStamp(), 'w').write(oldInfo)
        open(self._infoFile, 'w').write(response)
        # Fetching server's version
        if sys.version < '3':
            servDate = int(self.br.open(self._serverDate).read().strip())
        else:
            servDate = int(self.br.urlopen(self._serverDate).read().strip())
        open(location_manager.VERSION, 'w').write(str(servDate))
Esempio n. 53
0
def maybe_download_tesst_image(file_name):
    '''Download the given TestImages file if not in the directory

    file_name - name of file to fetch

    Image will be downloaded if not present to CP_EXAMPLEIMAGES directory.
    '''
    local_path = os.path.join(testimages_directory(), file_name)
    if not os.path.exists(local_path):
        url = testimages_url() + "/" + file_name
        try:
            URLopener().retrieve(url, local_path)
        except IOError, e:
            # This raises the "expected failure" exception.
            def bad_url(e=e):
                raise e

            unittest.expectedFailure(bad_url)()
Esempio n. 54
0
    def parse(self,uri=None,fh=None,str=None):
        """Parse a single XML document for this list

        Accepts either a uri (uri or default if parameter not specified), 
        or a filehandle (fh) or a string (str).

        Does not handle the case of sitemapindex+sitemaps
        """
        if (uri is not None):
            try:
                fh = URLopener().open(uri)
            except IOError as e:
                raise Exception("Failed to load sitemap/sitemapindex from %s (%s)" % (uri,str(e)))
        elif (str is not None):
            fh=StringIO.StringIO(str)
        if (fh is None):
            raise Exception("Nothing to parse")
        s = self.new_sitemap()
        s.parse_xml(fh=fh,resources=self,capability=self.capability_name,sitemapindex=False)
        self.parsed_index = s.parsed_index
Esempio n. 55
0
def try_download(_path, _file, _url, _stale,):
    now = time()
    url = URLopener()
    file_exists = isfile(_path+_file) == True
    if file_exists:
        file_old = (getmtime(_path+_file) + _stale) < now
    if not file_exists or (file_exists and file_old):
        try:
            url.retrieve(_url, _path+_file)
            result = 'ID ALIAS MAPPER: \'{}\' successfully downloaded'.format(_file)
        except IOError:
            result = 'ID ALIAS MAPPER: \'{}\' could not be downloaded'.format(_file)
    else:
        result = 'ID ALIAS MAPPER: \'{}\' is current, not downloaded'.format(_file)
    url.close()
    return result
Esempio n. 56
0
 def open(self, *args):
     f = URLopener.open(self, *args)
     return XML(f)
Esempio n. 57
0
 def http_error_default(*a, **k):
  return URLopener.http_error_default(*a, **k)
Esempio n. 58
0
 def __init__(self,server,infoFile):
     self._server = server
     self._infoFile = infoFile
     self.br = URLopener()
Esempio n. 59
0
def test_ping_play1():
    from urllib import URLopener
    u = URLopener()
    text = "<title>pypy.js various demos</title>"
    assert u.open("http://play1.pypy.org/").read().find(text) != -1
Esempio n. 60
0
#!/usr/bin/env python

from re import sub
from BeautifulSoup import BeautifulSoup
from urllib import URLopener

opener = URLopener()
html = opener.open('http://www.dailyzen.com/').read()

html = html[html.index('<!--Add Quote for correct day-->'):]
html1 = html[:html.index('<br>')]

html2 = html[html.index('<A class="artist">'):]
html2 = html2[:html2.index('</a></i>')]
html2 = sub('<A class="artist">','',html2).strip()

zen = BeautifulSoup(html1)
zen = zen.prettify().strip()

for x in ['<!--Add Quote for correct day-->','<br />','<p>','</p>','^\n','\n$']:
    zen = sub(x,'',zen).strip()

zen = sub('\n \n \n','\n \n',zen).strip()

print
print zen
print
print '\t\t',html2