Ejemplo n.º 1
0
def install_mpt(install_path, url=DEFAULT_MPT_URL):
    """
    Install MyPyTutor to the given directory.

    Args:
      install_path (str): The directory to install MyPyTutor in.
      url (str, optional): The URL of the MyPyTutor file to use.

    """
    # create our install path if it doesn't already exist
    if not os.path.exists(install_path):
        os.makedirs(install_path)

    print('Installing MyPyTutor...', end='', flush=True)

    # grab the latest zip file
    # we use an explicit filename here because we don't yet have access
    # to the tutorlib module for abstracting away temporary file creation
    try:
        urlobj = URLopener()
        filename, _ = urlobj.retrieve(url, 'MyPyTutor.zip')
    except Exception:
        print('failed')
        sys.exit(1)

    # extract the file
    with ZipFile(filename) as zf:
        zf.extractall(install_path)

    print('done')
Ejemplo n.º 2
0
    def read_file_content(self, file_url=None):
        """Return name of temp file in which remote file is saved."""
        if not file_url:
            file_url = self.url
            pywikibot.warning("file_url is not given. "
                              "Set to self.url by default.")
        pywikibot.output(u'Reading file %s' % file_url)
        resume = False
        rlen = 0
        _contents = None
        dt = 15
        uo = URLopener()
        retrieved = False

        while not retrieved:
            if resume:
                pywikibot.output(u"Resume download...")
                uo.addheader('Range', 'bytes=%s-' % rlen)

            infile = uo.open(file_url)

            if 'text/html' in infile.info().getheader('Content-Type'):
                pywikibot.output(u"Couldn't download the image: "
                                 "the requested URL was not found on server.")
                return

            content_len = infile.info().getheader('Content-Length')
            accept_ranges = infile.info().getheader('Accept-Ranges') == 'bytes'

            if resume:
                _contents += infile.read()
            else:
                _contents = infile.read()

            infile.close()
            retrieved = True

            if content_len:
                rlen = len(_contents)
                content_len = int(content_len)
                if rlen < content_len:
                    retrieved = False
                    pywikibot.output(
                        u"Connection closed at byte %s (%s left)"
                        % (rlen, content_len))
                    if accept_ranges and rlen > 0:
                        resume = True
                    pywikibot.output(u"Sleeping for %d seconds..." % dt)
                    time.sleep(dt)
                    if dt <= 60:
                        dt += 15
                    elif dt < 360:
                        dt += 60
            else:
                pywikibot.log(
                    u"WARNING: length check of retrieved data not possible.")
        handle, tempname = tempfile.mkstemp()
        with os.fdopen(handle, "wb") as t:
            t.write(_contents)
        return tempname
Ejemplo n.º 3
0
def get_imagelinks(url):
    """Given a URL, get all images linked to by the page at that URL."""
    # Check if BeautifulSoup is imported.
    if isinstance(BeautifulSoup, ImportError):
        raise BeautifulSoup

    links = []
    uo = URLopener()
    with uo.open(url) as f:
        soup = BeautifulSoup(f.read())

    if not shown:
        tagname = "a"
    elif shown == "just":
        tagname = "img"
    else:
        tagname = ["a", "img"]

    for tag in soup.findAll(tagname):
        link = tag.get("src", tag.get("href", None))
        if link:
            ext = os.path.splitext(link)[1].lower().strip('.')
            if ext in fileformats:
                links.append(urllib.basejoin(url, link))
    return links
def readTLEfile(source):
    ''' Read a TLE file (unzip if necessary) '''
    sourceName = source['name']
    sourceUrl = source['url']
    sourceFile = source['file']
    if os.path.isfile(sourceFile):
        print('Using saved TLE data {} ({})'.format(
            sourceFile, time.ctime(os.path.getmtime(sourceFile))))
    else:
        print('Retrieving TLE data from {}'.format(sourceUrl))
        file = URLopener()
        try:
            file.retrieve(sourceUrl, sourceFile)
        except:
            print("Error: Failed to get TLE data")
            return None
        else:
            print('{} updated'.format(sourceFile))

    if sourceFile.lower().endswith('.zip'):
        print('Unzipping {}...'.format(sourceFile))
        zip = zipfile.ZipFile(sourceFile)
        zip.extractall('.')
        sourceFile = zip.namelist()[0]
        print('Extracted {}'.format(zip.namelist()))

    tempContent = []
    with open(sourceFile) as f:
        for aline in f:
            tempContent.append(aline.replace('\n', ''))
        print(len(tempContent) // 3, 'TLEs loaded from {}'.format(sourceFile))

    return tempContent
Ejemplo n.º 5
0
def install_mpt(install_path, url=DEFAULT_MPT_URL):
    """
    Install MyPyTutor to the given directory.

    Args:
      install_path (str): The directory to install MyPyTutor in.
      url (str, optional): The URL of the MyPyTutor file to use.

    """
    # create our install path if it doesn't already exist
    if not os.path.exists(install_path):
        os.makedirs(install_path)

    print('Installing MyPyTutor...', end='', flush=True)

    # grab the latest zip file
    # we use an explicit filename here because we don't yet have access
    # to the tutorlib module for abstracting away temporary file creation
    try:
        urlobj = URLopener()
        filename, _ = urlobj.retrieve(url, 'MyPyTutor.zip')
    except Exception:
        print('failed')
        sys.exit(1)

    # extract the file
    with ZipFile(filename) as zf:
        zf.extractall(install_path)

    print('done')
Ejemplo n.º 6
0
def get_imagelinks(url):
    """Given a URL, get all images linked to by the page at that URL."""
    # Check if BeautifulSoup is imported.
    if isinstance(BeautifulSoup, ImportError):
        raise BeautifulSoup

    links = []
    uo = URLopener()
    with uo.open(url) as f:
        soup = BeautifulSoup(f.read())

    if not shown:
        tagname = "a"
    elif shown == "just":
        tagname = "img"
    else:
        tagname = ["a", "img"]

    for tag in soup.findAll(tagname):
        link = tag.get("src", tag.get("href", None))
        if link:
            ext = os.path.splitext(link)[1].lower().strip('.')
            if ext in fileformats:
                links.append(urllib.basejoin(url, link))
    return links
Ejemplo n.º 7
0
    def read_file_content(self, file_url=None):
        """Return name of temp file in which remote file is saved."""
        if not file_url:
            file_url = self.url
            pywikibot.warning("file_url is not given. "
                              "Set to self.url by default.")
        pywikibot.output(u'Reading file %s' % file_url)
        resume = False
        rlen = 0
        _contents = None
        dt = 15
        uo = URLopener()
        retrieved = False

        while not retrieved:
            if resume:
                pywikibot.output(u"Resume download...")
                uo.addheader('Range', 'bytes=%s-' % rlen)

            infile = uo.open(file_url)

            if 'text/html' in infile.info().getheader('Content-Type'):
                pywikibot.output(u"Couldn't download the image: "
                                 "the requested URL was not found on server.")
                return

            content_len = infile.info().getheader('Content-Length')
            accept_ranges = infile.info().getheader('Accept-Ranges') == 'bytes'

            if resume:
                _contents += infile.read()
            else:
                _contents = infile.read()

            infile.close()
            retrieved = True

            if content_len:
                rlen = len(_contents)
                content_len = int(content_len)
                if rlen < content_len:
                    retrieved = False
                    pywikibot.output(
                        u"Connection closed at byte %s (%s left)" %
                        (rlen, content_len))
                    if accept_ranges and rlen > 0:
                        resume = True
                    pywikibot.output(u"Sleeping for %d seconds..." % dt)
                    time.sleep(dt)
                    if dt <= 60:
                        dt += 15
                    elif dt < 360:
                        dt += 60
            else:
                pywikibot.log(
                    u"WARNING: length check of retrieved data not possible.")
        handle, tempname = tempfile.mkstemp()
        with os.fdopen(handle, "wb") as t:
            t.write(_contents)
        return tempname
Ejemplo n.º 8
0
def downloadZipFromLink(dowLink, albumName):
    zipName = albumName.replace(" ", "") + ".zip"
    tempDir = tempfile.gettempdir();
    zipPath = os.path.join(tempDir, zipName)
    
    if (os.path.exists(zipPath)):
        print("Deleting zip which already exists at location '{}'".format(zipPath));
        os.remove(zipPath); 
    
    print("Downloading '{}' from '{}' to file '{}'".format(albumName, dowLink, zipPath))
    urlopener = URLopener();
    def reporthook(blocknum, blocksize, totalsize):
        readsofar = blocknum * blocksize
        if totalsize > 0:
            percent = readsofar * 1e2 / totalsize
            s = "\r%5.1f%% %*d / %d" % (
                percent, len(str(totalsize)), readsofar, totalsize)
            sys.stderr.write(s)
            if readsofar >= totalsize:  # near the end
                sys.stderr.write("\n")
        else:  # total size is unknown
            sys.stderr.write("read %d\n" % (readsofar,))  
    try:        
        urlopener.retrieve(dowLink, zipPath, reporthook)
    except:
        if (os.path.exists(zipPath)):
            print("\nDeleting archive file '{}'".format(zipPath));
            os.remove(zipPath); 
        raise;
    
    print("Download of '{}' has completed.".format(albumName))
    return zipPath;
Ejemplo n.º 9
0
    def read(self, uri=None, resources=None, index_only=False):
        """Read sitemap from a URI including handling sitemapindexes.

        If index_only is True then individual sitemaps references in a sitemapindex
        will not be read. This will result in no resources being returned and is
        useful only to read the metadata and links listed in the sitemapindex.

        Includes the subtlety that if the input URI is a local file and is a
        sitemapindex which contains URIs for the individual sitemaps, then these
        are mapped to the filesystem also.
        """
        try:
            fh = URLopener().open(uri)
            self.num_files += 1
        except IOError as e:
            raise IOError(
                "Failed to load sitemap/sitemapindex from %s (%s)" %
                (uri, str(e)))
        # Get the Content-Length if we can (works fine for local files)
        try:
            self.content_length = int(fh.info()['Content-Length'])
            self.bytes_read += self.content_length
            self.logger.debug(
                "Read %d bytes from %s" %
                (self.content_length, uri))
        except KeyError:
            # If we don't get a length then c'est la vie
            self.logger.debug("Read ????? bytes from %s" % (uri))
            pass
        self.logger.info("Read sitemap/sitemapindex from %s" % (uri))
        s = self.new_sitemap()
        s.parse_xml(fh=fh, resources=self, capability=self.capability_name)
        # what did we read? sitemap or sitemapindex?
        if (s.parsed_index):
            # sitemapindex
            if (not self.allow_multifile):
                raise ListBaseIndexError(
                    "Got sitemapindex from %s but support for sitemapindex disabled" %
                    (uri))
            self.logger.info(
                "Parsed as sitemapindex, %d sitemaps" %
                (len(
                    self.resources)))
            sitemapindex_is_file = self.is_file_uri(uri)
            if (index_only):
                # don't read the component sitemaps
                self.sitemapindex = True
                return
            # now loop over all entries to read each sitemap and add to
            # resources
            sitemaps = self.resources
            self.resources = self.resources_class()
            self.logger.info("Now reading %d sitemaps" % len(sitemaps.uris()))
            for sitemap_uri in sorted(sitemaps.uris()):
                self.read_component_sitemap(
                    uri, sitemap_uri, s, sitemapindex_is_file)
        else:
            # sitemap
            self.logger.info("Parsed as sitemap, %d resources" %
                             (len(self.resources)))
Ejemplo n.º 10
0
 def download_file(self):
     """
     this function will visit a url for a specific location, enter the date
     and save the file to a specdified directory
     # http://penteli.meteo.gr/meteosearch/data/aghiosnikolaos/2009-11.txt
     """
     for station in self.stations['stations'][:]:
         try:
             # os.mkdir('./data/' + station)
             os.mkdir(os.path.join(os.getcwd(), data_folder) + '/' + station)  # messy!!!
         except:
             # add logging and fix exceptions too broad
             print('directory: {0} all ready exists!!!'.format(station))
             pass
         testfile = URLopener()
         os.chdir(data_folder + '/' + station)
         for i, date in enumerate(self.dates_to_download):
             name_to_save_file = os.getcwd() + '/' + station + '-' + date + '.txt'
             print(os.getcwd())
             try:
                 #  this is the complete url to visit and download its contents
                 url = url_seed + station + '/' + date + '.txt'
                 testfile.retrieve(url, name_to_save_file)
             except:
                 pass
         os.chdir(os.pardir)
         os.chdir(os.pardir)
Ejemplo n.º 11
0
    def _verify_and_download(self):
        """check if file is where it should and download if not"""
        if path.isfile(self._path):
            return
        # File does not exist, so we have to download it.
        epic_id = int(self.epic_id)
        d1 = epic_id - epic_id % 100000
        d2 = epic_id % 100000 - epic_id % 1000
        url_template = 'https://archive.stsci.edu/missions/k2/target_pixel_files/c{0:d}/{1:d}/{2:05d}/{3}'
        url_to_load = url_template.format(self.campaign, d1, d2,
                                          self.file_name)

        fmt = "Downloading {:} ..... "
        print(fmt.format(self.file_name), end='', file=sys.stderr, flush=True)
        url_retriever = URLopener()
        try:
            url_retriever.retrieve(url_to_load, self._path)
        except exceptions:
            print("", file=sys.stderr, flush=True)
            raise IOError(
                "\n\nFailed to download file {:}\n\n".format(url_to_load))
        if not path.isfile(self._path):
            print("", file=sys.stderr, flush=True)
            raise IOError('Download of\n' + url_to_load + '\nto\n' +
                          self._path + 'somehow failed')
        print(" done", file=sys.stderr, flush=True)
Ejemplo n.º 12
0
def download_data(admin_level, plz_regex_string, filename):
    bbox = '48.07303233901773,11.348190307617188,48.25028349849019,11.73614501953125'
    query = 'rel(' +bbox + ')[boundary=administrative][admin_level={}]; out geom;'.format(admin_level) + \
            'rel(' +bbox + ')[boundary=postal_code][postal_code~"{}"]; out geom;'.format(plz_regex_string)

    file = URLopener()
    file.retrieve('http://overpass-api.de/api/interpreter?data=' + quote_plus(query), filename)
def readTLEfile(source):
    ''' Read a TLE file (unzip if necessary) '''
    sourceName = source['name']
    sourceUrl = source['url']
    sourceFile = source['file']
    if os.path.isfile(sourceFile):
        print('Using saved TLE data {} ({})'.format(sourceFile,
              time.ctime(os.path.getmtime(sourceFile))))
    else:
        print('Retrieving TLE data from {}'.format(sourceUrl))
        file = URLopener()
        try:
            file.retrieve(sourceUrl, sourceFile)
        except:
            print("Error: Failed to get TLE data")
            return None
        else:
            print('{} updated'.format(sourceFile))

    if sourceFile.lower().endswith('.zip'):
        print('Unzipping {}...'.format(sourceFile))
        zip = zipfile.ZipFile(sourceFile)
        zip.extractall('.')
        sourceFile = zip.namelist()[0]
        print('Extracted {}'.format(zip.namelist()))

    tempContent = []
    with open(sourceFile) as f:
        for aline in f:
            tempContent.append(aline.replace('\n', ''))
        print(len(tempContent) // 3,
              'TLEs loaded from {}'.format(sourceFile))

    return tempContent
Ejemplo n.º 14
0
    def read_component_sitemap(self, sitemapindex_uri, sitemap_uri, sitemap, sitemapindex_is_file):
        """Read a component sitemap of a Resource List with index.

        Each component must be a sitemap with the 
        """
        if (sitemapindex_is_file):
            if (not self.is_file_uri(sitemap_uri)):
                # Attempt to map URI to local file
                remote_uri = sitemap_uri
                sitemap_uri = self.mapper.src_to_dst(remote_uri)
                self.logger.info("Mapped %s to local file %s" % (remote_uri, sitemap_uri))
            else:
                # The individual sitemaps should be at a URL (scheme/server/path)
                # that the sitemapindex URL can speak authoritatively about
                if (self.check_url_authority and
                    not UrlAuthority(sitemapindex_uri).has_authority_over(sitemap_uri)):
                    raise ListBaseIndexError("The sitemapindex (%s) refers to sitemap at a location it does not have authority over (%s)" % (sitemapindex_uri,sitemap_uri))
        try:
            fh = URLopener().open(sitemap_uri)
            self.num_files += 1
        except IOError as e:
            raise ListBaseIndexError("Failed to load sitemap from %s listed in sitemap index %s (%s)" % (sitemap_uri,sitemapindex_uri,str(e)))
        # Get the Content-Length if we can (works fine for local files)
        try:
            self.content_length = int(fh.info()['Content-Length'])
            self.bytes_read += self.content_length
        except KeyError:
            # If we don't get a length then c'est la vie
            pass
        self.logger.info( "Reading sitemap from %s (%d bytes)" % (sitemap_uri,self.content_length) )
        component = sitemap.parse_xml( fh=fh, sitemapindex=False )
        # Copy resources into self, check any metadata
        for r in component:
            self.resources.add(r)
Ejemplo n.º 15
0
def call_api(url, wait=1):
    time.sleep(wait)
    req = URLopener()
    req.addheader('Authorization', 'token ' + TOKEN)
    fp = req.open(url)
    data = json.load(fp)
    fp.close()
    return data
Ejemplo n.º 16
0
    def read(self, uri=None, resources=None, index_only=False):
        """Read sitemap from a URI including handling sitemapindexes.

        If index_only is True then individual sitemaps references in a sitemapindex
        will not be read. This will result in no resources being returned and is
        useful only to read the metadata and links listed in the sitemapindex.

        Includes the subtlety that if the input URI is a local file and is a
        sitemapindex which contains URIs for the individual sitemaps, then these
        are mapped to the filesystem also.
        """
        try:
            fh = URLopener().open(uri)
            self.num_files += 1
        except IOError as e:
            raise IOError("Failed to load sitemap/sitemapindex from %s (%s)" %
                          (uri, str(e)))
        # Get the Content-Length if we can (works fine for local files)
        try:
            self.content_length = int(fh.info()['Content-Length'])
            self.bytes_read += self.content_length
            self.logger.debug("Read %d bytes from %s" %
                              (self.content_length, uri))
        except KeyError:
            # If we don't get a length then c'est la vie
            self.logger.debug("Read ????? bytes from %s" % (uri))
            pass
        self.logger.info("Read sitemap/sitemapindex from %s" % (uri))
        s = self.new_sitemap()
        s.parse_xml(fh=fh, resources=self, capability=self.capability_name)
        # what did we read? sitemap or sitemapindex?
        if (s.parsed_index):
            # sitemapindex
            if (not self.allow_multifile):
                raise ListBaseIndexError(
                    "Got sitemapindex from %s but support for sitemapindex disabled"
                    % (uri))
            self.logger.info("Parsed as sitemapindex, %d sitemaps" %
                             (len(self.resources)))
            sitemapindex_is_file = self.is_file_uri(uri)
            if (index_only):
                # don't read the component sitemaps
                self.sitemapindex = True
                return
            # now loop over all entries to read each sitemap and add to
            # resources
            sitemaps = self.resources
            self.resources = self.resources_class()
            self.logger.info("Now reading %d sitemaps" % len(sitemaps.uris()))
            for sitemap_uri in sorted(sitemaps.uris()):
                self.read_component_sitemap(uri, sitemap_uri, s,
                                            sitemapindex_is_file)
        else:
            # sitemap
            self.logger.info("Parsed as sitemap, %d resources" %
                             (len(self.resources)))
def tryToReadFile(filePath, urlData):
    contents = ""
    try:
        fileHandle = URLopener().open(filePath)
        contents = fileHandle.read()
    except IOError:
        contents = "mitmProxy - Resource Override: Could not open " + filePath + \
         " Came from rule: " + urlData[0] + " , " + urlData[1]

    return contents
Ejemplo n.º 18
0
class HttpFetcherBasic(HttpFetcher):
    def __init__(self, url):
        super().__init__(url)
        self.urlop = URLopener()
        for hdr, val in (tuple(x.split("=", 1)) if "=" in x else (x, "")
                         for x in url.fragment.split("&") if x):
            self.urlop.addheader(hdr, val)

    def open(self, url):
        return self.urlop.open(url)
Ejemplo n.º 19
0
    def download(self, entity_id: int, destination: str = None, sort: List[Sort] = None) -> str:
        """
        Download sequences from a single entity.
        """

        sort = [Sort('id', 'asc')] if sort is None else sort
        sort = list(sort_item.to_json() for sort_item in sort) if sort else []
        body = {'filter': [], 'selection': [], 'sort': sort}
        file_path = Sequences.get_filepath_for_entity_id(entity_id)
        url = '{}/entities/{}/_extract'.format(self.url, entity_id)
        print('Downloading shards from "{}" to "{}".'.format(url, file_path))

        paths = []
        with self.session.post(url, stream=True, timeout=10 * 60, json=body) as response:
            try:
                links = response.json()
                print('links', links)
                if 'statusCode' in links and links['statusCode'] != 200:
                    raise Exception(links['message'])
                elif len(links) == 0:
                    raise Exception(
                        'Sequences:download - Error; no download links for {}. Does the table exist?'.format(entity_id))

                index = 0
                for link in links:
                    testfile = URLopener()
                    path = '{}-{}.gz'.format(file_path, index)
                    paths.append(path)
                    testfile.retrieve(link, path)
                    index = index + 1

            except Exception as e:
                print('Sequences:download - error:', e)
                raise e

        sorted_paths = self.get_sorted_file_shard_list(entity_id, paths, [])

        print(f'Unzipping: entity_id={entity_id} to destination={destination}')

        skip_first = False

        with open(destination, 'wb+') as target_file:
            for file_shard in sorted_paths:
                with gzip.open(file_shard, 'rb') as g_zip_file:
                    first_line = True
                    for line in g_zip_file:
                        # We skip the first line of every file, except for the very first.
                        if not (first_line and skip_first):
                            line = Sequences.sanitize(line.decode("utf-8"))
                            target_file.write(line.encode("utf-8"))
                        first_line = False
                # We skip the first line of every file, except for the very first.
                skip_first = True

        return destination
Ejemplo n.º 20
0
def save_downloaded_file(context):
    """
    Saves POEditor terms to a file in output dir

    :param context: behave context
    :return: N/A
    """
    file_path = get_poeditor_file_path(context)
    saved_file = URLopener()
    saved_file.retrieve(context.poeditor_download_url, file_path)
    context.logger.info('POEditor terms have been saved in "%s" file' %
                        file_path)
Ejemplo n.º 21
0
    def run(self):
        # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
        class DummyURLopener(URLopener):
            def open_local_file(self, url):
                return url

        warnings.simplefilter('ignore', DeprecationWarning)
        self.check_func("urlopen", urllib_request.urlopen)
        self.check_func("URLopener().open", URLopener().open)
        self.check_func("URLopener().retrieve", URLopener().retrieve)
        self.check_func("DummyURLopener().open", DummyURLopener().open)
        self.check_func("DummyURLopener().retrieve", DummyURLopener().retrieve)
        self.exit_fixed()
Ejemplo n.º 22
0
def download_text_file(url, file_name):
    opener = URLopener()
    file_name = file_name.split("/")[-1]
    file_name = file_name.replace("%20", " ")
    if _is_absolute_link(file_name):
        url = file_name
        if not url.startswith("http://"):
            url = "http://" + url
        out_name = file_name.split("/")[-1]
    else:
        url = "{}{}".format(url, file_name)
        out_name = file_name
    opener.retrieve(url, file_name)
    return out_name
Ejemplo n.º 23
0
def download_data():
    """This function downloads the data, extract them and remove the archive."""
    if not os.path.exists(DATA_HOME):
        print("Data are missing. Downloading them now...", end="", flush=True)
        datafile = URLopener()
        datafile.retrieve(DOWNLOAD_URL, ARCHIVE_FNAME)
        print("Ok.")
        print("Extracting now...", end="", flush=True)
        tf = tarfile.open(ARCHIVE_FNAME)
        tf.extractall()
        print("Ok.")
        print("Removing the archive...", end="", flush=True)
        os.remove(ARCHIVE_FNAME)
        print("Ok.")
def downloadFile(linkStore):
    for imgUrl in linkStore:
        try:
            #removing double slash from the start of url
            imgUrl = urlEdit(imgUrl[2:])
            fileName = imgUrl.split("/")[-1]
            imgUrl = 'https://' + imgUrl
            print('Downloading file: ' + fileName + '\tURL: ' + imgUrl + '\n')
            image = URLopener()
            image.retrieve(imgUrl, fileName)
            # above line may create error due to 403 forbidden response
        except:
            print("Error occured while downloading file: " + imgUrl + '\n')
        continue
Ejemplo n.º 25
0
def _download_file(url, destination):
    logger.info('Downloading %s to %s...', url, destination)

    response = _open_url(url)

    if not response.code == 200:
        raise WagonError("Failed to download file. Request to {0} "
                         "failed with HTTP Error: {1}".format(
                             url, response.code))
    final_url = response.geturl()
    if final_url != url and is_verbose():
        logger.debug('Redirected to %s', final_url)
    f = URLopener()
    f.retrieve(final_url, destination)
Ejemplo n.º 26
0
def main():
    username = input("username: "******"password: "******"http://www.loxa.edu.tw/index.php")
    with urlopen(r) as response:
        phpsessid = response.getheader("set-cookie").split("; ")[0].split("=")[1]

    cookie = "PHPSESSID={0}; Cookie_Allow=1".format(phpsessid)
    data = {"loginname": username, "loginpswd": password}
    r = Request(
        "http://www.loxa.edu.tw/check.php",
        data=urlencode(data).encode("utf8"),
        headers={"cookie": cookie},
        method="POST",
    )
    try:
        response = urlopen(r)
    except HTTPError:
        sys.exit("Invalid username or password.")

    r = Request("http://www.loxa.edu.tw/index.php?login=1&show_msg=Y", headers={"cookie": cookie})
    response = urlopen(r)

    r = Request("http://www.loxa.edu.tw/jewelbox/foldertree.php", headers={"cookie": cookie})
    with urlopen(r) as response:
        html = response.read().decode("big5")

    folder_tree_pattern = re.compile('insFld\(.+?, gFld\(".+?", "file_list.php\?dir_id=(\d+?)", "\w"\)\);')
    file_url_pattern = re.compile('<td colspan=3 nowrap>\s+?<a href="(http.+?)"')
    for i in folder_tree_pattern.finditer(html):
        dir_id = i.group(1)
        r = Request(
            "http://www.loxa.edu.tw/jewelbox/file_list.php?dir_id={0}".format(dir_id), headers={"cookie": cookie}
        )
        with urlopen(r) as response:
            html = response.read().decode("big5")

            for i in file_url_pattern.finditer(html):
                url = i.group(1)
                url_data = urlparse(url)
                file_path = url_data.path.lstrip("/")
                dir_name, base_name = os.path.split(file_path)
                if not os.path.exists(dir_name):
                    os.makedirs(dir_name)
                url_opener = URLopener()
                url_opener.addheader("cookie", cookie)
                print("Download: {0} -> {1}".format(url, file_path))
                url_opener.retrieve(url, file_path)
Ejemplo n.º 27
0
    def on_update_button_click(self):
        try:
            opener = URLopener()
            opener.retrieve(self.REMOTE_UPDATE_URL,
                            "resources/parameters.json")

            # Read the new settings.
            self.data = read_settings()
            messagebox.showinfo(
                "Settings Update",
                "Settings successfully updated from the server.")
        except Exception as e:
            logging.critical(
                "Couldn't open the remote settings file: {0}".format(str(e)))
            messagebox.showerror("Couldn't Update Settings",
                                 "Couldn't open the remote settings file.")
Ejemplo n.º 28
0
    def initUrllibInstance(self, server):
        printl("", self, "S")

        # we establish the connection once here
        self.urllibInstance = URLopener()

        # we add headers only in special cases
        connectionType = self.serverConfig.connectionType.value
        localAuth = self.serverConfig.localAuth.value

        if connectionType == "2" or localAuth:
            authHeader = self.plexInstance.get_hTokenForServer(server)
            self.urllibInstance.addheader("X-Plex-Token",
                                          authHeader["X-Plex-Token"])

        printl("", self, "C")
Ejemplo n.º 29
0
class RemoteFile(object):
    def __init__(self, url):
        self.opener = URLopener()
        self.url = url
        self.filename = url.rstrip('/').rsplit('/', 1)[-1]
        self.offset = 0

    def seek(self, offset, whence=0):
        assert whence == 0
        self.offset = offset

    def read(self, size):
        start = self.offset
        end = start + size - 1
        assert end > start
        h = 'Range', 'bytes={}-{}'.format(start, end)
        stderr.write('Fetching {} {}\n'.format(self.filename, h[1]))
        self.opener.addheaders.append(h)
        data = self.opener.open(self.url).read()
        return data

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()
Ejemplo n.º 30
0
    def parse(self, uri=None, fh=None, str_data=None, **kwargs):
        """Parse a single XML document for this list.

        Accepts either a uri (uri or default if parameter not specified), 
        or a filehandle (fh) or a string (str_data). Note that this method
        does not handle the case of a sitemapindex+sitemaps.

        LEGACY SUPPORT - the parameter str may be used in place of str_data
        but is deprecated and will be removed in a later version.
        """
        if (uri is not None):
            try:
                fh = URLopener().open(uri)
            except IOError as e:
                raise Exception(
                    "Failed to load sitemap/sitemapindex from %s (%s)" %
                    (uri, str(e)))
        elif (str_data is not None):
            fh = io.StringIO(str_data)
        elif ('str' in kwargs):
            # Legacy support for str argument, see
            # https://github.com/resync/resync/pull/21
            # One test for this in tests/test_list_base.py
            self.logger.warn(
                "Legacy parse(str=...), use parse(str_data=...) instead")
            fh = io.StringIO(kwargs['str'])
        if (fh is None):
            raise Exception("Nothing to parse")
        s = self.new_sitemap()
        s.parse_xml(fh=fh,
                    resources=self,
                    capability=self.capability_name,
                    sitemapindex=False)
        self.parsed_index = s.parsed_index
Ejemplo n.º 31
0
 def __init__(self, url):
     super(HttpDB, self).__init__()
     self.baseurl = url._replace(fragment="").geturl()
     self.db = urlop = URLopener()
     for hdr, val in (tuple(x.split("=", 1)) if "=" in x else (x, "")
                      for x in url.fragment.split("&") if x):
         urlop.addheader(hdr, val)
Ejemplo n.º 32
0
def Download_File(name):
    """ Download UCAC4 file. """

    url_name = prefix+name
    ucac_file = URLopener()
    ucac_file.retrieve(url_name, name)

    inp = open(name, 'rb')
    bz2_file = bz2.BZ2File(name+'.bz2', 'wb', compresslevel=1)
    copyfileobj(inp, bz2_file)
    inp.close()
    bz2_file.close()

    os.remove(name)

    return 0
Ejemplo n.º 33
0
def pose_video(datum):
    if not exists(datum['video']):
        try:
            URLopener().retrieve(datum["video_url"], datum["video"])
        except Exception as e:
            makedir(datum["pose_dir"]) # Empty directory

    if not exists(datum["pose_dir"]):
        gpu = get_empty_gpu()

        # Create Container
        container_id = Docker.create_container(DOCKER_NAME, "-it -v " + datum["video"] + ":/video.mp4")

        def remove_container():
            Docker.remove_container(container_id)

        try:
            # Start Container
            Docker.start_container(container_id)

            cmd = "./build/examples/openpose/openpose.bin --video /video.mp4 --model_pose BODY_25 --display 0 --render_pose 0 --write_json /out/ --hand --face --num_gpu 1 "
            cmd += " --num_gpu_start " + str(gpu)
            Docker.exec_container(container_id, "bash -c 'cd /openpose && " + cmd + "'")

            # Copy files
            Docker.cp_container_directory(container_id, datum["pose_dir"], "/out/")
        except Exception as e:
            remove_container()
            raise e
        finally:
            remove_container()

    return True
Ejemplo n.º 34
0
 def stop(self):
     """Stops the server."""
     self.stop_serving = True
     try:
         # This is to force stop the server loop
         URLopener().open("http://%s:%d" % (self.host, self.port))
     except IOError:
         pass
     log.info("Shutting down the webserver")
     self.thread.join()
Ejemplo n.º 35
0
 def stop(self) -> None:
     """Stops the server."""
     self.stop_serving = True
     try:
         # This is to force stop the server loop
         URLopener().open(f"http://{self.host}:{self.port}")
     except OSError:
         pass
     log.info("Shutting down the webserver")
     self.thread.join()
Ejemplo n.º 36
0
    def export(self,
               entity_id: int,
               format: ExportFormat,
               destination_folder: str = None):
        entity = self.entities.get(entity_id)
        entity_name = entity['name']
        user = self.authentication.user

        path_parts = entity['path'].split('.')
        # Last path part is always the current document.
        # Any before that are ancestor folders, the first being the parent.
        parent_folder_id = int(path_parts[-2]) if len(path_parts) > 1 else None

        job_id = self.jobs.create(owner_id=user['orgs'][0]['id'],
                                  shareable_id=entity['ownerId'],
                                  job_type=JobType.ExportJob,
                                  name='Export from python client',
                                  input_entity_ids=[entity_id],
                                  params={
                                      "filter": [],
                                      "format": format,
                                      "fileName": entity_name,
                                      "selection": [],
                                      "targetFolderId": parent_folder_id,
                                  })

        # Wait for the file to be converted to Genbank.
        job = self.jobs.poll_job(job_id)

        links = job['outputLinks']

        outputs = []

        for link in links:
            testfile = URLopener()

            destination = os.path.join(destination_folder, entity_name)
            testfile.retrieve(link['url'], destination)

            outputs.append(destination)

        return outputs
Ejemplo n.º 37
0
def get_imagelinks(url):
    """Given a URL, get all images linked to by the page at that URL."""
    links = []
    uo = URLopener()
    file = uo.open(url)
    soup = BeautifulSoup.BeautifulSoup(file.read())
    file.close()
    if not shown:
        tagname = "a"
    elif shown == "just":
        tagname = "img"
    else:
        tagname = ["a", "img"]

    for tag in soup.findAll(tagname):
        link = tag.get("src", tag.get("href", None))
        if link:
            ext = os.path.splitext(link)[1].lower().strip('.')
            if ext in fileformats:
                links.append(urllib.basejoin(url, link))
    return links
Ejemplo n.º 38
0
def get_imagelinks(url):
    """Given a URL, get all images linked to by the page at that URL."""
    links = []
    uo = URLopener()
    file = uo.open(url)
    soup = BeautifulSoup.BeautifulSoup(file.read())
    file.close()
    if not shown:
        tagname = "a"
    elif shown == "just":
        tagname = "img"
    else:
        tagname = ["a", "img"]

    for tag in soup.findAll(tagname):
        link = tag.get("src", tag.get("href", None))
        if link:
            ext = os.path.splitext(link)[1].lower().strip('.')
            if ext in fileformats:
                links.append(urllib.basejoin(url, link))
    return links
Ejemplo n.º 39
0
def online_install():
	#Set location for file download by changing working directory
	#Variable that stores the file name of the ninite file, the temp folder path, and the current directory
	dl = 'ninite.exe'
	dl_path = "c:\\Install_Wizard_Temp"
	currentDir = os.getcwd()
	##This should allow the download location to be changed so that the program can be run off locked flash drive
	#Test to see if directory exists for program already, if not, create one
	if not os.path.exists(dl_path):
		os.makedirs(dl_path)
	#Change working directory to one on customers computer
	os.chdir(dl_path)
	#Check if there is a previous ninite installer
	if os.path.isfile(dl):
		os.remove(dl)
		print('file removed')
	#Create url
	url = urlCreate()
	#Create object to open url
	ninite = URLopener()
	#Download file from url and save as installer.exe
	
	try:
		ninite.retrieve(url, dl)
	except: #Error in retrieving website
		text1.set('Ninite website could\nnot be accessed')
	#Run the file
	try:
		check_call(dl, shell=True)
	except: #Error in running file
		text1.set('Error running ninite file')
		
	#Test to see if dl file exists, if so, delete
	if os.path.isfile(dl):
		os.remove(dl)	
	#Change directory back to original working directory
	os.chdir(currentDir)
	#Check if directory that was created earlier still exists, if so remove it
	if os.path.exists(dl_path):
		rmtree(dl_path)
Ejemplo n.º 40
0
def downloadAsset(uri, dirname):
    tUrl = uri
    o = urlparse(tUrl)
    contentType = ""
    # targetDir = os.path.join(CURRENT_DIRECTORY, dirname, '/'.join(o.path.split('/')[1:-1]))
    targetDir = CURRENT_DIRECTORY + '/' + dirname + '/' + '/'.join(
        o.path.split('/')[1:-1])

    # javascript, fragment의 경우 다운로드 불필요
    if o.scheme == "javascript" or (o.netloc == '' and o.path == ''):
        return

    if o.scheme == "":
        if uri.startswith("//"):
            tUrl = f"https:{uri}"
        else:
            tUrl = f"https://{uri}"

    try:
        contentType = getContentType(tUrl)
    except Exception:
        try:
            if uri.startswith('//'):
                tUrl = f"http:{uri}"
            else:
                tUrl = f"http://{uri}"
            contentType = getContentType(tUrl)
        except Exception:
            pass
            # raise Exception("Error during connection")
    else:
        # text/html 무시
        if contentType in mimeTypes[1:]:
            if not os.path.exists(targetDir):
                path = Path(targetDir)
                path.mkdir(parents=True)

            targetFile = targetDir + '/' + o.path.split('/')[-1]
            if not os.path.exists(targetFile):
                try:
                    urlretrieve(tUrl, targetFile)
                    print(f"[Retrieved] {targetFile}")
                except Exception:
                    try:
                        opener = URLopener()
                        opener.addheader('User-Agent', 'Mozilla/5.0')
                        filename, headers = opener.retrieve(tUrl, targetFile)
                    except Exception:
                        try:
                            tUrl = tUrl.replace('www.', '')
                            tUrl = tUrl.replace('http:', 'https:')
                            filename, headers = opener.retrieve(
                                tUrl, targetFile)
                        except Exception as e:
                            print(str(e))
                            raise Exception

        else:
            pass
def open_url(url, **kwds):
    """Opens a url or file and returns an appropriate key-value reader."""
    reader_cls = fileformat(url)

    parsed_url = urlparse(url, 'file')
    if parsed_url.scheme == 'file':
        f = open(parsed_url.path, 'rb')
    else:
        if parsed_url.scheme == 'hdfs':
            server, username, path = hdfs.urlsplit(url)
            url = hdfs.datanode_url(server, username, path)

        if reader_cls is ZipReader and sys.version_info < (3, 2):
            # In Python <3.2, the gzip module is broken because it depends on
            # the underlying file being seekable (not true for url objects).
            opener = URLopener()
            filename, _ = opener.retrieve(url)
            f = open(filename, 'rb')
            os.unlink(filename)
        else:
            f = urlopen(url)

    return reader_cls(f, **kwds)
Ejemplo n.º 42
0
def download_audio_file(url, guid):
    """Download the audio file for the lecture.

    Downloads the mps audio recording of the lecture. File is stored in the
    tempoary folder named using the GUID

    Args:
        - url (str): The lecture's base url
        - guid (str): The lecture's guid

    """
    print("\nDownloading audio file")
    URLopener().retrieve(url + "/audio.mp3",
                         os.path.join(DOWNLOAD_DIRECTORY, guid, "audio.mp3"))
Ejemplo n.º 43
0
class RemoteFile(object):
    def __init__(self, url):
        self.opener = URLopener()
        self.url = url
        self.filename = url.rstrip('/').rsplit('/', 1)[-1]
        self.offset = 0

    def seek(self, offset, whence=0):
        assert whence == 0
        self.offset = offset

    def read(self, size):
        start = self.offset
        end = start + size - 1
        assert end > start
        h = 'Range', 'bytes={}-{}'.format(start, end)
        stderr.write('Fetching {} {}\n'.format(self.filename, h[1]))
        self.opener.addheaders.append(h)
        data = self.opener.open(self.url).read()
        return data
Ejemplo n.º 44
0
def getImage(url):
    MAX_TITLE_DESC = 100
    MAX_TITLE = 255

    uo = URLopener()
    file = uo.open(url)
    soup = BeautifulSoup.BeautifulSoup(file.read())
    file.close()
    outImage = Image()

    imgTag = soup.find("img", { "class" : "imageWithCaption" })
    link = imgTag.get("src", imgTag.get("href", None))
    if link:
        outImage.url = urllib.basejoin(url, link)
        caption = soup.find("div", { "id" : "caption" })
        captionTxt = caption.string
        #Kuressaare linnus, vaade põhjast (SM F 3761:473 F); Saaremaa Muuseum; Faili nimi:smf_3761_473.jpg
        (capPart1, museumName, capPart3) = captionTxt.split(';')
        museumName = museumName.strip()
        matchItemRef = re.search("^(.+)\((.+?)\)$", capPart1)
        if (matchItemRef and matchItemRef.group(2)): 
            outImage.source = u'[%s %s, %s]' % ( url, museumName, matchItemRef.group(2) )
            outImage.source.strip()

        mainTable = soup.find("table", {"class" : "data highlighted"})
        outDesc = u"<table>\n"
        outDesc += getWikiTable(mainTable, outImage)
        
        mainTable = soup.find("table", {"class" : "data"})
        outDesc += getWikiTable(mainTable, outImage)

        mainTable = soup.find("table", {"class" : "data full_length"})
        outDesc += getWikiTable(mainTable, outImage)
        outDesc += u"</table>\n"

        titleStart = matchItemRef.group(1).strip()
        if ( len(titleStart) > MAX_TITLE_DESC ):
            #shorten title beginning
            titleStart = titleStart[:MAX_TITLE_DESC]
        outImage.name = titleStart + u', ' + outImage.accession_number + u'.jpg'
        outImage.name = cleanUpTitle( outImage.name )
        if ( len(outImage.name) > MAX_TITLE ):
            #shorten title
            outImage.name = outImage.name[:MAX_TITLE]
        
        outImage.description = '{{et|1=' + outDesc + '}}'
        outImage.license = '{{PD-old}}'
        
        ##add categories
        museumName = museumName.encode('utf_8')
        if museumData.get(museumName) and museumData.get(museumName).get('enName'):
            museumEnName = museumData.get(museumName).get('enName')
            outImage.institution = u'{{Institution:' + museumEnName + u'}}'
            museumCat = u'Images from the ' + museumEnName
            outImage.categories.append( museumCat )
        else:
            print "Museum enName not found for %s ! \n" % url
            return None

            
    return outImage
Ejemplo n.º 45
0
 def __init__(self, url):
     self.opener = URLopener()
     self.url = url
     self.filename = url.rstrip('/').rsplit('/', 1)[-1]
     self.offset = 0