Ejemplo n.º 1
0
def site_reachable(url):
    try:
        urlopen(url, timeout=1)
    except (URLError, socket.timeout):
        return False
    else:
        return True
Ejemplo n.º 2
0
def url_exists(url, timeout=2):
    """
    Checks whether a url is online.

    Parameters
    ----------
    url: str
        A string containing a URL

    Returns
    -------
    value: bool

    Examples
    --------
    >>> from sunpy.util.net import url_exists
    >>> url_exists('http://www.google.com')
    True
    >>> url_exists('http://aslkfjasdlfkjwerf.com')
    False
    """
    try:
        urlopen(url, timeout=timeout)
    except HTTPError:
        return False
    except URLError:
        return False
    else:
        return True
Ejemplo n.º 3
0
def url_exists(url, timeout=2):
    """
    Checks whether a url is online.

    Parameters
    ----------
    url: `str`
        A string containing a URL

    Returns
    -------
    value: `bool`

    Examples
    --------
    >>> from sunpy.util.net import url_exists
    >>> url_exists('http://www.google.com')  #doctest: +REMOTE_DATA
    True
    >>> url_exists('http://aslkfjasdlfkjwerf.com')  #doctest: +REMOTE_DATA
    False
    """
    try:
        urlopen(url, timeout=timeout)
    except HTTPError:
        return False
    except URLError:
        return False
    else:
        return True
Ejemplo n.º 4
0
def site_reachable(url):
    try:
        urlopen(url, timeout=1)
    except URLError:
        return False
    else:
        return True
Ejemplo n.º 5
0
def site_reachable(url):
    try:
        urlopen(url, timeout=1)
    except URLError:
        return False
    else:
        return True
Ejemplo n.º 6
0
def get_base_url():
    """
    Find the first mirror which is online
    """
    for server in data_servers:
        try:
            urlopen(server, timeout=1)
            return server
        except (URLError, socket.timeout):
            pass

    raise IOError('Unable to find an online HESSI server from {0}'.format(data_servers))
Ejemplo n.º 7
0
def get_base_url():
    """
    Find the first mirror which is online
    """

    for server in data_servers:
        try:
            urlopen(server, timeout=1)
        except (URLError, socket.timeout):
            pass
        else:
            return server
Ejemplo n.º 8
0
def get_base_url():
    """
    Find the first mirror which is online
    """

    for server in data_servers:
        try:
            urlopen(server, timeout=1)
        except (URLError, socket.timeout):
            pass
        else:
            return server
Ejemplo n.º 9
0
def get_base_url():
    """
    Find the first mirror which is online
    """
    for server in data_servers:
        try:
            urlopen(server, timeout=1)
            return server
        except (URLError, socket.timeout):
            pass

    raise IOError('Unable to find an online HESSI server from {0}'.format(data_servers))
Ejemplo n.º 10
0
    def filelist(self, timerange):
        """
        Returns the list of existent files in the archive for the
        given time range.

        Parameters
        ----------

        timerange : `~sunpy.time.TimeRange`
            Time interval where to find the directories for a given
            pattern.

        Returns
        -------

        filesurls : list of strings
            List of all the files found between the time range given.

        Examples
        --------
        >>> from sunpy.time import TimeRange
        >>> timerange = TimeRange('2015-01-01','2015-01-01T16:00:00')
        >>> print(solmon.filelist(timerange))
        ['http://solarmonitor.org/data/2015/01/01/fits/swap/swap_00174_fd_20150101_025423.fts.gz']

        Note
        ----

        The search is strict with the time range, so if the archive scraped
        contains daily files, but the range doesn't start from the beginning
        of the day, then the file for that day won't be selected. The end of
        the timerange will normally be OK as includes the file on such end.

        """
        directories = self.range(timerange)
        filesurls = []
        if directories[0][0:3] == "ftp":  # TODO use urlsplit from pr #1807
            return self._ftpfileslist(timerange)
        for directory in directories:
            try:
                opn = urlopen(directory)
                try:
                    soup = BeautifulSoup(opn, "lxml")
                    for link in soup.find_all("a"):
                        href = link.get("href")
                        if href.endswith(self.pattern.split('.')[-1]):
                            fullpath = directory + href
                            if self._URL_followsPattern(fullpath):
                                datehref = self._extractDateURL(fullpath)
                                if (datehref >= timerange.start
                                        and datehref <= timerange.end):
                                    filesurls.append(fullpath)
                finally:
                    opn.close()
            except:
                raise
        return filesurls
Ejemplo n.º 11
0
def download_file(url, directory, default=u'file', overwrite=False):
    """ Download file from url into directory. Try to get filename from
    Content-Disposition header, otherwise get from path of url. Fall
    back to default if both fail. Only overwrite existing files when
    overwrite is True. """
    opn = urlopen(url)
    try:
        path = download_fileobj(opn, directory, url, default, overwrite)
    finally:
        opn.close()
    return path
Ejemplo n.º 12
0
def download_file(url, directory, default="file", overwrite=False):
    """ Download file from url into directory. Try to get filename from
    Content-Disposition header, otherwise get from path of url. Fall
    back to default if both fail. Only overwrite existing files when
    overwrite is True. """
    opn = urlopen(url)
    try:
        path = download_fileobj(opn, directory, url, default, overwrite)
    finally:
        opn.close()
    return path
Ejemplo n.º 13
0
    def filelist(self, timerange):
        """
        Returns the list of existent files in the archive for the
        given time range.

        Parameters
        ----------

        timerange : `~sunpy.time.TimeRange`
            Time interval where to find the directories for a given
            pattern.

        Returns
        -------

        filesurls : list of strings
            List of all the files found between the time range given.

        Examples
        --------
        >>> from sunpy.time import TimeRange
        >>> timerange = TimeRange('2015-01-01','2015-01-01T16:00:00')
        >>> print(solmon.filelist(timerange))
        ['http://solarmonitor.org/data/2015/01/01/fits/swap/swap_00174_fd_20150101_025423.fts.gz']
        """
        directories = self.range(timerange)
        filesurls = []
        for directory in directories:
            try:
                opn = urlopen(directory)
                try:
                    soup = BeautifulSoup(opn)
                    for link in soup.find_all("a"):
                        href = link.get("href")
                        if href.endswith(self.pattern.split('.')[-1]):
                            fullpath = directory + href
                            if self._URL_followsPattern(fullpath):
                                datehref = self._extractDateURL(fullpath)
                                if (datehref >= timerange.start and
                                    datehref <= timerange.end):
                                    filesurls.append(fullpath)
                finally:
                    opn.close()
            except:
                pass
        return filesurls
Ejemplo n.º 14
0
    def filelist(self, timerange):
        """
        Returns the list of existent files in the archive for the
        given time range.

        Parameters
        ----------

        timerange : `~sunpy.time.TimeRange`
            Time interval where to find the directories for a given
            pattern.

        Returns
        -------

        filesurls : list of strings
            List of all the files found between the time range given.

        Examples
        --------
        >>> from sunpy.time import TimeRange
        >>> timerange = TimeRange('2015-01-01','2015-01-01T16:00:00')
        >>> print(solmon.filelist(timerange))
        ['http://solarmonitor.org/data/2015/01/01/fits/swap/swap_00174_fd_20150101_025423.fts.gz']
        """
        directories = self.range(timerange)
        filesurls = []
        for directory in directories:
            try:
                opn = urlopen(directory)
                try:
                    soup = BeautifulSoup(opn)
                    for link in soup.find_all("a"):
                        href = link.get("href")
                        if href.endswith(self.pattern.split('.')[-1]):
                            fullpath = directory + href
                            if self._URL_followsPattern(fullpath):
                                datehref = self._extractDateURL(fullpath)
                                if (datehref >= timerange.start
                                        and datehref <= timerange.end):
                                    filesurls.append(fullpath)
                finally:
                    opn.close()
            except:
                pass
        return filesurls
Ejemplo n.º 15
0
    def filelist(self, timerange):
        """
        Returns the list of existent files in the archive for the
        given time range.

        Parameters
        ----------

        timerange : `~sunpy.time.TimeRange`
            Time interval where to find the directories for a given
            pattern.

        Returns
        -------

        filesurls : list of strings
            List of all the files found between the time range given.

        Examples
        --------
        >>> from sunpy.util.scraper import Scraper
        >>> solmon_pattern = ('http://solarmonitor.org/data/'
        ...                   '%Y/%m/%d/fits/{instrument}/'
        ...                   '{instrument}_{wave:05d}_fd_%Y%m%d_%H%M%S.fts.gz')
        >>> solmon = Scraper(solmon_pattern, instrument = 'swap', wave = 174)
        >>> from sunpy.time import TimeRange
        >>> timerange = TimeRange('2015-01-01','2015-01-01T16:00:00')
        >>> print(solmon.filelist(timerange))  # doctest: +REMOTE_DATA
        ['http://solarmonitor.org/data/2015/01/01/fits/swap/swap_00174_fd_20150101_025423.fts.gz',
         'http://solarmonitor.org/data/2015/01/01/fits/swap/swap_00174_fd_20150101_061145.fts.gz',
         'http://solarmonitor.org/data/2015/01/01/fits/swap/swap_00174_fd_20150101_093037.fts.gz',
         'http://solarmonitor.org/data/2015/01/01/fits/swap/swap_00174_fd_20150101_124927.fts.gz']

        Note
        ----

        The search is strict with the time range, so if the archive scraped
        contains daily files, but the range doesn't start from the beginning
        of the day, then the file for that day won't be selected. The end of
        the timerange will normally be OK as includes the file on such end.

        """
        directories = self.range(timerange)
        filesurls = []
        if directories[0][0:3] == "ftp":  # TODO use urlsplit from pr #1807
            return self._ftpfileslist(timerange)
        for directory in directories:
            try:
                opn = urlopen(directory)
                try:
                    soup = BeautifulSoup(opn, "html.parser")
                    for link in soup.find_all("a"):
                        href = link.get("href")
                        if href.endswith(self.pattern.split('.')[-1]):
                            fullpath = directory + href
                            if self._URL_followsPattern(fullpath):
                                datehref = self._extractDateURL(fullpath)
                                if (datehref >= timerange.start and
                                    datehref <= timerange.end):
                                    filesurls.append(fullpath)
                finally:
                    opn.close()
            except:
                raise
        return filesurls
Ejemplo n.º 16
0
def _is_url(arg):
    try:
        urlopen(arg)
    except:
        return False
    return True
Ejemplo n.º 17
0
def _is_url(arg):
    try:
        urlopen(arg)
    except:
        return False
    return True