Exemple #1
0
    def download(self, *query, **kwargs):
        """download(*query, client=sunpy.net.vso.VSOClient(), path=None, progress=False)
        Search for data using the VSO interface (see
        :meth:`sunpy.net.vso.VSOClient.query`). If querying the VSO results in
        no data, no operation is performed. Concrete, this means that no entry
        is added to the database and no file is downloaded. Otherwise, the
        retrieved search result is used to download all files that belong to
        this search result. After that, all the gathered information (the one
        from the VSO query result and the one from the downloaded FITS files)
        is added to the database in a way that each FITS header is represented
        by one database entry.

        """
        if not query:
            raise TypeError('at least one attribute required')
        client = kwargs.pop('client', None)
        path = kwargs.pop('path', None)
        progress = kwargs.pop('progress', False)
        if kwargs:
            k, v = kwargs.popitem()
            raise TypeError('unexpected keyword argument {0!r}'.format(k))
        if client is None:
            client = VSOClient()
        qr = client.query(*query)
        # don't do anything if querying the VSO results in no data
        if not qr:
            return
        entries = list(
            self._download_and_collect_entries(qr, client, path, progress))
        dump = serialize.dump_query(and_(*query))
        (dump_exists, ), = self.session.query(
            exists().where(tables.JSONDump.dump == tables.JSONDump(dump).dump))
        if dump_exists:
            # dump already exists in table jsondumps -> edit instead of add
            # update all entries with the fileid `entry.fileid`
            for entry in entries:
                old_entry = self.session.query(tables.DatabaseEntry).filter_by(
                    fileid=entry.fileid).first()
                if old_entry is not None:
                    attrs = [
                        'source', 'provider', 'physobs',
                        'observation_time_start', 'observation_time_end',
                        'instrument', 'size', 'wavemin', 'wavemax',
                        'download_time'
                    ]
                    kwargs = dict((k, getattr(entry, k)) for k in attrs)
                    cmd = commands.EditEntry(old_entry, **kwargs)
                    if self._enable_history:
                        self._command_manager.do(cmd)
                    else:
                        cmd()
        else:
            self.add_many(entries)
            # serialize the query and save the serialization in the database
            # for two reasons:
            #   1. to avoid unnecessary downloading in future calls of
            #      ``fetch``
            #   2. to know whether to add or to edit entries in future calls of
            #      ``download`` (this method)
            self.session.add(tables.JSONDump(dump))
Exemple #2
0
 def _download_and_collect_entries(self, query_result, client=None,
         path=None, progress=False):
     if client is None:
         client = VSOClient()
     for block in query_result:
         paths = client.get([block], path).wait(progress=progress)
         for path in paths:
             qr_entry = tables.DatabaseEntry._from_query_result_block(block)
             file_entries = list(
                 tables.entries_from_file(path, self.default_waveunit))
             for entry in file_entries:
                 entry.source = qr_entry.source
                 entry.provider = qr_entry.provider
                 entry.physobs = qr_entry.physobs
                 entry.fileid = qr_entry.fileid
                 entry.observation_time_start =\
                     qr_entry.observation_time_start
                 entry.observation_time_end = qr_entry.observation_time_end
                 entry.instrument = qr_entry.instrument
                 entry.size = qr_entry.size
                 entry.wavemin = qr_entry.wavemin
                 entry.wavemax = qr_entry.wavemax
                 entry.path = path
                 entry.download_time = datetime.utcnow()
                 yield entry
Exemple #3
0
    def _download_and_collect_entries(self,
                                      query_result,
                                      client=None,
                                      path=None,
                                      progress=False):
        if client is None:
            client = VSOClient()

        paths = client.get(query_result, path).wait(progress=progress)

        for (path, block) in zip(paths, query_result):
            qr_entry = tables.DatabaseEntry._from_query_result_block(block)

            if os.path.isfile(path):
                entries = tables.entries_from_file(path, self.default_waveunit)
            elif os.path.isdir(path):
                entries = tables.entries_from_dir(path, self.default_waveunit)
            else:
                raise ValueError('The path is neither a file nor directory')

            for entry in entries:
                entry.source = qr_entry.source
                entry.provider = qr_entry.provider
                entry.physobs = qr_entry.physobs
                entry.fileid = qr_entry.fileid
                entry.observation_time_start = qr_entry.observation_time_start
                entry.observation_time_end = qr_entry.observation_time_end
                entry.instrument = qr_entry.instrument
                entry.size = qr_entry.size
                entry.wavemin = qr_entry.wavemin
                entry.wavemax = qr_entry.wavemax
                entry.path = path
                entry.download_time = datetime.utcnow()
                yield entry
Exemple #4
0
    def _download_and_collect_entries(self, query_result, client=None,
            path=None, progress=False):
        if client is None:
            client = VSOClient()

        paths = client.get(query_result, path).wait(progress=progress)

        for (path, block) in zip(paths, query_result):
            qr_entry = tables.DatabaseEntry._from_query_result_block(block)

            if os.path.isfile(path):
                entries = tables.entries_from_file(path, self.default_waveunit)
            elif os.path.isdir(path):
                entries = tables.entries_from_dir(path, self.default_waveunit)
            else:
                raise ValueError('The path is neither a file nor directory')

            for entry in entries:
                entry.source = qr_entry.source
                entry.provider = qr_entry.provider
                entry.physobs = qr_entry.physobs
                entry.fileid = qr_entry.fileid
                entry.observation_time_start = qr_entry.observation_time_start
                entry.observation_time_end = qr_entry.observation_time_end
                entry.instrument = qr_entry.instrument
                entry.size = qr_entry.size
                entry.wavemin = qr_entry.wavemin
                entry.wavemax = qr_entry.wavemax
                entry.path = path
                entry.download_time = datetime.utcnow()
                yield entry
Exemple #5
0
    def download(self, *query, **kwargs):
        """download(*query, client=sunpy.net.vso.VSOClient(), path=None, progress=False)
        Search for data using the VSO interface (see
        :meth:`sunpy.net.vso.VSOClient.query`). If querying the VSO results in
        no data, no operation is performed. Concrete, this means that no entry
        is added to the database and no file is downloaded. Otherwise, the
        retrieved search result is used to download all files that belong to
        this search result. After that, all the gathered information (the one
        from the VSO query result and the one from the downloaded FITS files)
        is added to the database in a way that each FITS header is represented
        by one database entry.

        """
        if not query:
            raise TypeError('at least one attribute required')
        client = kwargs.pop('client', None)
        path = kwargs.pop('path', None)
        progress = kwargs.pop('progress', False)
        if kwargs:
            k, v = kwargs.popitem()
            raise TypeError('unexpected keyword argument {0!r}'.format(k))
        if client is None:
            client = VSOClient()
        qr = client.query(*query)
        # don't do anything if querying the VSO results in no data
        if not qr:
            return
        entries = list(self._download_and_collect_entries(
            qr, client, path, progress))
        dump = serialize.dump_query(and_(*query))
        (dump_exists,), = self.session.query(
            exists().where(tables.JSONDump.dump == tables.JSONDump(dump).dump))
        if dump_exists:
            # dump already exists in table jsondumps -> edit instead of add
            # update all entries with the fileid `entry.fileid`
            for entry in entries:
                old_entry = self.session.query(
                    tables.DatabaseEntry).filter_by(fileid=entry.fileid).first()
                if old_entry is not None:
                    attrs = [
                        'source', 'provider', 'physobs',
                        'observation_time_start', 'observation_time_end',
                        'instrument', 'size', 'wavemin', 'wavemax',
                        'download_time']
                    kwargs = dict((k, getattr(entry, k)) for k in attrs)
                    cmd = commands.EditEntry(old_entry, **kwargs)
                    if self._enable_history:
                        self._command_manager.do(cmd)
                    else:
                        cmd()
        else:
            self.add_many(entries)
            # serialize the query and save the serialization in the database
            # for two reasons:
            #   1. to avoid unnecessary downloading in future calls of
            #      ``fetch``
            #   2. to know whether to add or to edit entries in future calls of
            #      ``download`` (this method)
            self.session.add(tables.JSONDump(dump))
Exemple #6
0
    def _download_and_collect_entries(self, query_result, client=None,
                                      path=None, progress=False, methods=None,
                                      overwrite=False, **kwargs):

        if kwargs:
            k, v = kwargs.popitem()
            raise TypeError(f'unexpected keyword argument {k!r}')

        if client is None:
            client = VSOClient()

        remove_list = []
        delete_entries = []
        for qr in query_result:
            temp = tables.DatabaseEntry._from_query_result_block(qr)
            for database_entry in self:
                if database_entry.path is not None and temp._compare_attributes(
                    database_entry, ["source", "provider", "physobs", "fileid",
                                     "observation_time_start", "observation_time_end",
                                     "instrument", "size", "wavemin", "wavemax"]):
                    if not overwrite:
                        remove_list.append(qr)
                    else:
                        delete_entries.append(database_entry)

        for temp in remove_list:
            query_result = [x for x in query_result if x != temp]

        for temp in delete_entries:
            self.remove(temp)

        paths = client.fetch(query_result, path)

        for (path, block) in zip(paths, query_result):
            qr_entry = tables.DatabaseEntry._from_query_result_block(block)

            if os.path.isfile(path):
                entries = tables.entries_from_file(path, self.default_waveunit)
            elif os.path.isdir(path):
                entries = tables.entries_from_dir(path, self.default_waveunit)
            else:
                raise ValueError('The path is neither a file nor directory')

            for entry in entries:
                entry.source = qr_entry.source
                entry.provider = qr_entry.provider
                entry.physobs = qr_entry.physobs
                entry.fileid = qr_entry.fileid
                entry.observation_time_start = qr_entry.observation_time_start
                entry.observation_time_end = qr_entry.observation_time_end
                entry.instrument = qr_entry.instrument
                entry.size = qr_entry.size
                entry.wavemin = qr_entry.wavemin
                entry.wavemax = qr_entry.wavemax
                entry.path = path
                entry.download_time = datetime.utcnow()
                yield entry
Exemple #7
0
    def _download_and_collect_entries(self, query_result, **kwargs):

        client = kwargs.pop('client', None)
        path = kwargs.pop('path', None)
        progress = kwargs.pop('progress', False)
        methods = kwargs.pop('methods', ('URL-FILE_Rice', 'URL-FILE'))

        if kwargs:
            k, v = kwargs.popitem()
            raise TypeError('unexpected keyword argument {0!r}'.format(k))

        if client is None:
            client = VSOClient()

        paths = client.get(query_result, path, methods).wait(progress=progress)

        for (path, block) in zip(paths, query_result):
            qr_entry = tables.DatabaseEntry._from_query_result_block(block)

            if os.path.isfile(path):
                entries = tables.entries_from_file(path, self.default_waveunit)
            elif os.path.isdir(path):
                entries = tables.entries_from_dir(path, self.default_waveunit)
            else:
                raise ValueError('The path is neither a file nor directory')

            for entry in entries:
                entry.source = qr_entry.source
                entry.provider = qr_entry.provider
                entry.physobs = qr_entry.physobs
                entry.fileid = qr_entry.fileid
                entry.observation_time_start = qr_entry.observation_time_start
                entry.observation_time_end = qr_entry.observation_time_end
                entry.instrument = qr_entry.instrument
                entry.size = qr_entry.size
                entry.wavemin = qr_entry.wavemin
                entry.wavemax = qr_entry.wavemax
                entry.path = path
                entry.download_time = datetime.utcnow()
                yield entry
Exemple #8
0
    def _download_and_collect_entries(self, query_result, **kwargs):

        client = kwargs.pop('client', None)
        path = kwargs.pop('path', None)
        progress = kwargs.pop('progress', False)
        methods = kwargs.pop('methods', ('URL-FILE_Rice', 'URL-FILE'))

        if kwargs:
            k, v = kwargs.popitem()
            raise TypeError('unexpected keyword argument {0!r}'.format(k))

        if client is None:
            client = VSOClient()

        paths = client.get(query_result, path, methods).wait(progress=progress)

        for (path, block) in zip(paths, query_result):
            qr_entry = tables.DatabaseEntry._from_query_result_block(block)

            if os.path.isfile(path):
                entries = tables.entries_from_file(path, self.default_waveunit)
            elif os.path.isdir(path):
                entries = tables.entries_from_dir(path, self.default_waveunit)
            else:
                raise ValueError('The path is neither a file nor directory')

            for entry in entries:
                entry.source = qr_entry.source
                entry.provider = qr_entry.provider
                entry.physobs = qr_entry.physobs
                entry.fileid = qr_entry.fileid
                entry.observation_time_start = qr_entry.observation_time_start
                entry.observation_time_end = qr_entry.observation_time_end
                entry.instrument = qr_entry.instrument
                entry.size = qr_entry.size
                entry.wavemin = qr_entry.wavemin
                entry.wavemax = qr_entry.wavemax
                entry.path = path
                entry.download_time = datetime.utcnow()
                yield entry
Exemple #9
0
def search_VSO(start_time, end_time):
    client = VSOClient()
    query_response = client.query_legacy(tstart=start_time,
                                         tend=end_time,
                                         instrument='HMI',
                                         physobs='intensity',
                                         sample=3600)
    results = client.fetch(query_response[:1], path='./tmp/{file}', site='rob')
    continuum_file = results.wait()

    query_response = client.query_legacy(tstart=start_time,
                                         tend=end_time,
                                         instrument='HMI',
                                         physobs='los_magnetic_field',
                                         sample=3600)
    results = client.fetch(query_response[:1], path='./tmp/{file}', site='rob')
    magnetic_file = results.wait()
    return continuum_file[0], magnetic_file[0]
Exemple #10
0
    def fetch(self, *query, **kwargs):
        """
        Check if the query has already been used to collect new data.

        If yes, query the database using the method
        :meth:`sunpy.database.Database.search` and return the result.

        Otherwise, the retrieved search result is used to download all files
        that belong to this search result. After that, all the gathered
        information (the one from the query result and the one from the
        downloaded files) is added to the database in a way that each header
        is represented by one database entry.

        It uses the
        :meth:`sunpy.database.Database._download_and_collect_entries` method
        to download files, which uses query result block level caching. This
        means that files will not be downloaded for any query result block
        that had its files downloaded previously. If files for Query A were
        already downloaded, and then Query B is made which has some result
        blocks common with Query A, then files for these common blocks will
        not be downloaded again. Files will only be downloaded for those
        blocks which are new or haven't had their files downloaded yet.

        If querying results in no data, no operation is performed. Concrete,
        this means that no entry is added to the database and no file is
        downloaded.

        Parameters
        ----------
        query : `list`
            A variable number of attributes that are chained together via the
            boolean AND operator. The | operator may be used between attributes
            to express the boolean OR operator.
        path : `str`, optional
            The directory into which files will be downloaded.
        overwrite : `bool`, optional
            If True, matching database entries from the query results will be
            deleted and replaced with new database entries, with all files
            getting downloaded.
            Otherwise, no new file download and update of matching database
            entries takes place.
        client : `sunpy.net.vso.VSOClient`, optional
            VSO Client instance to use for search and download.
            If not specified a new instance will be created.
        progress : `bool`, optional
            If True, displays the progress bar during file download.
        methods : `str` or iterable of `str`, optional
            Set VSOClient download method, see`~sunpy.net.vso.VSOClient.fetch`
            for details.

        Examples
        --------
        The `~sunpy.Database.database.fetch` method can be used along with the ``overwrite=True``
        argument to overwrite and redownload files corresponding to the query, even if
        its entries are already present in the database. Note that the ``overwrite=True``
        argument deletes the old matching database entries and new database entries are
        added with information from the redownloaded files.

        >>> from sunpy.database import Database
        >>> from sunpy.database.tables import display_entries
        >>> from sunpy.net import vso, attrs as a
        >>> database = Database('sqlite:///:memory:')
        >>> database.fetch(a.Time('2012-08-05', '2012-08-05 00:00:05'),
        ...                a.Instrument.aia)  # doctest: +REMOTE_DATA
        >>> print(display_entries(database,
        ...                       ['id', 'observation_time_start', 'observation_time_end',
        ...                        'instrument', 'wavemin', 'wavemax']))  # doctest: +REMOTE_DATA
            id observation_time_start observation_time_end instrument wavemin wavemax
            --- ---------------------- -------------------- ---------- ------- -------
              1    2012-08-05 00:00:01  2012-08-05 00:00:02        AIA     9.4     9.4
              2    2012-08-05 00:00:01  2012-08-05 00:00:02        AIA     9.4     9.4
              3    2012-08-05 00:00:02  2012-08-05 00:00:03        AIA    33.5    33.5
              4    2012-08-05 00:00:02  2012-08-05 00:00:03        AIA    33.5    33.5
        >>> database.fetch(a.Time('2012-08-05', '2012-08-05 00:00:01'),
        ...                a.Instrument.aia, overwrite=True)  # doctest: +REMOTE_DATA
        >>> print(display_entries(database,
        ...                       ['id', 'observation_time_start', 'observation_time_end',
        ...                        'instrument', 'wavemin', 'wavemax']))  # doctest: +REMOTE_DATA
             id observation_time_start observation_time_end instrument wavemin wavemax
            --- ---------------------- -------------------- ---------- ------- -------
              3    2012-08-05 00:00:02  2012-08-05 00:00:03        AIA    33.5    33.5
              4    2012-08-05 00:00:02  2012-08-05 00:00:03        AIA    33.5    33.5
              5    2012-08-05 00:00:01  2012-08-05 00:00:02        AIA     9.4     9.4
              6    2012-08-05 00:00:01  2012-08-05 00:00:02        AIA     9.4     9.4

        Here the first 2 entries (IDs 1 and 2) were overwritten and its files were redownloaded,
        resulting in the entries with IDs 5 and 6.
        """

        if not query:
            raise TypeError('at least one attribute required')

        client = kwargs.get('client', None)
        if client is None:
            client = VSOClient()
        qr = client.search(*query, response_format="legacy")

        # don't do anything if querying results in no data
        if not qr:
            return

        entries = list(self._download_and_collect_entries(
            qr, **kwargs))

        self.add_many(entries)
Exemple #11
0
    def fetch(self, *query, **kwargs):

        """
        fetch(*query[, path, overwrite, client, progress, methods])

        Check if the query has already been used to collect new data.

        If yes, query the database using the method
        :meth:`sunpy.database.Database.search` and return the result.

        Otherwise, the retrieved search result is used to download all files
        that belong to this search result. After that, all the gathered
        information (the one from the query result and the one from the
        downloaded files) is added to the database in a way that each header
        is represented by one database entry.

        It uses the
        :meth:`sunpy.database.Database._download_and_collect_entries` method
        to download files, which uses query result block level caching. This
        means that files will not be downloaded for any query result block
        that had its files downloaded previously. If files for Query A were
        already downloaded, and then Query B is made which has some result
        blocks common with Query A, then files for these common blocks will
        not be downloaded again. Files will only be downloaded for those
        blocks which are new or haven't had their files downloaded yet.

        If querying results in no data, no operation is performed. Concrete,
        this means that no entry is added to the database and no file is
        downloaded.

        Parameters
        ----------
        query : `list`
            A variable number of attributes that are chained together via the
            boolean AND operator. The | operator may be used between attributes
            to express the boolean OR operator.
        path : `str`, optional
            The directory into which files will be downloaded.
        overwrite : `bool`, optional
            If True, matching database entries from the query results will be
            deleted and replaced with new database entries, with all files
            getting downloaded.
            Otherwise, no new file download and update of matching database
            entries takes place.
        client : `sunpy.net.vso.VSOClient`, optional
            VSO Client instance to use for search and download.
            If not specified a new instance will be created.
        progress : `bool`, optional
            If True, displays the progress bar during file download.
        methods : `str` or iterable of `str`, optional
            Set VSOClient download method, see`~sunpy.net.vso.VSOClient.fetch`
            for details.

        Examples
        --------
        The `~sunpy.Database.fetch` method can be used along with the `overwrite=True`
        argument to overwrite and redownload files corresponding to the query, even if
        its entries are already present in the database. Note that the `overwrite=True`
        argument deletes the old matching database entries and new database entries are
        added with information from the redownloaded files.

        >>> from sunpy.database import Database
        >>> from sunpy.database.tables import display_entries
        >>> from sunpy.net import vso
        >>> database = Database('sqlite:///:memory:')
        >>> database.fetch(vso.attrs.Time('2012-08-05', '2012-08-05 00:00:05'),
        ...                vso.attrs.Instrument('AIA'))  # doctest: +REMOTE_DATA
        >>> print(display_entries(database,
        ...                       ['id', 'observation_time_start', 'observation_time_end',
        ...                        'instrument', 'wavemin', 'wavemax']))  # doctest: +REMOTE_DATA
            id observation_time_start observation_time_end instrument wavemin wavemax
            --- ---------------------- -------------------- ---------- ------- -------
              1    2012-08-05 00:00:01  2012-08-05 00:00:02        AIA     9.4     9.4
              2    2012-08-05 00:00:01  2012-08-05 00:00:02        AIA     9.4     9.4
              3    2012-08-05 00:00:02  2012-08-05 00:00:03        AIA    33.5    33.5
              4    2012-08-05 00:00:02  2012-08-05 00:00:03        AIA    33.5    33.5
        >>> database.fetch(vso.attrs.Time('2012-08-05', '2012-08-05 00:00:01'),
        ...                vso.attrs.Instrument('AIA'), overwrite=True)  # doctest: +REMOTE_DATA
        >>> print(display_entries(database,
        ...                       ['id', 'observation_time_start', 'observation_time_end',
        ...                        'instrument', 'wavemin', 'wavemax']))  # doctest: +REMOTE_DATA
             id observation_time_start observation_time_end instrument wavemin wavemax
            --- ---------------------- -------------------- ---------- ------- -------
              3    2012-08-05 00:00:02  2012-08-05 00:00:03        AIA    33.5    33.5
              4    2012-08-05 00:00:02  2012-08-05 00:00:03        AIA    33.5    33.5
              5    2012-08-05 00:00:01  2012-08-05 00:00:02        AIA     9.4     9.4
              6    2012-08-05 00:00:01  2012-08-05 00:00:02        AIA     9.4     9.4

        Here the first 2 entries (IDs 1 and 2) were overwritten and its files were redownloaded,
        resulting in the entries with IDs 5 and 6.
        """

        if not query:
            raise TypeError('at least one attribute required')

        client = kwargs.get('client', None)
        if client is None:
            client = VSOClient()
        qr = client.search(*query)

        # don't do anything if querying results in no data
        if not qr:
            return

        entries = list(self._download_and_collect_entries(
            qr, **kwargs))

        self.add_many(entries)
Exemple #12
0
    def _download_and_collect_entries(self, query_result, **kwargs):

        client = kwargs.pop('client', None)
        path = kwargs.pop('path', None)
        progress = kwargs.pop('progress', False)
        methods = kwargs.pop('methods', ('URL-FILE_Rice', 'URL-FILE'))
        overwrite = kwargs.pop('overwrite', False)

        if kwargs:
            k, v = kwargs.popitem()
            raise TypeError('unexpected keyword argument {0!r}'.format(k))

        if client is None:
            client = VSOClient()

        remove_list = []
        delete_entries = []
        for qr in query_result:
            temp = tables.DatabaseEntry._from_query_result_block(qr)
            for database_entry in self:
                if database_entry.path is not None and temp._compare_attributes(
                    database_entry, ["source", "provider", "physobs", "fileid",
                                     "observation_time_start", "observation_time_end",
                                     "instrument", "size", "wavemin", "wavemax"]):
                    if not overwrite:
                        remove_list.append(qr)
                    else:
                        delete_entries.append(database_entry)

        for temp in remove_list:
            query_result = [x for x in query_result if x != temp]

        for temp in delete_entries:
            self.remove(temp)

        paths = client.fetch(query_result, path).wait(progress=progress)

        for (path, block) in zip(paths, query_result):
            qr_entry = tables.DatabaseEntry._from_query_result_block(block)

            if os.path.isfile(path):
                entries = tables.entries_from_file(path, self.default_waveunit)
            elif os.path.isdir(path):
                entries = tables.entries_from_dir(path, self.default_waveunit)
            else:
                raise ValueError('The path is neither a file nor directory')

            for entry in entries:
                entry.source = qr_entry.source
                entry.provider = qr_entry.provider
                entry.physobs = qr_entry.physobs
                entry.fileid = qr_entry.fileid
                entry.observation_time_start = qr_entry.observation_time_start
                entry.observation_time_end = qr_entry.observation_time_end
                entry.instrument = qr_entry.instrument
                entry.size = qr_entry.size
                entry.wavemin = qr_entry.wavemin
                entry.wavemax = qr_entry.wavemax
                entry.path = path
                entry.download_time = datetime.utcnow()
                yield entry
Exemple #13
0
#!/bin/env python

import sunpy
from sunpy.net.jsoc import JSOCClient
from sunpy.net.vso import VSOClient

print(f"Updating the attrs json files using sunpy {sunpy.__version__}...")

print("Updating VSO json...")

VSOClient.create_parse_vso_values()

print("Updating JSOC json...\nThis may take some time...")

JSOCClient.create_parse_jsoc_values()

print("Done. Don't forget to update the doctests.")
Exemple #14
0
def find_sdo_files(directory, wavelength='', time_limits=None, cadence=12, download=None, double_check='Yes'):
    """***This function no longer work. It will be updated or removed soon.***

    Checks a directory for missing files from downloading AIA images and can check/download the 
    files from the missing time.

    ***This function may need to run several times. It depends on how well the files are downloaded***
    
    Parameters
    ----------
    directory : Str
            A string of the path to the files to be checked.
    
    wavelength : Str
            The wavelength of the files to check. Only important if download = 'yes' or 'auto'. For HMI 
            files then can have 'los_magnetic_field', 'intensity', etc.
            Default: ''

    time_limits : list
            A list of two entries for the start and end time of the observation. To check if any files
            were missed before the first and after the last file you have.
            Default: None

    cadence : Int
            An integer number of seconds that should be the temporal seperation of the files.
            Default: 12

    download : Str
            Indicates whether missing files should be searched for/downloaded. If set to None then there
            will be a prompt to ask, enter 'Yes' or 'No'. Setting to 'auto' will search for the data 
            automatically without user input.
            Default: None

    double_check : Str
            After checking for more files to download check again - without downloading - to see if 
            there are any still missing, e.g. 'Yes' or 'No'. Can also have 'recursive' which keeps checking 
            all files are found in case there is a large time gap in the middle *** BEWARE INFINITIES ***.
            Default: 'Yes'
            
    Returns
    -------
    A list of the files with no friends cadence seconds after them.
    """

    # this function no longer works at the moment
    print("This function no longer work. It will be updated or removed soon.")
    return

    files_list = list(os.listdir(directory))
    files = [ f for f in files_list if f.endswith('.fits')]
    files.sort()
    
    if files == []:
        empty_but_download = input('The folder provided does not appear to have any \'.fits\' files within it. \nDo you want to download within the time range given in the form \"%Y-%m-%d %H:%M:%S\" (this will only work for SDO/AIA files at the moment)? ')
        if empty_but_download == 'Yes':
            client = VSOClient()
            query_response = client.query_legacy(tstart=time_limits[0], tend=time_limits[1], instrument='AIA', wave=wavelength)
            n = len(query_response) - 1 #will be used to index the list 'query_response' when downloading
            #Download the first two from ROB to /tmp folder and wait for download to complete
            results = client.get(query_response[0:n], path=directory, site='rob')
            fs = results.wait()
            still_no_friends = find_sdo_files(directory, wavelength, time_limits=time_limits, download='Yes', double_check='No')
            return
        else:
            assert files != [], f'No .fits files in {directory}.' #make sure there are files in the first place

    no_friends = [] #files that do not have a friend within the cadence time after it
    t_of_no_friends = [] #time of files that have no friends
    t_end_of_no_firends = []

    for f in range(len(files)-2): #don't want to look at the last one as it will never have anything after it anyway
        time_0 = datetime.datetime.strptime(files[f][4:19], '%Y%m%d_%H%M%S')
        time_1 = datetime.datetime.strptime(files[f+1][4:19], '%Y%m%d_%H%M%S')
        if time_0 <= time_1 <= time_0 + timedelta(seconds=cadence): #if there is a file <=12s ahead move on
            continue
        else: #if there is not a file <=12s ahead add it to the no friends list
            no_friends.append(files[f])
            t_of_no_friends.append(time_0) 
            t_end_of_no_firends.append(time_1) 

    if (download == None) and (len(t_of_no_friends) > 0 ):  
            download = input('Would you like the times of the missing files checked (yea or nay)? ')
    
            if download in ['No', 'no', 'N', 'n', 'Nope', 'nope', 'Nay', 'nay']:
                download = 'No'
            elif download in ['Yes', 'yes', 'Y', 'y', 'Yip', 'yip', 'Yea', 'yea']:
                download = 'Yes'

    if len(t_of_no_friends) > 0:
        print('There are ', len(t_of_no_friends), ' time intervals of missing files.')

    if (download == 'Yes'):

        client = VSOClient()

        if len(t_of_no_friends) > 0:
            start_times = [t.strftime("%Y-%m-%d %H:%M:%S") for t in t_of_no_friends]
            #search a minute ahead
            end_times = [t.strftime("%Y-%m-%d %H:%M:%S") for t in t_end_of_no_firends]
            for ts, te in zip(start_times, end_times):
                if files[0][0:3] == 'aia':
                    query_response = client.query_legacy(tstart=ts, tend=te, instrument='AIA', wave=wavelength)
                elif files[0][0:3] == 'hmi':
                    query_response = client.query_legacy(tstart=st, tend=te, instrument='HMI', physobs=wavelength)
                n = len(query_response) - 1 #will be used to index the list 'query_response' when downloading

                #Download the first two from ROB to /tmp folder and wait for download to complete
                results = client.get(query_response[0:n], path=directory, site='rob')
                fs = results.wait()

        if time_limits != None:
            time_first = datetime.datetime.strptime(files[0][4:19], '%Y%m%d_%H%M%S')
            time_last = datetime.datetime.strptime(files[-1][4:19], '%Y%m%d_%H%M%S')
        
            time_limits_first = datetime.datetime.strptime(time_limits[0], "%Y-%m-%d %H:%M:%S")
            time_limits_last = datetime.datetime.strptime(time_limits[1], "%Y-%m-%d %H:%M:%S")

            if time_first - time_limits_first >= timedelta(seconds=cadence): #if the diff between the start time given and the first file's starting time 
                print('Checking start time-gap.')
                if files[0][0:3] == 'aia':
                    query_response = client.query_legacy(tstart=time_limits[0], tend=time_first.strftime("%Y-%m-%d %H:%M:%S"), instrument='AIA', wave=wavelength)
                elif files[0][0:3] == 'hmi':
                    query_response = client.query_legacy(tstart=time_limits[0], tend=time_first.strftime("%Y-%m-%d %H:%M:%S"), instrument='HMI', physobs=wavelength)
                n = len(query_response) - 1 #will be used to index the list 'query_response' when downloading
                #Download the first two from ROB to /tmp folder and wait for download to complete
                results = client.get(query_response[0:n], path=directory, site='rob')
                fs = results.wait()
            if time_limits_last - time_last  >= timedelta(seconds=cadence): #if the diff between the end time given and the end file's starting time 
                print('Checking end time-gap.')
                if files[0][0:3] == 'aia':
                    query_response = client.query_legacy(tstart=time_last.strftime("%Y-%m-%d %H:%M:%S"), tend=time_limits[1], instrument='AIA', wave=wavelength)
                elif files[0][0:3] == 'hmi':
                    query_response = client.query_legacy(tstart=time_last.strftime("%Y-%m-%d %H:%M:%S"), tend=time_limits[1], instrument='HMI', physobs=wavelength)
                n = len(query_response) - 1 #will be used to index the list 'query_response' when downloading
                #Download the first two from ROB to /tmp folder and wait for download to complete
                results = client.get(query_response[0:n], path=directory, site='rob')
                fs = results.wait()
            
    duplicates = glob.glob(directory + '*.*.fits') #removes files that downloaded twice
    for each_file_path in duplicates:
        os.remove(each_file_path)

    if no_friends == []: #if there arent any files to check for then don't double check or anything, just stop
        print('All files here!')
        return
    
    if double_check == 'No':
        print(f'Here are files without friends {cadence} seconds ahead of them from directory \n{directory}:')
        print(no_friends)
        print('Please wait a few minutes and try this function again, it depends on the servers sometimes.')
        return no_friends
    elif double_check == 'Yes': #double check to see if we have all the files
        still_no_friends = find_sdo_files(directory, wavelength, time_limits=time_limits, download='Yes', double_check='No')
    elif double_check == 'recursive':
        still_no_friends = find_sdo_files(directory, wavelength, time_limits=time_limits, download='Yes', double_check='recursive')
    return still_no_friends