Exemple #1
0
def test_get_and_create_download_dir():
    # test default config
    path = get_and_create_download_dir()
    assert path == os.path.join(USER, 'sunpy', 'data')
    # test updated config
    new_path = os.path.join(USER, 'data_here_please')
    config.set('downloads', 'download_dir', new_path)
    path = get_and_create_download_dir()
    assert path == os.path.join(USER, new_path)
Exemple #2
0
def test_get_and_create_download_dir(undo_download_dir_patch):
    # test default config
    path = get_and_create_download_dir()
    assert Path(path) == Path(USER) / 'sunpy' / 'data'
    # test updated config
    new_path = os.path.join(USER, 'sunpy_data_here_please')
    config.set('downloads', 'download_dir', new_path)
    path = get_and_create_download_dir()
    assert path == os.path.join(USER, new_path)
    # Set the config back
    os.rmdir(new_path)
    config.set('downloads', 'download_dir', os.path.join(USER, 'sunpy', 'data'))
Exemple #3
0
def test_get_and_create_download_dir(undo_download_dir_patch):
    # test default config
    path = get_and_create_download_dir()
    assert Path(path) == Path(USER) / 'sunpy' / 'data'
    # test updated config
    new_path = os.path.join(USER, 'sunpy_data_here_please')
    config.set('downloads', 'download_dir', new_path)
    path = get_and_create_download_dir()
    assert path == os.path.join(USER, new_path)
    # Set the config back
    os.rmdir(new_path)
    config.set('downloads', 'download_dir',
               os.path.join(USER, 'sunpy', 'data'))
Exemple #4
0
def test_print_config_files(tmpdir, tmp_path, undo_download_dir_patch):
    with io.StringIO() as buf, redirect_stdout(buf):
        print_config()
        printed = buf.getvalue()
    assert "time_format = %Y-%m-%d %H:%M:%S" in printed
    assert _find_config_files()[0] in printed
    assert get_and_create_download_dir() in printed
    assert get_and_create_sample_dir() in printed
Exemple #5
0
    def from_url(cls, url):
        """ Return object read from URL.

        Parameters
        ----------
        url : str
            URL to retrieve the data from
        """
        path = download_file(url, get_and_create_download_dir())
        return cls.read(path)
Exemple #6
0
    def from_url(cls, url):
        """ Return object read from URL.

        Parameters
        ----------
        url : str
            URL to retrieve the data from
        """
        path = download_file(url, get_and_create_download_dir())
        return cls.read(path)
Exemple #7
0
    def get(self, qres, path=None, error_callback=None, **kwargs):
        """
        Download a set of results.

        Parameters
        ----------
        qres : `~sunpy.net.dataretriever.QueryResponse`
            Results to download.

        Returns
        -------
        Results Object
        """

        urls = []
        for qrblock in qres:
            urls.append(qrblock.url)

        filenames = []
        for url in urls:
            filenames.append(url.split('/')[-1])

        paths = []
        for i, filename in enumerate(filenames):
            if path is None:
                fname = os.path.join(get_and_create_download_dir(), '{file}')
            elif isinstance(path, six.string_types) and '{file}' not in path:
                fname = os.path.join(path, '{file}')

            temp_dict = qres[i].map_.copy()
            temp_dict['file'] = filename
            fname = fname.format(**temp_dict)
            fname = os.path.expanduser(fname)

            if os.path.exists(fname):
                fname = replacement_filename(fname)

            fname = partial(simple_path, fname)

            paths.append(fname)

        res = Results(lambda x: None, 0, lambda map_: self._link(map_))

        dobj = Downloader(max_conn=len(urls), max_total=len(urls))

        # We cast to list here in list(zip... to force execution of
        # res.require([x]) at the start of the loop.
        for aurl, ncall, fname in list(
                zip(urls, map(lambda x: res.require([x]), urls), paths)):
            dobj.download(aurl, fname, ncall, error_callback)

        return res
Exemple #8
0
    def get(self, qres, path=None, error_callback=None, **kwargs):
        """
        Download a set of results.

        Parameters
        ----------
        qres : `~sunpy.net.dataretriever.QueryResponse`
            Results to download.

        Returns
        -------
        Results Object
        """

        urls = []
        for qrblock in qres:
            urls.append(qrblock.url)

        filenames = []
        for url in urls:
            filenames.append(url.split('/')[-1])

        paths = []
        for i, filename in enumerate(filenames):
            if path is None:
                fname = os.path.join(get_and_create_download_dir(), '{file}')
            elif isinstance(path, six.string_types) and '{file}' not in path:
                fname = os.path.join(path, '{file}')

            temp_dict = qres[i].map_.copy()
            temp_dict['file'] = filename
            fname = fname.format(**temp_dict)
            fname = os.path.expanduser(fname)

            if os.path.exists(fname):
                fname = replacement_filename(fname)

            fname = partial(simple_path, fname)

            paths.append(fname)

        res = Results(lambda x: None, 0, lambda map_: self._link(map_))

        dobj = Downloader(max_conn=len(urls), max_total=len(urls))

        # We cast to list here in list(zip... to force execution of
        # res.require([x]) at the start of the loop.
        for aurl, ncall, fname in list(zip(urls, map(lambda x: res.require([x]),
                                           urls), paths)):
            dobj.download(aurl, fname, ncall, error_callback)

        return res
Exemple #9
0
def test_print_config_files():
    # TODO: Tidy this up.
    stdout = sys.stdout
    out = io.StringIO()
    sys.stdout = out
    print_config()
    sys.stdout = stdout
    out.seek(0)
    printed = out.read()
    assert "time_format = %Y-%m-%d %H:%M:%S" in printed
    assert _find_config_files()[0] in printed
    assert get_and_create_download_dir() in printed
    assert get_and_create_sample_dir() in printed
Exemple #10
0
def test_print_config_files(undo_download_dir_patch):
    # TODO: Tidy this up.
    stdout = sys.stdout
    out = io.StringIO()
    sys.stdout = out
    print_config()
    sys.stdout = stdout
    out.seek(0)
    printed = out.read()
    assert "time_format = %Y-%m-%d %H:%M:%S" in printed
    assert _find_config_files()[0] in printed
    assert get_and_create_download_dir() in printed
    assert get_and_create_sample_dir() in printed
Exemple #11
0
def get_lytaf_event_types(lytaf_path=None, print_event_types=True):
    """Prints the different event types in the each of the LYTAF databases.

    Parameters
    ----------
    lytaf_path : `str`
        Path location where LYTAF files are stored.
        Default = Path stored in confog file.

    print_event_types : `bool`
        If True, prints the artifacts in each lytaf database to screen.

    Returns
    -------
    all_event_types : `list`
        List of all events types in all lytaf databases.

    """
    # Set lytaf_path is not done by user
    if not lytaf_path:
        lytaf_path = get_and_create_download_dir()
    suffixes = ["lyra", "manual", "ppt", "science"]
    all_event_types = []
    # For each database file extract the event types and print them.
    if print_event_types:
        print("\nLYTAF Event Types\n-----------------\n")
    for suffix in suffixes:
        dbname = "annotation_{0}.db".format(suffix)
        # Check database file exists, else download it.
        check_download_file(dbname, LYTAF_REMOTE_PATH, lytaf_path)
        # Open SQLITE3 LYTAF files
        connection = sqlite3.connect(os.path.join(lytaf_path, dbname))
        # Create cursor to manipulate data in annotation file
        cursor = connection.cursor()
        cursor.execute("select type from eventType;")
        event_types = cursor.fetchall()
        all_event_types.append(event_types)
        if print_event_types:
            print("----------------\n{0} database\n----------------".format(
                suffix))
            for event_type in event_types:
                print(str(event_type[0]))
            print(" ")
    # Unpack event types in all_event_types into single list
    all_event_types = [
        event_type[0] for event_types in all_event_types
        for event_type in event_types
    ]
    return all_event_types
Exemple #12
0
def get_lytaf_event_types(lytaf_path=None, print_event_types=True):
    """Prints the different event types in the each of the LYTAF databases.

    Parameters
    ----------
    lytaf_path : `str`
        Path location where LYTAF files are stored.
        Default = Path stored in confog file.

    print_event_types : `bool`
        If True, prints the artifacts in each lytaf database to screen.

    Returns
    -------
    all_event_types : `list`
        List of all events types in all lytaf databases.

    """
    # Set lytaf_path is not done by user
    if not lytaf_path:
        lytaf_path = get_and_create_download_dir()
    suffixes = ["lyra", "manual", "ppt", "science"]
    all_event_types = []
    # For each database file extract the event types and print them.
    if print_event_types:
        print("\nLYTAF Event Types\n-----------------\n")
    for suffix in suffixes:
        dbname = "annotation_{0}.db".format(suffix)
        # Check database file exists, else download it.
        check_download_file(dbname, LYTAF_REMOTE_PATH, lytaf_path)
        # Open SQLITE3 LYTAF files
        connection = sqlite3.connect(os.path.join(lytaf_path, dbname))
        # Create cursor to manipulate data in annotation file
        cursor = connection.cursor()
        cursor.execute("select type from eventType;")
        event_types = cursor.fetchall()
        all_event_types.append(event_types)
        if print_event_types:
            print("----------------\n{0} database\n----------------"
                  .format(suffix))
            for event_type in event_types:
                print(str(event_type[0]))
            print(" ")
    # Unpack event types in all_event_types into single list
    all_event_types = [event_type[0] for event_types in all_event_types
                       for event_type in event_types]
    return all_event_types
Exemple #13
0
    def download(self, url, path=None, callback=None, errback=None):
        """Downloads a file at a specified URL.

        Parameters
        ----------
        url : str
            URL of file to download
        path : function, str
            Location to save file to. Can specify either a directory as a string
            or a function with signature: (path, url).
            Defaults to directory specified in sunpy configuration
        callback : function
            Function to call when download is successfully completed
        errback : function
            Function to call when download fails

        Returns
        -------
        out : None
        """
        # Load balancing?
        # TODO: explain

        server = self._get_server(url)

        # Create function to compute the filepath to download to if not set

        if path is None:
            path = partial(default_name, get_and_create_download_dir())
        elif isinstance(path, str):
            path = partial(default_name, path)
        elif not callable(path):
            raise ValueError("path must be: None, string or callable")

        # Use default callbacks if none were specified
        if callback is None:
            callback = self._default_callback
        if errback is None:
            errback = self._default_error_callback

        # Attempt to download file from URL
        if not self._attempt_download(url, path, callback, errback):
            # If there are too many concurrent downloads, queue for later
            self.q[server].append((url, path, callback, errback))
Exemple #14
0
    def download(self, url, path=None, callback=None, errback=None):
        """Downloads a file at a specified URL.

        Parameters
        ----------
        url : string
            URL of file to download
        path : function, string
            Location to save file to. Can specify either a directory as a string
            or a function with signature: (path, url).
            Defaults to directory specified in sunpy configuration
        callback : function
            Function to call when download is successfully completed
        errback : function
            Function to call when download fails

        Returns
        -------
        out : None
        """
        # Load balancing?
        # @todo: explain

        server = self._get_server(url)

        # Create function to compute the filepath to download to if not set

        if path is None:
            path = partial(default_name, get_and_create_download_dir())
        elif isinstance(path, six.string_types):
            path = partial(default_name, path)
        elif not callable(path):
            raise ValueError("path must be: None, string or callable")

        # Use default callbacks if none were specified
        if callback is None:
            callback = self._default_callback
        if errback is None:
            errback = self._default_error_callback

        # Attempt to download file from URL
        if not self._attempt_download(url, path, callback, errback):
            # If there are too many concurrent downloads, queue for later
            self.q[server].append((url, path, callback, errback))
Exemple #15
0
    def _download(uri, kwargs,
                  err='Unable to download data at specified URL'):
        """Attempts to download data at the specified URI.

        Parameters
        ----------
        **kwargs : uri
            A url
        """

        _filename = os.path.basename(uri).split("?")[0]

        # user specifies a download directory
        if "directory" in kwargs:
            download_dir = os.path.expanduser(kwargs["directory"])
        else:
            download_dir = get_and_create_download_dir()

        # overwrite the existing file if the keyword is present
        if "overwrite" in kwargs:
            overwrite = kwargs["overwrite"]
        else:
            overwrite = False

        # If the file is not already there, download it
        filepath = os.path.join(download_dir, _filename)

        if not(os.path.isfile(filepath)) or (overwrite and
                                             os.path.isfile(filepath)):
            try:
                response = urllib.request.urlopen(uri)
            except (urllib.error.HTTPError, urllib.error.URLError):
                raise urllib.error.URLError(err)
            with open(filepath, 'wb') as fp:
                shutil.copyfileobj(response, fp)
        else:
            warnings.warn("Using existing file rather than downloading, use "
                          "overwrite=True to override.", RuntimeWarning)

        return filepath
Exemple #16
0
    def _download(uri, kwargs,
                  err='Unable to download data at specified URL'):
        """Attempts to download data at the specified URI.

        Parameters
        ----------
        **kwargs : uri
            A url
        """

        _filename = os.path.basename(uri).split("?")[0]

        # user specifies a download directory
        if "directory" in kwargs:
            download_dir = os.path.expanduser(kwargs["directory"])
        else:
            download_dir = get_and_create_download_dir()

        # overwrite the existing file if the keyword is present
        if "overwrite" in kwargs:
            overwrite = kwargs["overwrite"]
        else:
            overwrite = False

        # If the file is not already there, download it
        filepath = os.path.join(download_dir, _filename)

        if not(os.path.isfile(filepath)) or (overwrite and
                                             os.path.isfile(filepath)):
            try:
                response = urllib.request.urlopen(uri)
            except (urllib.error.HTTPError, urllib.error.URLError):
                raise urllib.error.URLError(err)
            with open(filepath, 'wb') as fp:
                shutil.copyfileobj(response, fp)
        else:
            warnings.warn("Using existing file rather than downloading, use "
                          "overwrite=True to override.", RuntimeWarning)

        return filepath
Exemple #17
0
def _remove_lytaf_events(time, channels=None, artifacts=None,
                         return_artifacts=False, fitsfile=None,
                         csvfile=None, filecolumns=None,
                         lytaf_path=None, force_use_local_lytaf=False):
    """
    Removes periods of LYRA artifacts from a time series.

    This functions removes periods corresponding to certain artifacts recorded
    in the LYRA annotation file from an array of times given by the time input.
    If a list of arrays of other properties is supplied through the channels
    kwarg, then the relevant values from these arrays are also removed.  This
    is done by assuming that each element in each array supplied corresponds to
    the time in the same index in time array.  The artifacts to be removed are
    given via the artifacts kwarg.  The default is "all", meaning that all
    artifacts will be removed.  However, a subset of artifacts can be removed
    by supplying a list of strings of the desired artifact types.

    Parameters
    ----------
    time : `numpy.ndarray` of `datetime.datetime`
        Gives the times of the timeseries.

    channels : `list` of `numpy.array` convertible to float64.
        Contains arrays of the irradiances taken at the times in the time
        variable.  Each element in the list must have the same number of
        elements as time.

    artifacts : `list` of strings
        Contain the artifact types to be removed.  For list of artifact types
        see reference [1].  For example, if user wants to remove only large
        angle rotations, listed at reference [1] as LAR, let artifacts=["LAR"].
        Default=[], i.e. no artifacts will be removed.

    return_artifacts : `bool`
        Set to True to return a numpy recarray containing the start time, end
        time and type of all artifacts removed.
        Default=False

    fitsfile : `str`
        file name (including file path and suffix, .fits) of output fits file
        which is generated if this kwarg is not None.
        Default=None, i.e. no fits file is output.

    csvfile : `str`
        file name (including file path and suffix, .csv) of output csv file
        which is generated if this kwarg is not None.
        Default=None, i.e. no csv file is output.

    filecolumns : `list` of strings
        Gives names of columns of any output files produced.  Although
        initially set to None above, the default is in fact
        ["time", "channel0", "channel1",..."channelN"]
        where N is the number of irradiance arrays in the channels input
        (assuming 0-indexed counting).

    lytaf_path : `str`
        directory path where the LYRA annotation files are stored.

    force_use_local_lytaf : `bool`
        Ensures current local version of lytaf files are not replaced by
        up-to-date online versions even if current local lytaf files do not
        cover entire input time range etc.
        Default=False

    Returns
    -------
    clean_time : `numpy.ndarray` of `datetime.datetime`
        time array with artifact periods removed.

    clean_channels : `list` ndarrays/array-likes convertible to float64
        list of irradiance arrays with artifact periods removed.

    artifact_status : `dict`
        List of 4 variables containing information on what artifacts were
        found, removed, etc. from the time series.
        artifact_status["lytaf"] = artifacts found : `numpy.recarray`
            The full LYRA annotation file for the time series time range
            output by get_lytaf_events().
        artifact_status["removed"] = artifacts removed : `numpy.recarray`
            Artifacts which were found and removed from from time series.
        artifact_status["not_removed"] = artifacts found but not removed :
              `numpy.recarray`
            Artifacts which were found but not removed as they were not
            included when user defined artifacts kwarg.
        artifact_status["not_found"] = artifacts not found : `list` of strings
            Artifacts listed to be removed by user when defining artifacts
            kwarg which were not found in time series time range.

    References
    ----------
    [1] http://proba2.oma.be/data/TARDIS

    Example
    -------
    Sample data for example
        >>> from datetime import datetime, timedelta
        >>> from sunpy.instr.lyra import _remove_lytaf_events

        >>> time = np.array([datetime(2013, 2, 1)+timedelta(minutes=i)
        ...                 for i in range(120)])
        >>> channel_1 = np.zeros(len(time))+0.4
        >>> channel_2 = np.zeros(len(time))+0.1

    Remove LARs (Large Angle Rotations) from time series.

        >>> time_clean, channels_clean = _remove_lytaf_events(
        ...   time, channels=[channel_1, channel_2], artifacts=['LAR'])  # doctest: +REMOTE_DATA

    """
    # Check inputs
    if not lytaf_path:
        lytaf_path = get_and_create_download_dir()
    if channels and type(channels) is not list:
        raise TypeError("channels must be None or a list of numpy arrays "
                        "of dtype 'float64'.")
    if not artifacts:
        raise ValueError("User has supplied no artifacts to remove.")
    if type(artifacts) is str:
        artifacts = [artifacts]
    if not all(isinstance(artifact_type, str) for artifact_type in artifacts):
        raise TypeError("All elements in artifacts must in strings.")
    all_lytaf_event_types = get_lytaf_event_types(lytaf_path=lytaf_path,
                                                  print_event_types=False)
    for artifact in artifacts:
        if artifact not in all_lytaf_event_types:
            print(all_lytaf_event_types)
            raise ValueError("{0} is not a valid artifact type. See above.".format(artifact))
    # Define outputs
    clean_time = np.array([parse_time(t) for t in time])
    clean_channels = copy.deepcopy(channels)
    artifacts_not_found = []
    # Get LYTAF file for given time range
    lytaf = get_lytaf_events(time[0], time[-1], lytaf_path=lytaf_path,
                             force_use_local_lytaf=force_use_local_lytaf)

    # Find events in lytaf which are to be removed from time series.
    artifact_indices = np.empty(0, dtype="int64")
    for artifact_type in artifacts:
        indices = np.where(lytaf["event_type"] == artifact_type)[0]
        # If none of a given type of artifact is found, record this
        # type in artifact_not_found list.
        if len(indices) == 0:
            artifacts_not_found.append(artifact_type)
        else:
            # Else, record the indices of the artifacts of this type
            artifact_indices = np.concatenate((artifact_indices, indices))
    artifact_indices.sort()

    # Remove relevant artifacts from timeseries. If none of the
    # artifacts the user wanted removed were found, raise a warning and
    # continue with code.
    if not len(artifact_indices):
        warn("None of user supplied artifacts were found.")
        artifacts_not_found = artifacts
    else:
        # Remove periods corresponding to artifacts from flux and time
        # arrays.
        bad_indices = np.empty(0, dtype="int64")
        all_indices = np.arange(len(time))
        for index in artifact_indices:
            bad_period = np.logical_and(time >= lytaf["begin_time"][index],
                                        time <= lytaf["end_time"][index])
            bad_indices = np.append(bad_indices, all_indices[bad_period])
        clean_time = np.delete(clean_time, bad_indices)
        if channels:
            for i, f in enumerate(clean_channels):
                clean_channels[i] = np.delete(f, bad_indices)
    # If return_artifacts kwarg is True, return a list containing
    # information on what artifacts found, removed, etc.  See docstring.
    if return_artifacts:
        artifact_status = {"lytaf": lytaf,
                           "removed": lytaf[artifact_indices],
                           "not_removed": np.delete(lytaf, artifact_indices),
                           "not_found": artifacts_not_found}
    # Output FITS file if fits kwarg is set
    if fitsfile:
        # Create time array of time strings rather than datetime objects
        # and verify filecolumns have been correctly input.  If None,
        # generate generic filecolumns (see docstring of function called
        # below.
        string_time, filecolumns = _prep_columns(time, channels, filecolumns)
        # Prepare column objects.
        cols = [fits.Column(name=filecolumns[0], format="26A",
                            array=string_time)]
        if channels:
            for i, f in enumerate(channels):
                cols.append(fits.Column(name=filecolumns[i+1], format="D",
                                        array=f))
        coldefs = fits.ColDefs(cols)
        tbhdu = fits.new_table(coldefs)
        hdu = fits.PrimaryHDU()
        tbhdulist = fits.HDUList([hdu, tbhdu])
        # Write data to fits file.
        tbhdulist.writeto(fitsfile)
    # Output csv file if csv kwarg is set.
    if csvfile:
        # Create time array of time strings rather than datetime objects
        # and verify filecolumns have been correctly input.  If None,
        # generate generic filecolumns (see docstring of function called
        # below.
        string_time, filecolumns = _prep_columns(time, channels, filecolumns)
        # Open and write data to csv file.
        with open(csvfile, 'w') as openfile:
            csvwriter = csv.writer(openfile, delimiter=';')
            # Write header.
            csvwriter.writerow(filecolumns)
            # Write data.
            if not channels:
                for i in range(len(time)):
                    csvwriter.writerow(string_time[i])
            else:
                for i in range(len(time)):
                    row = [string_time[i]]
                    for f in channels:
                        row.append(f[i])
                    csvwriter.writerow(row)
    # Return values.
    if return_artifacts:
        if not channels:
            return clean_time, artifact_status
        else:
            return clean_time, clean_channels, artifact_status
    else:
        if not channels:
            return clean_time
        else:
            return clean_time, clean_channels
Exemple #18
0
def get_lytaf_events(start_time,
                     end_time,
                     lytaf_path=None,
                     combine_files=("lyra", "manual", "ppt", "science"),
                     csvfile=None,
                     force_use_local_lytaf=False):
    """
    Extracts combined lytaf file for given time range.

    Given a time range defined by start_time and end_time, this function
    extracts the segments of each LYRA annotation file and combines them.

    Parameters
    ----------
    start_time : `astropy.time.Time` or `str`
        Start time of period for which annotation file is required.

    end_time : `astropy.time.Time` or `str`
        End time of period for which annotation file is required.

    lytaf_path : `str`
        directory path where the LYRA annotation files are stored.

    combine_files : `tuple` of strings
        States which LYRA annotation files are to be combined.
        Default is all four, i.e. lyra, manual, ppt, science.
        See Notes section for an explanation of each.

    force_use_local_lytaf : `bool`
        Ensures current local version of lytaf files are not replaced by
        up-to-date online versions even if current local lytaf files do not
        cover entire input time range etc.
        Default=False

    Returns
    -------
    lytaf : `numpy.recarray`
        Containing the various parameters stored in the LYTAF files.

    Notes
    -----
    There are four LYRA annotation files which mark different types of events
    or artifacts in the data.  They are named annotation_suffix.db where
    suffix is a variable equalling either lyra, manual, ppt, or science.

    annotation_lyra.db : contains entries regarding possible effects to
        the data due to normal operation of LYRA instrument.

    annotation_manual.db : contains entries regarding possible effects
        to the data due to unusual or manually logged events.

    annotation_ppt.db : contains entries regarding possible effects to
        the data due to pointing or positioning of PROBA2.

    annotation_science.db : contains events in the data scientifically
        interesting, e.g. GOES flares.

    References
    ----------
    Further documentation: http://proba2.oma.be/data/TARDIS

    Examples
    --------
    Get all events in the LYTAF files for January 2014
        >>> from sunpy.instr.lyra import get_lytaf_events
        >>> lytaf = get_lytaf_events('2014-01-01', '2014-02-01')  # doctest: +REMOTE_DATA

    """
    # Check inputs
    # Check lytaf path
    if not lytaf_path:
        lytaf_path = get_and_create_download_dir()
    # Parse start_time and end_time
    start_time = parse_time(start_time)
    end_time = parse_time(end_time)
    # Check combine_files contains correct inputs
    if not all(suffix in ["lyra", "manual", "ppt", "science"]
               for suffix in combine_files):
        raise ValueError("Elements in combine_files must be strings equalling "
                         "'lyra', 'manual', 'ppt', or 'science'.")
    # Remove any duplicates from combine_files input
    combine_files = list(set(combine_files))
    combine_files.sort()
    # Convert input times to UNIX timestamp format since this is the
    # time format in the annotation files
    start_time_uts = (start_time - Time('1970-1-1')).sec
    end_time_uts = (end_time - Time('1970-1-1')).sec

    # Define numpy record array which will hold the information from
    # the annotation file.
    lytaf = np.empty((0, ),
                     dtype=[("insertion_time", object), ("begin_time", object),
                            ("reference_time", object), ("end_time", object),
                            ("event_type", object),
                            ("event_definition", object)])
    # Access annotation files
    for suffix in combine_files:
        # Check database files are present
        dbname = "annotation_{0}.db".format(suffix)
        check_download_file(dbname, LYTAF_REMOTE_PATH, lytaf_path)
        # Open SQLITE3 annotation files
        connection = sqlite3.connect(os.path.join(lytaf_path, dbname))
        # Create cursor to manipulate data in annotation file
        cursor = connection.cursor()
        # Check if lytaf file spans the start and end times defined by
        # user.  If not, download newest version.
        # First get start time of first event and end time of last
        # event in lytaf.
        cursor.execute("select begin_time from event order by begin_time asc "
                       "limit 1;")
        db_first_begin_time = cursor.fetchone()[0]
        db_first_begin_time = datetime.datetime.fromtimestamp(
            db_first_begin_time)
        cursor.execute("select end_time from event order by end_time desc "
                       "limit 1;")
        db_last_end_time = cursor.fetchone()[0]
        db_last_end_time = datetime.datetime.fromtimestamp(db_last_end_time)
        # If lytaf does not include entire input time range...
        if not force_use_local_lytaf:
            if end_time > db_last_end_time or start_time < db_first_begin_time:
                # ...close lytaf file...
                cursor.close()
                connection.close()
                # ...Download latest lytaf file...
                check_download_file(dbname,
                                    LYTAF_REMOTE_PATH,
                                    lytaf_path,
                                    replace=True)
                # ...and open new version of lytaf database.
                connection = sqlite3.connect(os.path.join(lytaf_path, dbname))
                cursor = connection.cursor()
        # Select and extract the data from event table within file within
        # given time range
        cursor.execute("select insertion_time, begin_time, reference_time, "
                       "end_time, eventType_id from event where end_time >= "
                       "{0} and begin_time <= "
                       "{1}".format(start_time_uts, end_time_uts))
        event_rows = cursor.fetchall()
        # Select and extract the event types from eventType table
        cursor.row_factory = sqlite3.Row
        cursor.execute("select * from eventType")
        eventType_rows = cursor.fetchall()
        eventType_id = []
        eventType_type = []
        eventType_definition = []
        for eventType_row in eventType_rows:
            eventType_id.append(eventType_row["id"])
            eventType_type.append(eventType_row["type"])
            eventType_definition.append(eventType_row["definition"])
        # Enter desired information into the lytaf numpy record array
        for event_row in event_rows:
            id_index = eventType_id.index(event_row[4])
            lytaf = np.append(
                lytaf,
                np.array(
                    (Time(datetime.datetime.utcfromtimestamp(event_row[0]),
                          format='datetime'),
                     Time(datetime.datetime.utcfromtimestamp(event_row[1]),
                          format='datetime'),
                     Time(datetime.datetime.utcfromtimestamp(event_row[2]),
                          format='datetime'),
                     Time(datetime.datetime.utcfromtimestamp(event_row[3]),
                          format='datetime'), eventType_type[id_index],
                     eventType_definition[id_index]),
                    dtype=lytaf.dtype))
        # Close file
        cursor.close()
        connection.close()
    # Sort lytaf in ascending order of begin time
    np.recarray.sort(lytaf, order="begin_time")

    # If csvfile kwarg is set, write out lytaf to csv file
    if csvfile:
        # Open and write data to csv file.
        with open(csvfile, 'w') as openfile:
            csvwriter = csv.writer(openfile, delimiter=';')
            # Write header.
            csvwriter.writerow(lytaf.dtype.names)
            # Write data.
            for row in lytaf:
                new_row = []
                new_row.append(row[0].strftime("%Y-%m-%dT%H:%M:%S"))
                new_row.append(row[1].strftime("%Y-%m-%dT%H:%M:%S"))
                new_row.append(row[2].strftime("%Y-%m-%dT%H:%M:%S"))
                new_row.append(row[3].strftime("%Y-%m-%dT%H:%M:%S"))
                new_row.append(row[4])
                new_row.append(row[5])
                csvwriter.writerow(new_row)

    return lytaf
Exemple #19
0
def remove_lytaf_events_from_timeseries(ts,
                                        artifacts=None,
                                        return_artifacts=False,
                                        lytaf_path=None,
                                        force_use_local_lytaf=False):
    """
    Removes periods of LYRA artifacts defined in LYTAF from a TimeSeries.

    Parameters
    ----------
    ts : `sunpy.timeseries.TimeSeries`

    artifacts : list of strings
        Sets the artifact types to be removed.  For a list of artifact types
        see reference [1].  For example, if a user wants to remove only large
        angle rotations, listed at reference [1] as LAR, set artifacts=["LAR"].
        The default is that no artifacts will be removed.

    return_artifacts : `bool`
        Set to True to return a `numpy.recarray` containing the start time, end
        time and type of all artifacts removed.
        Default=False

    lytaf_path : `str`
        directory path where the LYRA annotation files are stored.

    force_use_local_lytaf : `bool`
        Ensures current local version of lytaf files are not replaced by
        up-to-date online versions even if current local lytaf files do not
        cover entire input time range etc.
        Default=False

    Returns
    -------
    ts_new : `sunpy.timeseries.TimeSeries`
        copy of input TimeSeries with periods corresponding to artifacts
        removed.

    artifact_status : `dict`
        List of 4 variables containing information on what artifacts were
        found, removed, etc. from the time series.
        | **artifact_status["lytaf"]** : `numpy.recarray`
        |     The full LYRA annotation file for the time series time range
        |     output by get_lytaf_events().
        | **artifact_status["removed"]** : `numpy.recarray`
        |     Artifacts which were found and removed from from time series.
        | **artifact_status["not_removed"]** : `numpy.recarray`
        |     Artifacts which were found but not removed as they were not
        |     included when user defined artifacts kwarg.
        | **artifact_status["not_found"]** : `list` of strings
        |     Artifacts listed to be removed by user when defining
        |     artifacts kwarg which were not found in time series time range.

    Notes
    -----
    This function is intended to take TimeSeries objects as input, but the
    deprecated LightCurve is still supported here.

    References
    ----------
    [1] http://proba2.oma.be/data/TARDIS

    Examples
    --------
    Remove LARs (Large Angle Rotations) from TimeSeries for 4-Dec-2014:

        >>> import sunpy.timeseries as ts
        >>> import sunpy.data.sample  # doctest: +REMOTE_DATA
        >>> from sunpy.instr.lyra import remove_lytaf_events_from_timeseries
        >>> lyrats = ts.TimeSeries(sunpy.data.sample.LYRA_LEVEL3_TIMESERIES, source='LYRA')  # doctest: +REMOTE_DATA
        >>> ts_nolars = remove_lytaf_events_from_timeseries(lyrats, artifacts=["LAR"])  # doctest: +REMOTE_DATA

    To also retrieve information on the artifacts during that day:
        >>> ts_nolars, artifact_status = remove_lytaf_events_from_timeseries(
        ...        lyrats, artifacts=["LAR"], return_artifacts=True)  # doctest: +REMOTE_DATA

    """
    # Check that input argument is of correct type
    if not lytaf_path:
        lytaf_path = get_and_create_download_dir()
    # Remove artifacts from time series
    data_columns = ts.data.columns
    time, channels, artifact_status = _remove_lytaf_events(
        ts.data.index,
        channels=[np.asanyarray(ts.data[col]) for col in data_columns],
        artifacts=artifacts,
        return_artifacts=True,
        lytaf_path=lytaf_path,
        force_use_local_lytaf=force_use_local_lytaf)
    # Create new copy copy of timeseries and replace data with
    # artifact-free time series.
    ts_new = copy.deepcopy(ts)
    ts_new.data = pandas.DataFrame(index=time,
                                   data=dict(
                                       (col, channels[i])
                                       for i, col in enumerate(data_columns)))
    if return_artifacts:
        return ts_new, artifact_status
    else:
        return ts_new
Exemple #20
0
def _remove_lytaf_events(time,
                         channels=None,
                         artifacts=None,
                         return_artifacts=False,
                         fitsfile=None,
                         csvfile=None,
                         filecolumns=None,
                         lytaf_path=None,
                         force_use_local_lytaf=False):
    """
    Removes periods of LYRA artifacts from a time series.

    This functions removes periods corresponding to certain artifacts recorded
    in the LYRA annotation file from an array of times given by the time input.
    If a list of arrays of other properties is supplied through the channels
    kwarg, then the relevant values from these arrays are also removed.  This
    is done by assuming that each element in each array supplied corresponds to
    the time in the same index in time array.  The artifacts to be removed are
    given via the artifacts kwarg.  The default is "all", meaning that all
    artifacts will be removed.  However, a subset of artifacts can be removed
    by supplying a list of strings of the desired artifact types.

    Parameters
    ----------
    time : `numpy.ndarray` of `astropy.time.Time`
        Gives the times of the timeseries.

    channels : `list` of `numpy.array` convertible to float64.
        Contains arrays of the irradiances taken at the times in the time
        variable.  Each element in the list must have the same number of
        elements as time.

    artifacts : `list` of strings
        Contain the artifact types to be removed.  For list of artifact types
        see reference [1].  For example, if user wants to remove only large
        angle rotations, listed at reference [1] as LAR, let artifacts=["LAR"].
        Default=[], i.e. no artifacts will be removed.

    return_artifacts : `bool`
        Set to True to return a numpy recarray containing the start time, end
        time and type of all artifacts removed.
        Default=False

    fitsfile : `str`
        file name (including file path and suffix, .fits) of output fits file
        which is generated if this kwarg is not None.
        Default=None, i.e. no fits file is output.

    csvfile : `str`
        file name (including file path and suffix, .csv) of output csv file
        which is generated if this kwarg is not None.
        Default=None, i.e. no csv file is output.

    filecolumns : `list` of strings
        Gives names of columns of any output files produced.  Although
        initially set to None above, the default is in fact
        ["time", "channel0", "channel1",..."channelN"]
        where N is the number of irradiance arrays in the channels input
        (assuming 0-indexed counting).

    lytaf_path : `str`
        directory path where the LYRA annotation files are stored.

    force_use_local_lytaf : `bool`
        Ensures current local version of lytaf files are not replaced by
        up-to-date online versions even if current local lytaf files do not
        cover entire input time range etc.
        Default=False

    Returns
    -------
    clean_time : `numpy.ndarray` of `astropy.time.Time`
        time array with artifact periods removed.

    clean_channels : `list` ndarrays/array-likes convertible to float64
        list of irradiance arrays with artifact periods removed.

    artifact_status : `dict`
        List of 4 variables containing information on what artifacts were
        found, removed, etc. from the time series.
        artifact_status["lytaf"] = artifacts found : `numpy.recarray`
            The full LYRA annotation file for the time series time range
            output by get_lytaf_events().
        artifact_status["removed"] = artifacts removed : `numpy.recarray`
            Artifacts which were found and removed from from time series.
        artifact_status["not_removed"] = artifacts found but not removed :
              `numpy.recarray`
            Artifacts which were found but not removed as they were not
            included when user defined artifacts kwarg.
        artifact_status["not_found"] = artifacts not found : `list` of strings
            Artifacts listed to be removed by user when defining artifacts
            kwarg which were not found in time series time range.

    References
    ----------
    [1] http://proba2.oma.be/data/TARDIS

    Example
    -------
    Sample data for example
        >>> from sunpy.time import parse_time
        >>> from sunpy.instr.lyra import _remove_lytaf_events

        >>> time = parse_time(np.arange('2005-02-01T00:00:00', '2005-02-01T02:00:00',
        ...                   dtype='datetime64[m]'))
        >>> channel_1 = np.zeros(len(time))+0.4
        >>> channel_2 = np.zeros(len(time))+0.1

    Remove LARs (Large Angle Rotations) from time series.

        >>> time_clean, channels_clean = _remove_lytaf_events(
        ...   time, channels=[channel_1, channel_2], artifacts=['LAR'])  # doctest: +REMOTE_DATA

    """
    # Check inputs
    if not lytaf_path:
        lytaf_path = get_and_create_download_dir()
    if channels and type(channels) is not list:
        raise TypeError("channels must be None or a list of numpy arrays "
                        "of dtype 'float64'.")
    if not artifacts:
        raise ValueError("User has supplied no artifacts to remove.")
    if type(artifacts) is str:
        artifacts = [artifacts]
    if not all(isinstance(artifact_type, str) for artifact_type in artifacts):
        raise TypeError("All elements in artifacts must in strings.")
    all_lytaf_event_types = get_lytaf_event_types(lytaf_path=lytaf_path,
                                                  print_event_types=False)
    for artifact in artifacts:
        if artifact not in all_lytaf_event_types:
            print(all_lytaf_event_types)
            raise ValueError(
                "{0} is not a valid artifact type. See above.".format(
                    artifact))
    # Define outputs
    clean_time = parse_time(time)
    clean_channels = copy.deepcopy(channels)
    artifacts_not_found = []
    # Get LYTAF file for given time range
    lytaf = get_lytaf_events(time[0],
                             time[-1],
                             lytaf_path=lytaf_path,
                             force_use_local_lytaf=force_use_local_lytaf)

    # Find events in lytaf which are to be removed from time series.
    artifact_indices = np.empty(0, dtype="int64")
    for artifact_type in artifacts:
        indices = np.where(lytaf["event_type"] == artifact_type)[0]
        # If none of a given type of artifact is found, record this
        # type in artifact_not_found list.
        if len(indices) == 0:
            artifacts_not_found.append(artifact_type)
        else:
            # Else, record the indices of the artifacts of this type
            artifact_indices = np.concatenate((artifact_indices, indices))
    artifact_indices.sort()

    # Remove relevant artifacts from timeseries. If none of the
    # artifacts the user wanted removed were found, raise a warning and
    # continue with code.
    if not len(artifact_indices):
        warn("None of user supplied artifacts were found.")
        artifacts_not_found = artifacts
    else:
        # Remove periods corresponding to artifacts from flux and time
        # arrays.
        bad_indices = np.empty(0, dtype="int64")
        all_indices = np.arange(len(time))
        for index in artifact_indices:
            bad_period = np.logical_and(
                time >= lytaf["begin_time"][index].datetime,
                time <= lytaf["end_time"][index].datetime)
            bad_indices = np.append(bad_indices, all_indices[bad_period])
        clean_time = np.delete(clean_time, bad_indices)
        if channels:
            for i, f in enumerate(clean_channels):
                clean_channels[i] = np.delete(f, bad_indices)
    # If return_artifacts kwarg is True, return a list containing
    # information on what artifacts found, removed, etc.  See docstring.
    if return_artifacts:
        artifact_status = {
            "lytaf": lytaf,
            "removed": lytaf[artifact_indices],
            "not_removed": np.delete(lytaf, artifact_indices),
            "not_found": artifacts_not_found
        }
    # Output FITS file if fits kwarg is set
    if fitsfile:
        # Create time array of time strings rather than Time objects
        # and verify filecolumns have been correctly input.  If None,
        # generate generic filecolumns (see docstring of function called
        # below.
        string_time, filecolumns = _prep_columns(time, channels, filecolumns)
        # Prepare column objects.
        cols = [
            fits.Column(name=filecolumns[0], format="26A", array=string_time)
        ]
        if channels:
            for i, f in enumerate(channels):
                cols.append(
                    fits.Column(name=filecolumns[i + 1], format="D", array=f))
        coldefs = fits.ColDefs(cols)
        tbhdu = fits.new_table(coldefs)
        hdu = fits.PrimaryHDU()
        tbhdulist = fits.HDUList([hdu, tbhdu])
        # Write data to fits file.
        tbhdulist.writeto(fitsfile)
    # Output csv file if csv kwarg is set.
    if csvfile:
        # Create time array of time strings rather than Time objects
        # and verify filecolumns have been correctly input.  If None,
        # generate generic filecolumns (see docstring of function called
        # below.
        string_time, filecolumns = _prep_columns(time, channels, filecolumns)
        # Open and write data to csv file.
        with open(csvfile, 'w') as openfile:
            csvwriter = csv.writer(openfile, delimiter=';')
            # Write header.
            csvwriter.writerow(filecolumns)
            # Write data.
            if not channels:
                for i in range(len(time)):
                    csvwriter.writerow(string_time[i])
            else:
                for i in range(len(time)):
                    row = [string_time[i]]
                    for f in channels:
                        row.append(f[i])
                    csvwriter.writerow(row)
    # Return values.
    if return_artifacts:
        if not channels:
            return clean_time, artifact_status
        else:
            return clean_time, clean_channels, artifact_status
    else:
        if not channels:
            return clean_time
        else:
            return clean_time, clean_channels
Exemple #21
0
def remove_lytaf_events_from_lightcurve(lc, artifacts=None,
                                        return_artifacts=False,
                                        lytaf_path=None,
                                        force_use_local_lytaf=False):
    """
    Removes periods of LYRA artifacts defined in LYTAF from a LYRALightCurve.

    Parameters
    ----------
    lc : `sunpy.lightcurve.LightCurve`

     artifacts : list of strings
        Contain the artifact types to be removed.  For list of artifact types
        see reference [1].  For example, if user wants to remove only large
        angle rotations, listed at reference [1] as LAR, let artifacts=["LAR"].
        Default=[], i.e. no artifacts will be removed.

    return_artifacts : `bool`
        Set to True to return a `numpy.recarray` containing the start time, end
        time and type of all artifacts removed.
        Default=False

    lytaf_path : `str`
        directory path where the LYRA annotation files are stored.

    force_use_local_lytaf : `bool`
        Ensures current local version of lytaf files are not replaced by
        up-to-date online versions even if current local lytaf files do not
        cover entire input time range etc.
        Default=False

    Returns
    -------
    lc_new : `sunpy.lightcurve.LightCurve`
        copy of input LYRALightCurve with periods corresponding to artifacts
        removed.

    artifact_status : `dict`
        List of 4 variables containing information on what artifacts were
        found, removed, etc. from the time series.
        artifact_status["lytaf"] = artifacts found : `numpy.recarray`
            The full LYRA annotation file for the time series time range
            output by get_lytaf_events().
        artifact_status["removed"] = artifacts removed : `numpy.recarray`
            Artifacts which were found and removed from from time series.
        artifact_status["not_removed"] = artifacts found but not removed :
              `numpy.recarray`
            Artifacts which were found but not removed as they were not
            included when user defined artifacts kwarg.
        artifact_status["not_found"] = artifacts not found : `list` of strings
            Artifacts listed to be removed by user when defining
            artifacts kwarg which were not found in time series time range.

    References
    ----------
    [1] http://proba2.oma.be/data/TARDIS

    Examples
    --------
    Remove LARs (Large Angle Rotations) from LYRALightCurve for 4-Dec-2014:

        >>> import sunpy.lightcurve as lc
        >>> lc = lc.LYRALightCurve.create("2014-12-02")
        >>> lc_nolars = lc.remove_artifacts_from_lyralightcurve(lc, artifacts=["LAR"])

    To also retrieve information on the artifacts during that day:
        >>> lc_nolars, artifact_status = lc.remove_artifacts_from_lyralightcurve(
                lc, artifacts=["LAR"], return_artifacts=True)

    """
    # Check that input argument is of correct type
    if not lytaf_path:
        lytaf_path = get_and_create_download_dir()
    if not isinstance(lc, lightcurve.LightCurve):
        raise TypeError("lc must be a LightCurve object.")
    # Remove artifacts from time series
    data_columns = lc.data.columns
    time, channels, artifact_status = _remove_lytaf_events(
        lc.data.index,
        channels=[np.asanyarray(lc.data[col]) for col in data_columns],
        artifacts=artifacts, return_artifacts=True, lytaf_path=lytaf_path,
        force_use_local_lytaf=force_use_local_lytaf)
    # Create new copy copy of lightcurve and replace data with
    # artifact-free time series.
    lc_new = copy.deepcopy(lc)
    lc_new.data = pandas.DataFrame(
        index=time, data=dict((col, channels[i])
                              for i, col in enumerate(data_columns)))
    if return_artifacts:
        return lc_new, artifact_status
    else:
        return lc_new
Exemple #22
0
 def _parse_url(self, request, **kwargs):
     path = download_file(request.full_url, get_and_create_download_dir())
     return self._parse_path(pathlib.Path(path), **kwargs)
Exemple #23
0
def remove_lytaf_events_from_timeseries(ts, artifacts=None,
                                        return_artifacts=False,
                                        lytaf_path=None,
                                        force_use_local_lytaf=False):
    """
    Removes periods of LYRA artifacts defined in LYTAF from a TimeSeries.

    Parameters
    ----------
    ts : `sunpy.timeseries.TimeSeries`

    artifacts : list of strings
        Sets the artifact types to be removed.  For a list of artifact types
        see reference [1].  For example, if a user wants to remove only large
        angle rotations, listed at reference [1] as LAR, set artifacts=["LAR"].
        The default is that no artifacts will be removed.

    return_artifacts : `bool`
        Set to True to return a `numpy.recarray` containing the start time, end
        time and type of all artifacts removed.
        Default=False

    lytaf_path : `str`
        directory path where the LYRA annotation files are stored.

    force_use_local_lytaf : `bool`
        Ensures current local version of lytaf files are not replaced by
        up-to-date online versions even if current local lytaf files do not
        cover entire input time range etc.
        Default=False

    Returns
    -------
    ts_new : `sunpy.timeseries.TimeSeries`
        copy of input TimeSeries with periods corresponding to artifacts
        removed.

    artifact_status : `dict`
        List of 4 variables containing information on what artifacts were
        found, removed, etc. from the time series.
        | **artifact_status["lytaf"]** : `numpy.recarray`
        |     The full LYRA annotation file for the time series time range
        |     output by get_lytaf_events().
        | **artifact_status["removed"]** : `numpy.recarray`
        |     Artifacts which were found and removed from from time series.
        | **artifact_status["not_removed"]** : `numpy.recarray`
        |     Artifacts which were found but not removed as they were not
        |     included when user defined artifacts kwarg.
        | **artifact_status["not_found"]** : `list` of strings
        |     Artifacts listed to be removed by user when defining
        |     artifacts kwarg which were not found in time series time range.

    Notes
    -----
    This function is intended to take TimeSeries objects as input, but the
    deprecated LightCurve is still supported here.

    References
    ----------
    [1] http://proba2.oma.be/data/TARDIS

    Examples
    --------
    Remove LARs (Large Angle Rotations) from TimeSeries for 4-Dec-2014:

        >>> import sunpy.timeseries as ts
        >>> import sunpy.data.sample  # doctest: +REMOTE_DATA
        >>> from sunpy.instr.lyra import remove_lytaf_events_from_timeseries
        >>> lyrats = ts.TimeSeries(sunpy.data.sample.LYRA_LEVEL3_TIMESERIES, source='LYRA')  # doctest: +REMOTE_DATA
        >>> ts_nolars = remove_lytaf_events_from_timeseries(lyrats, artifacts=["LAR"])  # doctest: +REMOTE_DATA

    To also retrieve information on the artifacts during that day:
        >>> ts_nolars, artifact_status = remove_lytaf_events_from_timeseries(
        ...        lyrats, artifacts=["LAR"], return_artifacts=True)  # doctest: +REMOTE_DATA

    """
    # Check that input argument is of correct type
    if not lytaf_path:
        lytaf_path = get_and_create_download_dir()
    # Remove artifacts from time series
    data_columns = ts.data.columns
    time, channels, artifact_status = _remove_lytaf_events(
        ts.data.index,
        channels=[np.asanyarray(ts.data[col]) for col in data_columns],
        artifacts=artifacts, return_artifacts=True, lytaf_path=lytaf_path,
        force_use_local_lytaf=force_use_local_lytaf)
    # Create new copy copy of timeseries and replace data with
    # artifact-free time series.
    ts_new = copy.deepcopy(ts)
    ts_new.data = pandas.DataFrame(
        index=time, data=dict((col, channels[i])
                              for i, col in enumerate(data_columns)))
    if return_artifacts:
        return ts_new, artifact_status
    else:
        return ts_new
Exemple #24
0
    def _parse_args(self, *args, **kwargs):
        """
        Parses an args list for data-header pairs.  args can contain any
        mixture of the following entries:
        * tuples of data,header
        * data, header not in a tuple
        * data, wcs object in a tuple
        * data, wcs object not in a tuple
        * filename, which will be read
        * directory, from which all files will be read
        * glob, from which all files will be read
        * url, which will be downloaded and read
        * lists containing any of the above.

        Example
        -------
        self._parse_args(data, header,
                         (data, header),
                         ['file1', 'file2', 'file3'],
                         'file4',
                         'directory1',
                         '*.fits')

        """

        data_header_pairs = list()
        already_maps = list()

        # Account for nested lists of items
        args = expand_list(args)

        # For each of the arguments, handle each of the cases
        i = 0
        while i < len(args):

            arg = args[i]

            # Data-header or data-WCS pair
            if isinstance(arg, SUPPORTED_ARRAY_TYPES):
                arg_header = args[i+1]
                if isinstance(arg_header, WCS):
                    arg_header = args[i+1].to_header()

                if self._validate_meta(arg_header):
                    pair = (args[i], OrderedDict(arg_header))
                    data_header_pairs.append(pair)
                    i += 1    # an extra increment to account for the data-header pairing

            # File name
            elif (isinstance(arg, str) and
                  os.path.isfile(os.path.expanduser(arg))):
                path = os.path.expanduser(arg)
                pairs = self._read_file(path, **kwargs)
                data_header_pairs += pairs

            # Directory
            elif (isinstance(arg, str) and
                  os.path.isdir(os.path.expanduser(arg))):
                path = os.path.expanduser(arg)
                files = [os.path.join(path, elem) for elem in os.listdir(path)]
                for afile in files:
                    data_header_pairs += self._read_file(afile, **kwargs)

            # Glob
            elif (isinstance(arg, str) and '*' in arg):
                files = glob.glob(os.path.expanduser(arg))
                for afile in files:
                    data_header_pairs += self._read_file(afile, **kwargs)

            # Already a Map
            elif isinstance(arg, GenericMap):
                already_maps.append(arg)

            # A URL
            elif (isinstance(arg, str) and
                  _is_url(arg)):
                url = arg
                path = download_file(url, get_and_create_download_dir())
                pairs = self._read_file(path, **kwargs)
                data_header_pairs += pairs

            # A database Entry
            elif isinstance(arg, DatabaseEntry):
                data_header_pairs += self._read_file(arg.path, **kwargs)

            else:
                raise ValueError("File not found or invalid input")

            i += 1

        # TODO:
        # In the end, if there are already maps it should be put in the same
        # order as the input, currently they are not.
        return data_header_pairs, already_maps
Exemple #25
0
    def _parse_args(self, *args, **kwargs):
        """
        Parses an `args` list for data-header pairs. `args` can contain any mixture of the following
        entries:

        * tuples of (data, header, unit) (1)
        * data, header not in a tuple (1)
        * filename, which will be read
        * directory, from which all files will be read
        * glob, from which all files will be read
        * url, which will be downloaded and read
        * lists containing any of the above.

        (1) header/unit are optional and in either order, but data should be the first entry in each group.

        Examples
        --------
        self._parse_args(data, header,
                         (data, header),
                         ['file1', 'file2', 'file3'],
                         'file4',
                         'directory1',
                         '*.fits')
        """
        data_header_unit_tuples = list()
        data_header_pairs = list()
        already_timeseries = list()
        filepaths = list()

        # Account for nested lists of items. Simply outputs a single list of
        # items, nested lists are expanded to element level.
        args = expand_list(args)

        # For each of the arguments, handle each of the cases
        i = 0
        while i < len(args):
            arg = args[i]

            # Data-header pair in a tuple
            if (isinstance(arg, (np.ndarray, Table, pd.DataFrame))):
                # and self._validate_meta(args[i+1])):
                # Assume a Pandas Dataframe is given
                data = arg
                units = OrderedDict()
                meta = MetaDict()

                # Convert the data argument into a Pandas DataFrame if needed.
                if isinstance(data, Table):
                    # We have an Astropy Table:
                    data, meta, units = self._from_table(data)
                elif isinstance(data, np.ndarray):
                    # We have a numpy ndarray. We assume the first column is a dt index
                    data = pd.DataFrame(data=data[:, 1:], index=Time(data[:, 0]))

                # If there are 1 or 2 more arguments:
                for _ in range(2):
                    if (len(args) > i+1):
                        # If that next argument isn't data but is metaddata or units:
                        if not isinstance(args[i+1], (np.ndarray, Table, pd.DataFrame)):
                            if self._validate_units(args[i+1]):
                                units.update(args[i+1])
                                i += 1  # an extra increment to account for the units
                            elif self._validate_meta(args[i+1]):
                                # if we have an astropy.io FITS header then convert
                                # to preserve multi-line comments
                                if isinstance(args[i+1], astropy.io.fits.header.Header):
                                    args[i+1] = MetaDict(sunpy.io.header.FileHeader(args[i+1]))
                                meta.update(args[i+1])
                                i += 1  # an extra increment to account for the meta

                # Add a 3-tuple for this TimeSeries.
                data_header_unit_tuples.append((data, meta, units))

            # Filepath
            elif (isinstance(arg, str) and
                  os.path.isfile(os.path.expanduser(arg))):

                path = os.path.expanduser(arg)
                result = self._read_file(path, **kwargs)
                data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result)

            # Directory
            elif (isinstance(arg, str) and
                  os.path.isdir(os.path.expanduser(arg))):

                path = os.path.expanduser(arg)
                files = [os.path.join(path, elem) for elem in os.listdir(path)]
                for afile in files:
                    # returns a boolean telling us if it were read and either a
                    # tuple or the original filepath for reading by a source
                    result = self._read_file(afile, **kwargs)
                    data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths,
                                                                 result)

            # Glob
            elif isinstance(arg, str) and '*' in arg:

                files = glob.glob(os.path.expanduser(arg))
                for afile in files:
                    # returns a boolean telling us if it were read and either a
                    # tuple or the original filepath for reading by a source
                    result = self._read_file(afile, **kwargs)
                    data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths,
                                                                 result)

            # Already a TimeSeries
            elif isinstance(arg, GenericTimeSeries):
                already_timeseries.append(arg)

            # A URL
            elif (isinstance(arg, str) and
                  _is_url(arg)):
                url = arg
                path = download_file(url, get_and_create_download_dir())
                result = self._read_file(path, **kwargs)
                data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result)
            else:
                raise NoMatchError("File not found or invalid input")
            i += 1

        # TODO:
        # In the end, if there are already TimeSeries it should be put in the
        # same order as the input, currently they are not.
        return data_header_unit_tuples, data_header_pairs, already_timeseries, filepaths
Exemple #26
0
    def _parse_args(self, *args, **kwargs):
        """
        Parses an args list for data-header pairs.  args can contain any
        mixture of the following entries:
        * tuples of (data, header, unit) (1)
        * data, header not in a tuple (1)
        * filename, which will be read
        * directory, from which all files will be read
        * glob, from which all files will be read
        * url, which will be downloaded and read
        * lists containing any of the above.

        (1) Note that header/unit are optional and in either order, but data
        but be the first entry in each group.

        Example
        -------
        self._parse_args(data, header,
                         (data, header),
                         ['file1', 'file2', 'file3'],
                         'file4',
                         'directory1',
                         '*.fits')

        """

        data_header_unit_tuples = list()
        data_header_pairs = list()
        already_timeseries = list()
        filepaths = list()

        # Account for nested lists of items. Simply outputs a single list of
        # items, nested lists are expanded to element level.
        args = expand_list(args)

        # For each of the arguments, handle each of the cases
        i = 0
        while i < len(args):
            arg = args[i]

            # Data-header pair in a tuple
            if isinstance(arg, (np.ndarray, Table, pd.DataFrame)):
                # Assume a Pandas Dataframe is given
                data = arg
                units = OrderedDict()
                meta = MetaDict()

                # Convert the data argument into a Pandas DataFrame if needed.
                if isinstance(data, Table):
                    # We have an Astropy Table:
                    data, meta, units = self._from_table(data)
                elif isinstance(data, np.ndarray):
                    # We have a numpy ndarray. We assume the first column is a dt index
                    data = pd.DataFrame(data=data[:, 1:], index=Time(data[:, 0]))

                # If there are 1 or 2 more arguments:
                for _ in range(2):
                    if len(args) > i+1:
                        # If that next argument isn't data but is metaddata or units:
                        if not isinstance(args[i+1], (np.ndarray, Table, pd.DataFrame)):
                            if self._validate_units(args[i+1]):
                                units.update(args[i+1])
                                i += 1  # an extra increment to account for the units
                            elif self._validate_meta(args[i+1]):
                                # if we have an astropy.io FITS header then convert
                                # to preserve multi-line comments
                                if isinstance(args[i+1], astropy.io.fits.header.Header):
                                    args[i+1] = MetaDict(sunpy.io.header.FileHeader(args[i+1]))
                                meta.update(args[i+1])
                                i += 1  # an extra increment to account for the meta

                # Add a 3-tuple for this TimeSeries.
                data_header_unit_tuples.append((data, meta, units))

            # Filepath
            elif (isinstance(arg, str) and
                  os.path.isfile(os.path.expanduser(arg))):

                path = os.path.expanduser(arg)
                result = self._read_file(path, **kwargs)
                data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result)

            # Directory
            elif (isinstance(arg, str) and
                  os.path.isdir(os.path.expanduser(arg))):

                path = os.path.expanduser(arg)
                files = [os.path.join(path, elem) for elem in os.listdir(path)]
                for afile in files:
                    # returns a boolean telling us if it were read and either a
                    # tuple or the original filepath for reading by a source
                    result = self._read_file(afile, **kwargs)
                    data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths,
                                                                 result)

            # Glob
            elif isinstance(arg, str) and '*' in arg:

                files = glob.glob(os.path.expanduser(arg))
                for afile in files:
                    # returns a boolean telling us if it were read and either a
                    # tuple or the original filepath for reading by a source
                    result = self._read_file(afile, **kwargs)
                    data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths,
                                                                 result)

            # Already a TimeSeries
            elif isinstance(arg, GenericTimeSeries):
                already_timeseries.append(arg)

            # A URL
            elif (isinstance(arg, str) and
                  _is_url(arg)):
                url = arg
                path = download_file(url, get_and_create_download_dir())
                result = self._read_file(path, **kwargs)
                data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result)
            else:
                raise NoMatchError("File not found or invalid input")
            i += 1

        # TODO:
        # In the end, if there are already TimeSeries it should be put in the
        # same order as the input, currently they are not.
        return data_header_unit_tuples, data_header_pairs, already_timeseries, filepaths
Exemple #27
0
def get_goes_data(t=None, sat_num=None):
    ''' Reads GOES data from https://umbra.nascom.nasa.gov/ repository, for date
        and satellite number provided.  If sat_num is None, data for all available 
        satellites are downloaded, with some sanity check used to decide the best.
        If the Time() object t is None, data for the day before the current date 
        are read (since there is a delay of 1 day in availability of the data).
        
        Returns:
           goes_t    GOES time array in plot_date format
           goes_data GOES 1-8 A lightcurve
        '''
    # Can short-circuit the entire code below this block by using my goes.get_goes() routine
    lo, hi, goes_t = get_goes()
    if len(goes_t) != 0:
        # Got the data, now isolate the requested day
        good, = np.where(np.floor(goes_t.mjd) == np.floor(t.mjd))
        if len(good) != 0:
            return goes_t.plot_date, lo

    from sunpy.util.config import get_and_create_download_dir
    import shutil
    from astropy.io import fits
    import urllib2
    if t is None:
        t = Time(Time.now().mjd - 1, format='mjd')
    yr = t.iso[:4]
    datstr = t.iso[:10].replace('-', '')
    try:
        if sat_num is None:
            try:
                f = urllib2.urlopen(
                    'https://umbra.nascom.nasa.gov/goes/fits/' + yr, timeout=3)
            except:
                f = urllib2.urlopen('https://hesperia.gsfc.nasa.gov/goes/' +
                                    yr,
                                    timeout=3)
            lines = f.readlines()
            sat_num = []
            for line in lines:
                idx = line.find(datstr)
                if idx != -1:
                    sat_num.append(line[idx - 2:idx])
        if type(sat_num) is int:
            sat_num = [str(sat_num)]
        filenames = []
        for sat in sat_num:
            filename = 'go' + sat + datstr + '.fits'
            try:
                url = 'https://umbra.nascom.nasa.gov/goes/fits/' + yr + '/' + filename
                f = urllib2.urlopen(url, timeout=3)
            except:
                url = 'https://hesperia.gsfc.nasa.gov/goes/' + yr + '/' + filename
                f = urllib2.urlopen(url, timeout=3)
            with open(get_and_create_download_dir() + '/' + filename,
                      'wb') as g:
                shutil.copyfileobj(f, g)
            filenames.append(get_and_create_download_dir() + '/' + filename)
        pmerit = 0
        for file in filenames:
            gfits = fits.open(file)
            data = gfits[2].data['FLUX'][0][:, 0]
            good, = np.where(data > 1.e-8)
            tsecs = gfits[2].data['TIME'][0]
            merit = len(good)
            date_elements = gfits[0].header['DATE-OBS'].split('/')
            if merit > pmerit:
                print 'File:', file, 'is best'
                pmerit = merit
                goes_data = data
                goes_t = Time(date_elements[2] + '-' + date_elements[1] + '-' +
                              date_elements[0]).plot_date + tsecs / 86400.
        try:
            return goes_t, goes_data
        except:
            print 'No good GOES data for', datstr
            return None, None
    except:
        print 'GOES site unreachable?'
        return None, None
Exemple #28
0
def get_lytaf_events(start_time, end_time, lytaf_path=None,
                     combine_files=("lyra", "manual", "ppt", "science"),
                     csvfile=None, force_use_local_lytaf=False):
    """
    Extracts combined lytaf file for given time range.

    Given a time range defined by start_time and end_time, this function
    extracts the segments of each LYRA annotation file and combines them.

    Parameters
    ----------
    start_time : `datetime.datetime` or `str`
        Start time of period for which annotation file is required.

    end_time : `datetime.datetime` or `str`
        End time of period for which annotation file is required.

    lytaf_path : `str`
        directory path where the LYRA annotation files are stored.

    combine_files : `tuple` of strings
        States which LYRA annotation files are to be combined.
        Default is all four, i.e. lyra, manual, ppt, science.
        See Notes section for an explanation of each.

    force_use_local_lytaf : `bool`
        Ensures current local version of lytaf files are not replaced by
        up-to-date online versions even if current local lytaf files do not
        cover entire input time range etc.
        Default=False

    Returns
    -------
    lytaf : `numpy.recarray`
        Containing the various parameters stored in the LYTAF files.

    Notes
    -----
    There are four LYRA annotation files which mark different types of events
    or artifacts in the data.  They are named annotation_suffix.db where
    suffix is a variable equalling either lyra, manual, ppt, or science.

    annotation_lyra.db : contains entries regarding possible effects to
        the data due to normal operation of LYRA instrument.

    annotation_manual.db : contains entries regarding possible effects
        to the data due to unusual or manually logged events.

    annotation_ppt.db : contains entries regarding possible effects to
        the data due to pointing or positioning of PROBA2.

    annotation_science.db : contains events in the data scientifically
        interesting, e.g. GOES flares.

    References
    ----------
    Further documentation: http://proba2.oma.be/data/TARDIS

    Examples
    --------
    Get all events in the LYTAF files for January 2014
        >>> from sunpy.instr.lyra import get_lytaf_events
        >>> lytaf = get_lytaf_events('2014-01-01', '2014-02-01')  # doctest: +REMOTE_DATA

    """
    # Check inputs
    # Check lytaf path
    if not lytaf_path:
        lytaf_path = get_and_create_download_dir()
    # Check start_time and end_time is a date string or datetime object
    start_time = parse_time(start_time)
    end_time = parse_time(end_time)
    # Check combine_files contains correct inputs
    if not all(suffix in ["lyra", "manual", "ppt", "science"]
               for suffix in combine_files):
        raise ValueError("Elements in combine_files must be strings equalling "
                         "'lyra', 'manual', 'ppt', or 'science'.")
    # Remove any duplicates from combine_files input
    combine_files = list(set(combine_files))
    combine_files.sort()
    # Convert input times to UNIX timestamp format since this is the
    # time format in the annotation files
    start_time_uts = (start_time - datetime.datetime(1970, 1, 1)).total_seconds()
    end_time_uts = (end_time - datetime.datetime(1970, 1, 1)).total_seconds()

    # Define numpy record array which will hold the information from
    # the annotation file.
    lytaf = np.empty((0,), dtype=[("insertion_time", object),
                                  ("begin_time", object),
                                  ("reference_time", object),
                                  ("end_time", object),
                                  ("event_type", object),
                                  ("event_definition", object)])
    # Access annotation files
    for suffix in combine_files:
        # Check database files are present
        dbname = "annotation_{0}.db".format(suffix)
        check_download_file(dbname, LYTAF_REMOTE_PATH, lytaf_path)
        # Open SQLITE3 annotation files
        connection = sqlite3.connect(os.path.join(lytaf_path, dbname))
        # Create cursor to manipulate data in annotation file
        cursor = connection.cursor()
        # Check if lytaf file spans the start and end times defined by
        # user.  If not, download newest version.
        # First get start time of first event and end time of last
        # event in lytaf.
        cursor.execute("select begin_time from event order by begin_time asc "
                       "limit 1;")
        db_first_begin_time = cursor.fetchone()[0]
        db_first_begin_time = datetime.datetime.fromtimestamp(db_first_begin_time)
        cursor.execute("select end_time from event order by end_time desc "
                       "limit 1;")
        db_last_end_time = cursor.fetchone()[0]
        db_last_end_time = datetime.datetime.fromtimestamp(db_last_end_time)
        # If lytaf does not include entire input time range...
        if not force_use_local_lytaf:
            if end_time > db_last_end_time or start_time < db_first_begin_time:
                # ...close lytaf file...
                cursor.close()
                connection.close()
                # ...Download latest lytaf file...
                check_download_file(dbname, LYTAF_REMOTE_PATH, lytaf_path,
                                    replace=True)
                # ...and open new version of lytaf database.
                connection = sqlite3.connect(os.path.join(lytaf_path, dbname))
                cursor = connection.cursor()
        # Select and extract the data from event table within file within
        # given time range
        cursor.execute("select insertion_time, begin_time, reference_time, "
                       "end_time, eventType_id from event where end_time >= "
                       "{0} and begin_time <= "
                       "{1}".format(start_time_uts, end_time_uts))
        event_rows = cursor.fetchall()
        # Select and extract the event types from eventType table
        cursor.row_factory = sqlite3.Row
        cursor.execute("select * from eventType")
        eventType_rows = cursor.fetchall()
        eventType_id = []
        eventType_type = []
        eventType_definition = []
        for eventType_row in eventType_rows:
            eventType_id.append(eventType_row["id"])
            eventType_type.append(eventType_row["type"])
            eventType_definition.append(eventType_row["definition"])
        # Enter desired information into the lytaf numpy record array
        for event_row in event_rows:
            id_index = eventType_id.index(event_row[4])
            lytaf = np.append(lytaf,
                              np.array((datetime.datetime.utcfromtimestamp(event_row[0]),
                                        datetime.datetime.utcfromtimestamp(event_row[1]),
                                        datetime.datetime.utcfromtimestamp(event_row[2]),
                                        datetime.datetime.utcfromtimestamp(event_row[3]),
                                        eventType_type[id_index],
                                        eventType_definition[id_index]), dtype=lytaf.dtype))
        # Close file
        cursor.close()
        connection.close()
    # Sort lytaf in ascending order of begin time
    np.recarray.sort(lytaf, order="begin_time")

    # If csvfile kwarg is set, write out lytaf to csv file
    if csvfile:
        # Open and write data to csv file.
        with open(csvfile, 'w') as openfile:
            csvwriter = csv.writer(openfile, delimiter=';')
            # Write header.
            csvwriter.writerow(lytaf.dtype.names)
            # Write data.
            for row in lytaf:
                new_row = []
                new_row.append(row[0].strftime("%Y-%m-%dT%H:%M:%S"))
                new_row.append(row[1].strftime("%Y-%m-%dT%H:%M:%S"))
                new_row.append(row[2].strftime("%Y-%m-%dT%H:%M:%S"))
                new_row.append(row[3].strftime("%Y-%m-%dT%H:%M:%S"))
                new_row.append(row[4])
                new_row.append(row[5])
                csvwriter.writerow(new_row)

    return lytaf
Exemple #29
0
    def _parse_args(self, *args, **kwargs):
        """
        Parses an args list for data-header pairs.  args can contain any
        mixture of the following entries:
        * tuples of data,header
        * data, header not in a tuple
        * data, wcs object in a tuple
        * data, wcs object not in a tuple
        * filename, which will be read
        * directory, from which all files will be read
        * glob, from which all files will be read
        * url, which will be downloaded and read
        * lists containing any of the above.

        Example
        -------
        self._parse_args(data, header,
                         (data, header),
                         ['file1', 'file2', 'file3'],
                         'file4',
                         'directory1',
                         '*.fits')

        """

        data_header_pairs = list()
        already_maps = list()

        # Account for nested lists of items
        args = expand_list(args)

        # For each of the arguments, handle each of the cases
        i = 0
        while i < len(args):

            arg = args[i]

            # Data-header or data-WCS pair
            if isinstance(arg, SUPPORTED_ARRAY_TYPES):
                arg_header = args[i+1]
                if isinstance(arg_header, WCS):
                    arg_header = args[i+1].to_header()

                if self._validate_meta(arg_header):
                    pair = (args[i], OrderedDict(arg_header))
                    data_header_pairs.append(pair)
                    i += 1    # an extra increment to account for the data-header pairing

            # File name
            elif (isinstance(arg, six.string_types) and
                  os.path.isfile(os.path.expanduser(arg))):
                path = os.path.expanduser(arg)
                pairs = self._read_file(path, **kwargs)
                data_header_pairs += pairs

            # Directory
            elif (isinstance(arg, six.string_types) and
                  os.path.isdir(os.path.expanduser(arg))):
                path = os.path.expanduser(arg)
                files = [os.path.join(path, elem) for elem in os.listdir(path)]
                for afile in files:
                    data_header_pairs += self._read_file(afile, **kwargs)

            # Glob
            elif (isinstance(arg, six.string_types) and '*' in arg):
                files = glob.glob(os.path.expanduser(arg))
                for afile in files:
                    data_header_pairs += self._read_file(afile, **kwargs)

            # Already a Map
            elif isinstance(arg, GenericMap):
                already_maps.append(arg)

            # A URL
            elif (isinstance(arg, six.string_types) and
                  _is_url(arg)):
                url = arg
                path = download_file(url, get_and_create_download_dir())
                pairs = self._read_file(path, **kwargs)
                data_header_pairs += pairs

            # A database Entry
            elif isinstance(arg, DatabaseEntry):
                data_header_pairs += self._read_file(arg.path, **kwargs)

            else:
                raise ValueError("File not found or invalid input")

            i += 1

        # TODO:
        # In the end, if there are already maps it should be put in the same
        # order as the input, currently they are not.
        return data_header_pairs, already_maps
Exemple #30
0
def remove_lytaf_events_from_lightcurve(lc,
                                        artifacts=None,
                                        return_artifacts=False,
                                        lytaf_path=None,
                                        force_use_local_lytaf=False):
    """
    Removes periods of LYRA artifacts defined in LYTAF from a LYRALightCurve.

    Parameters
    ----------
    lc : `sunpy.lightcurve.LightCurve`

     artifacts : list of strings
        Contain the artifact types to be removed.  For list of artifact types
        see reference [1].  For example, if user wants to remove only large
        angle rotations, listed at reference [1] as LAR, let artifacts=["LAR"].
        Default=[], i.e. no artifacts will be removed.

    return_artifacts : `bool`
        Set to True to return a `numpy.recarray` containing the start time, end
        time and type of all artifacts removed.
        Default=False

    lytaf_path : `str`
        directory path where the LYRA annotation files are stored.

    force_use_local_lytaf : `bool`
        Ensures current local version of lytaf files are not replaced by
        up-to-date online versions even if current local lytaf files do not
        cover entire input time range etc.
        Default=False

    Returns
    -------
    lc_new : `sunpy.lightcurve.LightCurve`
        copy of input LYRALightCurve with periods corresponding to artifacts
        removed.

    artifact_status : `dict`
        List of 4 variables containing information on what artifacts were
        found, removed, etc. from the time series.
        artifact_status["lytaf"] = artifacts found : `numpy.recarray`
            The full LYRA annotation file for the time series time range
            output by get_lytaf_events().
        artifact_status["removed"] = artifacts removed : `numpy.recarray`
            Artifacts which were found and removed from from time series.
        artifact_status["not_removed"] = artifacts found but not removed :
              `numpy.recarray`
            Artifacts which were found but not removed as they were not
            included when user defined artifacts kwarg.
        artifact_status["not_found"] = artifacts not found : `list` of strings
            Artifacts listed to be removed by user when defining
            artifacts kwarg which were not found in time series time range.

    References
    ----------
    [1] http://proba2.oma.be/data/TARDIS

    Examples
    --------
    Remove LARs (Large Angle Rotations) from LYRALightCurve for 4-Dec-2014:

        >>> import sunpy.lightcurve as lc
        >>> lc = lc.LYRALightCurve.create("2014-12-02")
        >>> lc_nolars = lc.remove_artifacts_from_lyralightcurve(lc, artifacts=["LAR"])

    To also retrieve information on the artifacts during that day:
        >>> lc_nolars, artifact_status = lc.remove_artifacts_from_lyralightcurve(
                lc, artifacts=["LAR"], return_artifacts=True)

    """
    # Check that input argument is of correct type
    if not lytaf_path:
        lytaf_path = get_and_create_download_dir()
    if not isinstance(lc, lightcurve.LightCurve):
        raise TypeError("lc must be a LightCurve object.")
    # Remove artifacts from time series
    data_columns = lc.data.columns
    time, channels, artifact_status = _remove_lytaf_events(
        lc.data.index,
        channels=[np.asanyarray(lc.data[col]) for col in data_columns],
        artifacts=artifacts,
        return_artifacts=True,
        lytaf_path=lytaf_path,
        force_use_local_lytaf=force_use_local_lytaf)
    # Create new copy copy of lightcurve and replace data with
    # artifact-free time series.
    lc_new = copy.deepcopy(lc)
    lc_new.data = pandas.DataFrame(index=time,
                                   data=dict(
                                       (col, channels[i])
                                       for i, col in enumerate(data_columns)))
    if return_artifacts:
        return lc_new, artifact_status
    else:
        return lc_new