def start_gathering(topics, filenames, seconds_between_calls=30):
    # Index for topics/filenames:
    # 0: news articles
    # 1: tweets

    articles_df = read_last_record(filenames[0], articles_export_columns)
    tweets_df = read_last_record(filenames[1], twitter_export_columns)

    # Publish date of last checked article
    if articles_df is None or len(articles_df) < 1:
        last_article_date = datetime.datetime.today() - tdelta(weeks=1)
    else:
        last_article_date = articles_df.loc[0, "date"]

    # ID of last checked tweet
    if tweets_df is None or len(tweets_df) < 1:
        last_tweet_id = 0
    else:
        last_tweet_id = tweets_df.loc[0, "id"]

    while True:
        now = datetime.datetime.today()

        new_article_date = gather_new_articles(topics[0], last_article_date,
                                               filenames[0])
        new_tweet_id = gather_new_tweets(topics[1], last_tweet_id,
                                         filenames[1])

        # Update tracking variables if newer records have been found
        if new_article_date is not None:
            last_article_date = new_article_date
        if new_tweet_id is not None:
            last_tweet_id = new_tweet_id

        sleep_until(now + tdelta(seconds=seconds_between_calls))
def harvest_articles(topic, articles_filename, seconds_between_calls):
    articles_per_call = 100

    articles_df = read_last_article(articles_filename)
    if articles_df is None or len(articles_df) < 1:
        last_checked = datetime.datetime.today() - tdelta(weeks=1)
    else:
        last_checked = articles_df.loc[0, "date"]

    while True:
        now = datetime.datetime.today()

        # get new articles
        tmp_articles = search_articles(topic, articles_per_call)
        new_articles = tmp_articles[tmp_articles["date"] > last_checked]
        print("Found {} new articles.".format(len(new_articles)))

        # Skip if no new articles found
        if len(new_articles) == 0:
            sleep_until(now + tdelta(seconds=seconds_between_calls))
            continue

        # Sentiment
        new_articles["content"] = new_articles["url"].apply(get_content)
        new_articles[['sent_neg', 'sent_neu', 'sent_pos', 'sent_comp'
                      ]] = new_articles["content"].apply(get_sentiment)
        new_articles[["topic_freq", "topic_density"
                      ]] = new_articles["content"].apply(get_topic_freq,
                                                         args=(topic, ))

        # Save articles
        export_articles(new_articles, articles_filename)

        print(new_articles[["date", "id", "sent_comp"]])

        last_checked = now
        sleep_until(now + tdelta(seconds=seconds_between_calls))
def harvest_tweets(topic, tweets_filename, seconds_between_calls):
    tweets_per_call = 100

    tweets_df = read_last_tweet(tweets_filename)
    if tweets_df is None or len(tweets_df) < 1:
        last_id = 0
    else:
        last_id = tweets_df.loc[0, "id"]

    last_checked = None

    while True:
        now = datetime.datetime.today()

        # get new tweets
        tmp_tweets = search_tweets(topic, tweets_per_call)
        new_tweets = tmp_tweets[tmp_tweets["id"] > last_id]
        print("Found {} new tweets.".format(len(new_tweets)))

        # Skip if no new tweets found
        if len(new_tweets) == 0:
            sleep_until(now + tdelta(seconds=seconds_between_calls))
            continue

        # Sentiment
        new_tweets[['sent_neg', 'sent_neu', 'sent_pos',
                    'sent_comp']] = new_tweets["text"].apply(get_sentiment)

        # Save tweets
        export_tweets(new_tweets, tweets_filename)

        print(new_tweets[["date", "id", "sent_comp"]])

        last_checked = now
        last_id = new_tweets["id"].max()
        sleep_until(now + tdelta(seconds=seconds_between_calls))
def get_uframe_array(array_id,
                     out_dir=None,
                     exec_dpa=True,
                     urlonly=False,
                     alltimes=False,
                     deltatype='days',
                     deltaval=1,
                     provenance=False,
                     limit=True,
                     uframe_base=UFrame(),
                     file_format='netcdf'):
    """
    Download NetCDF / JSON files for the most recent 1-day worth of data for telemetered
    and recovered data streams for the specified array_id.

    Args:
        array_id: name of the array
        out_dir: top-level directory destination for writing NetCDF / JSON files.
            Defaults to the current working directory.
        exec_dpa: set to False to NOT execute L1/L2 data product algorithms prior
            to download.  Defaults to True

    Returns:
        urls: array of dictionaries containing the url, response code and reason
    """

    fetched_urls = []

    if deltatype not in _valid_relativedeltatypes:
        sys.stderr.write(
            'Invalid dateutil.relativedelta type: {:s}\n'.format(deltatype))
        sys.stderr.flush()
        return fetched_urls

    if not array_id:
        sys.stderr.write('Invalid array id specified\n')
        sys.stderr.flush()
        return
    if not urlonly and not out_dir:
        out_dir = os.path.realpath(os.curdir)

    if not urlonly and not os.path.exists(out_dir):
        sys.stdout.write('Creating output directory: {:s}\n'.format(out_dir))
        sys.stdout.flush()
        try:
            os.makedirs(out_dir)
        except OSError as e:
            sys.stderr.write(str(e))
            sys.stderr.flush()
            return

    # Make sure the array is in uFrame
    if not urlonly:
        sys.stdout.write('Fetching arrays ({:s})\n'.format(uframe_base))
        sys.stdout.flush()

    arrays = get_arrays(array_id=array_id, uframe_base=uframe_base)
    if not arrays:
        sys.stderr.write(
            'Array {:s} does not exist in uFrame\n'.format(array_id))
        sys.stderr.flush()
        return

    array = arrays[0]
    if not urlonly:
        sys.stdout.write('{:s}: Array exists...\n'.format(array))
        sys.stdout.flush()

    # Fetch the platforms on the array
    if not urlonly:
        sys.stdout.write(
            'Fetching array platforms ({:s})\n'.format(uframe_base))
        sys.stdout.flush()

    platforms = get_platforms(array, uframe_base=uframe_base)
    if not platforms:
        sys.stderr.write(
            '{:s}: No platforms found for specified array\n'.format(array))
        sys.stderr.flush()
        return

    if limit == True:
        limit = 10000  # limit to 10000 points
    else:
        limit = -1  # no limit

    for platform in platforms:

        p_name = '{:s}-{:s}'.format(array, platform)
        if not urlonly:
            sys.stdout.write(
                '{:s}: Fetching platform data sensors ({:s})\n'.format(
                    p_name, uframe_base))
            sys.stdout.flush()

        sensors = get_platform_sensors(array,
                                       platform,
                                       uframe_base=uframe_base)
        if not sensors:
            sys.stderr.write(
                '{:s}: No data sensors found for this platform\n'.format(
                    p_name))
            sys.stderr.flush()
            continue

        if not urlonly:
            sys.stdout.write('{:s}: {:d} sensors fetched\n'.format(
                p_name, len(sensors)))
            sys.stdout.flush()

        if not urlonly:
            sys.stdout.write(
                'Fetching platform sensors ({:s})\n'.format(uframe_base))
            sys.stdout.flush()
        for sensor in sensors:
            # Fetch sensor metadata

            meta = get_sensor_metadata(array,
                                       platform,
                                       sensor,
                                       uframe_base=uframe_base)
            if not meta:
                sys.stderr.write(
                    '{:s}: No metadata found for sensor: {:s}\n'.format(
                        p_name, sensor))
                sys.stderr.flush()
                continue

            for metadata in meta['times']:
                if alltimes:
                    ts0 = metadata['beginTime']
                    ts1 = metadata['endTime']
                else:
                    dt1 = parser.parse(metadata['endTime'])
                    if dt1.year < 2000:
                        sys.stderr.write(
                            '{:s}: Invalid metadata endTime: {:s}\n'.format(
                                p_name, metadata['endTime']))
                        sys.stderr.flush()
                        continue

                    dt0 = dt1 - tdelta(**dict({deltatype: deltaval}))
                    ts1 = metadata['endTime']
                    ts0 = '{:s}Z'.format(
                        dt0.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3])

                stream = metadata['stream']
                method = metadata['method']
                dest_dir = os.path.join(out_dir, p_name,
                                        method) if not urlonly else None

                fetched_url = fetch_uframe_time_bound_stream(
                    uframe_base=uframe_base,
                    subsite=array,
                    node=platform,
                    sensor=sensor,
                    method=method,
                    stream=stream,
                    begin_datetime=ts0,
                    end_datetime=ts1,
                    file_format=file_format,
                    exec_dpa=exec_dpa,
                    urlonly=urlonly,
                    dest_dir=dest_dir,
                    provenance=provenance,
                    limit=str(limit))
                fetched_urls.append(fetched_url)

    return fetched_urls
Esempio n. 5
0
    def instrument_to_query(self, ref_des, telemetry=None, time_delta_type=None, time_delta_value=None, begin_ts=None, end_ts=None, time_check=True, exec_dpa=True, application_type='netcdf', provenance=True, limit=-1, annotations=False, user=None, email=None):
        '''Return the list of request urls that conform to the UFrame API for the specified
        reference_designator.
        
        Parameters:
            ref_des: partial or fully-qualified reference designator
            telemetry: telemetry type (Default is all telemetry types
            time_delta_type: Type for calculating the subset start time, i.e.: years, months, weeks, days.  Must be a type kwarg accepted by dateutil.relativedelta'
            time_delta_value: Positive integer value to subtract from the end time to get the start time for subsetting.
            begin_dt: ISO-8601 formatted datestring specifying the dataset start time
            end_dt: ISO-8601 formatted datestring specifying the dataset end time
            exec_dpa: boolean value specifying whether to execute all data product algorithms to return L1/L2 parameters (Default is True)
            application_type: 'netcdf' or 'json' (Default is 'netcdf')
            provenance: boolean value specifying whether provenance information should be included in the data set (Default is True)
            limit: integer value ranging from -1 to 10000.  A value of -1 (default) results in a non-decimated dataset
            annotations: boolean value (True or False) specifying whether to include all dataset annotations
        '''
        
        urls = []
        
        instruments = self.search_instruments(ref_des)
        if not instruments:
            return urls
        
        self._port = 12576
        self._url = '{:s}:{:d}/sensor/inv'.format(self._base_url, self._port)    
        
        if time_delta_type and time_delta_value:
            if time_delta_type not in _valid_relativedeltatypes:
                sys.stderr.write('Invalid dateutil.relativedelta type: {:s}\n'.format(time_delta_type))
                sys.stderr.flush()
                return urls
        
        begin_dt = None
        end_dt = None
        if begin_ts:
            try:
                begin_dt = parser.parse(begin_ts)
            except ValueError as e:
                sys.stderr.write('Invalid begin_dt: {:s} ({:s})\n'.format(begin_ts, e.message))
                sys.stderr.flush()
                return urls    
                
        if end_ts:
            try:
                end_dt = parser.parse(end_ts)
            except ValueError as e:
                sys.stderr.write('Invalid end_dt: {:s} ({:s})\n'.format(end_ts, e.message))
                sys.stderr.flush()
                return urls
                
        for instrument in instruments:
            
            # Validate the reference designator format
            if not self.validate_reference_designator(instrument):
                sys.stderr.write('Invalid format for reference designator: {:s}\n'.format(instrument))
                sys.stderr.flush()
                continue
                
            #sys.stdout.write('Instrument: {:s}\n'.format(instrument))
                
            # Store the metadata for this instrument
            meta = self.toc[instrument]
            
            # Break the reference designator up
            r_tokens = instrument.split('-')
            
            for stream in meta['streams']:
                
                #sys.stdout.write('Stream: {:s}\n'.format(stream['stream']))
                
                if telemetry and stream['method'].find(telemetry) == -1:
                    continue
                    
                #Figure out what we're doing for time
                dt0 = None
                dt1 = None
                
                stream_dt0 = parser.parse(stream['beginTime'])
                stream_dt1 = parser.parse(stream['endTime'])
                
                if time_delta_type and time_delta_value:
                    dt1 = stream_dt1
                    dt0 = dt1 - tdelta(**dict({time_delta_type : time_delta_value})) 
                else:
                    if begin_dt:
                        dt0 = begin_dt
                    else:
                        dt0 = stream_dt0
                        
                    if end_dt:
                        dt1 = end_dt
                    else:
                        dt1 = stream_dt1
                
                # Format the endDT and beginDT values for the query
                try:
                    ts1 = dt1.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
                except ValueError as e:
                    sys.stderr.write('{:s}-{:s}: {:s}\n'.format(instrument, stream['stream'], e.message))
                    continue

                try:
                    ts0 = dt0.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
                except ValueError as e:
                    sys.stderr.write('{:s}-{:s}: {:s}\n'.format(instrument, stream['stream'], e.message))
                    continue
                        
                # Make sure the specified or calculated start and end time are within
                # the stream metadata times if time_check=True
                if time_check:
                    if dt1 > stream_dt1:
                        sys.stderr.write('time_check ({:s}): End time exceeds stream endTime ({:s} > {:s})\n'.format(stream['stream'], ts1, stream['endTime']))
                        sys.stderr.write('time_check ({:s}): Setting request end time to stream endTime\n'.format(stream['stream']))
                        sys.stderr.flush()
                        ts1 = stream['endTime']
                    
                    if dt0 < stream_dt0:
                        sys.stderr.write('time_check ({:s}): Start time is earlier than stream beginTime ({:s} < {:s})\n'.format(stream['stream'], ts0, stream['beginTime']))
                        sys.stderr.write('time_check ({:s}): Setting request begin time to stream beginTime\n'.format(stream['stream']))
                        ts0 = stream['beginTime']
                       
                # Check that ts0 < ts1
                dt0 = parser.parse(ts0)
                dt1 = parser.parse(ts1)
                if dt0 >= dt1:
                    sys.stderr.write('{:s}: Invalid time range specified ({:s} >= {:s})\n'.format(stream['stream'], ts0, ts1))
                    continue

                # Create the url
                stream_url = '{:s}/{:s}/{:s}/{:s}-{:s}/{:s}/{:s}?beginDT={:s}&endDT={:s}&format=application/{:s}&limit={:d}&execDPA={:s}&include_provenance={:s}'.format(
                    self.url,
                    r_tokens[0],
                    r_tokens[1],
                    r_tokens[2],
                    r_tokens[3],
                    stream['method'],
                    stream['stream'],
                    ts0,
                    ts1,
                    application_type,
                    limit,
                    str(exec_dpa).lower(),
                    str(provenance).lower())
                    
                if user:
                    stream_url = '{:s}&user={:s}'.format(stream_url, user)
                    
                if email:
                    stream_url = '{:s}&email={:s}'.format(stream_url, email)
                    
                urls.append(stream_url)
                            
        return urls
def get_uframe_array(array_id, out_dir=None, exec_dpa=True, urlonly=False, deltatype='days', deltaval=1, provenance=False, limit=True, uframe_base=UFrame(), file_format='netcdf'):
    """
    Download NetCDF / JSON files for the most recent 1-day worth of data for telemetered
    and recovered data streams for the specified array_id.

    Args:
        array_id: name of the array
        out_dir: top-level directory destination for writing NetCDF / JSON files.
            Defaults to the current working directory.
        exec_dpa: set to False to NOT execute L1/L2 data product algorithms prior
            to download.  Defaults to True

    Returns:
        urls: array of dictionaries containing the url, response code and reason
    """

    fetched_urls = []

    if deltatype not in _valid_relativedeltatypes:
        sys.stderr.write('Invalid dateutil.relativedelta type: {:s}\n'.format(deltatype))
        sys.stderr.flush()
        return fetched_urls

    if not array_id:
        sys.stderr.write('Invalid array id specified\n')
        sys.stderr.flush()
        return
    if not urlonly and not out_dir:
        out_dir = os.path.realpath(os.curdir)

    if not urlonly and not os.path.exists(out_dir):
        sys.stdout.write('Creating output directory: {:s}\n'.format(out_dir))
        sys.stdout.flush()
        try:
            os.makedirs(out_dir)
        except OSError as e:
            sys.stderr.write(str(e))
            sys.stderr.flush()
            return

    # Make sure the array is in uFrame
    if not urlonly:
        sys.stdout.write('Fetching arrays ({:s})\n'.format(uframe_base))
        sys.stdout.flush()

    arrays = get_arrays(array_id=array_id, uframe_base=uframe_base)
    if not arrays:
        sys.stderr.write('Array {:s} does not exist in uFrame\n'.format(array_id))
        sys.stderr.flush()
        return

    array = arrays[0]
    if not urlonly:
        sys.stdout.write('{:s}: Array exists...\n'.format(array))
        sys.stdout.flush()

    # Fetch the platforms on the array
    if not urlonly:
        sys.stdout.write('Fetching array platforms ({:s})\n'.format(uframe_base))
        sys.stdout.flush()

    platforms = get_platforms(array, uframe_base=uframe_base)
    if not platforms:
        sys.stderr.write('{:s}: No platforms found for specified array\n'.format(array))
        sys.stderr.flush()
        return
    
    if limit == True:
        limit = 10000 # limit to 10000 points
    else:
        limit = -1 # no limit

    for platform in platforms:

        p_name = '{:s}-{:s}'.format(array, platform)
        if not urlonly:
            sys.stdout.write('{:s}: Fetching platform data sensors ({:s})\n'.format(p_name, uframe_base))
            sys.stdout.flush()

        sensors = get_platform_sensors(array, platform, uframe_base=uframe_base)
        if not sensors:
            sys.stderr.write('{:s}: No data sensors found for this platform\n'.format(p_name))
            sys.stderr.flush()
            continue

        if not urlonly:
            sys.stdout.write('{:s}: {:d} sensors fetched\n'.format(p_name, len(sensors)))
            sys.stdout.flush()

        if not urlonly:
            sys.stdout.write('Fetching platform sensors ({:s})\n'.format(uframe_base))
            sys.stdout.flush()
        for sensor in sensors:
            # Fetch sensor metadata

            meta = get_sensor_metadata(array, platform, sensor, uframe_base=uframe_base)
            if not meta:
                sys.stderr.write('{:s}: No metadata found for sensor: {:s}\n'.format(p_name, sensor))
                sys.stderr.flush()
                continue

            for metadata in meta['times']:
                dt1 = parser.parse(metadata['endTime'])
                if dt1.year < 2000:
                    sys.stderr.write('{:s}: Invalid metadata endTime: {:s}\n'.format(p_name, metadata['endTime']))
                    sys.stderr.flush()
                    continue

                dt0 = dt1 - tdelta(**dict({deltatype : deltaval}))
                ts1 = metadata['endTime']
                ts0 = dt0.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
                stream = metadata['stream']
                method = metadata['method']
                dest_dir = os.path.join(out_dir, p_name, method) if not urlonly else None

                fetched_url = fetch_uframe_time_bound_stream(
                    uframe_base = uframe_base,
                    subsite = array,
                    node = platform,
                    sensor = sensor,
                    method = method,
                    stream = stream,
                    begin_datetime = ts0,
                    end_datetime = ts1,
                    file_format = file_format,
                    exec_dpa = exec_dpa,
                    urlonly = urlonly,
                    dest_dir = dest_dir,
                    provenance = provenance,
                    limit = str(limit)
                )
                fetched_urls.append(fetched_url)

    return fetched_urls
Esempio n. 7
0
    def instrument_to_query(self,
                            ref_des,
                            user,
                            stream=None,
                            telemetry=None,
                            time_delta_type=None,
                            time_delta_value=None,
                            begin_ts=None,
                            end_ts=None,
                            time_check=True,
                            exec_dpa=True,
                            application_type='netcdf',
                            provenance=True,
                            limit=-1,
                            annotations=False,
                            email=None):
        """Return the list of request urls that conform to the UFrame API for the specified
        fully or paritally-qualified reference_designator.  Request urls are formatted
        for either the UFrame m2m API (default) or direct UFrame access, depending
        on is_m2m property of the UFrameClient instance.
        
        Arguments:
            ref_des: partial or fully-qualified reference designator
            stream: restrict urls to the specified stream
            user: user name for the query
            
        Optional kwargs:
            telemetry: telemetry type (Default is all telemetry types
            time_delta_type: Type for calculating the subset start time, i.e.: years, months, weeks, days.  Must be a
                type kwarg accepted by dateutil.relativedelta'
            time_delta_value: Positive integer value to subtract from the end time to get the start time for subsetting.
            begin_ts: ISO-8601 formatted datestring specifying the dataset start time
            end_ts: ISO-8601 formatted datestring specifying the dataset end time
            time_check: set to true (default) to ensure the request times fall within the stream data availability
            exec_dpa: boolean value specifying whether to execute all data product algorithms to return L1/L2 parameters
                (Default is True)
            application_type: 'netcdf' or 'json' (Default is 'netcdf')
            provenance: boolean value specifying whether provenance information should be included in the data set
                (Default is True)
            limit: integer value ranging from -1 to 10000.  A value of -1 (default) results in a non-decimated dataset
            annotations: boolean value (True or False) specifying whether to include all dataset annotations
        """

        urls = []

        instruments = self.search_instruments(ref_des)
        if not instruments:
            return urls

        if time_delta_type and time_delta_value:
            if time_delta_type not in _valid_relativedeltatypes:
                self._logger.error(
                    'Invalid dateutil.relativedelta type: {:s}'.format(
                        time_delta_type))
                return urls

        begin_dt = None
        end_dt = None
        if begin_ts:
            try:
                begin_dt = parser.parse(begin_ts).replace(tzinfo=pytz.UTC)
            except ValueError as e:
                self._logger.error('Invalid begin_dt: {:s} ({:s})'.format(
                    begin_ts, e.message))
                return urls

        if end_ts:
            try:
                end_dt = parser.parse(end_ts).replace(tzinfo=pytz.UTC)
            except ValueError as e:
                self._logger.error('Invalid end_dt: {:s} ({:s})'.format(
                    end_ts, e.message))
                return urls

        for instrument in instruments:

            # Get the streams produced by this instrument
            instrument_streams = self.fetch_instrument_streams(instrument)
            if not instrument_streams:
                self._logger.info(
                    'No streams found for {:s}'.format(instrument))
                continue

            if stream:
                stream_names = [s['stream'] for s in instrument_streams]
                if stream not in stream_names:
                    self._logger.warning('Invalid stream: {:s}-{:s}'.format(
                        instrument, stream))
                    continue

                instrument_streams = [
                    s for s in instrument_streams if s['stream'] == stream
                ]
#                i = stream_names.index(stream)
#                instrument_streams = [instrument_streams[i]]

            if not instrument_streams:
                self._logger.info('{:s}: No streams found'.format(instrument))
                continue

            # Break the reference designator up
            r_tokens = instrument.split('-')

            for instrument_stream in instrument_streams:

                if telemetry and not instrument_stream['method'].startswith(
                        telemetry):
                    continue

                # Figure out what we're doing for time
                try:
                    stream_dt0 = parser.parse(instrument_stream['beginTime'])
                except ValueError:
                    self._logger.error(
                        '{:s}-{:s}: Invalid beginTime ({:s})'.format(
                            instrument, instrument_stream['stream'],
                            instrument_stream['beginTime']))
                    continue

                try:
                    stream_dt1 = parser.parse(instrument_stream['endTime'])
                    # Add 1 second to stream end time to account for milliseconds
                    stream_dt1 = stream_dt1 + tdelta(seconds=1)
                except ValueError:
                    self._logger.error(
                        '{:s}-{:s}: Invalid endTime ({:s})'.format(
                            'instrument', instrument_stream['stream'],
                            instrument_stream['endTime']))
                    continue

                if time_delta_type and time_delta_value:
                    dt1 = stream_dt1
                    dt0 = dt1 - tdelta(
                        **dict({time_delta_type: time_delta_value}))
                else:
                    if begin_dt:
                        dt0 = begin_dt
                    else:
                        dt0 = stream_dt0

                    if end_dt:
                        dt1 = end_dt
                    else:
                        dt1 = stream_dt1

                # Format the endDT and beginDT values for the query
                try:
                    ts1 = dt1.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
                except ValueError as e:
                    self._logger.error('{:s}-{:s}: {:s}'.format(
                        instrument, instrument_stream['stream'], e.message))
                    continue

                try:
                    ts0 = dt0.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
                except ValueError as e:
                    self._logger.error('{:s}-{:s}: {:s}'.format(
                        instrument, instrument_stream['stream'], e.message))
                    continue

                # Make sure the specified or calculated start and end time are within
                # the stream metadata times if time_check=True
                if time_check:
                    if dt1 > stream_dt1:
                        self._logger.warning(
                            '{:s}-{:s} time check - End time exceeds stream endTime'
                            .format(ref_des, instrument_stream['stream']))
                        self._logger.warning(
                            '{:s}-{:s} time check - Setting request end time to stream endTime'
                            .format(ref_des, instrument_stream['stream']))
                        ts1 = instrument_stream['endTime']

                    if dt0 < stream_dt0:
                        self._logger.warning(
                            '{:s}-{:s} time check - Start time is earlier than stream beginTime'
                            .format(ref_des, instrument_stream['stream']))
                        self._logger.warning(
                            '{:s}-{:s} time check -  Setting request begin time to stream beginTime'
                            .format(ref_des, instrument_stream['stream']))
                        ts0 = instrument_stream['beginTime']

                    # Check that ts0 < ts1
                    dt0 = parser.parse(ts0)
                    dt1 = parser.parse(ts1)
                    if dt0 >= dt1:
                        self._logger.warning(
                            '{:s}-{:s} - Invalid time range specified'.format(
                                instrument, instrument_stream['stream']))
                        continue

                # Create the url
                end_point = 'sensor/inv/{:s}/{:s}/{:s}-{:s}/{:s}/{:s}?beginDT={:s}&endDT={:s}&format=application/{:s}&limit={:d}&execDPA={:s}&include_provenance={:s}&user={:s}'.format(
                    r_tokens[0], r_tokens[1], r_tokens[2], r_tokens[3],
                    instrument_stream['method'], instrument_stream['stream'],
                    ts0, ts1, application_type, limit,
                    str(exec_dpa).lower(),
                    str(provenance).lower(), user)

                if email:
                    end_point = '{:s}&email={:s}'.format(end_point, email)

                urls.append(self.build_request(12576, end_point))

        return urls