Example #1
0
    def compute_af_manifest(self, geo_list):
        """
        get list of active fire file names from a set of geolocation files

        :param geo_list: list containing geolocation file names
        """

        prefix = ''
        file_list = []

        for g in geo_list:
            if g[:19] != prefix:
                prefix = g[:19]
                file_list.extend(
                    get_dList(self.url_base_hdf + '/' + self.filepath_af +
                              '/' + str(prefix[7:11]) + '/' +
                              str(prefix[11:14])))

        manifest = []

        # Search for what the name should look like and use that index to add that name to the manifest
        # this takes n*log(n) time, which I think is pretty good
        for g in geo_list:
            manifest.append(
                file_list[bisect(file_list, 'MYD14' + g[5:24] +
                                 '99999999999999.hdf') - 1])

        return manifest
Example #2
0
def laads_range_manifest(gran, from_utc, to_utc):
      """
      creates manifest for files of a granule from given time range
      """
      start_year = from_utc.year
      if start_year != to_utc.year:
          return laads_range_manifest(gran, from_utc, datetime(year=start_year, month=12,day=31,hour=23,minute=59)) + \
                 laads_range_manifest(gran, datetime(year=start_year+1, month=1, day=1, hour=0, minute=0), to_utc)

      # The source has data for different days in different folders, we'll need to get their paths for each day
      start_day = (from_utc - datetime(start_year, 1,1)).days + 1
      end_day = (to_utc - datetime(start_year, 1, 1)).days + 1

      file_list = []
      url = 'ftp://ladsweb.nascom.nasa.gov/' + gran['rel_path'] + gran['name'] + '/%s/%s'

      for day in range(start_day, end_day + 1):
          file_list.extend(get_dList(url % (str(start_year), str(day))))

      # we now have a list with all of the filenames during the days that the query requested, so now we'll trim the stuff at the front and back we don't need
      # invent a sample filename for the start time, they look like this:
      # MOD03.AYYYYDDDD.HHMM.006.#############.hdf
      start_filename = '%s.A%04d%03d.%02d%02d.006.9999999999999.hdf' % (gran['name'], start_year, start_day, from_utc.hour, from_utc.minute)

      # bisect searches for that sample name and returns the index of where that file should go
      # to make sure we get that data we start at the file before it (-1)
      start_index = bisect(file_list, start_filename) - 1

      # we'll do the same for the last one
      end_filename =  '%s.A%04d%03d.%02d%02d.006.9999999999999.hdf' % (gran['name'], start_year, end_day, to_utc.hour, to_utc.minute)
      end_index = bisect(file_list, end_filename)

      return file_list[start_index:end_index]
Example #3
0
    def manifest_from_geo(self, geo_list, granule_name):

        # prefix later tells us what url we should be looking at
        prefix = ''
        file_list = []

        # pulls directory listing of each relevant page (determined by the 'prefix' of each geo file)
        # this yields a super set of the active fire files we care about, we'll refine the list in the next part
        for g in geo_list:
            if g[:19] != prefix:
                prefix = g[:19]
                file_list.extend(
                    get_dList(self.url_base_hdf + '/' + self.filepath_af +
                              '/' + str(prefix[7:11]) + '/' +
                              str(prefix[11:14])))

        # manifest contains the final set of exact filenames we care about
        manifest = []

        # Search for what the name should look like and use that index to add that name to the manifest
        # this takes n*log(n) time, which I think is pretty good
        for g in geo_list:
            manifest.append(
                file_list[bisect(file_list, granule_name + g[5:24] +
                                 '99999999999999.hdf') - 1])

        return manifest
Example #4
0
    def compute_l0_manifest(self, from_utc, to_utc):
        """
        Compute list of files in the source for the given time frame

        :param from_utc: time UTC format
        :param to_utc: time UTC format
        :return: list of file names as strings
        """
        # We want a list of all of the filenames which land between from_utc and to_utc

        # Retrieve the directory listing
        dList = get_dList(self.url_base_l0 + '/' + self.filepath_l0)

        # Gameplan:
        # What would a file that starts exactly at from_utc look like?
        # format:   RNSCA-RVIRS_npp_dYYYYMMdd_thhmmssS_ehhmmssS_bnnnnn_cnnnnnnnnnnnnnnnnnnnn_aaaa_aaa.h5
        filename = 'RNSCA-RVIRS_npp_d%04d%02d%02d_t%02d00000_e000000_b00000_c00000000000000000000_aaaa_aaa.h5' % (
            from_utc.year, from_utc.month, from_utc.day, from_utc.hour)

        # Then, we find out where that filename would go in the dList
        # This call binary searches dList for filename, and returns it's index (pretty efficient)
        # If the filename is not found, it returns the index of the first file larger than it
        index = bisect(dList, filename)

        # If the filename we made up is not in the list (very likely), we actually want the first file
        # smaller than the filename, so we still get the data for that time period
        if index == len(dList):
            index = index - 1
        elif dList[index] != filename:
            index = index - 1

        current_time = from_utc

        level0manifest = []

        # there are strange gaps in times between files that I can't reconcile
        # so I just take the start of the next file as current_time
        while current_time < to_utc:
            # Get the file
            level0manifest.append(dList[index])

            index = index + 1
            if index >= len(dList):
                break

            current_file = dList[index]
            # Change time to match the next file, use that time to compare to to_utc
            # If the time of the next file is bigger than to_utc, then we have all of the files we care about
            current_time = current_time.replace(
                year=int(current_file[17:21]),
                month=int(current_file[21:23]),
                day=int(current_file[23:25]),
                hour=int(current_file[27:29]),
                minute=int(current_file[29:31]),
                second=int(current_file[31:33]))

        return level0manifest
Example #5
0
def laads_list_manifest(gran, gran_list):
      """
      creates manifest for a given granule matching time signatures of given granules.
      """

      url = 'ftp://ladsweb.nascom.nasa.gov/' + gran['rel_path'] + gran['name'] + '/%s/%s'
      prefix = ''
      file_list = []

      i = gran_list[0].find('.')

      for g in gran_list:
            if g[:i+10] != prefix:
                prefix = g[:i+10]
                file_list.extend(get_dList(url % (prefix[i+2:i+6], prefix[i+6:i+9])))

      search_string = gran['name'] + '%s.9999999999999.hdf'
      manifest = map(lambda x: file_list[bisect(file_list, search_string % x[i:(i+18)])-1], gran_list)
      return manifest
Example #6
0
    def compute_geo_manifest(self, from_utc, to_utc):
        """
        Get list of geolocation file names for the given time frame

        :param from_utc: start time UTC
        :param to_utc: end time UTC
        :return: list of file names as strings
        """

        # I don't really want to deal with splitting it on years, so we'll recurse on that
        # from now on we can assume that to and from occur in the same year
        start_year = from_utc.year
        if start_year != to_utc.year:
            return compute_geo_manifest(from_utc, datetime(year=start_year, month=12,day=31,hour=23,minute=59)) + \
                   compute_geo_manifest(datetime(year=start_year+1, month=1, day=1, hour=0, minute=0), to_utc)

        # The source has data for different days in different folders, we'll need to get their paths for each day
        start_day = (from_utc - datetime(start_year, 1, 1)).days + 1
        end_day = (to_utc - datetime(start_year, 1, 1)).days + 1

        file_list = []

        for day in range(start_day, end_day + 1):
            file_list.extend(
                get_dList(self.url_base_hdf + '/' + self.filepath_geo + '/' +
                          str(start_year) + '/' + str(day)))

        # we now have a list with all of the filenames during the days that the query requested, so now we'll trim the stuff at the front and back we don't need
        # invent a sample filename for the start time, they look like this:
        # MOD03.AYYYYDDDD.HHMM.006.#############.hdf
        start_filename = 'MOD03.A%04d%03d.%02d%02d.006.9999999999999.hdf' % (
            start_year, start_day, from_utc.hour, from_utc.minute)

        # bisect searches for that sample name and returns the index of where that file should go
        # to make sure we get that data we start at the file before it (-1)
        start_index = bisect(file_list, start_filename) - 1

        # we'll do the same for the last one
        end_filename = 'MOD03.A%04d%03d.%02d%02d.006.9999999999999.hdf' % (
            start_year, end_day, to_utc.hour, to_utc.minute)
        end_index = bisect(file_list, end_filename)

        return file_list[start_index:end_index]
Example #7
0
def manifest_from_geo(geo_list, granule_name):

    prefix = ''
    file_list = []

    for g in geo_list:
        if g[:19] != prefix:
            prefix = g[:19]
            file_list.extend(
                get_dList(self.url_base_hdf + '/' + self.filepath_af + '/' +
                          str(prefix[7:11]) + '/' + str(prefix[11:14])))

    manifest = []

    # Search for what the name should look like and use that index to add that name to the manifest
    # this takes n*log(n) time, which I think is pretty good
    for g in geo_list:
        manifest.append(
            file_list[bisect(file_list, granule_name + g[5:24] +
                             '99999999999999.hdf') - 1])

    return manifest
Example #8
0
    def compute_l0_manifest_g(self, from_utc, to_utc):
        """
        Compute list of GBAD files (AQUA specific) in the source for the given time frame

        :param from_utc: time UTC format
        :param to_utc: time UTC format
        :return: list of file names as strings
        """

        # We want a list of all of the filenames which land between from_utc and to_utc

        # Retrieve the directory listing
        dList = get_dList(self.url_base_l0 + '/' + self.filepath_l0_g)

        # Gameplan:
        # What would a file that starts exactly at from_utc look like?
        # Filenames have this pattern: P1540064AAAAAAAAAAAAAAyyDDDhhmmss000.PDS
        current_time = from_utc

        days = (current_time - datetime(current_time.year, 1, 1)).days + 1
        year = current_time.year % 100

        filename = 'P1540957AAAAAAAAAAAAAA%02d%03d%02d%02d%02d000.PDS' % (
            year, days, current_time.hour, current_time.minute,
            current_time.second)

        # Then, we find out where that filename would go in the dList
        # This call binary searches dList for filename, and returns it's index (pretty efficient)
        # If the filename is not found, it returns the index of the first file larger than it
        index = bisect(dList, filename)

        # If the filename we made up is not in the list (very likely), we actually want the first file
        # smaller than the filename, so we still get the data for that time period
        # (-4 because for each time there are 4 GBAD files, however there are only 2 we care for)
        if index == len(dList):
            index = index - 4
        elif dList[index] != filename:
            index = index - 4

        level0manifest = []

        while current_time < to_utc:
            # Add 000.PDS file
            level0manifest.append(dList[index])
            # Add 001.PDS file
            level0manifest.append(dList[index + 1])

            # Move index to next pair, (remember, there are 4 GBAD files, we only care about 2 of them)
            # If we run out of filenames before reaching to_utc, that's fine, just break
            index = index + 4
            if index >= len(dList):
                break

            current_file = dList[index]

            # Change time to match the next file, use that time to compare to to_utc
            # If the new time is bigger than to_utc, we have all of the files we care about
            current_time = current_time.replace(year=2000 +
                                                int(current_file[22:24]))
            current_time = current_time.replace(day=1, month=1)
            current_time = current_time + timedelta(
                days=int(current_file[24:27]) - 1)
            current_time = current_time.replace(
                hour=int(current_file[27:29]),
                minute=int(current_file[29:31]),
                second=int(current_file[31:33]))

        return level0manifest
Example #9
0
    def compute_geo_manifest(self, from_utc, to_utc):
        """
        Get list of geolocation file names for the given time frame

        :param from_utc: start time UTC
        :param to_utc: end time UTC
        :return: list of file names as strings
        """

        # I don't really want to deal with splitting it on years, so we'll recurse on that
        # from now on we can assume that to and from occur in the same year
        start_year = from_utc.year
        if start_year != to_utc.year:
            return compute_geo_manifest(from_utc, datetime(year=start_year, month=12,day=31,hour=23,minute=59)) + \
                   compute_geo_manifest(datetime(year=start_year+1, month=1, day=1, hour=0, minute=0), to_utc)

        # The source has data for different days in different folders, we'll need to get their paths for each day
        start_day = (from_utc - datetime(start_year, 1, 1)).days + 1
        end_day = (to_utc - datetime(start_year, 1, 1)).days + 1

        file_list = []

        for day in range(start_day, end_day + 1):
            file_list.extend(
                get_dList(self.url_base_hdf + '/' + self.filepath_geo + '/' +
                          str(start_year) + '/' + str(day)))

        geoMeta = []

        i = from_utc.replace(hour=0, minute=0, second=0, microsecond=0)
        end_date = to_utc.replace(hour=0, minute=0, second=0, microsecond=0)
        gran = 'MYD03'
        url = 'ftp://ladsweb.nascom.nasa.gov'
        path = 'geoMeta/6/AQUA'

        while i <= end_date:
            #geoMeta.append('ftp://ladsweb.nascom.nasa.gov/geoMeta/6/AQUA/' + str(year) + '/MYD03_' + str(year) + '-' + str(month) + '-' + str(day) + '.txt')
            geoMeta.append('%s/%s/%04d/%s_%04d-%02d-%02d.txt' %
                           (url, path, i.year, gran, i.year, i.month, i.day))
            i = i + timedelta(days=1)

        #######################################################################################################################################################

        # we now have a list with all of the filenames during the days that the query requested
        # so now we'll trim the stuff at the front and back we don't need.

        # invent a sample filename for the start time, they look like this:
        # MYD03.AYYYYDDDD.HHMM.006.#############.hdf
        start_filename = 'MYD03.A%04d%03d.%02d%02d.006.9999999999999.hdf' % (
            start_year, start_day, from_utc.hour, from_utc.minute)

        # bisect searches for that sample name and returns the index of where that file should go
        # to make sure we get that data we start at the file before it (-1)
        start_index = bisect(file_list, start_filename) - 1

        # we'll do the same for the last one
        end_filename = 'MYD03.A%04d%03d.%02d%02d.006.9999999999999.hdf' % (
            start_year, end_day, to_utc.hour, to_utc.minute)
        end_index = bisect(file_list, end_filename)

        return file_list[start_index:end_index]
Example #10
0
    def compute_l0_manifest(self, from_utc, to_utc):
        """
        Compute list of files in the source for the given time frame

        :param from_utc: time UTC format
        :param to_utc: time UTC format
        :return: list of file names as strings
        """

        # Retrieve the directory listing
        dList = get_dList(self.url_base_l0 + '/' + self.filepath_l0)

        # We want a list of all of the filenames which land between from_utc and to_utc

        # Gameplan:
        # What would a file that starts exactly at from_utc look like?
        # Filenames have this pattern: P0420064AAAAAAAAAAAAAAyyDDDhhmmss000.PDS
        current_time = from_utc

        days = (current_time - datetime(current_time.year, 1, 1)).days + 1
        year = current_time.year % 100

        filename = 'P0420064AAAAAAAAAAAAAA%02d%03d%02d%02d%02d000.PDS' % (
            year, days, current_time.hour, current_time.minute,
            current_time.second)

        # Then, we find out where that filename would go in the dList
        # This call binary searches dList for filename, and returns it's index (pretty efficient)
        # If the filename is not found, it returns the index of the first file larger than it
        index = bisect(dList, filename)

        # If the filename we made up is not in the list (very likely), we actually want the first file
        # smaller than the filename, so we still get the data for that time period
        # (-2 since the files come in pairs, one that ends in 000.PDS and one that ends in 001.PDS)

        if index == len(dList):
            index = index - 2

        elif dList[index] != filename:
            index = index - 2

        level0manifest = []

        # Now that we know where to start, we'll begin filling the manifest with relevant files
        while current_time < to_utc:
            # Add 000.PDS file to manifest
            level0manifest.append(dList[index])
            # Add 001.PDS file to manifest
            level0manifest.append(dList[index + 1])

            # Move the index to the next pair, if we run out of files just break
            index = index + 2
            if index >= len(dList):
                break

            current_file = dList[index]

            # Change time to match the next file, use that time to compare to to_utc
            # If the time that we get from this exceeds to_utc, we have all the data we want
            current_time = current_time.replace(year=2000 +
                                                int(current_file[22:24]))
            current_time = current_time.replace(day=1, month=1)
            current_time = current_time + timedelta(
                days=int(current_file[24:27]) - 1)
            current_time = current_time.replace(
                hour=int(current_file[27:29]),
                minute=int(current_file[29:31]),
                second=int(current_file[31:33]))

        return level0manifest