def compute_af_manifest(self, geo_list): """ get list of active fire file names from a set of geolocation files :param geo_list: list containing geolocation file names """ prefix = '' file_list = [] for g in geo_list: if g[:19] != prefix: prefix = g[:19] file_list.extend( get_dList(self.url_base_hdf + '/' + self.filepath_af + '/' + str(prefix[7:11]) + '/' + str(prefix[11:14]))) manifest = [] # Search for what the name should look like and use that index to add that name to the manifest # this takes n*log(n) time, which I think is pretty good for g in geo_list: manifest.append( file_list[bisect(file_list, 'MYD14' + g[5:24] + '99999999999999.hdf') - 1]) return manifest
def laads_range_manifest(gran, from_utc, to_utc): """ creates manifest for files of a granule from given time range """ start_year = from_utc.year if start_year != to_utc.year: return laads_range_manifest(gran, from_utc, datetime(year=start_year, month=12,day=31,hour=23,minute=59)) + \ laads_range_manifest(gran, datetime(year=start_year+1, month=1, day=1, hour=0, minute=0), to_utc) # The source has data for different days in different folders, we'll need to get their paths for each day start_day = (from_utc - datetime(start_year, 1,1)).days + 1 end_day = (to_utc - datetime(start_year, 1, 1)).days + 1 file_list = [] url = 'ftp://ladsweb.nascom.nasa.gov/' + gran['rel_path'] + gran['name'] + '/%s/%s' for day in range(start_day, end_day + 1): file_list.extend(get_dList(url % (str(start_year), str(day)))) # we now have a list with all of the filenames during the days that the query requested, so now we'll trim the stuff at the front and back we don't need # invent a sample filename for the start time, they look like this: # MOD03.AYYYYDDDD.HHMM.006.#############.hdf start_filename = '%s.A%04d%03d.%02d%02d.006.9999999999999.hdf' % (gran['name'], start_year, start_day, from_utc.hour, from_utc.minute) # bisect searches for that sample name and returns the index of where that file should go # to make sure we get that data we start at the file before it (-1) start_index = bisect(file_list, start_filename) - 1 # we'll do the same for the last one end_filename = '%s.A%04d%03d.%02d%02d.006.9999999999999.hdf' % (gran['name'], start_year, end_day, to_utc.hour, to_utc.minute) end_index = bisect(file_list, end_filename) return file_list[start_index:end_index]
def manifest_from_geo(self, geo_list, granule_name): # prefix later tells us what url we should be looking at prefix = '' file_list = [] # pulls directory listing of each relevant page (determined by the 'prefix' of each geo file) # this yields a super set of the active fire files we care about, we'll refine the list in the next part for g in geo_list: if g[:19] != prefix: prefix = g[:19] file_list.extend( get_dList(self.url_base_hdf + '/' + self.filepath_af + '/' + str(prefix[7:11]) + '/' + str(prefix[11:14]))) # manifest contains the final set of exact filenames we care about manifest = [] # Search for what the name should look like and use that index to add that name to the manifest # this takes n*log(n) time, which I think is pretty good for g in geo_list: manifest.append( file_list[bisect(file_list, granule_name + g[5:24] + '99999999999999.hdf') - 1]) return manifest
def compute_l0_manifest(self, from_utc, to_utc): """ Compute list of files in the source for the given time frame :param from_utc: time UTC format :param to_utc: time UTC format :return: list of file names as strings """ # We want a list of all of the filenames which land between from_utc and to_utc # Retrieve the directory listing dList = get_dList(self.url_base_l0 + '/' + self.filepath_l0) # Gameplan: # What would a file that starts exactly at from_utc look like? # format: RNSCA-RVIRS_npp_dYYYYMMdd_thhmmssS_ehhmmssS_bnnnnn_cnnnnnnnnnnnnnnnnnnnn_aaaa_aaa.h5 filename = 'RNSCA-RVIRS_npp_d%04d%02d%02d_t%02d00000_e000000_b00000_c00000000000000000000_aaaa_aaa.h5' % ( from_utc.year, from_utc.month, from_utc.day, from_utc.hour) # Then, we find out where that filename would go in the dList # This call binary searches dList for filename, and returns it's index (pretty efficient) # If the filename is not found, it returns the index of the first file larger than it index = bisect(dList, filename) # If the filename we made up is not in the list (very likely), we actually want the first file # smaller than the filename, so we still get the data for that time period if index == len(dList): index = index - 1 elif dList[index] != filename: index = index - 1 current_time = from_utc level0manifest = [] # there are strange gaps in times between files that I can't reconcile # so I just take the start of the next file as current_time while current_time < to_utc: # Get the file level0manifest.append(dList[index]) index = index + 1 if index >= len(dList): break current_file = dList[index] # Change time to match the next file, use that time to compare to to_utc # If the time of the next file is bigger than to_utc, then we have all of the files we care about current_time = current_time.replace( year=int(current_file[17:21]), month=int(current_file[21:23]), day=int(current_file[23:25]), hour=int(current_file[27:29]), minute=int(current_file[29:31]), second=int(current_file[31:33])) return level0manifest
def laads_list_manifest(gran, gran_list): """ creates manifest for a given granule matching time signatures of given granules. """ url = 'ftp://ladsweb.nascom.nasa.gov/' + gran['rel_path'] + gran['name'] + '/%s/%s' prefix = '' file_list = [] i = gran_list[0].find('.') for g in gran_list: if g[:i+10] != prefix: prefix = g[:i+10] file_list.extend(get_dList(url % (prefix[i+2:i+6], prefix[i+6:i+9]))) search_string = gran['name'] + '%s.9999999999999.hdf' manifest = map(lambda x: file_list[bisect(file_list, search_string % x[i:(i+18)])-1], gran_list) return manifest
def compute_geo_manifest(self, from_utc, to_utc): """ Get list of geolocation file names for the given time frame :param from_utc: start time UTC :param to_utc: end time UTC :return: list of file names as strings """ # I don't really want to deal with splitting it on years, so we'll recurse on that # from now on we can assume that to and from occur in the same year start_year = from_utc.year if start_year != to_utc.year: return compute_geo_manifest(from_utc, datetime(year=start_year, month=12,day=31,hour=23,minute=59)) + \ compute_geo_manifest(datetime(year=start_year+1, month=1, day=1, hour=0, minute=0), to_utc) # The source has data for different days in different folders, we'll need to get their paths for each day start_day = (from_utc - datetime(start_year, 1, 1)).days + 1 end_day = (to_utc - datetime(start_year, 1, 1)).days + 1 file_list = [] for day in range(start_day, end_day + 1): file_list.extend( get_dList(self.url_base_hdf + '/' + self.filepath_geo + '/' + str(start_year) + '/' + str(day))) # we now have a list with all of the filenames during the days that the query requested, so now we'll trim the stuff at the front and back we don't need # invent a sample filename for the start time, they look like this: # MOD03.AYYYYDDDD.HHMM.006.#############.hdf start_filename = 'MOD03.A%04d%03d.%02d%02d.006.9999999999999.hdf' % ( start_year, start_day, from_utc.hour, from_utc.minute) # bisect searches for that sample name and returns the index of where that file should go # to make sure we get that data we start at the file before it (-1) start_index = bisect(file_list, start_filename) - 1 # we'll do the same for the last one end_filename = 'MOD03.A%04d%03d.%02d%02d.006.9999999999999.hdf' % ( start_year, end_day, to_utc.hour, to_utc.minute) end_index = bisect(file_list, end_filename) return file_list[start_index:end_index]
def manifest_from_geo(geo_list, granule_name): prefix = '' file_list = [] for g in geo_list: if g[:19] != prefix: prefix = g[:19] file_list.extend( get_dList(self.url_base_hdf + '/' + self.filepath_af + '/' + str(prefix[7:11]) + '/' + str(prefix[11:14]))) manifest = [] # Search for what the name should look like and use that index to add that name to the manifest # this takes n*log(n) time, which I think is pretty good for g in geo_list: manifest.append( file_list[bisect(file_list, granule_name + g[5:24] + '99999999999999.hdf') - 1]) return manifest
def compute_l0_manifest_g(self, from_utc, to_utc): """ Compute list of GBAD files (AQUA specific) in the source for the given time frame :param from_utc: time UTC format :param to_utc: time UTC format :return: list of file names as strings """ # We want a list of all of the filenames which land between from_utc and to_utc # Retrieve the directory listing dList = get_dList(self.url_base_l0 + '/' + self.filepath_l0_g) # Gameplan: # What would a file that starts exactly at from_utc look like? # Filenames have this pattern: P1540064AAAAAAAAAAAAAAyyDDDhhmmss000.PDS current_time = from_utc days = (current_time - datetime(current_time.year, 1, 1)).days + 1 year = current_time.year % 100 filename = 'P1540957AAAAAAAAAAAAAA%02d%03d%02d%02d%02d000.PDS' % ( year, days, current_time.hour, current_time.minute, current_time.second) # Then, we find out where that filename would go in the dList # This call binary searches dList for filename, and returns it's index (pretty efficient) # If the filename is not found, it returns the index of the first file larger than it index = bisect(dList, filename) # If the filename we made up is not in the list (very likely), we actually want the first file # smaller than the filename, so we still get the data for that time period # (-4 because for each time there are 4 GBAD files, however there are only 2 we care for) if index == len(dList): index = index - 4 elif dList[index] != filename: index = index - 4 level0manifest = [] while current_time < to_utc: # Add 000.PDS file level0manifest.append(dList[index]) # Add 001.PDS file level0manifest.append(dList[index + 1]) # Move index to next pair, (remember, there are 4 GBAD files, we only care about 2 of them) # If we run out of filenames before reaching to_utc, that's fine, just break index = index + 4 if index >= len(dList): break current_file = dList[index] # Change time to match the next file, use that time to compare to to_utc # If the new time is bigger than to_utc, we have all of the files we care about current_time = current_time.replace(year=2000 + int(current_file[22:24])) current_time = current_time.replace(day=1, month=1) current_time = current_time + timedelta( days=int(current_file[24:27]) - 1) current_time = current_time.replace( hour=int(current_file[27:29]), minute=int(current_file[29:31]), second=int(current_file[31:33])) return level0manifest
def compute_geo_manifest(self, from_utc, to_utc): """ Get list of geolocation file names for the given time frame :param from_utc: start time UTC :param to_utc: end time UTC :return: list of file names as strings """ # I don't really want to deal with splitting it on years, so we'll recurse on that # from now on we can assume that to and from occur in the same year start_year = from_utc.year if start_year != to_utc.year: return compute_geo_manifest(from_utc, datetime(year=start_year, month=12,day=31,hour=23,minute=59)) + \ compute_geo_manifest(datetime(year=start_year+1, month=1, day=1, hour=0, minute=0), to_utc) # The source has data for different days in different folders, we'll need to get their paths for each day start_day = (from_utc - datetime(start_year, 1, 1)).days + 1 end_day = (to_utc - datetime(start_year, 1, 1)).days + 1 file_list = [] for day in range(start_day, end_day + 1): file_list.extend( get_dList(self.url_base_hdf + '/' + self.filepath_geo + '/' + str(start_year) + '/' + str(day))) geoMeta = [] i = from_utc.replace(hour=0, minute=0, second=0, microsecond=0) end_date = to_utc.replace(hour=0, minute=0, second=0, microsecond=0) gran = 'MYD03' url = 'ftp://ladsweb.nascom.nasa.gov' path = 'geoMeta/6/AQUA' while i <= end_date: #geoMeta.append('ftp://ladsweb.nascom.nasa.gov/geoMeta/6/AQUA/' + str(year) + '/MYD03_' + str(year) + '-' + str(month) + '-' + str(day) + '.txt') geoMeta.append('%s/%s/%04d/%s_%04d-%02d-%02d.txt' % (url, path, i.year, gran, i.year, i.month, i.day)) i = i + timedelta(days=1) ####################################################################################################################################################### # we now have a list with all of the filenames during the days that the query requested # so now we'll trim the stuff at the front and back we don't need. # invent a sample filename for the start time, they look like this: # MYD03.AYYYYDDDD.HHMM.006.#############.hdf start_filename = 'MYD03.A%04d%03d.%02d%02d.006.9999999999999.hdf' % ( start_year, start_day, from_utc.hour, from_utc.minute) # bisect searches for that sample name and returns the index of where that file should go # to make sure we get that data we start at the file before it (-1) start_index = bisect(file_list, start_filename) - 1 # we'll do the same for the last one end_filename = 'MYD03.A%04d%03d.%02d%02d.006.9999999999999.hdf' % ( start_year, end_day, to_utc.hour, to_utc.minute) end_index = bisect(file_list, end_filename) return file_list[start_index:end_index]
def compute_l0_manifest(self, from_utc, to_utc): """ Compute list of files in the source for the given time frame :param from_utc: time UTC format :param to_utc: time UTC format :return: list of file names as strings """ # Retrieve the directory listing dList = get_dList(self.url_base_l0 + '/' + self.filepath_l0) # We want a list of all of the filenames which land between from_utc and to_utc # Gameplan: # What would a file that starts exactly at from_utc look like? # Filenames have this pattern: P0420064AAAAAAAAAAAAAAyyDDDhhmmss000.PDS current_time = from_utc days = (current_time - datetime(current_time.year, 1, 1)).days + 1 year = current_time.year % 100 filename = 'P0420064AAAAAAAAAAAAAA%02d%03d%02d%02d%02d000.PDS' % ( year, days, current_time.hour, current_time.minute, current_time.second) # Then, we find out where that filename would go in the dList # This call binary searches dList for filename, and returns it's index (pretty efficient) # If the filename is not found, it returns the index of the first file larger than it index = bisect(dList, filename) # If the filename we made up is not in the list (very likely), we actually want the first file # smaller than the filename, so we still get the data for that time period # (-2 since the files come in pairs, one that ends in 000.PDS and one that ends in 001.PDS) if index == len(dList): index = index - 2 elif dList[index] != filename: index = index - 2 level0manifest = [] # Now that we know where to start, we'll begin filling the manifest with relevant files while current_time < to_utc: # Add 000.PDS file to manifest level0manifest.append(dList[index]) # Add 001.PDS file to manifest level0manifest.append(dList[index + 1]) # Move the index to the next pair, if we run out of files just break index = index + 2 if index >= len(dList): break current_file = dList[index] # Change time to match the next file, use that time to compare to to_utc # If the time that we get from this exceeds to_utc, we have all the data we want current_time = current_time.replace(year=2000 + int(current_file[22:24])) current_time = current_time.replace(day=1, month=1) current_time = current_time + timedelta( days=int(current_file[24:27]) - 1) current_time = current_time.replace( hour=int(current_file[27:29]), minute=int(current_file[29:31]), second=int(current_file[31:33])) return level0manifest