Exemple #1
0
def fetch_TRMM(start_dto, end_dto, outdir, product_string):
    """
    Fetches TRMM data from an FTP server.

       ftp://trmmopen.gsfc.nasa.gov/trmmdata/ByDate/V07/

    :param start_dto:        datetime object for start date of desired range
    :param end_dto:          datetime object for end date of desired range
    :param outdir:           output directory where files should be saved (str)
    :param product_string:   the string for the desired product, options include
                             1B11, 1B21, 1CTMI, 2A12, 2A21, 2A23, 2A25, 2B31, 3B42,
                             3G25, 3G31. The usual precip product of interest is the
                             well known 3B42 data product.

    :param output_files:     a list of new filepaths created by this function
    """

    # set up empty structure
    dates = []
    output_files = []
    ftpsite = "ftp://pps.gsfc.nasa.gov"
    un = "*****@*****.**"

    date_delta = end_dto - start_dto

    for i in range(date_delta.days + 1):
        dates.append(start_dto + timedelta(days=i))

    for date in dates:

        # navigate to path of desired year/month/day
        workdir = '/'.join([
            'trmmdata', 'ByDate', 'V07',
            str(date.year),
            str(date.month).zfill(2),
            str(date.day).zfill(2)
        ])

        filenames, filepaths = list_ftp(site=ftpsite,
                                        dir=workdir,
                                        username=un,
                                        password=un)

        for filename in filenames:

            if product_string in filename:
                try:
                    outname = os.path.join(outdir, os.path.basename(filename))
                    download_url(ftpsite + filename,
                                 outname,
                                 username=un,
                                 password=un)
                    output_files.append(outname)

                    # now extract it out of its GZ format
                    with gzip.open(outname, 'rb') as gz:
                        with open(outname.replace(".gz", ""), 'wb') as f:
                            content = gz.read()
                            f.write(content)

                    os.remove(outname)

                    print("downloaded and extracted {0}".format(
                        os.path.basename(filename)))
                except:
                    print("failed to download {0}".format(
                        os.path.basename(filename)))

    print("Finished downloading TRMM files!")

    return output_files
Exemple #2
0
def fetch_GPM_IMERG(start_dto, end_dto, outdir, product = "gis", time_res = "1day"):

    """
    Fetches 30 minute resolution GPM IMERG data from an ftp server. Several restrictions exist
    for this relatively new dataset, please read in the input section carefully.

       http://pps.gsfc.nasa.gov/Documents/GPM_Data_Info_140616.pdf

     Input:
       start_dto    datetime object for starting time of study boundary
       end_dto      datetiem object for ending time of study boundary
       outdir       output directory to save the data
       product      either "early" , "late" or "final" for full HDF5 data stacks of the respective runs,
                    which are all at 30minute resolutions. OR product can be set equal to "gis"
                    (default) to find only tif averages of the precipitation estimates. This gis
                    tif data is only provided for data less than one year old.
       time_res     if "product"  is set to "gis", specify what time average period you want. options
                    are "30min", "3hr", "1day", "3day", "7day". Defaults to "1day"

       learn more at the link below
       [http://pmm.nasa.gov/data-access/downloads/gpm]
    """

    # set up empty list of downloaded filepaths on local dir
    download_list = []

    # username and password info, should eventually be some DEVELOP credential.
    # this information is not at all sensitive.
    login = "******"


    # special filtering for gis type tif data to minimize data representation overlap.
    if product == "gis":
        if time_res == "30min":
            ok_minutes = [str(x).zfill(4) for x in range(0, 1440, 30)]
        elif time_res == "3hr":
            ok_minutes = [str(x).zfill(4) for x in range(0, 1440, 180)]
        else:
            ok_minutes = "0000"

    # assemble address information
    pps_server  = r"ftp://jsimpson.pps.eosdis.nasa.gov"

    # set product directory
    prod_server = "/".join(["NRTPUB/imerg", product])

    # log in and list available month folders.
    foldnames, foldpaths = list_ftp(site = pps_server,
                                    dir = prod_server,
                                    username = login,
                                    password = login)

    # perform a simple quick filtering of folders that definitely don't have data we want.
    for foldname in foldnames:
        try:
            int(foldname)
        except:
            foldnames.remove(foldname)

    for foldname in foldnames:
        print("exploring directory '{0}'".format(foldname))
        subdir = "/".join([prod_server, foldname])
        filenames, filepaths = list_ftp(site = pps_server,
                                        dir = subdir,
                                        username = login,
                                        password = login)

        for filepath in filepaths:
            filename = os.path.basename(filepath)
            finfo = filename.split(".")
            prod       = finfo[3]
            date_cords = finfo[4]
            minutes    = finfo[5]
            time       = finfo[7]

            date_str = date_cords.split("-")[0]
            date = datetime.strptime(date_str, "%Y%m%d") + timedelta(minutes = int(minutes))

            # see if this file meets criteria for download
            good_date = start_dto <=  date  <= end_dto

            if product == "gis":
                good_minutes = minutes in ok_minutes
                good_time    = time_res == time
            else:
                good_minutes = True
                good_time    = True

            # download the files
            if good_date and good_time and good_minutes:
                outname = os.path.join(outdir, date.strftime("%Y-%m-%d"), filename)
                download_url(filepath, outname, username = login, password = login)
                print("saved '{0}' in '{1}'".format(filename, outdir))
                download_list.append(outname)

    return download_list
Exemple #3
0
def fetch_MODIS(product, version, tiles, outdir, years, j_days = False,
                                                force_overwrite = False):
    """
    Fetch MODIS Land products from one of two servers.

       http://e4ftl01.cr.usgs.gov
       ftp://n5eil01u.ecs.nsidc.org

    Inputs:
        product         MODIS product to download such as 'MOD10A1' or 'MYD11A1'
        version         version number, usually '004' or '041' or '005'
        tiles           list of tiles to grab such as ['h11v12','h11v11']
        outdir          output directory to save downloaded files
        years           list of years to grab such as range(2001,2014)
        j_days          list of days to grab such as range(31:60).
                        Defaults to all days in year
        force_overwrite will re-download files even if they already exist
    """

    def Find_MODIS_Product(product, version):
        """
        Subfunction to determine  server properties for MODIS data product.
        returns http/ftp handles

        the two current servers where aqua/terra MODIS data can be downloaded are
            site1='http://e4ftl01.cr.usgs.gov'
            site2='n5eil01u.ecs.nsidc.org'

        Inputs:
           product     modis product such as 'MOD10A1'
           versions    modis version, usually '005', '004', or '041'

        Outputs:
           site        server address where data can be found
           ftp         ftp handle for open ftp session
           Dir         subdirectory of server to further search for files of input product.
        """

        sat_designation = product[0:3]
        prod_ID = product[3:]

        site1 = 'http://e4ftl01.cr.usgs.gov/'
        site2 = 'n5eil01u.ecs.nsidc.org'

        isftp = False
        Dir   = False

        # refine the address of the desired data product
        if '10' in prod_ID:
            isftp = True
            site  = site2

        if sat_designation == 'MOD':
            if isftp:
                Dir = 'MOST/' + product + '.' + version
            else:
                site = site1+'MOLT/' + product + '.' + version

        elif sat_designation == 'MYD':
            if isftp:
                Dir = 'DP1/MOSA/' + product + '.' + version
            else:
                site = site1+'MOLA/' + product+'.' + version

        elif sat_designation == 'MCD':
            site = site1+'MOTA/' + product + '.' + version

        else:
            print('No such MODIS product is available for download with this script!')
            site = "None"

        return site, isftp, Dir



    # check formats
    tiles = core.enf_list(tiles)
    years = core.enf_list(years)
    years = [str(year) for year in years]

    if isinstance(j_days, list):
        js = [str(j_day).zfill(3) for j_day in j_days]
    elif isinstance(j_days, int) and j_days != False:
        js = [str(j_days)]
    else:
        js = [str(x).zfill(3) for x in range(367)]

    # do a quick input tile check for 6 characters.
    for tile in tiles:
        if not len(tile) == 6:
            print("Warning! your tiles appear to be invalid!")
            print("Warning! make sure they are in format 'h##v##")

    # create output directories
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    print("Connecting to servers!")

    # obtain the web address, protocol information, and subdirectory where
    # this tpe of MODIS data can be found.
    site, isftp, Dir = Find_MODIS_Product(product, version)

    if Dir:
        print("Connected to {0}/{1}".format(site, Dir))
    else:
        print("Connected to {0}".format(site))

    # Depending on the type of connection (ftp vs http) populate the file list
    try:
        if isftp:
            dates,_ = list_ftp(site, False, False, Dir)
        else:
            dates   = list_http(site)
    except:
        raise ValueError("Could not connect to {0}/{1}".format(site,Dir))

    # refine contents down to just addresses of valid year and j_day
    good_dates=[]
    for date in dates:

        try:
            dto   = datetime.strptime(date, "%Y.%m.%d")
            j_day = dto.strftime("%j")
            year  = dto.strftime("%Y")

            if year in years:
                good_dates.append(date)

                if j_days:
                    if j_day not in js:
                        good_dates.remove(date)
        except ValueError:
            print("skipping non date folder name {0}".format(date))


    print('Found {0} days within range'.format(len(good_dates)))

    # for all folders within the desired date range,  map the subfolder contents.
    for good_date in good_dates:

        if isftp:
            files,_ = list_ftp(site, False, False, Dir + '/' + good_date)

        else:
            files   = list_http(site + '/' + good_date)

        for afile in files:

            # only list files with desired tile names and not preview jpgs
            if not '.jpg' in afile:
                for tile in tiles:
                    if tile in afile:

                        # assemble the address
                        if isftp:
                            address='/'.join(['ftp://'+site, Dir, good_date, afile])
                        else:
                            address='/'.join([site, good_date, afile])

                        #download the file
                        outname = os.path.join(outdir, afile)
                        if not os.path.isfile(outname) and not force_overwrite:
                            download_url(address, outname)

                        print('Downloaded {0}'.format(address))

    print('Finished retrieving MODIS - {0} data!'.format(product))
    return
Exemple #4
0
def fetch_GPM_IMERG(start_dto,
                    end_dto,
                    outdir,
                    product="gis",
                    time_res="1day"):
    """
    Fetches 30 minute resolution GPM IMERG data from an ftp server. Several restrictions exist
    for this relatively new dataset, please read in the input section carefully.

       http://pps.gsfc.nasa.gov/Documents/GPM_Data_Info_140616.pdf

    :param start_dto:   datetime object for starting time of study boundary
    :param end_dto:     datetime object for ending time of study boundary
    :param outdir:      output directory to save the data
    :param product:     either "early" , "late" or "final" for full HDF5 data stacks of the respective runs,
                        which are all at 30minute resolutions. OR product can be set equal to "gis"
                        (default) to find only tif averages of the precipitation estimates. This gis
                        tif data is ONLY provided for data less than one year old.
    :param time_res:    if "product"  is set to "gis", specify what time average period you want.
                        options are "30min", "3hr", "1day", "3day", "7day". Defaults to "1day"

    :return:            Returns a list of filepaths to freshly downloaded files

    learn more at [http://pmm.nasa.gov/data-access/downloads/gpm]
    """

    # set up empty list of downloaded filepaths on local dir
    download_list = []

    # username and password info, should eventually be some DEVELOP credential.
    # this information is not at all sensitive.
    login = "******"

    # special filtering for gis type tif data to minimize data representation overlap.
    if product == "gis":
        if time_res == "30min":
            ok_minutes = [str(x).zfill(4) for x in range(0, 1440, 30)]
        elif time_res == "3hr":
            ok_minutes = [str(x).zfill(4) for x in range(0, 1440, 180)]
        else:
            ok_minutes = "0000"

    # assemble address information
    pps_server = r"ftp://jsimpson.pps.eosdis.nasa.gov"

    # set product directory
    prod_server = "/".join(["NRTPUB/imerg", product])

    # log in and list available month folders.
    foldnames, foldpaths = list_ftp(site=pps_server,
                                    dir=prod_server,
                                    username=login,
                                    password=login)

    # perform a simple quick filtering of folders that definitely don't have data we want.
    for foldname in foldnames:
        try:
            int(foldname)
        except:
            foldnames.remove(foldname)

    for foldname in foldnames:
        print("exploring directory '{0}'".format(foldname))
        subdir = "/".join([prod_server, foldname])
        filenames, filepaths = list_ftp(site=pps_server,
                                        dir=subdir,
                                        username=login,
                                        password=login)

        for filepath in filepaths:
            filename = os.path.basename(filepath)
            finfo = filename.split(".")
            prod = finfo[3]
            date_cords = finfo[4]
            minutes = finfo[5]
            time = finfo[7]

            date_str = date_cords.split("-")[0]
            date = datetime.strptime(
                date_str, "%Y%m%d") + timedelta(minutes=int(minutes))

            # see if this file meets criteria for download
            good_date = start_dto <= date <= end_dto

            if product == "gis":
                good_minutes = minutes in ok_minutes
                good_time = time_res == time
            else:
                good_minutes = True
                good_time = True

            # download the files
            if good_date and good_time and good_minutes:
                outname = os.path.join(outdir, date.strftime("%Y-%m-%d"),
                                       filename)
                download_url(filepath, outname, username=login, password=login)
                print("saved '{0}' in '{1}'".format(filename, outdir))
                download_list.append(outname)

    return download_list
Exemple #5
0
def fetch_TRMM(start_dto, end_dto, outdir, product_string):
    """
    Fetches TRMM data from an FTP server.

       ftp://trmmopen.gsfc.nasa.gov/trmmdata/ByDate/V07/

    Input:
        start_dto        datetime object for start date of desired range
        end_dto          datetime object for end date of desired range
        outdir           output directory where files should be saved (str)
        product_string   the string for the desired product, options include
                            1B11, 1B21, 1CTMI, 2A12, 2A21, 2A23, 2A25, 2B31, 3B42,
                            3G25, 3G31. The usual precip product of interest is the
                            famous 3B42 data product.

    outputs:
        output_files    a list of new filepaths created by this function
    """

    # set up empty structure
    dates = []
    output_files = []
    ftpsite =  "ftp://pps.gsfc.nasa.gov"
    un      =  "*****@*****.**"

    date_delta = end_dto - start_dto

    for i in range(date_delta.days +1):
        dates.append(start_dto + timedelta(days = i))

    for date in dates:

        # navigate to path of desired year/month/day
        workdir = '/'.join(['trmmdata','ByDate','V07',
                            str(date.year),
                            str(date.month).zfill(2),
                            str(date.day).zfill(2)])

        filenames, filepaths = list_ftp(site = ftpsite,
                                        dir = workdir,
                                        username = un,
                                        password = un)

        for filename in filenames:

            if product_string in filename:
                try:
                    outname = os.path.join(outdir, os.path.basename(filename))
                    download_url(ftpsite + filename, outname, username = un, password = un)
                    output_files.append(outname)

                    # now extract it out of its GZ format
                    with gzip.open(outname, 'rb') as gz:
                        with open(outname.replace(".gz",""), 'wb') as f:
                            content = gz.read()
                            f.write(content)

                    os.remove(outname)


                    print("downloaded and extracted {0}".format(os.path.basename(filename)))
                except:
                    print("failed to download {0}".format(os.path.basename(filename)))

    print("Finished downloading TRMM files!")

    return output_files
Exemple #6
0
def fetch_MODIS(product,
                version,
                tiles,
                outdir,
                start_dto,
                end_dto,
                force_overwrite=False):
    """
    Fetch MODIS Land products from one of two servers. If this function
    runs and downloads 0 files, check that your inputs are consistent
    with the naming convention at the appropriate server address.

       http://e4ftl01.cr.usgs.gov
       ftp://n5eil01u.ecs.nsidc.org

    :param product:         MODIS product to download such as 'MOD10A1' or 'MYD11A1'
    :param version:         version number, usually '004' or '041' or '005'
    :param tiles:           list of tiles to grab such as ['h11v12','h11v11']
                            NOTE: for some MODIS products, the h and v are omitted.

    :param outdir :         output directory to save downloaded files
    :param start_dto:       datetime object, the starting date of the range of data to download
    :param end_dto:         datetime object, the ending date of the range of data to download
    :param force_overwrite: will re-download files even if they already exist

    :return out_filepaths:  a list of filepaths to all files created by this function
    """

    out_filepaths = []

    # check formats
    tiles = core.enf_list(tiles)

    # create output directories
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    print("Connecting to servers!")

    # obtain the web address, protocol information, and subdirectory where
    # this tpe of MODIS data can be found.
    site, isftp, Dir = _find_modis_product(product, version)

    if Dir:
        print("Connected to {0}/{1}".format(site, Dir))
    else:
        print("Connected to {0}".format(site))

    # Depending on the type of connection (ftp vs http) populate the file list
    try:
        if isftp:
            dates, _ = list_ftp(site, False, False, Dir)
        else:
            dates = list_http_e4ftl01(site)
    except:
        raise ValueError("Could not connect to {0}/{1}".format(site, Dir))

    # refine contents down to just addresses of valid year and j_day
    good_dates = []
    for date in dates:
        try:
            date_dto = datetime.strptime(date, "%Y.%m.%d")
            if start_dto <= date_dto <= end_dto:
                good_dates.append(date)

        except:
            print("skipping non date folder name {0}".format(date))

    print('Found {0} days within range'.format(len(good_dates)))

    # for all folders within the desired date range,  map the subfolder contents.
    for good_date in good_dates:

        if isftp:
            files, _ = list_ftp(site, False, False, Dir + '/' + good_date)

        else:
            files = list_http_e4ftl01(site + '/' + good_date)

        for afile in files:

            # only list files with desired tile names and not preview jpgs
            if not '.jpg' in afile:
                for tile in tiles:
                    if tile in afile:

                        # assemble the address
                        if isftp:
                            address = '/'.join(
                                ['ftp://' + site, Dir, good_date, afile])
                        else:
                            address = '/'.join([site, good_date, afile])

                        #download the file
                        outname = os.path.join(outdir, afile)
                        out_filepaths.append(outname)
                        if not os.path.isfile(outname) and not force_overwrite:
                            download_url(address, outname)

                        print('Downloaded {0}'.format(address))

    print("Finished retrieving MODIS - {0} data!".format(product))
    print("Downloaded {0} files".format(len(out_filepaths)))

    return out_filepaths
Exemple #7
0
def fetch_MODIS(product, version, tiles, outdir, start_dto, end_dto,
                                                force_overwrite = False):
    """
    Fetch MODIS Land products from one of two servers. If this function
    runs and downloads 0 files, check that your inputs are consistent
    with the naming convention at the appropriate server address.

       http://e4ftl01.cr.usgs.gov
       ftp://n5eil01u.ecs.nsidc.org

    :param product:         MODIS product to download such as 'MOD10A1' or 'MYD11A1'
    :param version:         version number, usually '004' or '041' or '005'
    :param tiles:           list of tiles to grab such as ['h11v12','h11v11']
                            NOTE: for some MODIS products, the h and v are omitted.

    :param outdir :         output directory to save downloaded files
    :param start_dto:       datetime object, the starting date of the range of data to download
    :param end_dto:         datetime object, the ending date of the range of data to download
    :param force_overwrite: will re-download files even if they already exist

    :return out_filepaths:  a list of filepaths to all files created by this function
    """

    out_filepaths = []

    # check formats
    tiles = core.enf_list(tiles)

    # create output directories
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    print("Connecting to servers!")

    # obtain the web address, protocol information, and subdirectory where
    # this tpe of MODIS data can be found.
    site, isftp, Dir = _find_modis_product(product, version)

    if Dir:
        print("Connected to {0}/{1}".format(site, Dir))
    else:
        print("Connected to {0}".format(site))

    # Depending on the type of connection (ftp vs http) populate the file list
    try:
        if isftp:
            dates,_ = list_ftp(site, False, False, Dir)
        else:
            dates   = list_http_e4ftl01(site)
    except:
        raise ValueError("Could not connect to {0}/{1}".format(site,Dir))

    # refine contents down to just addresses of valid year and j_day
    good_dates = []
    for date in dates:
        try:
            date_dto = datetime.strptime(date, "%Y.%m.%d")
            if start_dto <= date_dto <= end_dto:
                good_dates.append(date)

        except:
            print("skipping non date folder name {0}".format(date))


    print('Found {0} days within range'.format(len(good_dates)))

    # for all folders within the desired date range,  map the subfolder contents.
    for good_date in good_dates:

        if isftp:
            files,_ = list_ftp(site, False, False, Dir + '/' + good_date)

        else:
            files   = list_http_e4ftl01(site + '/' + good_date)

        for afile in files:

            # only list files with desired tile names and not preview jpgs
            if not '.jpg' in afile:
                for tile in tiles:
                    if tile in afile:

                        # assemble the address
                        if isftp:
                            address='/'.join(['ftp://'+site, Dir, good_date, afile])
                        else:
                            address='/'.join([site, good_date, afile])

                        #download the file
                        outname = os.path.join(outdir, afile)
                        out_filepaths.append(outname)
                        if not os.path.isfile(outname) and not force_overwrite:
                            download_url(address, outname)

                        print('Downloaded {0}'.format(address))

    print("Finished retrieving MODIS - {0} data!".format(product))
    print("Downloaded {0} files".format(len(out_filepaths)))

    return out_filepaths