Beispiel #1
0
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        engine = self.engine

        # files are nested in another baad_data folder
        # important files considered (baad_data.csv,baad_methods.csv)
        # relevant files can be added in the same manner

        file_names = ["baad_data/baad_data.csv", "baad_data/baad_methods.csv"]
        engine.download_files_from_archive(self.urls["BAAD"], file_names)

        # creating data from baad_data.csv
        engine.auto_create_table(Table("data",
                                       cleanup=Cleanup(correct_invalid_value,
                                                       nulls=['NA'])),
                                 filename="baad_data.csv")
        engine.insert_data_from_file(engine.format_filename("baad_data.csv"))

        # creating methods from baad_methods.csv
        engine.auto_create_table(Table("methods",
                                       cleanup=Cleanup(correct_invalid_value,
                                                       nulls=['NA'])),
                                 filename="baad_methods.csv")
        engine.insert_data_from_file(
            engine.format_filename("baad_methods.csv"))
Beispiel #2
0
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Gulf of Maine intertidal density/cover (Petraitis et al. 2008)"
        self.citation = "Peter S. Petraitis, Harrison Liu, and " \
                        "Erika C. Rhile. 2008. Densities and cover " \
                        "data for intertidal organisms in the Gulf of " \
                        "Maine, USA, from 2003 to 2007. Ecology 89:588."
        self.name = "intertidal-abund-me"
        self.ref = "https://figshare.com/collections/DENSITIES_AND_COVER_DATA_FOR_INTERTIDAL_ORGANISMS_IN_THE_GULF_OF_MAINE_USA_FROM_2003_TO_2007/3300200"
        self.description = "The data on densities and percent cover in the " \
                           "60 experimental plots from 2003 to 2007 and to " \
                           "update data from 1996 to 2002 that are already " \
                           "published in Ecological Archives." \
                           "Includes densities of mussels, " \
                           "herbivorous limpet, herbivorous snails, " \
                           "predatory snail, barnacle , fucoid algae and " \
                           "percent cover by mussels, barnacles, fucoids, " \
                           "and other sessile organisms."
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.5.3'
        self.urls = {"main": "https://ndownloader.figshare.com/files/5600831"}
        self.cleanup_func_table = Cleanup(correct_invalid_value,
                                          missing_values=[-999.9])

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=[-999.9])
        self.tables = {"main": Table("main", cleanup=self.cleanup_func_table)}
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Commercial Fisheries Monthly Trade Data by Product, Country/Association"
        self.name = "noaa-fisheries-trade"
        self.retriever_minimum_version = '2.1.dev'
        self.urls = {
            "imports": "https://www.st.nmfs.noaa.gov/pls/webpls/trade_prdct_cntry_ind_mth.results?"
                       "qtype=IMP&qmonthfrom=01&qmonthto=12&qyearfrom=1975&qyearto=2018"
                       "&qprod_name=%25&qcountry=%25&qsort=COUNTRY&qoutput=ASCII+FILE",

            "exports": "https://www.st.nmfs.noaa.gov/pls/webpls/trade_prdct_cntry_ind_mth.results?"
                       "qtype=EXP&qmonthfrom=01&qmonthto=12&qyearfrom=1975&qyearto=2018"
                       "&qprod_name=%25&qcountry=%25&qsort=COUNTRY&qoutput=ASCII+FILE",

            "rexport": "https://www.st.nmfs.noaa.gov/pls/webpls/trade_prdct_cntry_ind_mth.results?"
                       "qtype=REX&qmonthfrom=01&qmonthto=12&qyearfrom=1975&qyearto=2018"
                       "&qprod_name=%25&qcountry=%25&qsort=COUNTRY&qoutput=ASCII+FILE"
        }
        self.version = '1.0.0'
        self.ref = "https://www.st.nmfs.noaa.gov/commercial-fisheries/foreign-trade/" \
                   "applications/monthly-product-by-countryassociation"
        self.citation = "No known Citation"
        self.description = "Commercial Fisheries statistics provides a summary of " \
                           "commercial fisheries product data by individual country."
        self.keywords = ["Fish", "Fisheries"]
Beispiel #4
0
 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.name = "PRISM Climate Data"
     self.shortname = "PRISM"
     self.ref = "http://prism.oregonstate.edu/"
     self.urls = {"climate": "http://services.nacse.org/prism/data/public/4km/"}
     self.description = "The PRISM data set represents climate observations from a wide range of monitoring networks, applies sophisticated quality control measures, and develops spatial climate datasets to reveal short- and long-term climate patterns. "
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        engine = self.engine

        for key in self.urls:
            original_file_name = "trade_prdct_{}.txt".format(key)
            new_file_name = "trade_prdct_{}.csv".format(key)

            engine.download_file(self.urls[key], original_file_name)

            old_path = self.engine.format_filename(original_file_name)
            new_path = self.engine.format_filename(new_file_name)

            # Re-write the file with one delimeter
            old_data = open_fr(old_path)
            new_data = open_fw(new_path)

            # Read header line and convert "," to "|"
            line1 = old_data.readline().strip().replace(",", "|")
            new_data.write(line1 + "\n")
            for line in old_data:
                # Remove leading "|" from the data
                new_data.write(line.strip("|"))
            new_data.close()
            old_data.close()
            table = Table(key, delimiter="|")
            engine.auto_create_table(table, filename=new_file_name)
            engine.insert_data_from_file(new_path)
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Food web including metazoan parasites for a " \
                     "brackish shallow water ecosystem in Germany and Denmark"
        self.citation = "C. Dieter Zander, Neri Josten, Kim C. Detloff, " \
                        "Robert Poulin, John P. McLaughlin, and David W. " \
                        "Thieltges. 2011. Food web including metazoan " \
                        "parasites for a brackish shallow water ecosystem " \
                        "in Germany and Denmark. Ecology 92:2007."
        self.name = "flensburg-food-web"
        self.ref = "https://figshare.com/articles/Full_Archive/3552066"
        self.description = "This data is of a food web for the Flensburg " \
                           "Fjord, a brackish shallow water inlet on the " \
                           "Baltic Sea, between Germany and Denmark."
        self.keywords = []
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.0.4'
        self.urls = {"zip": "https://ndownloader.figshare.com/files/5620326"}
        self.cleanup_func_table = Cleanup(
            correct_invalid_value, missing_values=[''])

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(
                correct_invalid_value, nulls=['', 'unknown'])
Beispiel #7
0
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)        
        engine = self.engine
        
        # download and create species table
        table = Table('species')
        self.engine.auto_create_table(table, url=self.urls['species'])
        self.engine.insert_data_from_url(self.urls['species'])
        
        # State abbreviations with the year annual inventory began for that state
        stateslist = [('AL', 2001), ('AK', 2004), ('AZ', 2001), ('AR', 2000), 
                      ('CA', 2001), ('CO', 2002), ('CT', 2003), ('DE', 2004), 
                      ('FL', 2003), ('GA', 1998), ('ID', 2004), ('IL', 2001), 
                      ('IN', 1999), ('IA', 1999), ('KS', 2001), ('KY', 1999), 
                      ('LA', 2001), ('ME', 1999), ('MD', 2004), ('MA', 2003), 
                      ('MI', 2000), ('MN', 1999), ('MO', 1999), ('MT', 2003), 
                      ('NE', 2001), ('NV', 2004), ('NH', 2002), ('NJ', 2004), 
                      ('NY', 2002), ('NC', 2003), ('ND', 2001), ('OH', 2001), 
                      ('OK', 2008), ('OR', 2001), ('PA', 2000), ('RI', 2003), 
                      ('SC', 1999), ('SD', 2001), ('TN', 2000), ('TX', 2001), 
                      ('UT', 2000), ('VT', 2003), ('VA', 1998), ('WA', 2002), 
                      ('WV', 2004), ('WI', 2000), ('PR', 2001)]
        
        tablelist = ["SURVEY", "PLOT", "COND", "SUBPLOT", "SUBP_COND", "TREE", "SEEDLING"]
        
        for table in tablelist:
            for state, year in stateslist:
                engine.download_files_from_archive(self.urls["main"] + state + "_" + table + ".ZIP", 
                                                   [state + "_" + table + ".CSV"])
        
        for table in tablelist:
            print "Scanning data for table %s..." % table
            prep_file_name = "%s.csv" % table
            prep_file = open(engine.format_filename(prep_file_name), "wb")
            this_file = open(engine.format_filename(stateslist[0][0] + "_" + table + ".CSV"), "rb")
            col_names = this_file.readline()
            prep_file.write(col_names)
            column_names = [col.strip('"') for col in col_names.split(',')]
            year_column = column_names.index("INVYR")            
            this_file.close()
            
            for state, year in stateslist:
                this_file = open(engine.format_filename(state + "_" + table + ".CSV"), "rb")
                this_file.readline()
                for line in this_file:
                    values = line.split(',')
                    this_year = values[year_column]
                    if int(this_year) >= year:
                        prep_file.write(line)
            prep_file.close()
            engine.auto_create_table(Table(table), filename=prep_file_name)

            engine.insert_data_from_file(engine.format_filename(prep_file_name))
            
            try:
                os.remove(engine.format_filename(prep_file_name))
            except:
                pass
        
        return engine
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Aquatic Animal Excretion"
        self.name = "aquatic-animal-excretion"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.1.0'
        self.ref = "http://onlinelibrary.wiley.com/doi/10.1002/ecy.1792/abstract"
        self.urls = {
            'aquatic_animals': 'http://onlinelibrary.wiley.com/store/10.1002/ecy.1792/asset/supinfo/ecy1792-sup-0001-DataS1.zip?v=1&s=3a9094a807bbc2d03ba43045d2b72782bfb348ef'
        }
        self.citation = "Vanni, M. J., McIntyre, P. B., Allen, D., Arnott, D. L., Benstead, J. P., Berg, D. J., " \
                        "Brabrand, Å., Brosse, S., Bukaveckas, P. A., Caliman, A., Capps, K. A., Carneiro, L. S., " \
                        "Chadwick, N. E., Christian, A. D., Clarke, A., Conroy, J. D., Cross, W. F., Culver, D. A., " \
                        "Dalton, C. M., Devine, J. A., Domine, L. M., Evans-White, M. A., Faafeng, B. A., " \
                        "Flecker, A. S., Gido, K. B., Godinot, C., Guariento, R. D., Haertel-Borer, S., Hall, " \
                        "R. O., Henry, R., Herwig, B. R., Hicks, B. J., Higgins, K. A., Hood, J. M., Hopton, M. E., " \
                        "Ikeda, T., James, W. F., Jansen, H. M., Johnson, C. R., Koch, B. J., Lamberti, G. A., " \
                        "Lessard-Pilon, S., Maerz, J. C., Mather, M. E., McManamay, R. A., Milanovich, J. R., " \
                        "Morgan, D. K. J., Moslemi, J. M., Naddafi, R., Nilssen, J. P., Pagano, M., Pilati, A., " \
                        "Post, D. M., Roopin, M., Rugenski, A. T., Schaus, M. H., Shostell, J., Small, G. E., " \
                        "Solomon, C. T., Sterrett, S. C., Strand, O., Tarvainen, M., Taylor, J. M., Torres-Gerald, " \
                        "L. E., Turner, C. B., Urabe, J., Uye, S.-I., Ventelä, A.-M., Villeger, S., Whiles, M. R., " \
                        "Wilhelm, F. M., Wilson, H. F., Xenopoulos, M. A. and Zimmer, K. D. (2017), " \
                        "A global database of nitrogen and phosphorus excretion rates of aquatic animals. " \
                        "Ecology. Accepted Author Manuscript. doi:10.1002/ecy.1792"
        self.description = "Dataset containing the nutrient cycling rates of individual animals."
        self.keywords = ['Aquatic']

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
Beispiel #9
0
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)

        self.engine.download_file(
            self.urls["main"], "Succession_sampling_03-07_data_original.txt")
        data_path = self.engine.format_filename(
            "Succession_sampling_03-07_data.txt")
        old_data = open_fr(
            self.engine.find_file(
                "Succession_sampling_03-07_data_original.txt"))
        new_data = open_fw(data_path)
        # original file's header contains an end of line charactor in the middle hence creating two lines
        # Read in the two lines and create the full header
        line1 = old_data.readline().strip()
        line2 = old_data.readline()
        newline = line1 + "\t" + line2
        new_data.write(newline)
        for line in old_data:
            new_data.write(line)
        new_data.close()
        old_data.close()

        self.engine.auto_create_table(
            self.tables["main"], filename="Succession_sampling_03-07_data.txt")
        self.engine.insert_data_from_file(data_path)
Beispiel #10
0
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        engine = self.engine

        for key in self.urls:
            original_file_name = "trade_prdct_{}.txt".format(key)
            new_file_name = "trade_prdct_{}.csv".format(key)

            engine.download_file(self.urls[key], original_file_name)

            old_path = self.engine.format_filename(original_file_name)
            new_path = self.engine.format_filename(new_file_name)

            # Re-write the file with one delimeter
            old_data = open_fr(old_path)
            new_data = open_fw(new_path)

            # Read header line and convert "," to "|"
            line1 = old_data.readline().strip().replace(",", "|")
            new_data.write(line1 + "\n")
            for line in old_data:
                # Remove leading "|" from the data
                new_data.write(line.strip("|"))
            new_data.close()
            old_data.close()
            table = Table(key, delimiter="|")
            engine.auto_create_table(table, filename=new_file_name)
            engine.insert_data_from_file(new_path)
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "BAAD: a Biomass And Allometry Database for woody plants"
        self.name = "biomass-allometry-db"
        self.ref = "https://doi.org/10.6084/m9.figshare.c.3307692.v1"
        self.urls = {"BAAD": "https://ndownloader.figshare.com/files/5634309"}
        self.citation = "Falster, D.S., Duursma, R.A., Ishihara, M.I., " \
                        "Barneche, D.R., FitzJohn, R.G., Varhammar, A., Aiba, " \
                        "M., Ando, M., Anten, N., Aspinwall, M.J. and " \
                        "Baltzer, J.L., 2015. BAAD: a Biomass And Allometry " \
                        "Database for woody plants."
        self.licenses = [{"name": "CC0-1.0"}]
        self.keywords = ['plants', 'observational']
        self.retriever_minimum_version = "2.0.dev"
        self.version = "1.4.4"
        self.description = "The data set is a Biomass and allometry database " \
                           "(BAAD) for woody plants containing 259634 " \
                           "measurements collected in 176 different studies " \
                           "from 21084 individuals across 678 species."

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=['NA'])
        else:
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              missing_values=['NA'])
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Alwyn H. Gentry Forest Transect Dataset"
        self.name = "gentry-forest-transects"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.4.1'
        self.urls = {"stems": "http://www.mobot.org/mobot/gentry/123/all_Excel.zip",
                     "sites": "https://ndownloader.figshare.com/files/5515373",
                     "species": "",
                     "counts": ""}
        self.keywords = ["plants", "global-scale", "observational"]
        self.ref = "http://www.mobot.org/mobot/research/gentry/welcome.shtml"
        self.citation = "Phillips, O. and Miller, J.S., 2002. Global patterns of plant diversity: Alwyn H. Gentry's forest transect data set. Missouri Botanical Press."
        self.addendum = """Researchers who make use of the data in publications are requested to acknowledge Alwyn H. Gentry, the Missouri Botanical Garden, and collectors who assisted Gentry or contributed data for specific sites. It is also requested that a reprint of any publication making use of the Gentry Forest Transect Data be sent to:

Bruce E. Ponman
Missouri Botanical Garden
P.O. Box 299
St. Louis, MO 63166-0299
U.S.A. """

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
Beispiel #13
0
 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.title = "Commercial Fisheries Monthly Trade Data by Product, Country/Association"
     self.name = "noaa-fisheries-trade"
     self.retriever_minimum_version = '2.1.dev'
     self.urls = {
         "imports":
         "https://www.st.nmfs.noaa.gov/pls/webpls/trade_prdct_cntry_ind_mth.results?"
         "qtype=IMP&qmonthfrom=01&qmonthto=12&qyearfrom=1975&qyearto=2018"
         "&qprod_name=%25&qcountry=%25&qsort=COUNTRY&qoutput=ASCII+FILE",
         "exports":
         "https://www.st.nmfs.noaa.gov/pls/webpls/trade_prdct_cntry_ind_mth.results?"
         "qtype=EXP&qmonthfrom=01&qmonthto=12&qyearfrom=1975&qyearto=2018"
         "&qprod_name=%25&qcountry=%25&qsort=COUNTRY&qoutput=ASCII+FILE",
         "rexport":
         "https://www.st.nmfs.noaa.gov/pls/webpls/trade_prdct_cntry_ind_mth.results?"
         "qtype=REX&qmonthfrom=01&qmonthto=12&qyearfrom=1975&qyearto=2018"
         "&qprod_name=%25&qcountry=%25&qsort=COUNTRY&qoutput=ASCII+FILE"
     }
     self.version = '1.0.0'
     self.ref = "https://www.st.nmfs.noaa.gov/commercial-fisheries/foreign-trade/" \
                "applications/monthly-product-by-countryassociation"
     self.citation = "No known Citation"
     self.description = "Commercial Fisheries statistics provides a summary of " \
                        "commercial fisheries product data by individual country."
     self.keywords = ["Fish", "Fisheries"]
Beispiel #14
0
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Bioclim 2.5 Minute Climate Data"
        self.name = "bioclim"
        self.ref = "http://worldclim.org/bioclim"
        self.urls = {"climate": "http://biogeo.ucdavis.edu/data/climate/worldclim/1_4/grid/cur/bio_2-5m_bil.zip"}

        self.description = "Bioclimatic variables that are derived " \
                           "from the monthly temperature and rainfall " \
                           "values in order to generate more biologically " \
                           "meaningful variables."
        self.citation = "Hijmans, R.J., S.E. Cameron, J.L. Parra, " \
                        "P.G. Jones and A. Jarvis, 2005. Very high " \
                        "resolution interpolated climate surfaces for " \
                        "global land areas. International Journal of " \
                        "Climatology 25: 1965-1978."

        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.2.2'
        self.keywords = ["climate"]

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
 def download(self, engine=None, debug=False):
     data_file_name = "eBird_Observation_Dataset_2013.csv"
     Script.download(self, engine, debug)
     self.engine.download_files_from_archive(self.urls["main"],
                                             [data_file_name],
                                             filetype='gz')
     table = (Table("main", delimiter=","))
     table.columns = [("BASISOFRECORD", ("char", )),
                      ("INSTITUTIONCODE", ("char", )),
                      ("COLLECTIONCODE", ("char", )),
                      ("CATALOGNUMBER", ("char", )),
                      ("OCCURRENCEID", ("char", )),
                      ("RECORDEDBY", ("char", )), ("YEAR", ("int", )),
                      ("MONTH", ("int", )), ("DAY", ("int", )),
                      ("COUNTRY", ("char", )),
                      ("STATEPROVINCE", ("char", )), ("COUNTY", ("char", )),
                      ("DECIMALLATITUDE", ("double", )),
                      ("DECIMALLONGITUDE", ("double", )),
                      ("LOCALITY", ("char", )), ("KINGDOM", ("char", )),
                      ("PHYLUM", ("char", )), ("CLASS", ("char", )),
                      ("SPORDER", ("char", )), ("FAMILY", ("char", )),
                      ("GENUS", ("char", )),
                      ("SPECIFICEPITHET", ("char", )),
                      ("SCIENTIFICNAME", ("char", )),
                      ("VERNACULARNAME", ("char", )),
                      ("INDIVIDUALCOUNT", ("int", ))]
     engine.table = table
     engine.create_table()
     engine.insert_data_from_file(engine.format_filename(data_file_name))
     return engine
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Food web including metazoan parasites for a " \
                     "brackish shallow water ecosystem in Germany and Denmark"
        self.citation = "C. Dieter Zander, Neri Josten, Kim C. Detloff, " \
                        "Robert Poulin, John P. McLaughlin, and David W. " \
                        "Thieltges. 2011. Food web including metazoan " \
                        "parasites for a brackish shallow water ecosystem " \
                        "in Germany and Denmark. Ecology 92:2007."
        self.name = "flensburg-food-web"
        self.ref = "https://figshare.com/articles/Full_Archive/3552066"
        self.description = "This data is of a food web for the Flensburg " \
                           "Fjord, a brackish shallow water inlet on the " \
                           "Baltic Sea, between Germany and Denmark."
        self.keywords = []
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.0.4'
        self.urls = {"zip": "https://ndownloader.figshare.com/files/5620326"}
        self.cleanup_func_table = Cleanup(correct_invalid_value,
                                          missing_values=[''])

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=['', 'unknown'])
Beispiel #17
0
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Pantheria (Jones et al. 2009)"
        self.name = "pantheria"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.3.0'
        self.ref = "https://figshare.com/collections/PanTHERIA_a_species-level_database_of_life_history_ecology_" \
                   "and_geography_of_extant_and_recently_extinct_mammals/3301274"
        self.urls = {"data": "https://ndownloader.figshare.com/files/5604752"}
        self.citation = "Kate E. Jones, Jon Bielby, Marcel Cardillo, Susanne A. Fritz, Justin O'Dell, C. David L. " \
                        "Orme, Kamran Safi, Wes Sechrest, Elizabeth H. Boakes, Chris Carbone, Christina Connolly, " \
                        "Michael J. Cutts, Janine K. Foster, Richard Grenyer, Michael Habib, Christopher A. " \
                        "Plaster, Samantha A. Price, Elizabeth A. Rigby, Janna Rist, Amber Teacher, Olaf R. P. " \
                        "Bininda-Emonds, John L. Gittleman, Georgina M. Mace, and Andy Purvis. 2009. PanTHERIA:a " \
                        "species-level database of life history, ecology, and geography of extant and recently " \
                        "extinct mammals. Ecology 90:2648."
        self.description = "PanTHERIA is a data set of multispecies trait data from diverse literature sources " \
                           "and also includes spatial databases of mammalian geographic ranges and global climatic " \
                           "and anthropogenic variables."
        self.keywords = ["mammals", "literature-compilation", "life-history"]

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=['NA'])
        else:
            self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=['NA'])
Beispiel #18
0
 def download(self, engine=None, debug=False):
     Script.download(self, engine, debug)
     self.engine.download_file(self.urls["trees"], "LS_trees_1983_2000.txt")
     data_path = self.engine.format_filename("LS_trees_1983_2000.txt")
     self.engine.auto_create_table(self.tables["trees"],
                                   filename="LS_trees_1983_2000.txt")
     self.engine.insert_data_from_file(data_path)
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Tree demography in Western Ghats, India - Pelissier et al. 2011"
        self.name = "tree-demog-wghats"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.3.1'
        self.ref = "https://figshare.com/collections/Tree_demography_in_an_undisturbed_" \
                   "Dipterocarp_permanent_sample_plot_at_Uppangala_Western_Ghats_of_India/3304026"
        self.urls = {"data": "https://ndownloader.figshare.com/files/5619033"}
        self.citation = "Raphael Pelissier, Jean-Pierre Pascal, N. Ayyappan, B. R. Ramesh, " \
                        "S. Aravajy, and S. R. Ramalingam. 2011. Twenty years tree demography " \
                        "in an undisturbed Dipterocarp permanent sample plot at Uppangala, " \
                        "Western Ghats of India. Ecology 92:1376."
        self.description = "A data set on demography of trees monitored over 20 years in " \
                           "Uppangala permanent sample plot (UPSP)."
        self.keywords = ["plants", "time-series", "observational"]

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=['NA'])
        else:
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              missing_values=['NA'])
Beispiel #20
0
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Alwyn H. Gentry Forest Transect Dataset"
        self.name = "gentry-forest-transects"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.4.1'
        self.urls = {
            "stems": "http://www.mobot.org/mobot/gentry/123/all_Excel.zip",
            "sites": "https://ndownloader.figshare.com/files/5515373",
            "species": "",
            "counts": ""
        }
        self.keywords = ["plants", "global-scale", "observational"]
        self.ref = "http://www.mobot.org/mobot/research/gentry/welcome.shtml"
        self.citation = "Phillips, O. and Miller, J.S., 2002. Global patterns of plant diversity: Alwyn H. Gentry's forest transect data set. Missouri Botanical Press."
        self.addendum = """Researchers who make use of the data in publications are requested to acknowledge Alwyn H. Gentry, the Missouri Botanical Garden, and collectors who assisted Gentry or contributed data for specific sites. It is also requested that a reprint of any publication making use of the Gentry Forest Transect Data be sent to:

Bruce E. Ponman
Missouri Botanical Garden
P.O. Box 299
St. Louis, MO 63166-0299
U.S.A. """

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "PREDICTS Database"
        self.name = "predicts"
        self.ref = "http://data.nhm.ac.uk/dataset/902f084d-ce3f-429f-a6a5-23162c73fdf7"
        self.urls = {
            "PREDICTS":
            "http://data.nhm.ac.uk/dataset/the-2016-release-of-the-predicts-database/"
            "resource/78dac1a9-6aa0-4dcb-9750-50df04f8ca6e/download"
        }
        self.citation = "Lawrence N Hudson; Tim Newbold; Sara Contu; " \
                        "Samantha L L Hill et al. (2016). Dataset: " \
                        "The 2016 release of the PREDICTS database. " \
                        "http://dx.doi.org/10.5519/0066354"
        self.keywords = ['biodiversity', 'anthropogenic pressures']
        self.retriever_minimum_version = "2.0.dev"
        self.version = "1.0.4"
        self.description = "A dataset of 3,250,404 measurements, " \
                           "collated from 26,114 sampling locations in 94 " \
                           "countries and representing 47,044 species."

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
Beispiel #22
0
 def download(self, engine=None, debug=False):
     if engine.name != "Download Only":
         raise Exception("The Bioclim dataset contains only non-tabular data files, and can only be used with the 'download only' engine.")
     Script.download(self, engine, debug)
     file_names = ["bio%s.bil" % file_num for file_num in range(1, 20)]
     self.engine.download_files_from_archive(self.urls["climate"], file_names)
     self.engine.register_files(file_names)
Beispiel #23
0
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)

        self.engine.download_file(
            self.urls["main"], "Succession_sampling_03-07_data_original.txt")
        data_path = self.engine.format_filename(
            "Succession_sampling_03-07_data.txt")
        old_data = open(
            self.engine.find_file(
                "Succession_sampling_03-07_data_original.txt"), 'rb')
        new_data = open(data_path, 'wb')

        line1 = old_data.readline()
        line2 = old_data.readline()
        newline = line1.replace("\n", "\t") + line2
        new_data.write(newline)

        for line in old_data:
            new_data.write(line)

        new_data.close()
        old_data.close()

        self.engine.auto_create_table(
            self.tables["main"], filename="Succession_sampling_03-07_data.txt")
        self.engine.insert_data_from_file(data_path)
 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.name = "eBird Observation Dataset"
     self.shortname = "eBirdOD"
     self.ref = "http://ebird.org/content/ebird/news/gbif/"
     self.urls = {"main": "https://dataone.ornith.cornell.edu/metacat/d1/mn/v1/object/CLOEODDATA.05192014.1"}
     self.description = "A collection of observations from birders through portals managed and maintained by local partner conservation organizations"
Beispiel #25
0
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "vertnet:"
        self.name = "vertnet"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.4.2'
        self.ref = "http://vertnet.org/resources/datatoolscode.html"
        self.urls = {
            'amphibians':
            'https://de.iplantcollaborative.org/anon-files//iplant/home/shared/commons_repo/curated/Vertnet_Amphibia_Sep2016/VertNet_Amphibia_Sept2016.zip',
            'birds':
            'https://de.iplantcollaborative.org/anon-files//iplant/home/shared/commons_repo/curated/Vertnet_Aves_Sep2016/VertNet_Aves_Sept2016.zip',
            'fishes':
            'https://de.iplantcollaborative.org/anon-files//iplant/home/shared/commons_repo/curated/Vertnet_Fishes_Sep2016/VertNet_Fishes_Sept2016.zip',
            'mammals':
            'https://de.iplantcollaborative.org/anon-files//iplant/home/shared/commons_repo/curated/Vertnet_Mammalia_Sep2016/VertNet_Mammalia_Sept2016.zip',
            'reptiles':
            'https://de.iplantcollaborative.org/anon-files//iplant/home/shared/commons_repo/curated/Vertnet_Reptilia_Sep2016/VertNet_Reptilia_Sept2016.zip'
        }
        self.description = " "
        self.keywords = ['Taxon > animals']

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
Beispiel #26
0
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Bioclim 2.5 Minute Climate Data"
        self.name = "bioclim"
        self.ref = "http://worldclim.org/bioclim"
        self.urls = {
            "climate":
            "http://biogeo.ucdavis.edu/data/climate/worldclim/1_4/grid/cur/bio_2-5m_bil.zip"
        }

        self.description = "Bioclimatic variables that are derived " \
                           "from the monthly temperature and rainfall " \
                           "values in order to generate more biologically " \
                           "meaningful variables."
        self.citation = "Hijmans, R.J., S.E. Cameron, J.L. Parra, " \
                        "P.G. Jones and A. Jarvis, 2005. Very high " \
                        "resolution interpolated climate surfaces for " \
                        "global land areas. International Journal of " \
                        "Climatology 25: 1965-1978."

        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.2.2'
        self.keywords = ["climate"]

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
Beispiel #27
0
    def download(self, engine=None, debug=False):
        if engine.name != "Download Only":
            raise Exception(
                "The PRISM dataset contains only non-tabular data files, and can only be used with the 'download only' engine."
            )
        Script.download(self, engine, debug)

        clim_vars = ['ppt', 'tmax', 'tmean', 'tmin']
        years = list(range(1981, 2015))
        months = ["{:02d}".format(i) for i in range(1, 13)]
        for clim_var in clim_vars:
            mval = "M3" if clim_var == 'ppt' else "M2"
            for year in years:
                for month in months:
                    file_names = self.get_file_names(clim_var, mval, year,
                                                     month)
                    file_url = urllib.parse.urljoin(
                        self.urls["climate"],
                        "{}/{}{}".format(clim_var, year, month))
                    archivename = "PRISM_{}_stable_4km{}_{}{}_bil.zip".format(
                        clim_var, mval, year, month)
                    self.engine.download_files_from_archive(
                        file_url,
                        file_names,
                        archivename=archivename,
                        keep_in_dir=True)
                    self.engine.register_files(file_names)
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "BAAD: a Biomass And Allometry Database for woody plants"
        self.name = "biomass-allometry-db"
        self.ref = "https://doi.org/10.6084/m9.figshare.c.3307692.v1"
        self.urls = {"BAAD": "https://ndownloader.figshare.com/files/5634309"}
        self.citation = "Falster, D.S., Duursma, R.A., Ishihara, M.I., " \
                        "Barneche, D.R., FitzJohn, R.G., Varhammar, A., Aiba, " \
                        "M., Ando, M., Anten, N., Aspinwall, M.J. and " \
                        "Baltzer, J.L., 2015. BAAD: a Biomass And Allometry " \
                        "Database for woody plants."
        self.licenses = [{"name": "CC0-1.0"}]
        self.keywords = ['plants', 'observational']
        self.retriever_minimum_version = "2.0.dev"
        self.version = "1.4.5"
        self.description = "The data set is a Biomass and allometry database " \
                           "(BAAD) for woody plants containing 259634 " \
                           "measurements collected in 176 different studies " \
                           "from 21084 individuals across 678 species."

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=['NA'])
        else:
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              missing_values=['NA'])
            self.encoding = "latin-1"
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "A Southern Ocean dietary database"
        self.citation = "Ben Raymond, Michelle Marshall, Gabrielle Nevitt, " \
                        "Chris L. Gillies, John van den Hoff, Jonathan " \
                        "S. Stark, Marcel Losekoot, Eric J. Woehler, and " \
                        "Andrew J. Constable. 2011. " \
                        "A Southern Ocean dietary database. Ecology 92:1188."
        self.name = "socean-diet-data"
        self.shortname = "socean-diet-data"
        self.ref = "https://figshare.com/articles/Full_Archive/3551304"
        self.description = "Diet-related data from published" \
                           " and unpublished data sets and studies"
        self.keywords = []
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.0.3'
        self.urls = {"zip": "https://ndownloader.figshare.com/files/5618823"}
        self.cleanup_func_table = Cleanup(
            correct_invalid_value, missing_values=[
                '', 'unknown'])

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(
                correct_invalid_value, nulls=['', 'unknown'])
Beispiel #30
0
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Global wood density database - Zanne et al. 2009"
        self.name = "wood-density"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.3.0'
        self.urls = {"GWDD": "http://datadryad.org/bitstream/handle/10255/dryad.235/GlobalWoodDensityDatabase.xls?sequence=1"}
        self.keywords = ["Taxon > Plants", "Spatial Scale > Global",
                     "Data Type > Observational"]
        self.ref = "http://datadryad.org/resource/doi:10.5061/dryad.234"
        self.description = "A collection  and collation of data on the major wood functional traits, including the largest wood density database to date (8412 taxa), mechanical strength measures and anatomical features, as well as clade-specific features such as secondary chemistry."
        self.citation = "Chave J, Coomes DA, Jansen S, Lewis SL, Swenson NG, Zanne AE (2009) Towards a worldwide wood economics spectrum. Ecology Letters 12(4): 351-366. http://dx.doi.org/10.1111/j.1461-0248.2009.01285.x and Zanne AE, Lopez-Gonzalez G, Coomes DA, Ilic J, Jansen S, Lewis SL, Miller RB, Swenson NG, Wiemann MC, Chave J (2009) Data from: Towards a worldwide wood economics spectrum. Dryad Digital Repository. http://dx.doi.org/10.5061/dryad.234"
        self.addendum = """ *Correspondence for updates to the database: [email protected]
        For descriptions of the database, see Chave et al. 2009. Towards a worldwide wood economics spectrum. Ecology Letters. Identifier: http://hdl.handle.net/10255/dryad.234

        Below we list the rules of use for the Global wood density database.
        These are developed based on the rules of use for the Glopnet dataset (www.nature.com/nature/journal/v428/n6985/full/nature02403.html) and Cedar Creek LTER and Related Data (http://www.lter.umn.edu/cgi-bin/register).
        If you would like to use the Global wood density database, we request that you:
        1. Notify the main address of correspondence (Gaby Lopez-Gonzalo) if you plan to use the database in a publication.
        2. Provide recognition of the efforts of this group in the assembly of the data by using the citation for the database above.
        3. Recognize that these data were assembled by the group for various analyses and research questions. If any of these uses overlap with your interests, you recognize that group has precedence in addressing these questions."""

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "USGS North American Breeding Bird Survey 50 stop"
        self.name = "breed-bird-survey-50stop"
        self.description = "A Cooperative effort between the U.S. Geological Survey's Patuxent Wildlife Research Center and Environment Canada's Canadian Wildlife Service to monitor the status and trends of North American bird populations."
        self.citation = "Pardieck, K.L., D.J. Ziolkowski Jr., M.-A.R. Hudson. 2015. North American Breeding Bird Survey Dataset 1966 - 2014, version 2014.0. U.S. Geological Survey, Patuxent Wildlife Research Center."
        self.ref = "http://www.pwrc.usgs.gov/BBS/"
        self.keywords = ["birds", "continental-scale"]
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.4.1'
        self.urls = {
                     "counts": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/50-StopData/1997ToPresent_SurveyWide/",
                     "routes": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/Routes.zip",
                     "weather": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/Weather.zip",
                     "region_codes": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/RegionCodes.txt",
                     "species": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/SpeciesList.txt"
                     }

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=['NULL'])
            self.cleanup_func_clean = Cleanup(correct_invalid_value, nulls = ['*'])
        else:
            self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=['NULL'])
            self.cleanup_func_clean = Cleanup(correct_invalid_value, missing_values = ['*'])         
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Tree demography in Western Ghats, India - Pelissier et al. 2011"
        self.name = "tree-demog-wghats"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.3.2'
        self.ref = "https://figshare.com/collections/Tree_demography_in_an_undisturbed_" \
                   "Dipterocarp_permanent_sample_plot_at_Uppangala_Western_Ghats_of_India/3304026"
        self.urls = {"data": "https://ndownloader.figshare.com/files/5619033"}
        self.citation = "Raphael Pelissier, Jean-Pierre Pascal, N. Ayyappan, B. R. Ramesh, " \
                        "S. Aravajy, and S. R. Ramalingam. 2011. Twenty years tree demography " \
                        "in an undisturbed Dipterocarp permanent sample plot at Uppangala, " \
                        "Western Ghats of India. Ecology 92:1376."
        self.description = "A data set on demography of trees monitored over 20 years in " \
                           "Uppangala permanent sample plot (UPSP)."
        self.keywords = ["plants", "time-series", "observational"]

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(
                correct_invalid_value, nulls=['NA'])
        else:
            self.cleanup_func_table = Cleanup(
                correct_invalid_value, missing_values=['NA'])
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Gulf of Maine intertidal density/cover (Petraitis et al. 2008)"
        self.citation = "Peter S. Petraitis, Harrison Liu, and " \
                        "Erika C. Rhile. 2008. Densities and cover " \
                        "data for intertidal organisms in the Gulf of " \
                        "Maine, USA, from 2003 to 2007. Ecology 89:588."
        self.name = "intertidal-abund-me"
        self.ref = "https://figshare.com/collections/DENSITIES_AND_COVER_DATA_FOR_INTERTIDAL_ORGANISMS_IN_THE_GULF_OF_MAINE_USA_FROM_2003_TO_2007/3300200"
        self.description = "The data on densities and percent cover in the " \
                           "60 experimental plots from 2003 to 2007 and to " \
                           "update data from 1996 to 2002 that are already " \
                           "published in Ecological Archives." \
                           "Includes densities of mussels, " \
                           "herbivorous limpet, herbivorous snails, " \
                           "predatory snail, barnacle , fucoid algae and " \
                           "percent cover by mussels, barnacles, fucoids, " \
                           "and other sessile organisms."
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.5.3'
        self.urls = {"main": "https://ndownloader.figshare.com/files/5600831"}
        self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=[-999.9])

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=[-999.9])
        self.tables = {"main": Table("main", cleanup=self.cleanup_func_table)}
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "USGS North American Breeding Bird Survey 50 stop"
        self.name = "breed-bird-survey-50stop"
        self.description = "A Cooperative effort between the U.S. Geological Survey's Patuxent Wildlife Research Center and Environment Canada's Canadian Wildlife Service to monitor the status and trends of North American bird populations."
        self.citation = "Pardieck, K.L., D.J. Ziolkowski Jr., M.-A.R. Hudson. 2015. North American Breeding Bird Survey Dataset 1966 - 2014, version 2014.0. U.S. Geological Survey, Patuxent Wildlife Research Center."
        self.ref = "http://www.pwrc.usgs.gov/BBS/"
        self.keywords = ["birds", "continental-scale"]
        self.retriever_minimum_version = '2.0.dev'
        self.version = '2.0.0'
        self.urls = {
            "counts": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/50-StopData/1997ToPresent_SurveyWide/",
            "routes": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/routes.zip",
            "weather": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/Weather.zip",
            "region_codes": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/RegionCodes.txt",
            "species": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/SpeciesList.txt"}

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(
                correct_invalid_value, nulls=['NULL'])
            self.cleanup_func_clean = Cleanup(
                correct_invalid_value, nulls=['*'])
        else:
            self.encoding = "latin-1"
            self.cleanup_func_table = Cleanup(
                correct_invalid_value, missing_values=['NULL'])
            self.cleanup_func_clean = Cleanup(
                correct_invalid_value, missing_values=['*'])
Beispiel #35
0
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Pantheria (Jones et al. 2009)"
        self.name = "pantheria"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.3.1'
        self.ref = "https://figshare.com/collections/PanTHERIA_a_species-level_database_of_life_history_ecology_" \
                   "and_geography_of_extant_and_recently_extinct_mammals/3301274"
        self.urls = {"data": "https://ndownloader.figshare.com/files/5604752"}
        self.citation = "Kate E. Jones, Jon Bielby, Marcel Cardillo, Susanne A. Fritz, Justin O'Dell, C. David L. " \
                        "Orme, Kamran Safi, Wes Sechrest, Elizabeth H. Boakes, Chris Carbone, Christina Connolly, " \
                        "Michael J. Cutts, Janine K. Foster, Richard Grenyer, Michael Habib, Christopher A. " \
                        "Plaster, Samantha A. Price, Elizabeth A. Rigby, Janna Rist, Amber Teacher, Olaf R. P. " \
                        "Bininda-Emonds, John L. Gittleman, Georgina M. Mace, and Andy Purvis. 2009. PanTHERIA:a " \
                        "species-level database of life history, ecology, and geography of extant and recently " \
                        "extinct mammals. Ecology 90:2648."
        self.description = "PanTHERIA is a data set of multispecies trait data from diverse literature sources " \
                           "and also includes spatial databases of mammalian geographic ranges and global climatic " \
                           "and anthropogenic variables."
        self.keywords = ["mammals", "literature-compilation", "life-history"]

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=['NA'])
        else:
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              missing_values=['NA'])
Beispiel #36
0
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Indian Forest Stand Structure and Composition (Ramesh et al. 2010)"
        self.name = "forest-plots-wghats"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.3.2'
        self.ref = "https://figshare.com/collections/Forest_stand_structure_and_composition_in_96_sites_" \
                   "along_environmental_gradients_in_the_central_Western_Ghats_of_India/3303531"
        self.urls = {'data': 'https://ndownloader.figshare.com/files/5617140'}
        self.citation = "B. R. Ramesh, M. H. Swaminath, Santoshgouda V. Patil, Dasappa, Raphael Pelissier, P. " \
                        "Dilip Venugopal, S. Aravajy, Claire Elouard, and S. Ramalingam. 2010. Forest stand " \
                        "structure and composition in 96 sites along environmental gradients in the central " \
                        "Western Ghats of India. Ecology 91:3118."
        self.description = "This data set reports woody plant species abundances in a network of 96 sampling " \
                           "sites spread across 22000 km2 in central Western Ghats region, Karnataka, India."
        self.keywords = ['plants', 'regional-scale', 'observational']

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=['NA'])
        else:
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              missing_values=['NA'])
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        engine = self.engine
        filenames = [
            'Aquatic_animal_excretion_data.csv',
            'Aquatic_animal_excretion_variable_descriptions.csv'
        ]
        for file_paths in filenames:
            if not os.path.isfile(engine.format_filename(file_paths)):
                url = self.urls["aquatic_animals"]
                engine.download_files_from_archive(url, filenames, "zip")

        # processing Aquatic_animal_excretion_data.csv
        filename = 'Aquatic_animal_excretion_data.csv'
        tablename = 'aquatic_animals'
        table = Table(str(tablename), delimiter=',')
        table.columns = [("index", ("pk-int", )), ("sourcenumber", ("int", )),
                         ("sourcename", ("char", )),
                         ("speciesname", ("char", )),
                         ("speciescode", ("char", )),
                         ("invert/vert", ("char", )), ("phylum", ("char", )),
                         ("class", ("char", )), ("order", ("char", )),
                         ("family", ("char", )), ("trophicgild", ("char", )),
                         ("drymass", ("double", )),
                         ("logdrymass", ("double", )),
                         ("ecosystemtype", ("char", )),
                         ("energysource", ("char", )), ("habitat", ("char", )),
                         ("residentecosystem", ("char", )),
                         ("temperature", ("double", )),
                         ("nexcretionrate", ("double", )),
                         ("pexcretionrate", ("double", )),
                         ("lognexcretionrate", ("double", )),
                         ("logpexcretionrate", ("double", )),
                         ("incubationtime", ("double", )),
                         ("nform", ("char", )), ("pform", ("char", )),
                         ("bodyc", ("double", )), ("bodyn", ("double", )),
                         ("bodyp", ("double", )), ("bodyc:n", ("double", )),
                         ("bodyc:p", ("double", )), ("bodyn:p", ("double", )),
                         ("bodydatasource", ("char", )),
                         ("datasource", ("char", )),
                         ("dataproviders", ("char", ))]
        engine.table = table
        engine.create_table()
        engine.insert_data_from_file(engine.format_filename(str(filename)))

        # processing Aquatic_animal_excretion_variable_descriptions.csv
        filename = 'Aquatic_animal_excretion_variable_descriptions.csv'
        tablename = 'variable_descriptions'
        table = Table(str(tablename), delimiter=',')
        table.columns = [("Column", ("char", )), ("Variable", ("char", )),
                         ("Description", ("char", )),
                         ("Data Class", ("char", )), ("Units", ("char", )),
                         ("Minimum_value", ("char", )),
                         ("Maximum_value", ("char", )),
                         ("Possible_values", ("char", )),
                         ("Missing_data_symbol", ("char", )),
                         ("Notes", ("char", ))]
        engine.table = table
        engine.create_table()
        engine.insert_data_from_file(engine.format_filename(str(filename)))
 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.name = "PRISM Climate Data"
     self.shortname = "PRISM"
     self.ref = "http://prism.oregonstate.edu/"
     self.urls = {"climate": "http://services.nacse.org/prism/data/public/4km/"}
     self.description = "The PRISM data set represents climate observations from a wide range of monitoring networks, applies sophisticated quality control measures, and develops spatial climate datasets to reveal short- and long-term climate patterns. "
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Aquatic Animal Excretion"
        self.name = "aquatic-animal-excretion"
        self.retriever_minimum_version = '2.0.dev'
        self.encoding = "latin-1"
        self.version = '1.1.6'
        self.ref = "http://onlinelibrary.wiley.com/doi/10.1002/ecy.1792/abstract"
        self.urls = {
            'aquatic_animals':
            'https://esajournals.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1002%2Fecy.1792&file=ecy1792-sup-0001-DataS1.zip'
        }
        self.citation = "Vanni, M. J., McIntyre, P. B., Allen, D., Arnott, D. L., Benstead, J. P., Berg, D. J., " \
                        "Brabrand, Å., Brosse, S., Bukaveckas, P. A., Caliman, A., Capps, K. A., Carneiro, L. S., " \
                        "Chadwick, N. E., Christian, A. D., Clarke, A., Conroy, J. D., Cross, W. F., Culver, D. A., " \
                        "Dalton, C. M., Devine, J. A., Domine, L. M., Evans-White, M. A., Faafeng, B. A., " \
                        "Flecker, A. S., Gido, K. B., Godinot, C., Guariento, R. D., Haertel-Borer, S., Hall, " \
                        "R. O., Henry, R., Herwig, B. R., Hicks, B. J., Higgins, K. A., Hood, J. M., Hopton, M. E., " \
                        "Ikeda, T., James, W. F., Jansen, H. M., Johnson, C. R., Koch, B. J., Lamberti, G. A., " \
                        "Lessard-Pilon, S., Maerz, J. C., Mather, M. E., McManamay, R. A., Milanovich, J. R., " \
                        "Morgan, D. K. J., Moslemi, J. M., Naddafi, R., Nilssen, J. P., Pagano, M., Pilati, A., " \
                        "Post, D. M., Roopin, M., Rugenski, A. T., Schaus, M. H., Shostell, J., Small, G. E., " \
                        "Solomon, C. T., Sterrett, S. C., Strand, O., Tarvainen, M., Taylor, J. M., Torres-Gerald, " \
                        "L. E., Turner, C. B., Urabe, J., Uye, S.-I., Ventelä, A.-M., Villeger, S., Whiles, M. R., " \
                        "Wilhelm, F. M., Wilson, H. F., Xenopoulos, M. A. and Zimmer, K. D. (2017), " \
                        "A global database of nitrogen and phosphorus excretion rates of aquatic animals. " \
                        "Ecology. Accepted Author Manuscript. doi:10.1002/ecy.1792"
        self.description = "Dataset containing the nutrient cycling rates of individual animals."
        self.keywords = ['Aquatic']

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
Beispiel #40
0
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "A Southern Ocean dietary database"
        self.citation = "Ben Raymond, Michelle Marshall, Gabrielle Nevitt, " \
                        "Chris L. Gillies, John van den Hoff, Jonathan " \
                        "S. Stark, Marcel Losekoot, Eric J. Woehler, and " \
                        "Andrew J. Constable. 2011. " \
                        "A Southern Ocean dietary database. Ecology 92:1188."
        self.name = "socean-diet-data"
        self.shortname = "socean-diet-data"
        self.ref = "https://figshare.com/articles/Full_Archive/3551304"
        self.description = "Diet-related data from published" \
                           " and unpublished data sets and studies"
        self.keywords = []
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.0.4'
        self.urls = {"zip": "https://ndownloader.figshare.com/files/5618823"}
        self.cleanup_func_table = Cleanup(correct_invalid_value,
                                          missing_values=['', 'unknown'])

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=['', 'unknown'])
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        for key in self.urls:
            self.engine.download_file(self.urls[key],
                                      self.urls[key].rpartition('/')[-1])
            new_file_path = self.engine.format_filename("new" + key)
            old_data = open_fr(
                self.engine.find_file(self.urls[key].rpartition('/')[-1]))
            new_data = open_fw(new_file_path)
            with old_data as file_block:

                # after the metadata lines, set data to True
                data = False
                for lines in file_block.readlines():
                    # meta data contins line with no ";" and may have "(;;;;)+" or empty lines
                    if not data and (";" not in lines or ";;;;" in lines):
                        pass
                    else:
                        data = True
                        new_data.write(lines)
            file_block.close()
            new_data.close()
            self.engine.auto_create_table(Table(
                key, cleanup=self.cleanup_func_table),
                                          filename=str("new" + key))
            self.engine.insert_data_from_file(new_file_path)
Beispiel #42
0
    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Global wood density database - Zanne et al. 2009"
        self.name = "wood-density"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.3.1'
        self.urls = {
            "GWDD":
            "http://datadryad.org/bitstream/handle/10255/dryad.235/GlobalWoodDensityDatabase.xls?sequence=1"
        }
        self.keywords = [
            "Taxon > Plants", "Spatial Scale > Global",
            "Data Type > Observational"
        ]
        self.ref = "http://datadryad.org/resource/doi:10.5061/dryad.234"
        self.description = "A collection  and collation of data on the major wood functional traits, including the largest wood density database to date (8412 taxa), mechanical strength measures and anatomical features, as well as clade-specific features such as secondary chemistry."
        self.citation = "Chave J, Coomes DA, Jansen S, Lewis SL, Swenson NG, Zanne AE (2009) Towards a worldwide wood economics spectrum. Ecology Letters 12(4): 351-366. http://dx.doi.org/10.1111/j.1461-0248.2009.01285.x and Zanne AE, Lopez-Gonzalez G, Coomes DA, Ilic J, Jansen S, Lewis SL, Miller RB, Swenson NG, Wiemann MC, Chave J (2009) Data from: Towards a worldwide wood economics spectrum. Dryad Digital Repository. http://dx.doi.org/10.5061/dryad.234"
        self.addendum = """ *Correspondence for updates to the database: [email protected]
        For descriptions of the database, see Chave et al. 2009. Towards a worldwide wood economics spectrum. Ecology Letters. Identifier: http://hdl.handle.net/10255/dryad.234

        Below we list the rules of use for the Global wood density database.
        These are developed based on the rules of use for the Glopnet dataset (www.nature.com/nature/journal/v428/n6985/full/nature02403.html) and Cedar Creek LTER and Related Data (http://www.lter.umn.edu/cgi-bin/register).
        If you would like to use the Global wood density database, we request that you:
        1. Notify the main address of correspondence (Gaby Lopez-Gonzalo) if you plan to use the database in a publication.
        2. Provide recognition of the efforts of this group in the assembly of the data by using the citation for the database above.
        3. Recognize that these data were assembled by the group for various analyses and research questions. If any of these uses overlap with your interests, you recognize that group has precedence in addressing these questions."""

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
Beispiel #43
0
 def __init__(self):
     Script.__init__(self,
                     tables={'trees': Table('trees', cleanup=Cleanup(correct_invalid_value, nulls=[-999]))},
                     name="Tree growth, mortality, physical condition - Clark, 2006",
                     tags=['Taxon > Plants'],
                     urls={'trees': 'http://esapubs.org/archive/ecol/E087/132/LS_trees_1983_2000.txt'},
                     shortname="Clark2006",
                     description="David B. Clark and Deborah A. Clark. 2006. Tree growth, mortality, physical condition, and microsite in an old-growth lowland tropical rain forest. Ecology 87:2132.")
 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.name = "PRISM Climate Data"
     self.shortname = "PRISM"
     self.ref = "http://prism.oregonstate.edu/"
     self.urls = {
         "climate": "http://services.nacse.org/prism/data/public/4km/"
     }
Beispiel #45
0
 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.name = "BAAD: a Biomass And Allometry Database for woody plants"
     self.shortname = "BAAD"
     self.ref = "http://esapubs.org/archive/ecol/E096/128/"
     self.urls = {"BAAD": "http://www.esapubs.org/archive/ecol/E096/128/baad_data.zip"}
     self.description = "Falster, D.S., Duursma, R.A., Ishihara, M.I., Barneche, D.R., FitzJohn, R.G., Varhammar, A., Aiba, M., Ando, M., Anten, N., Aspinwall, M.J. and Baltzer, J.L., 2015. BAAD: a Biomass And Allometry Database for woody plants."
     self.tags = ['Taxon > Plants', 'Data Type > Observational']
Beispiel #46
0
 def __init__(self):
     Script.__init__(self,
                     tables={'trees': Table('trees', cleanup=Cleanup(correct_invalid_value, nulls=[-999]))},
                     name="Tree growth, mortality, physical condition - Clark, 2006",
                     tags=['Taxon > Plants'],
                     urls={'trees': 'http://esapubs.org/archive/ecol/E087/132/LS_trees_1983_2000.txt'},
                     shortname="Clark2006",
                     description="David B. Clark and Deborah A. Clark. 2006. Tree growth, mortality, physical condition, and microsite in an old-growth lowland tropical rain forest. Ecology 87:2132.")
Beispiel #47
0
 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.name = "USA National Phenology Network"
     self.shortname = "NPN"
     self.ref = "http://www.usanpn.org/results/data"
     self.tags = ["Data Type > Phenology", "Spatial Scale > Continental"]
     self.description = "The data set was collected via Nature's Notebook phenology observation program (2009-present), and (2) Lilac and honeysuckle data (1955-present)"
     self.citation = "Schwartz, M. D., Ault, T. R., & J. L. Betancourt, 2012: Spring Onset Variations and Trends in the Continental USA: Past and Regional Assessment Using Temperature-Based Indices. International Journal of Climatology (published online, DOI: 10.1002/joc.3625)."
Beispiel #48
0
 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.name = "Bioclim 2.5 Minute Climate Data"
     self.shortname = "Bioclim"
     self.ref = "http://worldclim.org/bioclim"
     self.urls = {"climate": "http://biogeo.ucdavis.edu/data/climate/worldclim/1_4/grid/cur/bio_2-5m_bil.zip"}
     self.description = "Bioclimatic variables that are derived from the monthly temperature and rainfall values in order to generate more biologically meaningful variables."
     self.citation = "Hijmans, R.J., S.E. Cameron, J.L. Parra, P.G. Jones and A. Jarvis, 2005. Very high resolution interpolated climate surfaces for global land areas. International Journal of Climatology 25: 1965-1978."
     self.tags = ["Data Type > Compilation"]
 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.name = "CRC Avian Body Masses"
     self.shortname = "AvianBodyMass"
     self.public = False
     self.ref = "http://www.crcpress.com/ecommerce_product/product_detail.jsf?isbn=1420064444"
     self.tables = {"mass": Table("mass", delimiter="~")}
     self.urls = {"mass": ""}
     self.tags = ["Taxon > Birds", "Data Type > Compilation"]
Beispiel #50
0
 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.name = "Zanne et al. Global wood density database."
     self.shortname = "GWDD"
     self.urls = {"GWDD": "http://datadryad.org/bitstream/handle/10255/dryad.235/GlobalWoodDensityDatabase.xls?sequence=1"}
     self.tags = ["Taxon > Plants", "Spatial Scale > Global",
                  "Data Type > Observational"]
     self.ref = "http://datadryad.org/resource/doi:10.5061/dryad.234"
     self.addendum = """Citation for the database: Zanne, A.E., Lopez-Gonzalez, G.*, Coomes, D.A., Ilic, J., Jansen, S., Lewis, S.L., Miller, R.B., Swenson, N.G., Wiemann, M.C., and Chave, J. 2009. Global wood density database. Dryad. Identifier: http://hdl.handle.net/10255/dryad.235. 
Beispiel #51
0
 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.name = "BAAD: a Biomass And Allometry Database for woody plants"
     self.shortname = "BAAD"
     self.ref = "http://esapubs.org/archive/ecol/E096/128/"
     self.urls = {"BAAD": "http://www.esapubs.org/archive/ecol/E096/128/baad_data.zip"}
     self.citation = "Falster, D.S., Duursma, R.A., Ishihara, M.I., Barneche, D.R., FitzJohn, R.G., Varhammar, A., Aiba, M., Ando, M., Anten, N., Aspinwall, M.J. and Baltzer, J.L., 2015. BAAD: a Biomass And Allometry Database for woody plants."
     self.tags = ['Taxon > Plants', 'Data Type > Observational']
     self.description = "The data set is a Biomass and allometry database (BAAD) for woody plants containing 259634 measurements collected in 176 different studies from 21084 individuals across 678 species."
Beispiel #52
0
 def __init__(self):
     Script.__init__(self,
                     tables={'trees': Table('trees', cleanup=Cleanup(correct_invalid_value, nulls=[-999]))},
                     name="Tree growth, mortality, physical condition - Clark, 2006",
                     tags=['Taxon > Plants'],
                     urls={'trees': 'http://esapubs.org/archive/ecol/E087/132/LS_trees_1983_2000.txt'},
                     shortname="Clark2006",
                     description = "The data set helps to examine the post-establishment ecology of 10 species of tropical wet forest trees selected to span a range of predicted life history patterns at the La Selva Biological Station in Costa Rica.",
                     ref = "http://esapubs.org/archive/ecol/E087/132/",
                     citation="David B. Clark and Deborah A. Clark. 2006. Tree growth, mortality, physical condition, and microsite in an old-growth lowland tropical rain forest. Ecology 87:2132.")
Beispiel #53
0
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        engine = self.engine
        engine.download_files_from_archive(self.urls["data"], ["PanTHERIA_1-0_WR05_Aug2008.txt"],
                                           filetype="zip")

        # Create table Species
        engine.auto_create_table(Table('species', cleanup=self.cleanup_func_table),
                                 filename="PanTHERIA_1-0_WR05_Aug2008.txt")
        engine.insert_data_from_file(engine.format_filename("PanTHERIA_1-0_WR05_Aug2008.txt"))
 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.name = "CRC Avian Body Masses"
     self.shortname = "AvianBodyMass"
     self.public = False
     self.ref = "http://www.crcnetbase.com/isbn/9781420064452"
     self.citation = "Robert B. Payne, CRC Handbook of Avian Body Masses. Second Edition. The Wilson Journal of Ornithology Sep 2009 : Vol. 121, Issue 3, pg(s) 661-662 doi: 10.1676/1559-4491-121.3.661." 
     self.description = "Body masses of birds of the world."
     self.tables = {"mass": Table("mass", delimiter="~")}
     self.urls = {"mass": ""}
     self.tags = ["Taxon > Birds", "Data Type > Compilation"]
Beispiel #55
0
 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.name = "Forest Inventory and Analysis"
     self.shortname = "FIA"
     self.ref = "http://fia.fs.fed.us/"
     self.urls = {"main": "http://apps.fs.fed.us/fiadb-downloads/",
                  'species': 'http://apps.fs.fed.us/fiadb-downloads/REF_SPECIES.CSV'}
     self.tags = ["Taxon > Plants", "Spatial Scale > Continental",
                  "Data Type > Observational"]
     self.description = """WARNING: This dataset requires downloading many large files and will probably take several hours to finish installing."""
     self.addendum = """This dataset requires downloading many large files - please be patient."""
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        engine = self.engine
        engine.download_files_from_archive(self.urls["data"],
                                           ["Predator_and_prey_body_sizes_in_marine_food_webs_vsn4.txt"],
                                           filetype="zip")

        # Create table Species
        engine.auto_create_table(Table('main', cleanup=self.cleanup_func_table),
                                 filename="Predator_and_prey_body_sizes_in_marine_food_webs_vsn4.txt")
        engine.insert_data_from_file(
            engine.format_filename("Predator_and_prey_body_sizes_in_marine_food_webs_vsn4.txt"))
Beispiel #57
0
 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.name = "Alwyn H. Gentry Forest Transect Dataset"
     self.shortname = "Gentry"
     self.urls = {"stems": "http://www.mobot.org/mobot/gentry/123/all_Excel.zip",
                  "sites": "http://www.ecologicaldata.org/sites/default/files/gentry_sites_data.txt",
                  "species": "",
                  "counts": ""}
     self.tags = ["Taxon > Plants", "Spatial Scale > Global",
                  "Data Type > Observational"]
     self.ref = "http://www.wlbcenter.org/gentry_data.htm"
     self.addendum = """Researchers who make use of the data in publications are requested to acknowledge Alwyn H. Gentry, the Missouri Botanical Garden, and collectors who assisted Gentry or contributed data for specific sites. It is also requested that a reprint of any publication making use of the Gentry Forest Transect Data be sent to: