def __init__(self, **kwargs): Script.__init__(self, **kwargs) self.title = "Food web including metazoan parasites for a " \ "brackish shallow water ecosystem in Germany and Denmark" self.citation = "C. Dieter Zander, Neri Josten, Kim C. Detloff, " \ "Robert Poulin, John P. McLaughlin, and David W. " \ "Thieltges. 2011. Food web including metazoan " \ "parasites for a brackish shallow water ecosystem " \ "in Germany and Denmark. Ecology 92:2007." self.name = "flensburg-food-web" self.ref = "https://figshare.com/articles/Full_Archive/3552066" self.description = "This data is of a food web for the Flensburg " \ "Fjord, a brackish shallow water inlet on the " \ "Baltic Sea, between Germany and Denmark." self.keywords = [] self.retriever_minimum_version = '2.0.dev' self.version = '1.0.4' self.urls = {"zip": "https://ndownloader.figshare.com/files/5620326"} self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=['']) if parse_version(VERSION) <= parse_version("2.0.0"): self.shortname = self.name self.name = self.title self.tags = self.keywords self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=['', 'unknown'])
def __init__(self, **kwargs): Script.__init__(self, **kwargs) self.title = "A Southern Ocean dietary database" self.citation = "Ben Raymond, Michelle Marshall, Gabrielle Nevitt, " \ "Chris L. Gillies, John van den Hoff, Jonathan " \ "S. Stark, Marcel Losekoot, Eric J. Woehler, and " \ "Andrew J. Constable. 2011. " \ "A Southern Ocean dietary database. Ecology 92:1188." self.name = "socean-diet-data" self.shortname = "socean-diet-data" self.ref = "https://figshare.com/articles/Full_Archive/3551304" self.description = "Diet-related data from published" \ " and unpublished data sets and studies" self.keywords = [] self.retriever_minimum_version = '2.0.dev' self.version = '1.0.4' self.urls = {"zip": "https://ndownloader.figshare.com/files/5618823"} self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=['', 'unknown']) if parse_version(VERSION) <= parse_version("2.0.0"): self.shortname = self.name self.name = self.title self.tags = self.keywords self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=['', 'unknown'])
def __init__(self, **kwargs): Script.__init__(self, **kwargs) self.title = "BAAD: a Biomass And Allometry Database for woody plants" self.name = "biomass-allometry-db" self.ref = "https://doi.org/10.6084/m9.figshare.c.3307692.v1" self.urls = {"BAAD": "https://ndownloader.figshare.com/files/5634309"} self.citation = "Falster, D.S., Duursma, R.A., Ishihara, M.I., " \ "Barneche, D.R., FitzJohn, R.G., Varhammar, A., Aiba, " \ "M., Ando, M., Anten, N., Aspinwall, M.J. and " \ "Baltzer, J.L., 2015. BAAD: a Biomass And Allometry " \ "Database for woody plants." self.licenses = [{"name": "CC0-1.0"}] self.keywords = ['plants', 'observational'] self.retriever_minimum_version = "2.0.dev" self.version = "1.4.5" self.description = "The data set is a Biomass and allometry database " \ "(BAAD) for woody plants containing 259634 " \ "measurements collected in 176 different studies " \ "from 21084 individuals across 678 species." if parse_version(VERSION) <= parse_version("2.0.0"): self.shortname = self.name self.name = self.title self.tags = self.keywords self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=['NA']) else: self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=['NA']) self.encoding = "latin-1"
def __init__(self, **kwargs): Script.__init__(self, **kwargs) self.title = "Pantheria (Jones et al. 2009)" self.name = "pantheria" self.retriever_minimum_version = '2.0.dev' self.version = '1.3.1' self.ref = "https://figshare.com/collections/PanTHERIA_a_species-level_database_of_life_history_ecology_" \ "and_geography_of_extant_and_recently_extinct_mammals/3301274" self.urls = {"data": "https://ndownloader.figshare.com/files/5604752"} self.citation = "Kate E. Jones, Jon Bielby, Marcel Cardillo, Susanne A. Fritz, Justin O'Dell, C. David L. " \ "Orme, Kamran Safi, Wes Sechrest, Elizabeth H. Boakes, Chris Carbone, Christina Connolly, " \ "Michael J. Cutts, Janine K. Foster, Richard Grenyer, Michael Habib, Christopher A. " \ "Plaster, Samantha A. Price, Elizabeth A. Rigby, Janna Rist, Amber Teacher, Olaf R. P. " \ "Bininda-Emonds, John L. Gittleman, Georgina M. Mace, and Andy Purvis. 2009. PanTHERIA:a " \ "species-level database of life history, ecology, and geography of extant and recently " \ "extinct mammals. Ecology 90:2648." self.description = "PanTHERIA is a data set of multispecies trait data from diverse literature sources " \ "and also includes spatial databases of mammalian geographic ranges and global climatic " \ "and anthropogenic variables." self.keywords = ["mammals", "literature-compilation", "life-history"] if parse_version(VERSION) <= parse_version("2.0.0"): self.shortname = self.name self.name = self.title self.tags = self.keywords self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=['NA']) else: self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=['NA'])
def __init__(self, **kwargs): Script.__init__(self, **kwargs) self.title = "Indian Forest Stand Structure and Composition (Ramesh et al. 2010)" self.name = "forest-plots-wghats" self.retriever_minimum_version = '2.0.dev' self.version = '1.3.2' self.ref = "https://figshare.com/collections/Forest_stand_structure_and_composition_in_96_sites_" \ "along_environmental_gradients_in_the_central_Western_Ghats_of_India/3303531" self.urls = {'data': 'https://ndownloader.figshare.com/files/5617140'} self.citation = "B. R. Ramesh, M. H. Swaminath, Santoshgouda V. Patil, Dasappa, Raphael Pelissier, P. " \ "Dilip Venugopal, S. Aravajy, Claire Elouard, and S. Ramalingam. 2010. Forest stand " \ "structure and composition in 96 sites along environmental gradients in the central " \ "Western Ghats of India. Ecology 91:3118." self.description = "This data set reports woody plant species abundances in a network of 96 sampling " \ "sites spread across 22000 km2 in central Western Ghats region, Karnataka, India." self.keywords = ['plants', 'regional-scale', 'observational'] if parse_version(VERSION) <= parse_version("2.0.0"): self.shortname = self.name self.name = self.title self.tags = self.keywords self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=['NA']) else: self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=['NA'])
def __init__(self, **kwargs): Script.__init__(self, **kwargs) self.title = "USGS North American Breeding Bird Survey 50 stop" self.name = "breed-bird-survey-50stop" self.description = "A Cooperative effort between the U.S. Geological Survey's Patuxent Wildlife Research Center and Environment Canada's Canadian Wildlife Service to monitor the status and trends of North American bird populations." self.citation = "Pardieck, K.L., D.J. Ziolkowski Jr., M.-A.R. Hudson. 2015. North American Breeding Bird Survey Dataset 1966 - 2014, version 2014.0. U.S. Geological Survey, Patuxent Wildlife Research Center." self.ref = "http://www.pwrc.usgs.gov/BBS/" self.keywords = ["birds", "continental-scale"] self.retriever_minimum_version = '2.0.dev' self.version = '2.0.0' self.urls = { "counts": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/50-StopData/1997ToPresent_SurveyWide/", "routes": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/routes.zip", "weather": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/Weather.zip", "region_codes": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/RegionCodes.txt", "species": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/SpeciesList.txt"} if parse_version(VERSION) <= parse_version("2.0.0"): self.shortname = self.name self.name = self.title self.tags = self.keywords self.cleanup_func_table = Cleanup( correct_invalid_value, nulls=['NULL']) self.cleanup_func_clean = Cleanup( correct_invalid_value, nulls=['*']) else: self.encoding = "latin-1" self.cleanup_func_table = Cleanup( correct_invalid_value, missing_values=['NULL']) self.cleanup_func_clean = Cleanup( correct_invalid_value, missing_values=['*'])
def __init__(self, **kwargs): Script.__init__(self, **kwargs) self.title = "Tree demography in Western Ghats, India - Pelissier et al. 2011" self.name = "tree-demog-wghats" self.retriever_minimum_version = '2.0.dev' self.version = '1.3.1' self.ref = "https://figshare.com/collections/Tree_demography_in_an_undisturbed_" \ "Dipterocarp_permanent_sample_plot_at_Uppangala_Western_Ghats_of_India/3304026" self.urls = {"data": "https://ndownloader.figshare.com/files/5619033"} self.citation = "Raphael Pelissier, Jean-Pierre Pascal, N. Ayyappan, B. R. Ramesh, " \ "S. Aravajy, and S. R. Ramalingam. 2011. Twenty years tree demography " \ "in an undisturbed Dipterocarp permanent sample plot at Uppangala, " \ "Western Ghats of India. Ecology 92:1376." self.description = "A data set on demography of trees monitored over 20 years in " \ "Uppangala permanent sample plot (UPSP)." self.keywords = ["plants", "time-series", "observational"] if parse_version(VERSION) <= parse_version("2.0.0"): self.shortname = self.name self.name = self.title self.tags = self.keywords self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=['NA']) else: self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=['NA'])
def __init__(self, **kwargs): Script.__init__(self, **kwargs) self.title = "Gulf of Maine intertidal density/cover (Petraitis et al. 2008)" self.citation = "Peter S. Petraitis, Harrison Liu, and " \ "Erika C. Rhile. 2008. Densities and cover " \ "data for intertidal organisms in the Gulf of " \ "Maine, USA, from 2003 to 2007. Ecology 89:588." self.name = "intertidal-abund-me" self.ref = "https://figshare.com/collections/DENSITIES_AND_COVER_DATA_FOR_INTERTIDAL_ORGANISMS_IN_THE_GULF_OF_MAINE_USA_FROM_2003_TO_2007/3300200" self.description = "The data on densities and percent cover in the " \ "60 experimental plots from 2003 to 2007 and to " \ "update data from 1996 to 2002 that are already " \ "published in Ecological Archives." \ "Includes densities of mussels, " \ "herbivorous limpet, herbivorous snails, " \ "predatory snail, barnacle , fucoid algae and " \ "percent cover by mussels, barnacles, fucoids, " \ "and other sessile organisms." self.retriever_minimum_version = '2.0.dev' self.version = '1.5.3' self.urls = {"main": "https://ndownloader.figshare.com/files/5600831"} self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=[-999.9]) if parse_version(VERSION) <= parse_version("2.0.0"): self.shortname = self.name self.name = self.title self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=[-999.9]) self.tables = {"main": Table("main", cleanup=self.cleanup_func_table)}
def download(self, engine=None, debug=False): Script.download(self, engine, debug) engine = self.engine # files are nested in another baad_data folder # important files considered (baad_data.csv,baad_methods.csv) # relevant files can be added in the same manner file_names = ["baad_data/baad_data.csv", "baad_data/baad_methods.csv"] engine.download_files_from_archive(self.urls["BAAD"], file_names) # creating data from baad_data.csv engine.auto_create_table(Table("data", cleanup=Cleanup(correct_invalid_value, nulls=['NA'])), filename="baad_data.csv") engine.insert_data_from_file(engine.format_filename("baad_data.csv")) # creating methods from baad_methods.csv engine.auto_create_table(Table("methods", cleanup=Cleanup(correct_invalid_value, nulls=['NA'])), filename="baad_methods.csv") engine.insert_data_from_file( engine.format_filename("baad_methods.csv"))
def __init__(self, **kwargs): Script.__init__(self, **kwargs) self.title = "USGS North American Breeding Bird Survey" self.name = "breed-bird-survey" self.description = "A Cooperative effort between the U.S. " \ "Geological Survey's Patuxent Wildlife Research " \ "Center and Environment Canada's Canadian " \ "Wildlife Service to monitor the status " \ "and trends of North American bird populations." self.citation = "Pardieck, K.L., D.J. Ziolkowski Jr., M.-A.R. Hudson. " \ "2015. North American Breeding Bird Survey Dataset 1966 - " \ "2014, version 2014.0. U.S. Geological Survey, Patuxent " \ "Wildlife Research Center" self.ref = "http://www.pwrc.usgs.gov/BBS/" self.keywords = ["birds", "continental-scale"] self.retriever_minimum_version = '2.0.dev' self.version = '4.0.0' base_url = "https://www.sciencebase.gov/catalog/file/get/5ea04e9a82cefae35a129d65?f=__disk__" self.urls = { "counts": base_url + "38%2F0e%2F1d%2F380e1dd98a48aa48b9cf2efa25f74e07ebc797f8", "routes": base_url + "5d%2Fca%2F74%2F5dca74b1e3e1c21f18443e8f27c38bf0e2b2a234&allowOpen=true", "weather": base_url + "87%2Fb5%2F1d%2F87b51d999ae1ad18838aa60851e9bcff4498ac8d", "migrants": base_url + "bf%2Fe5%2Ff6%2Fbfe5f6834f85cc1e31edf67b5eb825b9abff5806", "Vehicledata": base_url + "a9%2F97%2F2b%2Fa9972b26aaeb48bf9425ed21681312b4cc063a7c", "species": base_url + "6f%2F16%2F1f%2F6f161fc7c7db1dcaf1259deb02d824700f280460&allowOpen=true", } if parse_version(VERSION) <= parse_version("2.0.0"): self.shortname = self.name self.name = self.title self.tags = self.keywords self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=['NULL']) self.cleanup_func_clean = Cleanup(correct_invalid_value, nulls=['*']) else: self.encoding = "latin-1" self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=['NULL']) self.cleanup_func_clean = Cleanup(correct_invalid_value, missing_values=['*'])
def download(self, engine=None, debug=False): Script.download(self, engine, debug) for key in self.urls: self.engine.download_file(self.urls[key], self.urls[key].rpartition('/')[-1]) new_file_path = self.engine.format_filename("new" + key) old_data = open( self.engine.find_file(self.urls[key].rpartition('/')[-1]), "rb") new_data = open(new_file_path, 'wb') with old_data as file_block: # after the metadata lines, set data to True data = False for lines in file_block.readlines(): # meta data contins line with no ";" and may have "(;;;;)+" or empty lines if not data and (";" not in lines or ";;;;" in lines): pass else: data = True new_data.write(lines) file_block.close() new_data.close() self.engine.auto_create_table(Table(key, cleanup=Cleanup( correct_invalid_value, nulls=[-999.9])), filename=str("new" + key)) self.engine.insert_data_from_file(new_file_path)
def __init__(self): Script.__init__(self, tables={'trees': Table('trees', cleanup=Cleanup(correct_invalid_value, nulls=[-999]))}, name="Tree growth, mortality, physical condition - Clark, 2006", tags=['Taxon > Plants'], urls={'trees': 'http://esapubs.org/archive/ecol/E087/132/LS_trees_1983_2000.txt'}, shortname="Clark2006", description="David B. Clark and Deborah A. Clark. 2006. Tree growth, mortality, physical condition, and microsite in an old-growth lowland tropical rain forest. Ecology 87:2132.")
def __init__(self, **kwargs): Script.__init__(self, **kwargs) self.title = "A database on the life history traits of the Northwest European flora" self.name = "plant-life-hist-eu" self.retriever_minimum_version = '2.0.dev' self.version = '1.4.3' self.ref = "http://www.uni-oldenburg.de/en/biology/landeco/research/projects/leda/" self.urls = { "Age_of_first_flowering": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/age_of_first_flowering.txt", "Branching": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/branching.txt", "Bud_bank_seasonality": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/buds_seasonality.txt", "Bud_vertical_distribution": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/buds_vertical_dist.txt", "Buoyancy": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/buoyancy.txt", "Canopy_height": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/canopy_height.txt", "Clonal_growth_organs": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/CGO.txt", "Dispersal_type": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/dispersal_type.txt", "Leaf_distribution_along_the_stem": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/leaf_distribution.txt", "Leaf_dry_matter_content": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/LDMC_und_Geo.txt", "Leaf_mass": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/leaf_mass.txt", "Leaf_size": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/leaf_size.txt", "Morphology_of_dispersal_unit": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/morphology_dispersal_unit.txt", "Plant_growth_form": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/plant_growth_form.txt", "Plant_life_span": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/plant_life_span.txt", "Releasing_height": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/releasing_height.txt", "Seed_bank": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/seed_bank.txt", "Seed_longevity": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/seed_longevity.txt", "Seed_mass": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/seed_mass.txt", "Seed_number": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/seed_number.txt", "Seed_shape": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/seed_shape.txt", "Shoot_growth_form": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/shoot_growth_form.txt", "Specific_leaf_area": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/SLA_und_geo_neu.txt", "Seed_number_per_shoot": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/SNP.txt", "Woodiness": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/ssd.txt", "Terminal_velocity": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/TV.txt", } self.citation = "KLEYER, M., BEKKER, R.M., KNEVEL, I.C., BAKKER, J.P, THOMPSON, K., SONNENSCHEIN, M., POSCHLOD, P., VAN GROENENDAEL, J.M., KLIMES, L., KLIMESOVA, J., KLOTZ, S., RUSCH, G.M., HERMY, M., ADRIAENS, D., BOEDELTJE, G., BOSSUYT, B., DANNEMANN, A., ENDELS, P., GoeTZENBERGER, L., HODGSON, J.G., JACKEL, A-K., KueHN, I., KUNZMANN, D., OZINGA, W.A., RoeMERMANN, C., STADLER, M., SCHLEGELMILCH, J., STEENDAM, H.J., TACKENBERG, O., WILMANN, B., CORNELISSEN, J.H.C., ERIKSSON, O., GARNIER, E., PECO, B. (2008): The LEDA Traitbase: A database of life-history traits of Northwest European flora. Journal of Ecology 96: 1266-1274" self.keywords = ['plants', 'observational'] self.description = "The LEDA Traitbase provides information on plant traits that describe three key features of plant dynamics: persistence, regeneration and dispersal. " if parse_version(VERSION) <= parse_version("2.0.0"): self.shortname = self.name self.name = self.title self.tags = self.keywords self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=['NA']) else: self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=['NA'])
def download(self, engine=None, debug=False): Script.download(self, engine, debug) engine = self.engine files = ["Macroplot_data_Rev.txt", "Microplot_data.txt", "Site_variables.txt", "Species_list.txt"] engine.download_files_from_archive(self.urls["data"], files, filetype="zip") # Create table species engine.auto_create_table(Table('species', cleanup=Cleanup(correct_invalid_value, nulls=['NA'])), filename="Species_list.txt") engine.insert_data_from_file(engine.format_filename("Species_list.txt")) # Create table sites engine.auto_create_table(Table('sites', cleanup=Cleanup(correct_invalid_value, nulls=['NA'])), filename="Site_variables.txt") engine.insert_data_from_file(engine.format_filename("Site_variables.txt")) # Create table microplots table = Table('microplots') table.columns = [('record_id', ('pk-auto',)), ('SpCode', ('char', '30')), ('Count', ('ct-int',))] table.ct_names = ['BSP1', 'BSP2', 'BSP3', 'BSP4', 'BSP5', 'BSP6', 'BSP7', 'BSP8', 'BSP9', 'BSP10', 'BSP11', 'BSP12', 'BSP13', 'BSP14', 'BSP15', 'BSP16', 'BSP17', 'BSP18', 'BSP20', 'BSP21', 'BSP22', 'BSP23', 'BSP24', 'BSP25', 'BSP26', 'BSP27', 'BSP28', 'BSP29', 'BSP30', 'BSP31', 'BSP33', 'BSP34', 'BSP35', 'BSP36', 'BSP37', 'BSP41', 'BSP42', 'BSP43', 'BSP44', 'BSP45', 'BSP46', 'BSP47', 'BSP48', 'BSP49', 'BSP50', 'BSP51', 'BSP52', 'BSP53', 'BSP54', 'BSP55', 'BSP56', 'BSP57', 'BSP58', 'BSP59', 'BSP60', 'BSP61', 'BSP62', 'BSP63', 'BSP64', 'BSP65', 'BSP66', 'BSP67', 'BSP68', 'BSP69', 'BSP70', 'BSP71', 'BSP72', 'BSP73', 'BSP74', 'BSP75', 'BSP76', 'BSP78', 'BSP79', 'BSP80', 'BSP82', 'BSP83', 'BSP84', 'BSP85', 'BSP86', 'BSP87', 'BSP88', 'BSP89', 'BSP90', 'BSP91', 'BSP92', 'BSP93', 'BSP94', 'BSP95', 'BSP96', 'BSP97', 'BSP98', 'BSP99', 'BSP100', 'BSP101', 'BSP102', 'BSP104'] table.ct_column = 'PlotID' engine.auto_create_table(table, filename="Microplot_data.txt") engine.insert_data_from_file(engine.format_filename("Microplot_data.txt")) # Create table microplots table = Table('macroplots') table.ct_names = ['TreeGirth1', 'TreeGirth2', 'TreeGirth3', 'TreeGirth4', 'TreeGirth5'] table.ct_column = 'Tree' table.columns = [('record_id', ('pk-auto',)), ('PlotID', ('char', '20')), ('SpCode', ('char', '30')), ('Girth', ('ct-int',))] engine.auto_create_table(table, filename="Macroplot_data_Rev.txt") engine.insert_data_from_file(engine.format_filename("Macroplot_data_Rev.txt"))
def __init__(self): Script.__init__(self, tables={'trees': Table('trees', cleanup=Cleanup(correct_invalid_value, nulls=[-999]))}, name="Tree growth, mortality, physical condition - Clark, 2006", tags=['Taxon > Plants'], urls={'trees': 'http://esapubs.org/archive/ecol/E087/132/LS_trees_1983_2000.txt'}, shortname="Clark2006", description = "The data set helps to examine the post-establishment ecology of 10 species of tropical wet forest trees selected to span a range of predicted life history patterns at the La Selva Biological Station in Costa Rica.", ref = "http://esapubs.org/archive/ecol/E087/132/", citation="David B. Clark and Deborah A. Clark. 2006. Tree growth, mortality, physical condition, and microsite in an old-growth lowland tropical rain forest. Ecology 87:2132.")
def __init__(self): Script.__init__(self, tables={'trees': Table('trees', cleanup=Cleanup(correct_invalid_value, nulls=[-999]))}, name="Tree growth, mortality, physical condition - Clark, 2006", tags=['plants', 'time-series'], urls={'trees': 'https://ndownloader.figshare.com/files/5597693'}, shortname="la-selva-trees", description="The data set helps to examine the post-establishment ecology of 10 species of tropical wet forest trees selected to span a range of predicted life history patterns at the La Selva Biological Station in Costa Rica.", ref="https://doi.org/10.6084/m9.figshare.c.3299324.v1", retriever_minimum_version= "2.0.dev", version='1.3.0', citation="David B. Clark and Deborah A. Clark. 2006. Tree growth, mortality, physical condition, and microsite in an old-growth lowland tropical rain forest. Ecology 87:2132.")
def __init__(self, **kwargs): Script.__init__(self, **kwargs) self.title = "Amniote life History database" self.name = "amniote-life-hist" self.retriever_minimum_version = '2.0.dev' self.version = '2.0.2' self.ref = "https://figshare.com/collections/An_amniote_life-history_database_to_perform_comparative_" \ "analyses_with_birds_mammals_and_reptiles/3308127" self.urls = {"data": "https://ndownloader.figshare.com/files/8067269"} self.citation = "Myhrvold, N.P., Baldridge, E., Chan, B., Sivam, D., Freeman, D.L. and Ernest, S.M., 2015. " \ "An amniote life-history database to perform comparative analyses with birds, mammals, " \ "and reptiles:Ecological Archives E096-269. Ecology, 96(11), pp.3109-000." self.licenses = [{"name": "CC0-1.0"}] self.description = "Compilation of life history traits for birds, mammals, and reptiles." self.keywords = ["mammals", "literature-compilation"] self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=['-999']) if parse_version(VERSION) <= parse_version("2.0.0"): self.shortname = self.name self.name = self.title self.tags = self.keywords self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=['-999'])
def download(self, engine=None, debug=False): Script.download(self, engine, debug) engine = self.engine engine.download_files_from_archive(self.urls["data"], ["PanTHERIA_1-0_WR05_Aug2008.txt"], filetype="zip") # Create table Species engine.auto_create_table(Table('species', cleanup=Cleanup(correct_invalid_value, nulls=['NA'])), filename="PanTHERIA_1-0_WR05_Aug2008.txt") engine.insert_data_from_file( engine.format_filename("PanTHERIA_1-0_WR05_Aug2008.txt"))
def __init__(self, **kwargs): Script.__init__(self, **kwargs) self.title = "Marine Predator and Prey Body Sizes - Barnes et al. 2008" self.name = "predator-prey-size-marine" self.retriever_minimum_version = '2.0.dev' self.version = '2.0.3' self.archived = zip self.ref = "https://figshare.com/collections/PREDATOR_AND_PREY_BODY_SIZES_IN_MARINE_FOOD_WEBS/3300257" self.urls = {"data": "https://ndownloader.figshare.com/files/5601029"} self.citation = "C. Barnes, D. M. Bethea, R. D. Brodeur, J. Spitz, V. Ridoux, C. Pusineri, B. C. Chase, " \ "M. E. Hunsicker, F. Juanes, A. Kellermann, J. Lancaster, F. Menard, F.-X. Bard, P. Munk, " \ "J. K. Pinnegar, F. S. Scharf, R. A. Rountree, K. I. Stergiou, C. Sassa, A. Sabates, and S. " \ "Jennings. 2008. Predator and prey body sizes in marine food webs. Ecology 89:881." self.description = "The data set contains relationships between predator and prey size which are needed to " \ "describe interactions of species and size classes in food webs." self.keywords = ["fish", "literature-compilation", "size"] self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=['n/a', '0.0000E+00']) self.encoding = "latin-1" if parse_version(VERSION) <= parse_version("2.0.0"): self.shortname = self.name self.name = self.title self.tags = self.keywords self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=['n/a', '0.0000E+00'])
def __init__(self, **kwargs): Script.__init__(self, **kwargs) self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=[-999]) self.title = "Tree growth, mortality, physical condition - Clark, 2006" self.keywords = ['plants', 'time-series'] self.urls = {'trees': 'https://ndownloader.figshare.com/files/5597693'} self.name = "la-selva-trees" self.description = "The data set helps to examine the post-establishment ecology of 10 species of tropical wet forest trees selected to span a range of predicted life history patterns at the La Selva Biological Station in Costa Rica." self.ref = "https://doi.org/10.6084/m9.figshare.c.3299324.v1" self.retriever_minimum_version = "2.0.dev" self.version = '1.4.1' self.citation = "David B. Clark and Deborah A. Clark. 2006. Tree growth, mortality, physical condition, and microsite in an old-growth lowland tropical rain forest. Ecology 87:2132." if parse_version(VERSION) <= parse_version("2.0.0"): self.shortname = self.name self.name = self.title self.tags = self.keywords self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=[-999]) self.tables = { 'trees': Table('trees', cleanup=self.cleanup_func_table) }
def __init__(self): Script.__init__(self, name="Gulf of Maine intertidal density/cover (Ecological Archives 2008)", description="Peter S. Petraitis, Harrison Liu, and Erika C. Rhile. 2008. Densities and cover data for intertidal organisms in the Gulf of Maine, USA, from 2003 to 2007. Ecology 89:588.", shortname="Petraitis2008", ref="http://www.esapubs.org/archive/ecol/E089/032/", urls = { "main": "http://www.esapubs.org/archive/ecol/E089/032/Succession_sampling_03-07_data.txt", }, tables = { "main": Table("main", cleanup=Cleanup(correct_invalid_value, nulls=[-999.9])), } )
def __init__(self): Script.__init__(self, name = "Gulf of Maine intertidal density/cover (Ecological Archives 2008)", citation = "Peter S. Petraitis, Harrison Liu, and Erika C. Rhile. 2008. Densities and cover data for intertidal organisms in the Gulf of Maine, USA, from 2003 to 2007. Ecology 89:588.", shortname = "Petraitis2008", ref = "http://www.esapubs.org/archive/ecol/E089/032/", description = "The data set provides access to data on densities and percent cover in the 60 experimental plots from 2003 to 2007 and to update data from 1996 to 2002 that are already published in Ecological Archives.It includes densities of mussels, an herbivorous limpet, herbivorous snails, a predatory snail, a barnacle , and fucoid algae and percent cover by mussels, barnacles, fucoids, and other sessile organisms.", urls = { "main": "http://www.esapubs.org/archive/ecol/E089/032/Succession_sampling_03-07_data.txt", }, tables = { "main": Table("main", cleanup=Cleanup(correct_invalid_value, nulls=[-999.9])), } )
def __init__(self, **kwargs): Script.__init__(self, **kwargs) self.title = "Commercial Fisheries Monthly Trade Data by Product, Country/Association" self.name = "fao-global-capture-product" self.retriever_minimum_version = '2.1.dev' self.urls = { "capture": "http://www.fao.org/fishery/static/Data/Capture_2018.1.2.zip"} self.version = '1.0.0' self.ref = "http://www.fao.org/fishery/statistics/global-capture-production/" self.citation = "FAO. 2018. FAO yearbook. Fishery and Aquaculture Statistics " \ "2016/FAO annuaire. Statistiques des pêches et de l'aquaculture " \ "2016/FAO anuario. Estadísticas de pesca y acuicultura 2016. " \ "Rome/Roma. 104pp." self.description = "Commercial Fisheries statistics provides a summary of " \ "commercial fisheries product data by individual country." self.keywords = ["Fish", "Fisheries"] self.cleanup_func_table = Cleanup( correct_invalid_value, missingValues=['-']) self.encoding = "utf-8"
def __init__(self): Script.__init__( self, name= "Gulf of Maine intertidal density/cover (Petraitis et al. 2008)", citation= "Peter S. Petraitis, Harrison Liu, and Erika C. Rhile. 2008. Densities and cover data for intertidal organisms in the Gulf of Maine, USA, from 2003 to 2007. Ecology 89:588.", shortname="intertidal-abund-me", ref= "https://figshare.com/collections/DENSITIES_AND_COVER_DATA_FOR_INTERTIDAL_ORGANISMS_IN_THE_GULF_OF_MAINE_USA_FROM_2003_TO_2007/3300200", description= "The data set provides access to data on densities and percent cover in the 60 experimental plots from 2003 to 2007 and to update data from 1996 to 2002 that are already published in Ecological Archives.It includes densities of mussels, an herbivorous limpet, herbivorous snails, a predatory snail, a barnacle , and fucoid algae and percent cover by mussels, barnacles, fucoids, and other sessile organisms.", retriever_minimum_version='2.0.dev', version='1.4.0', urls={"main": "https://ndownloader.figshare.com/files/5600831"}, tables={ "main": Table("main", cleanup=Cleanup(correct_invalid_value, nulls=[-999.9])) })
def download(self, engine=None, debug=False): try: Script.download(self, engine, debug) engine = self.engine # Species table table = Table("species", cleanup=Cleanup(), contains_pk=True, header_rows=9) table.columns = [ ("species_id", ("pk-int", )), ("AOU", ("int", )), ("english_common_name", ("char", 50)), ("french_common_name", ("char", 50)), ("spanish_common_name", ("char", 50)), ("sporder", ("char", 30)), ("family", ("char", 30)), ("genus", ("char", 30)), ("species", ("char", 50)), ] table.fixed_width = [7, 6, 51, 51, 51, 51, 51, 51, 50] engine.table = table engine.create_table() engine.insert_data_from_url(self.urls["species"]) # Routes table engine.download_files_from_archive(self.urls["routes"], ["routes.csv"]) engine.auto_create_table(Table("routes", cleanup=Cleanup()), filename="routes.csv") engine.insert_data_from_file(engine.format_filename("routes.csv")) # Weather table if not os.path.isfile(engine.format_filename("weather_new.csv")): engine.download_files_from_archive(self.urls["weather"], ["weather.csv"]) read = open_fr(engine.format_filename("weather.csv")) write = open_fw(engine.format_filename("weather_new.csv")) print("Cleaning weather data...") for line in read: values = line.split(',') newvalues = [] for value in values: if ':' in value: newvalues.append(value.replace(':', '')) elif value == "N": newvalues.append(None) else: newvalues.append(value) write.write(','.join(str(value) for value in newvalues)) write.close() read.close() engine.auto_create_table(Table("weather", pk="RouteDataId", cleanup=self.cleanup_func_table), filename="weather_new.csv") engine.insert_data_from_file( engine.format_filename("weather_new.csv")) # Region_codes table table = Table("region_codes", pk=False, header_rows=11, fixed_width=[11, 11, 30]) def regioncodes_cleanup(value, engine): replace = { chr(225): "a", chr(233): "e", chr(237): "i", chr(243): "o" } newvalue = str(value) for key in list(replace.keys()): if key in newvalue: newvalue = newvalue.replace(key, replace[key]) return newvalue table.cleanup = Cleanup(regioncodes_cleanup) table.columns = [("countrynum", ("int", )), ("regioncode", ("int", )), ("regionname", ("char", 30))] engine.table = table engine.create_table() engine.insert_data_from_url(self.urls["region_codes"]) # Counts table table = Table("counts", delimiter=',') table.columns = [("record_id", ("pk-auto", )), ("countrynum", ("int", )), ("statenum", ("int", )), ("Route", ("int", )), ("RPID", ("int", )), ("Year", ("int", )), ("Aou", ("int", )), ("Count10", ("int", )), ("Count20", ("int", )), ("Count30", ("int", )), ("Count40", ("int", )), ("Count50", ("int", )), ("StopTotal", ("int", )), ("SpeciesTotal", ("int", ))] stateslist = [ "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", ["New Hampshire", "NHampsh"], ["New Jersey", "NJersey"], ["New Mexico", "NMexico"], ["New York", "NYork"], ["North Carolina", "NCaroli"], ["North Dakota", "NDakota"], "Ohio", "Oklahoma", "Oregon", "Pennsylvania", ["Rhode Island", "RhodeIs"], ["South Carolina", "SCaroli"], ["South Dakota", "SDakota"], "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", ["West Virginia", "W_Virgi"], "Wisconsin", "Wyoming", "Alberta", ["British Columbia", "BritCol"], "Manitoba", ["New Brunswick", "NBrunsw"], ["Northwest Territories", "NWTerri"], "Newfoundland", ["Nova Scotia", "NovaSco"], "Nunavut", "Ontario", ["Prince Edward Island", "PEI"], "Quebec", "Saskatchewan", "Yukon" ] state = "" shortstate = "" engine.table = table engine.create_table() for state in stateslist: try: if len(state) > 2: shortstate = state[0:7] else: state, shortstate = state[0], state[1] print("Inserting data from " + state + "...") try: engine.table.cleanup = Cleanup() engine.insert_data_from_archive( self.urls["counts"] + shortstate + ".zip", [shortstate + ".csv"]) except: print("Failed bulk insert on " + state + ", inserting manually.") engine.connection.rollback() engine.table.cleanup = self.cleanup_func_clean engine.insert_data_from_archive( self.urls["counts"] + shortstate + ".zip", [shortstate + ".csv"]) except: print("There was an error in " + state + ".") raise except zipfile.BadZipfile: print( "There was an unexpected error in the Breeding Bird Survey archives." ) raise return engine
def download(self, engine=None, debug=False): try: Script.download(self, engine, debug) engine = self.engine # Species table table = Table("species", cleanup=Cleanup(), contains_pk=True, header_rows=11) table.columns = [ ("species_id", ("pk-int", )), ("AOU", ("int", )), ("english_common_name", ("char", 50)), ("french_common_name", ("char", 50)), ("spanish_common_name", ("char", 50)), ("sporder", ("char", 30)), ("family", ("char", 30)), ("genus", ("char", 30)), ("species", ("char", 50)), ] table.fixed_width = [7, 6, 51, 51, 51, 51, 51, 51, 50] engine.table = table engine.create_table() engine.insert_data_from_url(self.urls["species"]) # Routes table engine.download_files_from_archive(self.urls["routes"], ["routes.csv"], archive_name="routes.zip") engine.auto_create_table(Table("routes", cleanup=Cleanup()), filename="routes.csv") engine.insert_data_from_file(engine.format_filename("routes.csv")) # Weather table engine.download_files_from_archive(self.urls["weather"], ["weather.csv"], archive_name="weather.zip") engine.auto_create_table(Table("weather", pk="RouteDataId", cleanup=self.cleanup_func_table), filename="weather.csv") engine.insert_data_from_file(engine.format_filename("weather.csv")) # Migrations data engine.download_files_from_archive( self.urls["migrants"], archive_name="MigrantNonBreeder.zip") engine.extract_zip( engine.format_filename("MigrantNonBreeder/Migrants.zip"), engine.format_filename("Migrant"), ) engine.extract_zip( engine.format_filename("MigrantNonBreeder/MigrantSummary.zip"), engine.format_filename("MigrantSummary"), ) table = Table("migrants", cleanup=Cleanup()) table.columns = [('routedataid', ('int', )), ('countrynum', ('int', )), ('statenum', ('int', )), ('route', ('int', )), ('rpid', ('int', )), ('year', ('int', )), ('aou', ('int', )), ('stop1', ('int', )), ('stop2', ('int', )), ('stop3', ('int', )), ('stop4', ('int', )), ('stop5', ('int', )), ('stop6', ('int', )), ('stop7', ('int', )), ('stop8', ('int', )), ('stop9', ('int', )), ('stop10', ('int', )), ('stop11', ('int', )), ('stop12', ('int', )), ('stop13', ('int', )), ('stop14', ('int', )), ('stop15', ('int', )), ('stop16', ('int', )), ('stop17', ('int', )), ('stop18', ('int', )), ('stop19', ('int', )), ('stop20', ('int', )), ('stop21', ('int', )), ('stop22', ('int', )), ('stop23', ('int', )), ('stop24', ('int', )), ('stop25', ('int', )), ('stop26', ('int', )), ('stop27', ('int', )), ('stop28', ('int', )), ('stop29', ('int', )), ('stop30', ('int', )), ('stop31', ('int', )), ('stop32', ('int', )), ('stop33', ('int', )), ('stop34', ('int', )), ('stop35', ('int', )), ('stop36', ('int', )), ('stop37', ('int', )), ('stop38', ('int', )), ('stop39', ('int', )), ('stop40', ('int', )), ('stop41', ('int', )), ('stop42', ('int', )), ('stop43', ('int', )), ('stop44', ('int', )), ('stop45', ('int', )), ('stop46', ('int', )), ('stop47', ('int', )), ('stop48', ('int', )), ('stop49', ('int', )), ('stop50', ('int', ))] engine.table = table engine.create_table() engine.insert_data_from_file( engine.format_filename("Migrant/Migrants.csv")) table = Table("migrantsummary", cleanup=Cleanup()) table.columns = [('routedataid', ('int', )), ('countrynum', ('int', )), ('statenum', ('int', )), ('route', ('int', )), ('rpid', ('int', )), ('year', ('int', )), ('aou', ('int', )), ('count10', ('int', )), ('count20', ('int', )), ('count30', ('int', )), ('count40', ('int', )), ('count50', ('int', )), ('stoptotal', ('int', )), ('speciestotal', ('int', ))] engine.table = table engine.create_table() engine.insert_data_from_file( engine.format_filename("MigrantSummary/MigrantSummary.csv")) table = Table("vehicledata", cleanup=Cleanup()) table.columns = [('routedataid', ('int', )), ('countrynum', ('int', )), ('statenum', ('int', )), ('route', ('int', )), ('rpid', ('int', )), ('year', ('int', )), ('recordedcar', ('char', )), ('car1', ('int', )), ('car2', ('int', )), ('car3', ('int', )), ('car4', ('int', )), ('car5', ('int', )), ('car6', ('int', )), ('car7', ('int', )), ('car8', ('int', )), ('car9', ('int', )), ('car10', ('int', )), ('car11', ('int', )), ('car12', ('int', )), ('car13', ('int', )), ('car14', ('int', )), ('car15', ('int', )), ('car16', ('int', )), ('car17', ('int', )), ('car18', ('int', )), ('car19', ('int', )), ('car20', ('int', )), ('car21', ('int', )), ('car22', ('int', )), ('car23', ('int', )), ('car24', ('int', )), ('car25', ('int', )), ('car26', ('int', )), ('car27', ('int', )), ('car28', ('int', )), ('car29', ('int', )), ('car30', ('int', )), ('car31', ('int', )), ('car32', ('int', )), ('car33', ('int', )), ('car34', ('int', )), ('car35', ('int', )), ('car36', ('int', )), ('car37', ('int', )), ('car38', ('int', )), ('car39', ('int', )), ('car40', ('int', )), ('car41', ('int', )), ('car42', ('int', )), ('car43', ('int', )), ('car44', ('int', )), ('car45', ('int', )), ('car46', ('int', )), ('car47', ('int', )), ('car48', ('int', )), ('car49', ('int', )), ('car50', ('int', )), ('noise1', ('int', )), ('noise2', ('int', )), ('noise3', ('int', )), ('noise4', ('int', )), ('noise5', ('int', )), ('noise6', ('int', )), ('noise7', ('int', )), ('noise8', ('int', )), ('noise9', ('int', )), ('noise10', ('int', )), ('noise11', ('int', )), ('noise12', ('int', )), ('noise13', ('int', )), ('noise14', ('int', )), ('noise15', ('int', )), ('noise16', ('int', )), ('noise17', ('int', )), ('noise18', ('int', )), ('noise19', ('int', )), ('noise20', ('int', )), ('noise21', ('int', )), ('noise22', ('int', )), ('noise23', ('int', )), ('noise24', ('int', )), ('noise25', ('int', )), ('noise26', ('int', )), ('noise27', ('int', )), ('noise28', ('int', )), ('noise29', ('int', )), ('noise30', ('int', )), ('noise31', ('int', )), ('noise32', ('int', )), ('noise33', ('int', )), ('noise34', ('int', )), ('noise35', ('int', )), ('noise36', ('int', )), ('noise37', ('int', )), ('noise38', ('int', )), ('noise39', ('int', )), ('noise40', ('int', )), ('noise41', ('int', )), ('noise42', ('int', )), ('noise43', ('int', )), ('noise44', ('int', )), ('noise45', ('int', )), ('noise46', ('int', )), ('noise47', ('int', )), ('noise48', ('int', )), ('noise49', ('int', )), ('noise50', ('int', ))] engine.table = table engine.create_table() engine.download_files_from_archive(self.urls["Vehicledata"], archive_name="VehicleData.zip") engine.extract_zip( engine.format_filename("VehicleData/VehicleData.zip"), engine.format_filename("VehicleData"), ) engine.insert_data_from_file( engine.format_filename("VehicleData/VehicleData.csv")) # Counts table table = Table("counts", delimiter=",") engine.download_files_from_archive(self.urls["counts"], archive_name="States.zip") table.columns = [("record_id", ("pk-auto", )), ("RouteDataID", ("int", )), ("countrynum", ("int", )), ("statenum", ("int", )), ("Route", ("int", )), ("RPID", ("int", )), ("Year", ("int", )), ("Aou", ("int", )), ("Count10", ("int", )), ("Count20", ("int", )), ("Count30", ("int", )), ("Count40", ("int", )), ("Count50", ("int", )), ("StopTotal", ("int", )), ("SpeciesTotal", ("int", ))] stateslist = [ "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", ["New Hampshire", "NHampsh"], ["New Jersey", "NJersey"], ["New Mexico", "NMexico"], ["New York", "NYork"], ["North Carolina", "NCaroli"], ["North Dakota", "NDakota"], "Ohio", "Oklahoma", "Oregon", "Pennsylvania", ["Rhode Island", "RhodeIs"], ["South Carolina", "SCaroli"], ["South Dakota", "SDakota"], "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", ["West Virginia", "W_Virgi"], "Wisconsin", "Wyoming", "Alberta", ["British Columbia", "BritCol"], "Manitoba", ["New Brunswick", "NBrunsw"], ["Northwest Territories", "NWTerri"], "Newfoundland", ["Nova Scotia", "NovaSco"], "Nunavut", "Ontario", ["Prince Edward Island", "PEI"], "Quebec", "Saskatchewan", "Yukon" ] state = "" shortstate = "" engine.table = table engine.create_table() for state in stateslist: try: if isinstance(state, (list, )): state, shortstate = state[0], state[1] else: shortstate = state[0:7] print("Inserting data from " + state + "...") try: engine.table.cleanup = Cleanup() engine.extract_zip( engine.format_filename("States/" + shortstate + ".zip"), engine.format_filename("States/" + shortstate), ) file_path = "{states}/{shortstate}/{shortstate}.csv".format( states="States", shortstate=shortstate) engine.insert_data_from_file( engine.format_filename(file_path)) except: print(state, ": Failed bulk insert on, inserting manually.") engine.connection.rollback() engine.table.cleanup = self.cleanup_func_clean engine.insert_data_from_file( engine.format_filename(file_path)) except: print("There was an error in " + state + ".") raise except zipfile.BadZipfile: print( "There was an unexpected error in the Breeding Bird Survey archives." ) raise return engine
def download(self, engine=None, debug=False): try: Script.download(self, engine, debug) engine = self.engine # Species table table = Table("species", cleanup=Cleanup(), contains_pk=True, header_rows=6) table.columns=[("species_id", ("pk-int",) ), ("AOU", ("int",) ), ("english_common_name", ("char",50) ), ("french_common_name", ("char",50) ), ("spanish_common_name", ("char",50) ), ("sporder", ("char",30) ), ("family", ("char",30) ), ("genus", ("char",30) ), ("species", ("char",50) ), ] table.fixed_width = [7,6,51,51,51,51,51,51,50] engine.table = table engine.create_table() engine.insert_data_from_url(self.urls["species"]) # Routes table if not os.path.isfile(engine.format_filename("routes_new.csv")): engine.download_files_from_archive(self.urls["routes"], ["routes.csv"]) read = open(engine.format_filename("routes.csv"), "rb") write = open(engine.format_filename("routes_new.csv"), "wb") print "Cleaning routes data..." write.write(read.readline()) for line in read: values = line.split(',') v = Decimal(values[5]) if v > 0: values[5] = str(v * Decimal("-1")) write.write(','.join(str(value) for value in values)) write.close() read.close() engine.auto_create_table(Table("routes", cleanup=Cleanup()), filename="routes_new.csv") engine.insert_data_from_file(engine.format_filename("routes_new.csv")) # Weather table if not os.path.isfile(engine.format_filename("weather_new.csv")): engine.download_files_from_archive(self.urls["weather"], ["weather.csv"]) read = open(engine.format_filename("weather.csv"), "rb") write = open(engine.format_filename("weather_new.csv"), "wb") print "Cleaning weather data..." for line in read: values = line.split(',') newvalues = [] for value in values: if ':' in value: newvalues.append(value.replace(':', '')) elif value == "N": newvalues.append(None) else: newvalues.append(value) write.write(','.join(str(value) for value in newvalues)) write.close() read.close() engine.auto_create_table(Table("weather", pk="RouteDataId", cleanup=Cleanup(correct_invalid_value, nulls=['NULL'])), filename="weather_new.csv") engine.insert_data_from_file(engine.format_filename("weather_new.csv")) # Region_codes table table = Table("region_codes", pk=False, header_rows=11, fixed_width=[11, 11, 30]) def regioncodes_cleanup(value, engine): replace = {chr(225):"a", chr(233):"e", chr(237):"i", chr(243):"o"} newvalue = str(value) for key in replace.keys(): if key in newvalue: newvalue = newvalue.replace(key, replace[key]) return newvalue table.cleanup = Cleanup(regioncodes_cleanup) table.columns=[("countrynum" , ("int",) ), ("regioncode" , ("int",) ), ("regionname" , ("char",30) )] engine.table = table engine.create_table() engine.insert_data_from_url(self.urls["region_codes"]) # Counts table table = Table("counts", pk=False, delimiter=',') table.columns=[("RouteDataID" , ("int",) ), ("countrynum" , ("int",) ), ("statenum" , ("int",) ), ("Route" , ("int",) ), ("RPID" , ("int",) ), ("year" , ("int",) ), ("AOU" , ("int",) ), ("Stop1" , ("int",) ), ("Stop2" , ("int",) ), ("Stop3" , ("int",) ), ("Stop4" , ("int",) ), ("Stop5" , ("int",) ), ("Stop6" , ("int",) ), ("Stop7" , ("int",) ), ("Stop8" , ("int",) ), ("Stop9" , ("int",) ), ("Stop10" , ("int",) ), ("Stop11" , ("int",) ), ("Stop12" , ("int",) ), ("Stop13" , ("int",) ), ("Stop14" , ("int",) ), ("Stop15" , ("int",) ), ("Stop16" , ("int",) ), ("Stop17" , ("int",) ), ("Stop18" , ("int",) ), ("Stop19" , ("int",) ), ("Stop20" , ("int",) ), ("Stop21" , ("int",) ), ("Stop22" , ("int",) ), ("Stop23" , ("int",) ), ("Stop24" , ("int",) ), ("Stop25" , ("int",) ), ("Stop26" , ("int",) ), ("Stop27" , ("int",) ), ("Stop28" , ("int",) ), ("Stop29" , ("int",) ), ("Stop30" , ("int",) ), ("Stop31" , ("int",) ), ("Stop32" , ("int",) ), ("Stop33" , ("int",) ), ("Stop34" , ("int",) ), ("Stop35" , ("int",) ), ("Stop36" , ("int",) ), ("Stop37" , ("int",) ), ("Stop38" , ("int",) ), ("Stop39" , ("int",) ), ("Stop40" , ("int",) ), ("Stop41" , ("int",) ), ("Stop42" , ("int",) ), ("Stop43" , ("int",) ), ("Stop44" , ("int",) ), ("Stop45" , ("int",) ), ("Stop46" , ("int",) ), ("Stop47" , ("int",) ), ("Stop48" , ("int",) ), ("Stop49" , ("int",) ), ("Stop50" , ("int",) )] part = "" engine.table = table engine.create_table() for part in range(1,11): part = str(part) try: print "Inserting data from part " + part + "..." try: engine.table.cleanup = Cleanup() engine.insert_data_from_archive(self.urls["counts"] + "Fifty" + part + ".zip", ["fifty" + part + ".csv"]) except: print "Failed bulk insert on " + part + ", inserting manually." engine.connection.rollback() engine.table.cleanup = Cleanup(correct_invalid_value, nulls=['*']) engine.insert_data_from_archive(self.urls["counts"] + "Fifty" + part + ".zip", ["fifty" + part + ".csv"]) except: print "There was an error in part " + part + "." raise except zipfile.BadZipfile: print "There was an unexpected error in the Breeding Bird Survey archives." raise return engine
def download(self, engine=None, debug=False): try: Script.download(self, engine, debug) engine = self.engine # Species table table = Table("species", cleanup=Cleanup(), contains_pk=True, header_rows=11) table.columns = [ ("species_id", ("pk-int", )), ("AOU", ("int", )), ("english_common_name", ("char", 50)), ("french_common_name", ("char", 50)), ("spanish_common_name", ("char", 50)), ("sporder", ("char", 30)), ("family", ("char", 30)), ("genus", ("char", 30)), ("species", ("char", 50)), ] table.fixed_width = [7, 6, 51, 51, 51, 51, 51, 51, 50] engine.table = table engine.create_table() engine.insert_data_from_url(self.urls["species"]) # Routes table engine.download_files_from_archive(self.urls["routes"], ["routes.csv"], archive_name="routes.zip") engine.auto_create_table(Table("routes", cleanup=Cleanup()), filename="routes.csv") engine.insert_data_from_file(engine.format_filename("routes.csv")) # Weather table engine.download_files_from_archive(self.urls["weather"], ["weather.csv"], archive_name="weather.zip") engine.auto_create_table(Table("weather", pk="RouteDataId", cleanup=self.cleanup_func_table), filename="weather.csv") engine.insert_data_from_file(engine.format_filename("weather.csv")) # Migrations data engine.download_files_from_archive( self.urls["migrants"], archive_name="MigrantNonBreeder.zip") engine.extract_zip( engine.format_filename("MigrantNonBreeder/Migrants.zip"), engine.format_filename("Migrant"), ) engine.extract_zip( engine.format_filename("MigrantNonBreeder/MigrantSummary.zip"), engine.format_filename("MigrantSummary"), ) table = Table("migrants", cleanup=Cleanup()) table.columns = [('routedataid', ('int', )), ('countrynum', ('int', )), ('statenum', ('int', )), ('route', ('int', )), ('rpid', ('int', )), ('year', ('int', )), ('aou', ('int', )), ('stop1', ('int', )), ('stop2', ('int', )), ('stop3', ('int', )), ('stop4', ('int', )), ('stop5', ('int', )), ('stop6', ('int', )), ('stop7', ('int', )), ('stop8', ('int', )), ('stop9', ('int', )), ('stop10', ('int', )), ('stop11', ('int', )), ('stop12', ('int', )), ('stop13', ('int', )), ('stop14', ('int', )), ('stop15', ('int', )), ('stop16', ('int', )), ('stop17', ('int', )), ('stop18', ('int', )), ('stop19', ('int', )), ('stop20', ('int', )), ('stop21', ('int', )), ('stop22', ('int', )), ('stop23', ('int', )), ('stop24', ('int', )), ('stop25', ('int', )), ('stop26', ('int', )), ('stop27', ('int', )), ('stop28', ('int', )), ('stop29', ('int', )), ('stop30', ('int', )), ('stop31', ('int', )), ('stop32', ('int', )), ('stop33', ('int', )), ('stop34', ('int', )), ('stop35', ('int', )), ('stop36', ('int', )), ('stop37', ('int', )), ('stop38', ('int', )), ('stop39', ('int', )), ('stop40', ('int', )), ('stop41', ('int', )), ('stop42', ('int', )), ('stop43', ('int', )), ('stop44', ('int', )), ('stop45', ('int', )), ('stop46', ('int', )), ('stop47', ('int', )), ('stop48', ('int', )), ('stop49', ('int', )), ('stop50', ('int', ))] engine.table = table engine.create_table() engine.insert_data_from_file( engine.format_filename("Migrant/Migrants.csv")) table = Table("migrantsummary", cleanup=Cleanup()) table.columns = [('routedataid', ('int', )), ('countrynum', ('int', )), ('statenum', ('int', )), ('route', ('int', )), ('rpid', ('int', )), ('year', ('int', )), ('aou', ('int', )), ('count10', ('int', )), ('count20', ('int', )), ('count30', ('int', )), ('count40', ('int', )), ('count50', ('int', )), ('stoptotal', ('int', )), ('speciestotal', ('int', ))] engine.table = table engine.create_table() engine.insert_data_from_file( engine.format_filename("MigrantSummary/MigrantSummary.csv")) table = Table("vehicledata", cleanup=Cleanup()) table.columns = [('routedataid', ('int', )), ('countrynum', ('int', )), ('statenum', ('int', )), ('route', ('int', )), ('rpid', ('int', )), ('year', ('int', )), ('recordedcar', ('char', )), ('car1', ('int', )), ('car2', ('int', )), ('car3', ('int', )), ('car4', ('int', )), ('car5', ('int', )), ('car6', ('int', )), ('car7', ('int', )), ('car8', ('int', )), ('car9', ('int', )), ('car10', ('int', )), ('car11', ('int', )), ('car12', ('int', )), ('car13', ('int', )), ('car14', ('int', )), ('car15', ('int', )), ('car16', ('int', )), ('car17', ('int', )), ('car18', ('int', )), ('car19', ('int', )), ('car20', ('int', )), ('car21', ('int', )), ('car22', ('int', )), ('car23', ('int', )), ('car24', ('int', )), ('car25', ('int', )), ('car26', ('int', )), ('car27', ('int', )), ('car28', ('int', )), ('car29', ('int', )), ('car30', ('int', )), ('car31', ('int', )), ('car32', ('int', )), ('car33', ('int', )), ('car34', ('int', )), ('car35', ('int', )), ('car36', ('int', )), ('car37', ('int', )), ('car38', ('int', )), ('car39', ('int', )), ('car40', ('int', )), ('car41', ('int', )), ('car42', ('int', )), ('car43', ('int', )), ('car44', ('int', )), ('car45', ('int', )), ('car46', ('int', )), ('car47', ('int', )), ('car48', ('int', )), ('car49', ('int', )), ('car50', ('int', )), ('noise1', ('int', )), ('noise2', ('int', )), ('noise3', ('int', )), ('noise4', ('int', )), ('noise5', ('int', )), ('noise6', ('int', )), ('noise7', ('int', )), ('noise8', ('int', )), ('noise9', ('int', )), ('noise10', ('int', )), ('noise11', ('int', )), ('noise12', ('int', )), ('noise13', ('int', )), ('noise14', ('int', )), ('noise15', ('int', )), ('noise16', ('int', )), ('noise17', ('int', )), ('noise18', ('int', )), ('noise19', ('int', )), ('noise20', ('int', )), ('noise21', ('int', )), ('noise22', ('int', )), ('noise23', ('int', )), ('noise24', ('int', )), ('noise25', ('int', )), ('noise26', ('int', )), ('noise27', ('int', )), ('noise28', ('int', )), ('noise29', ('int', )), ('noise30', ('int', )), ('noise31', ('int', )), ('noise32', ('int', )), ('noise33', ('int', )), ('noise34', ('int', )), ('noise35', ('int', )), ('noise36', ('int', )), ('noise37', ('int', )), ('noise38', ('int', )), ('noise39', ('int', )), ('noise40', ('int', )), ('noise41', ('int', )), ('noise42', ('int', )), ('noise43', ('int', )), ('noise44', ('int', )), ('noise45', ('int', )), ('noise46', ('int', )), ('noise47', ('int', )), ('noise48', ('int', )), ('noise49', ('int', )), ('noise50', ('int', ))] engine.table = table engine.create_table() engine.download_files_from_archive(self.urls["Vehicledata"], archive_name="VehicleData.zip") engine.extract_zip( engine.format_filename("VehicleData/VehicleData.zip"), engine.format_filename("VehicleData"), ) engine.insert_data_from_file( engine.format_filename("VehicleData/VehicleData.csv")) # Counts table table = Table("counts", pk=False, delimiter=',') engine.download_files_from_archive(self.urls["counts"], archive_name="50-StopData.zip") table.columns = [("RouteDataID", ("int", )), ("countrynum", ("int", )), ("statenum", ("int", )), ("Route", ("int", )), ("RPID", ("int", )), ("year", ("int", )), ("AOU", ("int", )), ("Stop1", ("int", )), ("Stop2", ("int", )), ("Stop3", ("int", )), ("Stop4", ("int", )), ("Stop5", ("int", )), ("Stop6", ("int", )), ("Stop7", ("int", )), ("Stop8", ("int", )), ("Stop9", ("int", )), ("Stop10", ("int", )), ("Stop11", ("int", )), ("Stop12", ("int", )), ("Stop13", ("int", )), ("Stop14", ("int", )), ("Stop15", ("int", )), ("Stop16", ("int", )), ("Stop17", ("int", )), ("Stop18", ("int", )), ("Stop19", ("int", )), ("Stop20", ("int", )), ("Stop21", ("int", )), ("Stop22", ("int", )), ("Stop23", ("int", )), ("Stop24", ("int", )), ("Stop25", ("int", )), ("Stop26", ("int", )), ("Stop27", ("int", )), ("Stop28", ("int", )), ("Stop29", ("int", )), ("Stop30", ("int", )), ("Stop31", ("int", )), ("Stop32", ("int", )), ("Stop33", ("int", )), ("Stop34", ("int", )), ("Stop35", ("int", )), ("Stop36", ("int", )), ("Stop37", ("int", )), ("Stop38", ("int", )), ("Stop39", ("int", )), ("Stop40", ("int", )), ("Stop41", ("int", )), ("Stop42", ("int", )), ("Stop43", ("int", )), ("Stop44", ("int", )), ("Stop45", ("int", )), ("Stop46", ("int", )), ("Stop47", ("int", )), ("Stop48", ("int", )), ("Stop49", ("int", )), ("Stop50", ("int", ))] part = "" engine.table = table engine.create_table() for part in range(1, 11): part = str(part) try: print("Inserting data from part " + part + "...") try: "1997ToPresent_SurveyWide" engine.table.cleanup = Cleanup() engine.extract_zip( engine.format_filename( "50-StopData/1997ToPresent_SurveyWide/Fifty" + part + ".zip"), engine.format_filename("fifty" + part + ".csv"), ) except: print( "fifty{}: Failed bulk insert on, inserting manually." .format(part)) engine.connection.rollback() engine.table.cleanup = self.cleanup_func_clean engine.insert_data_from_archive( self.urls["counts"] + "Fifty" + part + ".zip", ["fifty" + part + ".csv"]) except: print("There was an error in part " + part + ".") raise except zipfile.BadZipfile: print( "There was an unexpected error in the Breeding Bird Survey archives." ) raise return engine