Esempi in Python per Cleanup, esempi in Python per retriever.lib.models.Cleanup

Esempio n. 1

0

Mostra file

File: flensburg_food_web.py Progetto: shubhank-saxena/retriever-recipes

    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Food web including metazoan parasites for a " \
                     "brackish shallow water ecosystem in Germany and Denmark"
        self.citation = "C. Dieter Zander, Neri Josten, Kim C. Detloff, " \
                        "Robert Poulin, John P. McLaughlin, and David W. " \
                        "Thieltges. 2011. Food web including metazoan " \
                        "parasites for a brackish shallow water ecosystem " \
                        "in Germany and Denmark. Ecology 92:2007."
        self.name = "flensburg-food-web"
        self.ref = "https://figshare.com/articles/Full_Archive/3552066"
        self.description = "This data is of a food web for the Flensburg " \
                           "Fjord, a brackish shallow water inlet on the " \
                           "Baltic Sea, between Germany and Denmark."
        self.keywords = []
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.0.4'
        self.urls = {"zip": "https://ndownloader.figshare.com/files/5620326"}
        self.cleanup_func_table = Cleanup(correct_invalid_value,
                                          missing_values=[''])

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=['', 'unknown'])

Esempio n. 2

0

Mostra file

    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "A Southern Ocean dietary database"
        self.citation = "Ben Raymond, Michelle Marshall, Gabrielle Nevitt, " \
                        "Chris L. Gillies, John van den Hoff, Jonathan " \
                        "S. Stark, Marcel Losekoot, Eric J. Woehler, and " \
                        "Andrew J. Constable. 2011. " \
                        "A Southern Ocean dietary database. Ecology 92:1188."
        self.name = "socean-diet-data"
        self.shortname = "socean-diet-data"
        self.ref = "https://figshare.com/articles/Full_Archive/3551304"
        self.description = "Diet-related data from published" \
                           " and unpublished data sets and studies"
        self.keywords = []
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.0.4'
        self.urls = {"zip": "https://ndownloader.figshare.com/files/5618823"}
        self.cleanup_func_table = Cleanup(correct_invalid_value,
                                          missing_values=['', 'unknown'])

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=['', 'unknown'])

Esempio n. 3

0

Mostra file

File: biomass_allometry_db.py Progetto: henrykironde/retriever-recipes

    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "BAAD: a Biomass And Allometry Database for woody plants"
        self.name = "biomass-allometry-db"
        self.ref = "https://doi.org/10.6084/m9.figshare.c.3307692.v1"
        self.urls = {"BAAD": "https://ndownloader.figshare.com/files/5634309"}
        self.citation = "Falster, D.S., Duursma, R.A., Ishihara, M.I., " \
                        "Barneche, D.R., FitzJohn, R.G., Varhammar, A., Aiba, " \
                        "M., Ando, M., Anten, N., Aspinwall, M.J. and " \
                        "Baltzer, J.L., 2015. BAAD: a Biomass And Allometry " \
                        "Database for woody plants."
        self.licenses = [{"name": "CC0-1.0"}]
        self.keywords = ['plants', 'observational']
        self.retriever_minimum_version = "2.0.dev"
        self.version = "1.4.5"
        self.description = "The data set is a Biomass and allometry database " \
                           "(BAAD) for woody plants containing 259634 " \
                           "measurements collected in 176 different studies " \
                           "from 21084 individuals across 678 species."

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=['NA'])
        else:
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              missing_values=['NA'])
            self.encoding = "latin-1"

Esempio n. 4

0

Mostra file

    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Pantheria (Jones et al. 2009)"
        self.name = "pantheria"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.3.1'
        self.ref = "https://figshare.com/collections/PanTHERIA_a_species-level_database_of_life_history_ecology_" \
                   "and_geography_of_extant_and_recently_extinct_mammals/3301274"
        self.urls = {"data": "https://ndownloader.figshare.com/files/5604752"}
        self.citation = "Kate E. Jones, Jon Bielby, Marcel Cardillo, Susanne A. Fritz, Justin O'Dell, C. David L. " \
                        "Orme, Kamran Safi, Wes Sechrest, Elizabeth H. Boakes, Chris Carbone, Christina Connolly, " \
                        "Michael J. Cutts, Janine K. Foster, Richard Grenyer, Michael Habib, Christopher A. " \
                        "Plaster, Samantha A. Price, Elizabeth A. Rigby, Janna Rist, Amber Teacher, Olaf R. P. " \
                        "Bininda-Emonds, John L. Gittleman, Georgina M. Mace, and Andy Purvis. 2009. PanTHERIA:a " \
                        "species-level database of life history, ecology, and geography of extant and recently " \
                        "extinct mammals. Ecology 90:2648."
        self.description = "PanTHERIA is a data set of multispecies trait data from diverse literature sources " \
                           "and also includes spatial databases of mammalian geographic ranges and global climatic " \
                           "and anthropogenic variables."
        self.keywords = ["mammals", "literature-compilation", "life-history"]

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=['NA'])
        else:
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              missing_values=['NA'])

Esempio n. 5

0

Mostra file

    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Indian Forest Stand Structure and Composition (Ramesh et al. 2010)"
        self.name = "forest-plots-wghats"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.3.2'
        self.ref = "https://figshare.com/collections/Forest_stand_structure_and_composition_in_96_sites_" \
                   "along_environmental_gradients_in_the_central_Western_Ghats_of_India/3303531"
        self.urls = {'data': 'https://ndownloader.figshare.com/files/5617140'}
        self.citation = "B. R. Ramesh, M. H. Swaminath, Santoshgouda V. Patil, Dasappa, Raphael Pelissier, P. " \
                        "Dilip Venugopal, S. Aravajy, Claire Elouard, and S. Ramalingam. 2010. Forest stand " \
                        "structure and composition in 96 sites along environmental gradients in the central " \
                        "Western Ghats of India. Ecology 91:3118."
        self.description = "This data set reports woody plant species abundances in a network of 96 sampling " \
                           "sites spread across 22000 km2 in central Western Ghats region, Karnataka, India."
        self.keywords = ['plants', 'regional-scale', 'observational']

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=['NA'])
        else:
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              missing_values=['NA'])

Esempio n. 6

0

Mostra file

File: breed_bird_survey_50stop.py Progetto: pathak-mayurdeep/retriever

    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "USGS North American Breeding Bird Survey 50 stop"
        self.name = "breed-bird-survey-50stop"
        self.description = "A Cooperative effort between the U.S. Geological Survey's Patuxent Wildlife Research Center and Environment Canada's Canadian Wildlife Service to monitor the status and trends of North American bird populations."
        self.citation = "Pardieck, K.L., D.J. Ziolkowski Jr., M.-A.R. Hudson. 2015. North American Breeding Bird Survey Dataset 1966 - 2014, version 2014.0. U.S. Geological Survey, Patuxent Wildlife Research Center."
        self.ref = "http://www.pwrc.usgs.gov/BBS/"
        self.keywords = ["birds", "continental-scale"]
        self.retriever_minimum_version = '2.0.dev'
        self.version = '2.0.0'
        self.urls = {
            "counts": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/50-StopData/1997ToPresent_SurveyWide/",
            "routes": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/routes.zip",
            "weather": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/Weather.zip",
            "region_codes": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/RegionCodes.txt",
            "species": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/SpeciesList.txt"}

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(
                correct_invalid_value, nulls=['NULL'])
            self.cleanup_func_clean = Cleanup(
                correct_invalid_value, nulls=['*'])
        else:
            self.encoding = "latin-1"
            self.cleanup_func_table = Cleanup(
                correct_invalid_value, missing_values=['NULL'])
            self.cleanup_func_clean = Cleanup(
                correct_invalid_value, missing_values=['*'])

Esempio n. 7

0

Mostra file

File: tree_demog_wghats.py Progetto: surana-mudit/retriever

    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Tree demography in Western Ghats, India - Pelissier et al. 2011"
        self.name = "tree-demog-wghats"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.3.1'
        self.ref = "https://figshare.com/collections/Tree_demography_in_an_undisturbed_" \
                   "Dipterocarp_permanent_sample_plot_at_Uppangala_Western_Ghats_of_India/3304026"
        self.urls = {"data": "https://ndownloader.figshare.com/files/5619033"}
        self.citation = "Raphael Pelissier, Jean-Pierre Pascal, N. Ayyappan, B. R. Ramesh, " \
                        "S. Aravajy, and S. R. Ramalingam. 2011. Twenty years tree demography " \
                        "in an undisturbed Dipterocarp permanent sample plot at Uppangala, " \
                        "Western Ghats of India. Ecology 92:1376."
        self.description = "A data set on demography of trees monitored over 20 years in " \
                           "Uppangala permanent sample plot (UPSP)."
        self.keywords = ["plants", "time-series", "observational"]

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=['NA'])
        else:
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              missing_values=['NA'])

Esempio n. 8

0

Mostra file

    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Gulf of Maine intertidal density/cover (Petraitis et al. 2008)"
        self.citation = "Peter S. Petraitis, Harrison Liu, and " \
                        "Erika C. Rhile. 2008. Densities and cover " \
                        "data for intertidal organisms in the Gulf of " \
                        "Maine, USA, from 2003 to 2007. Ecology 89:588."
        self.name = "intertidal-abund-me"
        self.ref = "https://figshare.com/collections/DENSITIES_AND_COVER_DATA_FOR_INTERTIDAL_ORGANISMS_IN_THE_GULF_OF_MAINE_USA_FROM_2003_TO_2007/3300200"
        self.description = "The data on densities and percent cover in the " \
                           "60 experimental plots from 2003 to 2007 and to " \
                           "update data from 1996 to 2002 that are already " \
                           "published in Ecological Archives." \
                           "Includes densities of mussels, " \
                           "herbivorous limpet, herbivorous snails, " \
                           "predatory snail, barnacle , fucoid algae and " \
                           "percent cover by mussels, barnacles, fucoids, " \
                           "and other sessile organisms."
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.5.3'
        self.urls = {"main": "https://ndownloader.figshare.com/files/5600831"}
        self.cleanup_func_table = Cleanup(correct_invalid_value,
                                          missing_values=[-999.9])

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=[-999.9])
        self.tables = {"main": Table("main", cleanup=self.cleanup_func_table)}

Esempio n. 9

0

Mostra file

File: baad.py Progetto: cotsog/deletedret

    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        engine = self.engine

        # files are nested in another baad_data folder
        # important files considered (baad_data.csv,baad_methods.csv)
        # relevant files can be added in the same manner

        file_names = ["baad_data/baad_data.csv", "baad_data/baad_methods.csv"]
        engine.download_files_from_archive(self.urls["BAAD"], file_names)

        # creating data from baad_data.csv
        engine.auto_create_table(Table("data",
                                       cleanup=Cleanup(correct_invalid_value,
                                                       nulls=['NA'])),
                                 filename="baad_data.csv")
        engine.insert_data_from_file(engine.format_filename("baad_data.csv"))

        # creating methods from baad_methods.csv
        engine.auto_create_table(Table("methods",
                                       cleanup=Cleanup(correct_invalid_value,
                                                       nulls=['NA'])),
                                 filename="baad_methods.csv")
        engine.insert_data_from_file(
            engine.format_filename("baad_methods.csv"))

Esempio n. 10

0

Mostra file

    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "USGS North American Breeding Bird Survey"
        self.name = "breed-bird-survey"
        self.description = "A Cooperative effort between the U.S. " \
                           "Geological Survey's Patuxent Wildlife Research " \
                           "Center and Environment Canada's Canadian " \
                           "Wildlife Service to monitor the status " \
                           "and trends of North American bird populations."
        self.citation = "Pardieck, K.L., D.J. Ziolkowski Jr., M.-A.R. Hudson. " \
                        "2015. North American Breeding Bird Survey Dataset 1966 - " \
                        "2014, version 2014.0. U.S. Geological Survey, Patuxent " \
                        "Wildlife Research Center"
        self.ref = "http://www.pwrc.usgs.gov/BBS/"
        self.keywords = ["birds", "continental-scale"]
        self.retriever_minimum_version = '2.0.dev'
        self.version = '4.0.0'
        base_url = "https://www.sciencebase.gov/catalog/file/get/5ea04e9a82cefae35a129d65?f=__disk__"
        self.urls = {
            "counts":
            base_url +
            "38%2F0e%2F1d%2F380e1dd98a48aa48b9cf2efa25f74e07ebc797f8",
            "routes":
            base_url +
            "5d%2Fca%2F74%2F5dca74b1e3e1c21f18443e8f27c38bf0e2b2a234&allowOpen=true",
            "weather":
            base_url +
            "87%2Fb5%2F1d%2F87b51d999ae1ad18838aa60851e9bcff4498ac8d",
            "migrants":
            base_url +
            "bf%2Fe5%2Ff6%2Fbfe5f6834f85cc1e31edf67b5eb825b9abff5806",
            "Vehicledata":
            base_url +
            "a9%2F97%2F2b%2Fa9972b26aaeb48bf9425ed21681312b4cc063a7c",
            "species":
            base_url +
            "6f%2F16%2F1f%2F6f161fc7c7db1dcaf1259deb02d824700f280460&allowOpen=true",
        }

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=['NULL'])
            self.cleanup_func_clean = Cleanup(correct_invalid_value,
                                              nulls=['*'])
        else:
            self.encoding = "latin-1"
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              missing_values=['NULL'])
            self.cleanup_func_clean = Cleanup(correct_invalid_value,
                                              missing_values=['*'])

Esempio n. 11

0

Mostra file

    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        for key in self.urls:
            self.engine.download_file(self.urls[key],
                                      self.urls[key].rpartition('/')[-1])
            new_file_path = self.engine.format_filename("new" + key)
            old_data = open(
                self.engine.find_file(self.urls[key].rpartition('/')[-1]),
                "rb")
            new_data = open(new_file_path, 'wb')
            with old_data as file_block:

                # after the metadata lines, set data to True
                data = False
                for lines in file_block.readlines():
                    # meta data contins line with no ";" and may have "(;;;;)+" or empty lines
                    if not data and (";" not in lines or ";;;;" in lines):
                        pass
                    else:
                        data = True
                        new_data.write(lines)
            file_block.close()
            new_data.close()
            self.engine.auto_create_table(Table(key,
                                                cleanup=Cleanup(
                                                    correct_invalid_value,
                                                    nulls=[-999.9])),
                                          filename=str("new" + key))
            self.engine.insert_data_from_file(new_file_path)

Esempio n. 12

0

Mostra file

File: EA_clark2006.py Progetto: skmorgane/retriever

 def __init__(self):
     Script.__init__(self,
                     tables={'trees': Table('trees', cleanup=Cleanup(correct_invalid_value, nulls=[-999]))},
                     name="Tree growth, mortality, physical condition - Clark, 2006",
                     tags=['Taxon > Plants'],
                     urls={'trees': 'http://esapubs.org/archive/ecol/E087/132/LS_trees_1983_2000.txt'},
                     shortname="Clark2006",
                     description="David B. Clark and Deborah A. Clark. 2006. Tree growth, mortality, physical condition, and microsite in an old-growth lowland tropical rain forest. Ecology 87:2132.")

Esempio n. 13

0

Mostra file

    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "A database on the life history traits of the Northwest European flora"
        self.name = "plant-life-hist-eu"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '1.4.3'
        self.ref = "http://www.uni-oldenburg.de/en/biology/landeco/research/projects/leda/"
        self.urls = {
            "Age_of_first_flowering": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/age_of_first_flowering.txt",
            "Branching": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/branching.txt",
            "Bud_bank_seasonality": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/buds_seasonality.txt",
            "Bud_vertical_distribution": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/buds_vertical_dist.txt",
            "Buoyancy": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/buoyancy.txt",
            "Canopy_height": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/canopy_height.txt",
            "Clonal_growth_organs": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/CGO.txt",
            "Dispersal_type": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/dispersal_type.txt",
            "Leaf_distribution_along_the_stem": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/leaf_distribution.txt",
            "Leaf_dry_matter_content": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/LDMC_und_Geo.txt",
            "Leaf_mass": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/leaf_mass.txt",
            "Leaf_size": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/leaf_size.txt",
            "Morphology_of_dispersal_unit": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/morphology_dispersal_unit.txt",
            "Plant_growth_form": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/plant_growth_form.txt",
            "Plant_life_span": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/plant_life_span.txt",
            "Releasing_height": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/releasing_height.txt",
            "Seed_bank": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/seed_bank.txt",
            "Seed_longevity": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/seed_longevity.txt",
            "Seed_mass": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/seed_mass.txt",
            "Seed_number": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/seed_number.txt",
            "Seed_shape": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/seed_shape.txt",
            "Shoot_growth_form": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/shoot_growth_form.txt",
            "Specific_leaf_area": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/SLA_und_geo_neu.txt",
            "Seed_number_per_shoot": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/SNP.txt",
            "Woodiness": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/ssd.txt",
            "Terminal_velocity": "http://www.uni-oldenburg.de/fileadmin/user_upload/biologie/ag/landeco/download/LEDA/Data_files/TV.txt",
        }
        self.citation = "KLEYER, M., BEKKER, R.M., KNEVEL, I.C., BAKKER, J.P, THOMPSON, K., SONNENSCHEIN, M., POSCHLOD, P., VAN GROENENDAEL, J.M., KLIMES, L., KLIMESOVA, J., KLOTZ, S., RUSCH, G.M., HERMY, M., ADRIAENS, D., BOEDELTJE, G., BOSSUYT, B., DANNEMANN, A., ENDELS, P., GoeTZENBERGER, L., HODGSON, J.G., JACKEL, A-K., KueHN, I., KUNZMANN, D., OZINGA, W.A., RoeMERMANN, C., STADLER, M., SCHLEGELMILCH, J., STEENDAM, H.J., TACKENBERG, O., WILMANN, B., CORNELISSEN, J.H.C., ERIKSSON, O., GARNIER, E., PECO, B. (2008): The LEDA Traitbase: A database of life-history traits of Northwest European flora. Journal of Ecology 96: 1266-1274"
        self.keywords = ['plants', 'observational']
        self.description = "The LEDA Traitbase provides information on plant traits that describe three key features of plant dynamics: persistence, regeneration and dispersal. "

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=['NA'])
        else:
            self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=['NA'])

Esempio n. 14

0

Mostra file

File: forest_plots_wghats.py Progetto: mcglinnlab/retriever

    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        engine = self.engine
        files = ["Macroplot_data_Rev.txt", "Microplot_data.txt", "Site_variables.txt", "Species_list.txt"]
        engine.download_files_from_archive(self.urls["data"], files, filetype="zip")

        # Create table species
        engine.auto_create_table(Table('species', cleanup=Cleanup(correct_invalid_value, nulls=['NA'])),
                                 filename="Species_list.txt")
        engine.insert_data_from_file(engine.format_filename("Species_list.txt"))

        # Create table sites
        engine.auto_create_table(Table('sites', cleanup=Cleanup(correct_invalid_value, nulls=['NA'])),
                                 filename="Site_variables.txt")
        engine.insert_data_from_file(engine.format_filename("Site_variables.txt"))

        # Create table microplots
        table = Table('microplots')
        table.columns = [('record_id', ('pk-auto',)), ('SpCode', ('char', '30')), ('Count', ('ct-int',))]
        table.ct_names = ['BSP1', 'BSP2', 'BSP3', 'BSP4', 'BSP5', 'BSP6', 'BSP7', 'BSP8', 'BSP9',
                          'BSP10', 'BSP11', 'BSP12', 'BSP13', 'BSP14', 'BSP15', 'BSP16', 'BSP17',
                          'BSP18', 'BSP20', 'BSP21', 'BSP22', 'BSP23', 'BSP24', 'BSP25', 'BSP26',
                          'BSP27', 'BSP28', 'BSP29', 'BSP30', 'BSP31', 'BSP33', 'BSP34', 'BSP35',
                          'BSP36', 'BSP37', 'BSP41', 'BSP42', 'BSP43', 'BSP44', 'BSP45', 'BSP46',
                          'BSP47', 'BSP48', 'BSP49', 'BSP50', 'BSP51', 'BSP52', 'BSP53', 'BSP54',
                          'BSP55', 'BSP56', 'BSP57', 'BSP58', 'BSP59', 'BSP60', 'BSP61', 'BSP62',
                          'BSP63', 'BSP64', 'BSP65', 'BSP66', 'BSP67', 'BSP68', 'BSP69', 'BSP70',
                          'BSP71', 'BSP72', 'BSP73', 'BSP74', 'BSP75', 'BSP76', 'BSP78', 'BSP79',
                          'BSP80', 'BSP82', 'BSP83', 'BSP84', 'BSP85', 'BSP86', 'BSP87', 'BSP88',
                          'BSP89', 'BSP90', 'BSP91', 'BSP92', 'BSP93', 'BSP94', 'BSP95', 'BSP96',
                          'BSP97', 'BSP98', 'BSP99', 'BSP100', 'BSP101', 'BSP102', 'BSP104']
        table.ct_column = 'PlotID'
        engine.auto_create_table(table, filename="Microplot_data.txt")
        engine.insert_data_from_file(engine.format_filename("Microplot_data.txt"))

        # Create table microplots
        table = Table('macroplots')
        table.ct_names = ['TreeGirth1', 'TreeGirth2', 'TreeGirth3', 'TreeGirth4', 'TreeGirth5']
        table.ct_column = 'Tree'
        table.columns = [('record_id', ('pk-auto',)), ('PlotID', ('char', '20')), ('SpCode', ('char', '30')),
                         ('Girth', ('ct-int',))]
        engine.auto_create_table(table, filename="Macroplot_data_Rev.txt")
        engine.insert_data_from_file(engine.format_filename("Macroplot_data_Rev.txt"))

Esempio n. 15

0

Mostra file

File: EA_clark2006.py Progetto: raj-maurya/retriever

 def __init__(self):
     Script.__init__(self,
                     tables={'trees': Table('trees', cleanup=Cleanup(correct_invalid_value, nulls=[-999]))},
                     name="Tree growth, mortality, physical condition - Clark, 2006",
                     tags=['Taxon > Plants'],
                     urls={'trees': 'http://esapubs.org/archive/ecol/E087/132/LS_trees_1983_2000.txt'},
                     shortname="Clark2006",
                     description = "The data set helps to examine the post-establishment ecology of 10 species of tropical wet forest trees selected to span a range of predicted life history patterns at the La Selva Biological Station in Costa Rica.",
                     ref = "http://esapubs.org/archive/ecol/E087/132/",
                     citation="David B. Clark and Deborah A. Clark. 2006. Tree growth, mortality, physical condition, and microsite in an old-growth lowland tropical rain forest. Ecology 87:2132.")

Esempio n. 16

0

Mostra file

 def __init__(self):
     Script.__init__(self,
                     tables={'trees': Table('trees', cleanup=Cleanup(correct_invalid_value, nulls=[-999]))},
                     name="Tree growth, mortality, physical condition - Clark, 2006",
                     tags=['plants', 'time-series'],
                     urls={'trees': 'https://ndownloader.figshare.com/files/5597693'},
                     shortname="la-selva-trees",
                     description="The data set helps to examine the post-establishment ecology of 10 species of tropical wet forest trees selected to span a range of predicted life history patterns at the La Selva Biological Station in Costa Rica.",
                     ref="https://doi.org/10.6084/m9.figshare.c.3299324.v1",
                     retriever_minimum_version= "2.0.dev",
                     version='1.3.0',
                     citation="David B. Clark and Deborah A. Clark. 2006. Tree growth, mortality, physical condition, and microsite in an old-growth lowland tropical rain forest. Ecology 87:2132.")

Esempio n. 17

0

Mostra file

    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.title = "Amniote life History database"
        self.name = "amniote-life-hist"
        self.retriever_minimum_version = '2.0.dev'
        self.version = '2.0.2'
        self.ref = "https://figshare.com/collections/An_amniote_life-history_database_to_perform_comparative_" \
                   "analyses_with_birds_mammals_and_reptiles/3308127"
        self.urls = {"data": "https://ndownloader.figshare.com/files/8067269"}
        self.citation = "Myhrvold, N.P., Baldridge, E., Chan, B., Sivam, D., Freeman, D.L. and Ernest, S.M., 2015. " \
                        "An amniote life-history database to perform comparative analyses with birds, mammals, " \
                        "and reptiles:Ecological Archives E096-269. Ecology, 96(11), pp.3109-000."
        self.licenses = [{"name": "CC0-1.0"}]
        self.description = "Compilation of life history traits for birds, mammals, and reptiles."
        self.keywords = ["mammals", "literature-compilation"]
        self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=['-999'])

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=['-999'])

Esempio n. 18

0

Mostra file

File: pantheria.py Progetto: mcglinnlab/retriever

    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        engine = self.engine
        engine.download_files_from_archive(self.urls["data"],
                                           ["PanTHERIA_1-0_WR05_Aug2008.txt"],
                                           filetype="zip")

        # Create table Species
        engine.auto_create_table(Table('species',
                                       cleanup=Cleanup(correct_invalid_value,
                                                       nulls=['NA'])),
                                 filename="PanTHERIA_1-0_WR05_Aug2008.txt")
        engine.insert_data_from_file(
            engine.format_filename("PanTHERIA_1-0_WR05_Aug2008.txt"))

Esempio n. 19

0

Mostra file

File: predator_prey_size_marine.py Progetto: henrykironde/retriever-recipes

 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.title = "Marine Predator and Prey Body Sizes - Barnes et al. 2008"
     self.name = "predator-prey-size-marine"
     self.retriever_minimum_version = '2.0.dev'
     self.version = '2.0.3'
     self.archived = zip
     self.ref = "https://figshare.com/collections/PREDATOR_AND_PREY_BODY_SIZES_IN_MARINE_FOOD_WEBS/3300257"
     self.urls = {"data": "https://ndownloader.figshare.com/files/5601029"}
     self.citation = "C. Barnes, D. M. Bethea, R. D. Brodeur, J. Spitz, V. Ridoux, C. Pusineri, B. C. Chase, " \
                     "M. E. Hunsicker, F. Juanes, A. Kellermann, J. Lancaster, F. Menard, F.-X. Bard, P. Munk, " \
                     "J. K. Pinnegar, F. S. Scharf, R. A. Rountree, K. I. Stergiou, C. Sassa, A. Sabates, and S. " \
                     "Jennings. 2008. Predator and prey body sizes in marine food webs. Ecology 89:881."
     self.description = "The data set contains relationships between predator and prey size which are needed to " \
                        "describe interactions of species and size classes in food webs."
     self.keywords = ["fish", "literature-compilation", "size"]
     self.cleanup_func_table = Cleanup(correct_invalid_value, missing_values=['n/a', '0.0000E+00'])
     self.encoding =  "latin-1"
     if parse_version(VERSION) <= parse_version("2.0.0"):
         self.shortname = self.name
         self.name = self.title
         self.tags = self.keywords
         self.cleanup_func_table = Cleanup(correct_invalid_value, nulls=['n/a', '0.0000E+00'])

Esempio n. 20

0

Mostra file

    def __init__(self, **kwargs):
        Script.__init__(self, **kwargs)
        self.cleanup_func_table = Cleanup(correct_invalid_value,
                                          missing_values=[-999])
        self.title = "Tree growth, mortality, physical condition - Clark, 2006"
        self.keywords = ['plants', 'time-series']
        self.urls = {'trees': 'https://ndownloader.figshare.com/files/5597693'}
        self.name = "la-selva-trees"
        self.description = "The data set helps to examine the post-establishment ecology of 10 species of tropical wet forest trees selected to span a range of predicted life history patterns at the La Selva Biological Station in Costa Rica."
        self.ref = "https://doi.org/10.6084/m9.figshare.c.3299324.v1"
        self.retriever_minimum_version = "2.0.dev"
        self.version = '1.4.1'
        self.citation = "David B. Clark and Deborah A. Clark. 2006. Tree growth, mortality, physical condition, and microsite in an old-growth lowland tropical rain forest. Ecology 87:2132."

        if parse_version(VERSION) <= parse_version("2.0.0"):
            self.shortname = self.name
            self.name = self.title
            self.tags = self.keywords
            self.cleanup_func_table = Cleanup(correct_invalid_value,
                                              nulls=[-999])
        self.tables = {
            'trees': Table('trees', cleanup=self.cleanup_func_table)
        }

Esempio n. 21

0

Mostra file

File: EA_petraitis2008.py Progetto: skmorgane/retriever

 def __init__(self):
     Script.__init__(self,
                     name="Gulf of Maine intertidal density/cover (Ecological Archives 2008)",
                     description="Peter S. Petraitis, Harrison Liu, and Erika C. Rhile. 2008. Densities and cover data for intertidal organisms in the Gulf of Maine, USA, from 2003 to 2007. Ecology 89:588.",
                     shortname="Petraitis2008",
                     ref="http://www.esapubs.org/archive/ecol/E089/032/",
                     urls = {
                             "main": "http://www.esapubs.org/archive/ecol/E089/032/Succession_sampling_03-07_data.txt",
                            },
                     tables = {
                               "main": Table("main",
                                             cleanup=Cleanup(correct_invalid_value,
                                                             nulls=[-999.9])),
                               }
                     )

Esempio n. 22

0

Mostra file

 def __init__(self):
     Script.__init__(self,
                     name = "Gulf of Maine intertidal density/cover (Ecological Archives 2008)",
                     citation = "Peter S. Petraitis, Harrison Liu, and Erika C. Rhile. 2008. Densities and cover data for intertidal organisms in the Gulf of Maine, USA, from 2003 to 2007. Ecology 89:588.",
                     shortname = "Petraitis2008",
                     ref = "http://www.esapubs.org/archive/ecol/E089/032/",
                     description = "The data set provides access to data on densities and percent cover in the 60 experimental plots from 2003 to 2007 and to update data from 1996 to 2002 that are already published in Ecological Archives.It includes densities of mussels, an herbivorous limpet, herbivorous snails, a predatory snail, a barnacle , and fucoid algae and percent cover by mussels, barnacles, fucoids, and other sessile organisms.",
                     urls = {
                             "main": "http://www.esapubs.org/archive/ecol/E089/032/Succession_sampling_03-07_data.txt",
                            },
                     tables = {
                               "main": Table("main",
                                             cleanup=Cleanup(correct_invalid_value,
                                                             nulls=[-999.9])),
                               }
                     )

Esempio n. 23

0

Mostra file

File: fao_global_capture_product.py Progetto: shubhank-saxena/retriever-recipes

 def __init__(self, **kwargs):
     Script.__init__(self, **kwargs)
     self.title = "Commercial Fisheries Monthly Trade Data by Product, Country/Association"
     self.name = "fao-global-capture-product"
     self.retriever_minimum_version = '2.1.dev'
     self.urls = {
         "capture": "http://www.fao.org/fishery/static/Data/Capture_2018.1.2.zip"}
     self.version = '1.0.0'
     self.ref = "http://www.fao.org/fishery/statistics/global-capture-production/"
     self.citation = "FAO. 2018. FAO yearbook. Fishery and Aquaculture Statistics " \
                     "2016/FAO annuaire. Statistiques des pêches et de l'aquaculture " \
                     "2016/FAO anuario. Estadísticas de pesca y acuicultura 2016. " \
                     "Rome/Roma. 104pp."
     self.description = "Commercial Fisheries statistics provides a summary of " \
                        "commercial fisheries product data by individual country."
     self.keywords = ["Fish", "Fisheries"]
     self.cleanup_func_table = Cleanup(
         correct_invalid_value, missingValues=['-'])
     self.encoding = "utf-8"

Esempio n. 24

0

Mostra file

File: intertidal_abund_me.py Progetto: mcglinnlab/retriever

 def __init__(self):
     Script.__init__(
         self,
         name=
         "Gulf of Maine intertidal density/cover (Petraitis et al. 2008)",
         citation=
         "Peter S. Petraitis, Harrison Liu, and Erika C. Rhile. 2008. Densities and cover data for intertidal organisms in the Gulf of Maine, USA, from 2003 to 2007. Ecology 89:588.",
         shortname="intertidal-abund-me",
         ref=
         "https://figshare.com/collections/DENSITIES_AND_COVER_DATA_FOR_INTERTIDAL_ORGANISMS_IN_THE_GULF_OF_MAINE_USA_FROM_2003_TO_2007/3300200",
         description=
         "The data set provides access to data on densities and percent cover in the 60 experimental plots from 2003 to 2007 and to update data from 1996 to 2002 that are already published in Ecological Archives.It includes densities of mussels, an herbivorous limpet, herbivorous snails, a predatory snail, a barnacle , and fucoid algae and percent cover by mussels, barnacles, fucoids, and other sessile organisms.",
         retriever_minimum_version='2.0.dev',
         version='1.4.0',
         urls={"main": "https://ndownloader.figshare.com/files/5600831"},
         tables={
             "main":
             Table("main",
                   cleanup=Cleanup(correct_invalid_value, nulls=[-999.9]))
         })

Esempio n. 25

0

Mostra file

File: breed_bird_survey.py Progetto: rupesh1798/retriever

    def download(self, engine=None, debug=False):
        try:
            Script.download(self, engine, debug)

            engine = self.engine

            # Species table
            table = Table("species",
                          cleanup=Cleanup(),
                          contains_pk=True,
                          header_rows=9)

            table.columns = [
                ("species_id", ("pk-int", )),
                ("AOU", ("int", )),
                ("english_common_name", ("char", 50)),
                ("french_common_name", ("char", 50)),
                ("spanish_common_name", ("char", 50)),
                ("sporder", ("char", 30)),
                ("family", ("char", 30)),
                ("genus", ("char", 30)),
                ("species", ("char", 50)),
            ]
            table.fixed_width = [7, 6, 51, 51, 51, 51, 51, 51, 50]

            engine.table = table
            engine.create_table()
            engine.insert_data_from_url(self.urls["species"])

            # Routes table
            engine.download_files_from_archive(self.urls["routes"],
                                               ["routes.csv"])
            engine.auto_create_table(Table("routes", cleanup=Cleanup()),
                                     filename="routes.csv")
            engine.insert_data_from_file(engine.format_filename("routes.csv"))

            # Weather table
            if not os.path.isfile(engine.format_filename("weather_new.csv")):
                engine.download_files_from_archive(self.urls["weather"],
                                                   ["weather.csv"])
                read = open_fr(engine.format_filename("weather.csv"))
                write = open_fw(engine.format_filename("weather_new.csv"))
                print("Cleaning weather data...")
                for line in read:
                    values = line.split(',')
                    newvalues = []
                    for value in values:

                        if ':' in value:
                            newvalues.append(value.replace(':', ''))
                        elif value == "N":
                            newvalues.append(None)
                        else:
                            newvalues.append(value)
                    write.write(','.join(str(value) for value in newvalues))
                write.close()
                read.close()

            engine.auto_create_table(Table("weather",
                                           pk="RouteDataId",
                                           cleanup=self.cleanup_func_table),
                                     filename="weather_new.csv")
            engine.insert_data_from_file(
                engine.format_filename("weather_new.csv"))

            # Region_codes table
            table = Table("region_codes",
                          pk=False,
                          header_rows=11,
                          fixed_width=[11, 11, 30])

            def regioncodes_cleanup(value, engine):
                replace = {
                    chr(225): "a",
                    chr(233): "e",
                    chr(237): "i",
                    chr(243): "o"
                }
                newvalue = str(value)
                for key in list(replace.keys()):
                    if key in newvalue:
                        newvalue = newvalue.replace(key, replace[key])
                return newvalue

            table.cleanup = Cleanup(regioncodes_cleanup)

            table.columns = [("countrynum", ("int", )),
                             ("regioncode", ("int", )),
                             ("regionname", ("char", 30))]

            engine.table = table
            engine.create_table()

            engine.insert_data_from_url(self.urls["region_codes"])

            # Counts table
            table = Table("counts", delimiter=',')

            table.columns = [("record_id", ("pk-auto", )),
                             ("countrynum", ("int", )),
                             ("statenum", ("int", )), ("Route", ("int", )),
                             ("RPID", ("int", )), ("Year", ("int", )),
                             ("Aou", ("int", )), ("Count10", ("int", )),
                             ("Count20", ("int", )), ("Count30", ("int", )),
                             ("Count40", ("int", )), ("Count50", ("int", )),
                             ("StopTotal", ("int", )),
                             ("SpeciesTotal", ("int", ))]

            stateslist = [
                "Alabama", "Alaska", "Arizona", "Arkansas", "California",
                "Colorado", "Connecticut", "Delaware", "Florida", "Georgia",
                "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky",
                "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan",
                "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska",
                "Nevada", ["New Hampshire", "NHampsh"],
                ["New Jersey", "NJersey"], ["New Mexico", "NMexico"],
                ["New York", "NYork"], ["North Carolina", "NCaroli"],
                ["North Dakota",
                 "NDakota"], "Ohio", "Oklahoma", "Oregon", "Pennsylvania",
                ["Rhode Island", "RhodeIs"], ["South Carolina", "SCaroli"],
                ["South Dakota", "SDakota"], "Tennessee", "Texas", "Utah",
                "Vermont", "Virginia", "Washington",
                ["West Virginia",
                 "W_Virgi"], "Wisconsin", "Wyoming", "Alberta",
                ["British Columbia", "BritCol"], "Manitoba",
                ["New Brunswick", "NBrunsw"],
                ["Northwest Territories", "NWTerri"], "Newfoundland",
                ["Nova Scotia", "NovaSco"], "Nunavut", "Ontario",
                ["Prince Edward Island",
                 "PEI"], "Quebec", "Saskatchewan", "Yukon"
            ]

            state = ""
            shortstate = ""

            engine.table = table
            engine.create_table()

            for state in stateslist:
                try:
                    if len(state) > 2:
                        shortstate = state[0:7]
                    else:
                        state, shortstate = state[0], state[1]

                    print("Inserting data from " + state + "...")
                    try:
                        engine.table.cleanup = Cleanup()
                        engine.insert_data_from_archive(
                            self.urls["counts"] + shortstate + ".zip",
                            [shortstate + ".csv"])
                    except:
                        print("Failed bulk insert on " + state +
                              ", inserting manually.")
                        engine.connection.rollback()
                        engine.table.cleanup = self.cleanup_func_clean
                        engine.insert_data_from_archive(
                            self.urls["counts"] + shortstate + ".zip",
                            [shortstate + ".csv"])

                except:
                    print("There was an error in " + state + ".")
                    raise

        except zipfile.BadZipfile:
            print(
                "There was an unexpected error in the Breeding Bird Survey archives."
            )
            raise

        return engine

Esempio n. 26

0

Mostra file

    def download(self, engine=None, debug=False):
        try:
            Script.download(self, engine, debug)

            engine = self.engine

            # Species table
            table = Table("species",
                          cleanup=Cleanup(),
                          contains_pk=True,
                          header_rows=11)
            table.columns = [
                ("species_id", ("pk-int", )),
                ("AOU", ("int", )),
                ("english_common_name", ("char", 50)),
                ("french_common_name", ("char", 50)),
                ("spanish_common_name", ("char", 50)),
                ("sporder", ("char", 30)),
                ("family", ("char", 30)),
                ("genus", ("char", 30)),
                ("species", ("char", 50)),
            ]
            table.fixed_width = [7, 6, 51, 51, 51, 51, 51, 51, 50]
            engine.table = table
            engine.create_table()
            engine.insert_data_from_url(self.urls["species"])

            # Routes table
            engine.download_files_from_archive(self.urls["routes"],
                                               ["routes.csv"],
                                               archive_name="routes.zip")
            engine.auto_create_table(Table("routes", cleanup=Cleanup()),
                                     filename="routes.csv")
            engine.insert_data_from_file(engine.format_filename("routes.csv"))

            # Weather table
            engine.download_files_from_archive(self.urls["weather"],
                                               ["weather.csv"],
                                               archive_name="weather.zip")
            engine.auto_create_table(Table("weather",
                                           pk="RouteDataId",
                                           cleanup=self.cleanup_func_table),
                                     filename="weather.csv")
            engine.insert_data_from_file(engine.format_filename("weather.csv"))

            # Migrations data
            engine.download_files_from_archive(
                self.urls["migrants"], archive_name="MigrantNonBreeder.zip")
            engine.extract_zip(
                engine.format_filename("MigrantNonBreeder/Migrants.zip"),
                engine.format_filename("Migrant"),
            )
            engine.extract_zip(
                engine.format_filename("MigrantNonBreeder/MigrantSummary.zip"),
                engine.format_filename("MigrantSummary"),
            )

            table = Table("migrants", cleanup=Cleanup())
            table.columns = [('routedataid', ('int', )),
                             ('countrynum', ('int', )),
                             ('statenum', ('int', )), ('route', ('int', )),
                             ('rpid', ('int', )), ('year', ('int', )),
                             ('aou', ('int', )), ('stop1', ('int', )),
                             ('stop2', ('int', )), ('stop3', ('int', )),
                             ('stop4', ('int', )), ('stop5', ('int', )),
                             ('stop6', ('int', )), ('stop7', ('int', )),
                             ('stop8', ('int', )), ('stop9', ('int', )),
                             ('stop10', ('int', )), ('stop11', ('int', )),
                             ('stop12', ('int', )), ('stop13', ('int', )),
                             ('stop14', ('int', )), ('stop15', ('int', )),
                             ('stop16', ('int', )), ('stop17', ('int', )),
                             ('stop18', ('int', )), ('stop19', ('int', )),
                             ('stop20', ('int', )), ('stop21', ('int', )),
                             ('stop22', ('int', )), ('stop23', ('int', )),
                             ('stop24', ('int', )), ('stop25', ('int', )),
                             ('stop26', ('int', )), ('stop27', ('int', )),
                             ('stop28', ('int', )), ('stop29', ('int', )),
                             ('stop30', ('int', )), ('stop31', ('int', )),
                             ('stop32', ('int', )), ('stop33', ('int', )),
                             ('stop34', ('int', )), ('stop35', ('int', )),
                             ('stop36', ('int', )), ('stop37', ('int', )),
                             ('stop38', ('int', )), ('stop39', ('int', )),
                             ('stop40', ('int', )), ('stop41', ('int', )),
                             ('stop42', ('int', )), ('stop43', ('int', )),
                             ('stop44', ('int', )), ('stop45', ('int', )),
                             ('stop46', ('int', )), ('stop47', ('int', )),
                             ('stop48', ('int', )), ('stop49', ('int', )),
                             ('stop50', ('int', ))]
            engine.table = table
            engine.create_table()
            engine.insert_data_from_file(
                engine.format_filename("Migrant/Migrants.csv"))

            table = Table("migrantsummary", cleanup=Cleanup())
            table.columns = [('routedataid', ('int', )),
                             ('countrynum', ('int', )),
                             ('statenum', ('int', )), ('route', ('int', )),
                             ('rpid', ('int', )), ('year', ('int', )),
                             ('aou', ('int', )), ('count10', ('int', )),
                             ('count20', ('int', )), ('count30', ('int', )),
                             ('count40', ('int', )), ('count50', ('int', )),
                             ('stoptotal', ('int', )),
                             ('speciestotal', ('int', ))]
            engine.table = table
            engine.create_table()
            engine.insert_data_from_file(
                engine.format_filename("MigrantSummary/MigrantSummary.csv"))

            table = Table("vehicledata", cleanup=Cleanup())
            table.columns = [('routedataid', ('int', )),
                             ('countrynum', ('int', )),
                             ('statenum', ('int', )), ('route', ('int', )),
                             ('rpid', ('int', )), ('year', ('int', )),
                             ('recordedcar', ('char', )), ('car1', ('int', )),
                             ('car2', ('int', )), ('car3', ('int', )),
                             ('car4', ('int', )), ('car5', ('int', )),
                             ('car6', ('int', )), ('car7', ('int', )),
                             ('car8', ('int', )), ('car9', ('int', )),
                             ('car10', ('int', )), ('car11', ('int', )),
                             ('car12', ('int', )), ('car13', ('int', )),
                             ('car14', ('int', )), ('car15', ('int', )),
                             ('car16', ('int', )), ('car17', ('int', )),
                             ('car18', ('int', )), ('car19', ('int', )),
                             ('car20', ('int', )), ('car21', ('int', )),
                             ('car22', ('int', )), ('car23', ('int', )),
                             ('car24', ('int', )), ('car25', ('int', )),
                             ('car26', ('int', )), ('car27', ('int', )),
                             ('car28', ('int', )), ('car29', ('int', )),
                             ('car30', ('int', )), ('car31', ('int', )),
                             ('car32', ('int', )), ('car33', ('int', )),
                             ('car34', ('int', )), ('car35', ('int', )),
                             ('car36', ('int', )), ('car37', ('int', )),
                             ('car38', ('int', )), ('car39', ('int', )),
                             ('car40', ('int', )), ('car41', ('int', )),
                             ('car42', ('int', )), ('car43', ('int', )),
                             ('car44', ('int', )), ('car45', ('int', )),
                             ('car46', ('int', )), ('car47', ('int', )),
                             ('car48', ('int', )), ('car49', ('int', )),
                             ('car50', ('int', )), ('noise1', ('int', )),
                             ('noise2', ('int', )), ('noise3', ('int', )),
                             ('noise4', ('int', )), ('noise5', ('int', )),
                             ('noise6', ('int', )), ('noise7', ('int', )),
                             ('noise8', ('int', )), ('noise9', ('int', )),
                             ('noise10', ('int', )), ('noise11', ('int', )),
                             ('noise12', ('int', )), ('noise13', ('int', )),
                             ('noise14', ('int', )), ('noise15', ('int', )),
                             ('noise16', ('int', )), ('noise17', ('int', )),
                             ('noise18', ('int', )), ('noise19', ('int', )),
                             ('noise20', ('int', )), ('noise21', ('int', )),
                             ('noise22', ('int', )), ('noise23', ('int', )),
                             ('noise24', ('int', )), ('noise25', ('int', )),
                             ('noise26', ('int', )), ('noise27', ('int', )),
                             ('noise28', ('int', )), ('noise29', ('int', )),
                             ('noise30', ('int', )), ('noise31', ('int', )),
                             ('noise32', ('int', )), ('noise33', ('int', )),
                             ('noise34', ('int', )), ('noise35', ('int', )),
                             ('noise36', ('int', )), ('noise37', ('int', )),
                             ('noise38', ('int', )), ('noise39', ('int', )),
                             ('noise40', ('int', )), ('noise41', ('int', )),
                             ('noise42', ('int', )), ('noise43', ('int', )),
                             ('noise44', ('int', )), ('noise45', ('int', )),
                             ('noise46', ('int', )), ('noise47', ('int', )),
                             ('noise48', ('int', )), ('noise49', ('int', )),
                             ('noise50', ('int', ))]
            engine.table = table
            engine.create_table()
            engine.download_files_from_archive(self.urls["Vehicledata"],
                                               archive_name="VehicleData.zip")
            engine.extract_zip(
                engine.format_filename("VehicleData/VehicleData.zip"),
                engine.format_filename("VehicleData"),
            )
            engine.insert_data_from_file(
                engine.format_filename("VehicleData/VehicleData.csv"))

            # Counts table
            table = Table("counts", delimiter=",")
            engine.download_files_from_archive(self.urls["counts"],
                                               archive_name="States.zip")

            table.columns = [("record_id", ("pk-auto", )),
                             ("RouteDataID", ("int", )),
                             ("countrynum", ("int", )),
                             ("statenum", ("int", )), ("Route", ("int", )),
                             ("RPID", ("int", )), ("Year", ("int", )),
                             ("Aou", ("int", )), ("Count10", ("int", )),
                             ("Count20", ("int", )), ("Count30", ("int", )),
                             ("Count40", ("int", )), ("Count50", ("int", )),
                             ("StopTotal", ("int", )),
                             ("SpeciesTotal", ("int", ))]

            stateslist = [
                "Alabama", "Alaska", "Arizona", "Arkansas", "California",
                "Colorado", "Connecticut", "Delaware", "Florida", "Georgia",
                "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky",
                "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan",
                "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska",
                "Nevada", ["New Hampshire", "NHampsh"],
                ["New Jersey", "NJersey"], ["New Mexico", "NMexico"],
                ["New York", "NYork"], ["North Carolina", "NCaroli"],
                ["North Dakota",
                 "NDakota"], "Ohio", "Oklahoma", "Oregon", "Pennsylvania",
                ["Rhode Island", "RhodeIs"], ["South Carolina", "SCaroli"],
                ["South Dakota", "SDakota"], "Tennessee", "Texas", "Utah",
                "Vermont", "Virginia", "Washington",
                ["West Virginia",
                 "W_Virgi"], "Wisconsin", "Wyoming", "Alberta",
                ["British Columbia", "BritCol"], "Manitoba",
                ["New Brunswick", "NBrunsw"],
                ["Northwest Territories", "NWTerri"], "Newfoundland",
                ["Nova Scotia", "NovaSco"], "Nunavut", "Ontario",
                ["Prince Edward Island",
                 "PEI"], "Quebec", "Saskatchewan", "Yukon"
            ]

            state = ""
            shortstate = ""

            engine.table = table
            engine.create_table()

            for state in stateslist:
                try:
                    if isinstance(state, (list, )):
                        state, shortstate = state[0], state[1]
                    else:
                        shortstate = state[0:7]

                    print("Inserting data from " + state + "...")
                    try:
                        engine.table.cleanup = Cleanup()
                        engine.extract_zip(
                            engine.format_filename("States/" + shortstate +
                                                   ".zip"),
                            engine.format_filename("States/" + shortstate),
                        )
                        file_path = "{states}/{shortstate}/{shortstate}.csv".format(
                            states="States", shortstate=shortstate)
                        engine.insert_data_from_file(
                            engine.format_filename(file_path))
                    except:
                        print(state,
                              ": Failed bulk insert on, inserting manually.")
                        engine.connection.rollback()
                        engine.table.cleanup = self.cleanup_func_clean
                        engine.insert_data_from_file(
                            engine.format_filename(file_path))
                except:
                    print("There was an error in " + state + ".")
                    raise

        except zipfile.BadZipfile:
            print(
                "There was an unexpected error in the Breeding Bird Survey archives."
            )
            raise

        return engine

Esempio n. 27

0

Mostra file

File: bbs50stop.py Progetto: raj-maurya/retriever

    def download(self, engine=None, debug=False):
        try:
            Script.download(self, engine, debug)

            engine = self.engine

            # Species table
            table = Table("species", cleanup=Cleanup(), contains_pk=True,
                          header_rows=6)

            table.columns=[("species_id", ("pk-int",) ),
                           ("AOU", ("int",) ),
                           ("english_common_name", ("char",50) ),
                           ("french_common_name", ("char",50) ),
                           ("spanish_common_name", ("char",50) ),
                           ("sporder", ("char",30) ),
                           ("family", ("char",30) ),
                           ("genus", ("char",30) ),
                           ("species", ("char",50) ),
                           ]
            table.fixed_width = [7,6,51,51,51,51,51,51,50]

            engine.table = table
            engine.create_table()
            engine.insert_data_from_url(self.urls["species"])

            # Routes table
            if not os.path.isfile(engine.format_filename("routes_new.csv")):
                engine.download_files_from_archive(self.urls["routes"],
                                                   ["routes.csv"])
                read = open(engine.format_filename("routes.csv"), "rb")
                write = open(engine.format_filename("routes_new.csv"), "wb")
                print "Cleaning routes data..."
                write.write(read.readline())
                for line in read:
                    values = line.split(',')
                    v = Decimal(values[5])
                    if  v > 0:
                        values[5] = str(v * Decimal("-1"))
                    write.write(','.join(str(value) for value in values))
                write.close()
                read.close()

            engine.auto_create_table(Table("routes", cleanup=Cleanup()),
                                     filename="routes_new.csv")

            engine.insert_data_from_file(engine.format_filename("routes_new.csv"))


            # Weather table
            if not os.path.isfile(engine.format_filename("weather_new.csv")):
                engine.download_files_from_archive(self.urls["weather"],
                                                   ["weather.csv"])
                read = open(engine.format_filename("weather.csv"), "rb")
                write = open(engine.format_filename("weather_new.csv"), "wb")
                print "Cleaning weather data..."
                for line in read:
                    values = line.split(',')
                    newvalues = []
                    for value in values:

                        if ':' in value:
                            newvalues.append(value.replace(':', ''))
                        elif value == "N":
                            newvalues.append(None)
                        else:
                            newvalues.append(value)
                    write.write(','.join(str(value) for value in newvalues))
                write.close()
                read.close()

            engine.auto_create_table(Table("weather", pk="RouteDataId",
                                           cleanup=Cleanup(correct_invalid_value, nulls=['NULL'])),
                                     filename="weather_new.csv")
            engine.insert_data_from_file(engine.format_filename("weather_new.csv"))


            # Region_codes table
            table = Table("region_codes", pk=False, header_rows=11,
                          fixed_width=[11, 11, 30])
            def regioncodes_cleanup(value, engine):
                replace = {chr(225):"a", chr(233):"e", chr(237):"i", chr(243):"o"}
                newvalue = str(value)
                for key in replace.keys():
                    if key in newvalue:
                        newvalue = newvalue.replace(key, replace[key])
                return newvalue
            table.cleanup = Cleanup(regioncodes_cleanup)

            table.columns=[("countrynum"            ,   ("int",)        ),
                           ("regioncode"            ,   ("int",)        ),
                           ("regionname"            ,   ("char",30)     )]

            engine.table = table
            engine.create_table()

            engine.insert_data_from_url(self.urls["region_codes"])

            # Counts table
            table = Table("counts", pk=False, delimiter=',')
            table.columns=[("RouteDataID"           ,   ("int",)        ),
                           ("countrynum"            ,   ("int",)        ),
                           ("statenum"              ,   ("int",)        ),
                           ("Route"                 ,   ("int",)        ),
                           ("RPID"                  ,   ("int",)        ),
                           ("year"                  ,   ("int",)        ),
                           ("AOU"                   ,   ("int",)        ),
                           ("Stop1"                 ,   ("int",)        ),
                           ("Stop2"                 ,   ("int",)        ),
                           ("Stop3"                 ,   ("int",)        ),
                           ("Stop4"                 ,   ("int",)        ),
                           ("Stop5"                 ,   ("int",)        ),
                           ("Stop6"                 ,   ("int",)        ),
                           ("Stop7"                 ,   ("int",)        ),
                           ("Stop8"                 ,   ("int",)        ),
                           ("Stop9"                 ,   ("int",)        ),
                           ("Stop10"                ,   ("int",)        ),
                           ("Stop11"                ,   ("int",)        ),
                           ("Stop12"                ,   ("int",)        ),
                           ("Stop13"                ,   ("int",)        ),
                           ("Stop14"                ,   ("int",)        ),
                           ("Stop15"                ,   ("int",)        ),
                           ("Stop16"                ,   ("int",)        ),
                           ("Stop17"                ,   ("int",)        ),
                           ("Stop18"                ,   ("int",)        ),
                           ("Stop19"                ,   ("int",)        ),
                           ("Stop20"                ,   ("int",)        ),
                           ("Stop21"                ,   ("int",)        ),
                           ("Stop22"                ,   ("int",)        ),
                           ("Stop23"                ,   ("int",)        ),
                           ("Stop24"                ,   ("int",)        ),
                           ("Stop25"                ,   ("int",)        ),
                           ("Stop26"                ,   ("int",)        ),
                           ("Stop27"                ,   ("int",)        ),
                           ("Stop28"                ,   ("int",)        ),
                           ("Stop29"                ,   ("int",)        ),
                           ("Stop30"                ,   ("int",)        ),
                           ("Stop31"                ,   ("int",)        ),
                           ("Stop32"                ,   ("int",)        ),
                           ("Stop33"                ,   ("int",)        ),
                           ("Stop34"                ,   ("int",)        ),
                           ("Stop35"                ,   ("int",)        ),
                           ("Stop36"                ,   ("int",)        ),
                           ("Stop37"                ,   ("int",)        ),
                           ("Stop38"                ,   ("int",)        ),
                           ("Stop39"                ,   ("int",)        ),
                           ("Stop40"                ,   ("int",)        ),
                           ("Stop41"                ,   ("int",)        ),
                           ("Stop42"                ,   ("int",)        ),
                           ("Stop43"                ,   ("int",)        ),
                           ("Stop44"                ,   ("int",)        ),
                           ("Stop45"                ,   ("int",)        ),
                           ("Stop46"                ,   ("int",)        ),
                           ("Stop47"                ,   ("int",)        ),
                           ("Stop48"                ,   ("int",)        ),
                           ("Stop49"                ,   ("int",)        ),
                           ("Stop50"                ,   ("int",)        )]

            part = ""
            engine.table = table
            engine.create_table()

            for part in range(1,11):
                part = str(part)
                try:
                    print "Inserting data from part " + part + "..."
                    try:
                        engine.table.cleanup = Cleanup()
                        engine.insert_data_from_archive(self.urls["counts"] +
                                                        "Fifty" + part + ".zip",
                                                        ["fifty" + part + ".csv"])
                    except:
                        print "Failed bulk insert on " + part + ", inserting manually."
                        engine.connection.rollback()
                        engine.table.cleanup = Cleanup(correct_invalid_value,
                                                       nulls=['*'])
                        engine.insert_data_from_archive(self.urls["counts"] +
                                                        "Fifty" + part + ".zip",
                                                        ["fifty" + part + ".csv"])

                except:
                    print "There was an error in part " + part + "."
                    raise


        except zipfile.BadZipfile:
            print "There was an unexpected error in the Breeding Bird Survey archives."
            raise

        return engine

Esempio n. 28

0

Mostra file

File: breed_bird_survey_50stop.py Progetto: ashishpriyadarshiCIC/retriever-recipes

    def download(self, engine=None, debug=False):
        try:
            Script.download(self, engine, debug)

            engine = self.engine

            # Species table
            table = Table("species",
                          cleanup=Cleanup(),
                          contains_pk=True,
                          header_rows=11)
            table.columns = [
                ("species_id", ("pk-int", )),
                ("AOU", ("int", )),
                ("english_common_name", ("char", 50)),
                ("french_common_name", ("char", 50)),
                ("spanish_common_name", ("char", 50)),
                ("sporder", ("char", 30)),
                ("family", ("char", 30)),
                ("genus", ("char", 30)),
                ("species", ("char", 50)),
            ]
            table.fixed_width = [7, 6, 51, 51, 51, 51, 51, 51, 50]
            engine.table = table
            engine.create_table()
            engine.insert_data_from_url(self.urls["species"])

            # Routes table
            engine.download_files_from_archive(self.urls["routes"],
                                               ["routes.csv"],
                                               archive_name="routes.zip")
            engine.auto_create_table(Table("routes", cleanup=Cleanup()),
                                     filename="routes.csv")
            engine.insert_data_from_file(engine.format_filename("routes.csv"))

            # Weather table
            engine.download_files_from_archive(self.urls["weather"],
                                               ["weather.csv"],
                                               archive_name="weather.zip")
            engine.auto_create_table(Table("weather",
                                           pk="RouteDataId",
                                           cleanup=self.cleanup_func_table),
                                     filename="weather.csv")
            engine.insert_data_from_file(engine.format_filename("weather.csv"))

            # Migrations data
            engine.download_files_from_archive(
                self.urls["migrants"], archive_name="MigrantNonBreeder.zip")
            engine.extract_zip(
                engine.format_filename("MigrantNonBreeder/Migrants.zip"),
                engine.format_filename("Migrant"),
            )
            engine.extract_zip(
                engine.format_filename("MigrantNonBreeder/MigrantSummary.zip"),
                engine.format_filename("MigrantSummary"),
            )

            table = Table("migrants", cleanup=Cleanup())
            table.columns = [('routedataid', ('int', )),
                             ('countrynum', ('int', )),
                             ('statenum', ('int', )), ('route', ('int', )),
                             ('rpid', ('int', )), ('year', ('int', )),
                             ('aou', ('int', )), ('stop1', ('int', )),
                             ('stop2', ('int', )), ('stop3', ('int', )),
                             ('stop4', ('int', )), ('stop5', ('int', )),
                             ('stop6', ('int', )), ('stop7', ('int', )),
                             ('stop8', ('int', )), ('stop9', ('int', )),
                             ('stop10', ('int', )), ('stop11', ('int', )),
                             ('stop12', ('int', )), ('stop13', ('int', )),
                             ('stop14', ('int', )), ('stop15', ('int', )),
                             ('stop16', ('int', )), ('stop17', ('int', )),
                             ('stop18', ('int', )), ('stop19', ('int', )),
                             ('stop20', ('int', )), ('stop21', ('int', )),
                             ('stop22', ('int', )), ('stop23', ('int', )),
                             ('stop24', ('int', )), ('stop25', ('int', )),
                             ('stop26', ('int', )), ('stop27', ('int', )),
                             ('stop28', ('int', )), ('stop29', ('int', )),
                             ('stop30', ('int', )), ('stop31', ('int', )),
                             ('stop32', ('int', )), ('stop33', ('int', )),
                             ('stop34', ('int', )), ('stop35', ('int', )),
                             ('stop36', ('int', )), ('stop37', ('int', )),
                             ('stop38', ('int', )), ('stop39', ('int', )),
                             ('stop40', ('int', )), ('stop41', ('int', )),
                             ('stop42', ('int', )), ('stop43', ('int', )),
                             ('stop44', ('int', )), ('stop45', ('int', )),
                             ('stop46', ('int', )), ('stop47', ('int', )),
                             ('stop48', ('int', )), ('stop49', ('int', )),
                             ('stop50', ('int', ))]
            engine.table = table
            engine.create_table()
            engine.insert_data_from_file(
                engine.format_filename("Migrant/Migrants.csv"))

            table = Table("migrantsummary", cleanup=Cleanup())
            table.columns = [('routedataid', ('int', )),
                             ('countrynum', ('int', )),
                             ('statenum', ('int', )), ('route', ('int', )),
                             ('rpid', ('int', )), ('year', ('int', )),
                             ('aou', ('int', )), ('count10', ('int', )),
                             ('count20', ('int', )), ('count30', ('int', )),
                             ('count40', ('int', )), ('count50', ('int', )),
                             ('stoptotal', ('int', )),
                             ('speciestotal', ('int', ))]
            engine.table = table
            engine.create_table()
            engine.insert_data_from_file(
                engine.format_filename("MigrantSummary/MigrantSummary.csv"))

            table = Table("vehicledata", cleanup=Cleanup())
            table.columns = [('routedataid', ('int', )),
                             ('countrynum', ('int', )),
                             ('statenum', ('int', )), ('route', ('int', )),
                             ('rpid', ('int', )), ('year', ('int', )),
                             ('recordedcar', ('char', )), ('car1', ('int', )),
                             ('car2', ('int', )), ('car3', ('int', )),
                             ('car4', ('int', )), ('car5', ('int', )),
                             ('car6', ('int', )), ('car7', ('int', )),
                             ('car8', ('int', )), ('car9', ('int', )),
                             ('car10', ('int', )), ('car11', ('int', )),
                             ('car12', ('int', )), ('car13', ('int', )),
                             ('car14', ('int', )), ('car15', ('int', )),
                             ('car16', ('int', )), ('car17', ('int', )),
                             ('car18', ('int', )), ('car19', ('int', )),
                             ('car20', ('int', )), ('car21', ('int', )),
                             ('car22', ('int', )), ('car23', ('int', )),
                             ('car24', ('int', )), ('car25', ('int', )),
                             ('car26', ('int', )), ('car27', ('int', )),
                             ('car28', ('int', )), ('car29', ('int', )),
                             ('car30', ('int', )), ('car31', ('int', )),
                             ('car32', ('int', )), ('car33', ('int', )),
                             ('car34', ('int', )), ('car35', ('int', )),
                             ('car36', ('int', )), ('car37', ('int', )),
                             ('car38', ('int', )), ('car39', ('int', )),
                             ('car40', ('int', )), ('car41', ('int', )),
                             ('car42', ('int', )), ('car43', ('int', )),
                             ('car44', ('int', )), ('car45', ('int', )),
                             ('car46', ('int', )), ('car47', ('int', )),
                             ('car48', ('int', )), ('car49', ('int', )),
                             ('car50', ('int', )), ('noise1', ('int', )),
                             ('noise2', ('int', )), ('noise3', ('int', )),
                             ('noise4', ('int', )), ('noise5', ('int', )),
                             ('noise6', ('int', )), ('noise7', ('int', )),
                             ('noise8', ('int', )), ('noise9', ('int', )),
                             ('noise10', ('int', )), ('noise11', ('int', )),
                             ('noise12', ('int', )), ('noise13', ('int', )),
                             ('noise14', ('int', )), ('noise15', ('int', )),
                             ('noise16', ('int', )), ('noise17', ('int', )),
                             ('noise18', ('int', )), ('noise19', ('int', )),
                             ('noise20', ('int', )), ('noise21', ('int', )),
                             ('noise22', ('int', )), ('noise23', ('int', )),
                             ('noise24', ('int', )), ('noise25', ('int', )),
                             ('noise26', ('int', )), ('noise27', ('int', )),
                             ('noise28', ('int', )), ('noise29', ('int', )),
                             ('noise30', ('int', )), ('noise31', ('int', )),
                             ('noise32', ('int', )), ('noise33', ('int', )),
                             ('noise34', ('int', )), ('noise35', ('int', )),
                             ('noise36', ('int', )), ('noise37', ('int', )),
                             ('noise38', ('int', )), ('noise39', ('int', )),
                             ('noise40', ('int', )), ('noise41', ('int', )),
                             ('noise42', ('int', )), ('noise43', ('int', )),
                             ('noise44', ('int', )), ('noise45', ('int', )),
                             ('noise46', ('int', )), ('noise47', ('int', )),
                             ('noise48', ('int', )), ('noise49', ('int', )),
                             ('noise50', ('int', ))]
            engine.table = table
            engine.create_table()
            engine.download_files_from_archive(self.urls["Vehicledata"],
                                               archive_name="VehicleData.zip")
            engine.extract_zip(
                engine.format_filename("VehicleData/VehicleData.zip"),
                engine.format_filename("VehicleData"),
            )
            engine.insert_data_from_file(
                engine.format_filename("VehicleData/VehicleData.csv"))

            # Counts table
            table = Table("counts", pk=False, delimiter=',')
            engine.download_files_from_archive(self.urls["counts"],
                                               archive_name="50-StopData.zip")
            table.columns = [("RouteDataID", ("int", )),
                             ("countrynum", ("int", )),
                             ("statenum", ("int", )), ("Route", ("int", )),
                             ("RPID", ("int", )), ("year", ("int", )),
                             ("AOU", ("int", )), ("Stop1", ("int", )),
                             ("Stop2", ("int", )), ("Stop3", ("int", )),
                             ("Stop4", ("int", )), ("Stop5", ("int", )),
                             ("Stop6", ("int", )), ("Stop7", ("int", )),
                             ("Stop8", ("int", )), ("Stop9", ("int", )),
                             ("Stop10", ("int", )), ("Stop11", ("int", )),
                             ("Stop12", ("int", )), ("Stop13", ("int", )),
                             ("Stop14", ("int", )), ("Stop15", ("int", )),
                             ("Stop16", ("int", )), ("Stop17", ("int", )),
                             ("Stop18", ("int", )), ("Stop19", ("int", )),
                             ("Stop20", ("int", )), ("Stop21", ("int", )),
                             ("Stop22", ("int", )), ("Stop23", ("int", )),
                             ("Stop24", ("int", )), ("Stop25", ("int", )),
                             ("Stop26", ("int", )), ("Stop27", ("int", )),
                             ("Stop28", ("int", )), ("Stop29", ("int", )),
                             ("Stop30", ("int", )), ("Stop31", ("int", )),
                             ("Stop32", ("int", )), ("Stop33", ("int", )),
                             ("Stop34", ("int", )), ("Stop35", ("int", )),
                             ("Stop36", ("int", )), ("Stop37", ("int", )),
                             ("Stop38", ("int", )), ("Stop39", ("int", )),
                             ("Stop40", ("int", )), ("Stop41", ("int", )),
                             ("Stop42", ("int", )), ("Stop43", ("int", )),
                             ("Stop44", ("int", )), ("Stop45", ("int", )),
                             ("Stop46", ("int", )), ("Stop47", ("int", )),
                             ("Stop48", ("int", )), ("Stop49", ("int", )),
                             ("Stop50", ("int", ))]

            part = ""
            engine.table = table
            engine.create_table()

            for part in range(1, 11):
                part = str(part)
                try:
                    print("Inserting data from part " + part + "...")
                    try:
                        "1997ToPresent_SurveyWide"
                        engine.table.cleanup = Cleanup()
                        engine.extract_zip(
                            engine.format_filename(
                                "50-StopData/1997ToPresent_SurveyWide/Fifty" +
                                part + ".zip"),
                            engine.format_filename("fifty" + part + ".csv"),
                        )
                    except:
                        print(
                            "fifty{}: Failed bulk insert on, inserting manually."
                            .format(part))
                        engine.connection.rollback()
                        engine.table.cleanup = self.cleanup_func_clean
                        engine.insert_data_from_archive(
                            self.urls["counts"] + "Fifty" + part + ".zip",
                            ["fifty" + part + ".csv"])

                except:
                    print("There was an error in part " + part + ".")
                    raise

        except zipfile.BadZipfile:
            print(
                "There was an unexpected error in the Breeding Bird Survey archives."
            )
            raise

        return engine