Exemple #1
0
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)

        self.engine.download_file(self.urls["GWDD"],
                                  "GlobalWoodDensityDatabase.xls")
        filename = os.path.basename("GlobalWoodDensityDatabase.xls")

        book = xlrd.open_workbook(self.engine.format_filename(filename))
        sh = book.sheet_by_index(1)
        rows = sh.nrows

        #Creating data table
        lines = []
        for i in range(1, rows):
            row = sh.row(i)
            if not all(Excel.empty_cell(cell) for cell in row):
                this_line = {}

                def format_value(s):
                    s = Excel.cell_value(s)
                    return str(s).title().replace("\\", "/").replace('"', '')

                for num, label in enumerate([
                        "Number", "Family", "Binomial", "Wood_Density",
                        "Region", "Reference_Number"
                ]):
                    this_line[label] = format_value(row[num])
                lines.append(this_line)

        table = Table("data", delimiter="\t")
        table.columns = [("Number", ("pk-int", )), ("Family", ("char", )),
                         ("Binomial", ("char", )),
                         ("Wood_Density", ("double", )),
                         ("Region", ("char", )),
                         ("Reference_Number", ("int", ))]
        table.pk = 'Number'
        table.contains_pk = True

        gwdd = []
        for line in lines:
            gwdd_data = [
                line["Number"], line["Family"], line["Binomial"],
                line["Wood_Density"], line["Region"], line["Reference_Number"]
            ]
            gwdd.append(gwdd_data)

        data = ['\t'.join(gwdd_line) for gwdd_line in gwdd]
        self.engine.table = table
        self.engine.create_table()
        self.engine.add_to_table(data)

        #Creating reference table
        lines = []
        sh = book.sheet_by_index(2)
        rows = sh.nrows
        for i in range(1, rows):
            row = sh.row(i)
            if not all(Excel.empty_cell(cell) for cell in row):
                this_line = {}

                def format_value(s):
                    s = Excel.cell_value(s)
                    return str(s).title().replace("\\", "/").replace('"', '')

                for num, label in enumerate(["Reference_Number", "Reference"]):
                    this_line[label] = format_value(row[num])
                lines.append(this_line)

        table = Table("reference", delimiter="\t")
        table.columns = [("Reference_Number", ("pk-int", )),
                         ("Reference", ("char", ))]
        table.pk = 'Reference_Number'
        table.contains_pk = True

        gwdd = []
        for line in lines:
            gwdd_ref = [line["Reference_Number"], line["Reference"]]
            gwdd.append(gwdd_ref)

        data = ['\t'.join(gwdd_line) for gwdd_line in gwdd]
        self.engine.table = table
        self.engine.create_table()
        self.engine.add_to_table(data)

        return self.engine
Exemple #2
0
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)

        self.engine.auto_create_table(Table("sites"),
                                      url=self.urls["sites"],
                                      filename='gentry_sites.csv')
        self.engine.insert_data_from_url(self.urls["sites"])

        self.engine.download_file(self.urls["stems"], "all_Excel.zip")
        local_zip = zipfile.ZipFile(
            self.engine.format_filename("all_Excel.zip"))
        filelist = local_zip.namelist()
        local_zip.close()
        self.engine.download_files_from_archive(self.urls["stems"], filelist)

        filelist = [os.path.basename(filename) for filename in filelist]

        # Currently all_Excel.zip is missing CURUYUQU.xls
        # Download it separately and add it to the file list
        if not self.engine.find_file('CURUYUQU.xls'):
            self.engine.download_file(
                "http://www.mobot.org/mobot/gentry/123/samerica/CURUYUQU.xls",
                "CURUYUQU.xls")
            filelist.append('CURUYUQU.xls')

        lines = []
        tax = []
        for filename in filelist:
            print("Extracting data from " + filename + "...")
            book = xlrd.open_workbook(self.engine.format_filename(filename))
            sh = book.sheet_by_index(0)
            rows = sh.nrows
            cn = {'stems': []}
            n = 0
            for colnum, c in enumerate(sh.row(0)):
                if not Excel.empty_cell(c):
                    cid = c.value.lower().strip()
                    # line number column is sometimes named differently
                    if cid in ["sub", "number"]:
                        cid = "line"
                    # the "number of individuals" column is named in various
                    # different ways; they always at least contain "nd"
                    if "nd" in cid:
                        cid = "count"
                    # in QUIAPACA.xls the "number of individuals" column is
                    # misnamed "STEMDBH" just like the stems columns, so weep
                    # for the state of scientific data and then fix manually
                    if filename == "QUIAPACA.xls" and colnum == 13:
                        cid = "count"

                    # if column is a stem, add it to the list of stems;
                    # otherwise, make note of the column name/number
                    if "stem" in cid or "dbh" in cid:
                        cn["stems"].append(n)
                    else:
                        cn[cid] = n
                n += 1
            # sometimes, a data file does not contain a liana or count column
            if not "liana" in list(cn.keys()):
                cn["liana"] = -1
            if not "count" in list(cn.keys()):
                cn["count"] = -1
            for i in range(1, rows):
                row = sh.row(i)
                cellcount = len(row)
                # make sure the row is real, not just empty cells
                if not all(Excel.empty_cell(cell) for cell in row):
                    try:
                        this_line = {}

                        # get the following information from the appropriate columns
                        for i in [
                                "line", "family", "genus", "species", "liana",
                                "count"
                        ]:
                            if cn[i] > -1:
                                if row[cn[i]].ctype != 2:
                                    # if the cell type(ctype) is not a number
                                    this_line[i] = row[
                                        cn[i]].value.lower().strip().replace(
                                            "\\", "/").replace('"', '')
                                else:
                                    this_line[i] = row[cn[i]].value
                                if this_line[i] == '`':
                                    this_line[i] = 1
                        this_line["stems"] = [
                            row[c] for c in cn["stems"]
                            if not Excel.empty_cell(row[c])
                        ]
                        this_line["site"] = filename[0:-4]

                        # Manually correct CEDRAL data, which has a single line
                        # that is shifted by one to the left starting at Liana
                        if this_line["site"] == "CEDRAL" and type(
                                this_line["liana"]) == float:
                            this_line["liana"] = ""
                            this_line["count"] = 3
                            this_line["stems"] = [2.5, 2.5, 30, 18, 25]

                        lines.append(this_line)

                        # Check how far the species is identified
                        full_id = 0
                        if len(this_line["species"]) < 3:
                            if len(this_line["genus"]) < 3:
                                id_level = "family"
                            else:
                                id_level = "genus"
                        else:
                            id_level = "species"
                            full_id = 1
                        tax.append(
                            (this_line["family"], this_line["genus"],
                             this_line["species"], id_level, str(full_id)))
                    except:
                        raise
                        pass

        tax = sorted(
            tax, key=lambda group: group[0] + " " + group[1] + " " + group[2])
        unique_tax = []
        tax_dict = {}
        tax_count = 0

        # Get all unique families/genera/species
        print("\n")
        for group in tax:
            if not (group in unique_tax):
                unique_tax.append(group)
                tax_count += 1
                tax_dict[group[0:3]] = tax_count
                if tax_count % 10 == 0:
                    msg = "Generating taxonomic groups: " + str(
                        tax_count) + " / " + str(TAX_GROUPS)
                    sys.stdout.flush()
                    sys.stdout.write(msg + "\b" * len(msg))
        print("\n")
        # Create species table
        table = Table("species", delimiter=",")
        table.columns = [("species_id", ("pk-int", )), ("family", ("char", )),
                         ("genus", ("char", )), ("species", ("char", )),
                         ("id_level", ("char", 10)), ("full_id", ("int", ))]

        data = [[str(tax_dict[group[:3]])] + ['"%s"' % g for g in group]
                for group in unique_tax]
        table.pk = 'species_id'
        table.contains_pk = True

        self.engine.table = table
        self.engine.create_table()
        self.engine.add_to_table(data)

        # Create stems table
        table = Table("stems", delimiter=",")
        table.columns = [("stem_id", ("pk-auto", )), ("line", ("int", )),
                         ("species_id", ("int", )),
                         ("site_code", ("char", 12)), ("liana", ("char", 10)),
                         ("stem", ("double", ))]
        stems = []
        counts = []
        for line in lines:
            try:
                liana = line["liana"]
            except KeyError:
                liana = ""
            species_info = [
                line["line"],
                tax_dict[(line["family"], line["genus"], line["species"])],
                line["site"], liana
            ]
            try:
                counts.append(
                    [value for value in species_info + [line["count"]]])
            except KeyError:
                pass

            for i in line["stems"]:
                stem = species_info + [str(i)]
                stems.append(stem)

        self.engine.table = table
        self.engine.create_table()
        self.engine.add_to_table(stems)

        # Create counts table
        table = Table("counts", delimiter=",", contains_pk=False)
        table.columns = [("count_id", ("pk-auto", )), ("line", ("int", )),
                         ("species_id", ("int", )),
                         ("site_code", ("char", 12)), ("liana", ("char", 10)),
                         ("count", ("double", ))]
        self.engine.table = table
        self.engine.create_table()
        self.engine.add_to_table(counts)

        return self.engine
Exemple #3
0
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        reload(sys)
        if hasattr(sys, 'setdefaultencoding'):
            sys.setdefaultencoding("utf-8")

        self.engine.download_file(self.urls["GWDD"], "GlobalWoodDensityDatabase.xls")
        filename = os.path.basename("GlobalWoodDensityDatabase.xls")
        book = xlrd.open_workbook(self.engine.format_filename(filename))
        sh = book.sheet_by_index(1)
        rows = sh.nrows

        # Creating data files
        file_path = self.engine.format_filename("gwdd_data.csv")
        gwdd_data = open_fw(file_path)
        csv_writer = open_csvw(gwdd_data)
        csv_writer.writerow(["Number", "Family", "Binomial", "Wood_Density", "Region", "Reference_Number"])

        for index in range(1, rows):
            row = sh.row(index)
            # get each row and format the sell value.
            row_as_list = [to_str(column_value.value) for column_value in row]
            csv_writer.writerow(row_as_list)
        gwdd_data.close()

        table = Table("data", delimiter=",")
        table.columns = [("Number", ("pk-int",)),
                         ("Family", ("char",)),
                         ("Binomial", ("char",)),
                         ("Wood_Density", ("double",)),
                         ("Region", ("char",)),
                         ("Reference_Number", ("int",))]
        table.pk = 'Number'
        table.contains_pk = True

        self.engine.table = table
        self.engine.create_table()
        self.engine.insert_data_from_file(engine.format_filename(file_path))

        # Creating reference tale file
        file_path = self.engine.format_filename("gwdd_ref.csv")
        ref_file = open_fw(file_path)
        csv_writerd = open_csvw(ref_file)
        csv_writerd.writerow(["Reference_Number", "Reference"])
        sh = book.sheet_by_index(2)
        rows = sh.nrows
        for index in range(1, rows):
            row = sh.row(index)
            # get each row and format the sell value.
            row_as_list = [to_str(column_value.value, object_encoding=sys.stdout) for column_value in row]
            csv_writerd.writerow(row_as_list)
        ref_file.close()

        table = Table("reference", delimiter=",")
        table.columns = [("Reference_Number", ("pk-int",)), ("Reference", ("char",))]
        table.pk = 'Reference_Number'
        table.contains_pk = True
        self.engine.table = table
        self.engine.create_table()
        self.engine.insert_data_from_file(engine.format_filename(file_path))

        return self.engine
Exemple #4
0
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        
        self.engine.auto_create_table(Table("sites"), url=self.urls["sites"])
        self.engine.insert_data_from_url(self.urls["sites"])
              
        self.engine.download_file(self.urls["stems"], "all_Excel.zip")
        local_zip = zipfile.ZipFile(self.engine.format_filename("all_Excel.zip"))        
        filelist = local_zip.namelist()
        local_zip.close()        
        self.engine.download_files_from_archive(self.urls["stems"], filelist)
        
        filelist = [os.path.basename(filename) for filename in filelist]
        
        lines = []
        tax = []
        for filename in filelist:
            print "Extracting data from " + filename + "..."
            book = xlrd.open_workbook(self.engine.format_filename(filename))
            sh = book.sheet_by_index(0)
            rows = sh.nrows
            cn = {'stems': []}
            n = 0
            for c in sh.row(0):
                if not Excel.empty_cell(c):
                    cid = Excel.cell_value(c).lower()
                    # line number column is sometimes named differently
                    if cid in ["sub", "number"]:
                        cid = "line"
                    # the "number of individuals" column is named in various
                    # different ways; they always at least contain "nd"
                    if "nd" in cid:
                        cid = "count"
                    # if column is a stem, add it to the list of stems;
                    # otherwise, make note of the column name/number
                    if "stem" in cid:
                        cn["stems"].append(n)
                    else:
                        cn[cid] = n
                n += 1
            # sometimes, a data file does not contain a liana or count column
            if not "liana" in cn.keys():
                cn["liana"] = -1
            if not "count" in cn.keys():
                cn["count"] = -1
            for i in range(1, rows):
                row = sh.row(i)
                cellcount = len(row)
                # make sure the row is real, not just empty cells
                if cellcount > 4 and not Excel.empty_cell(row[0]):
                    try:
                        this_line = {}
                        
                        def format_value(s):
                            s = Excel.cell_value(s)
                            return str(s).title().replace("\\", "/").replace('"', '')
                        
                        # get the following information from the appropriate columns
                        for i in ["line", "family", "genus", "species", 
                                  "liana", "count"]:
                            if cn[i] > -1:
                                this_line[i] = format_value(row[cn[i]])
                                if this_line[i] == '`':
                                    this_line[i] = 1

                        this_line["stems"] = [Excel.cell_value(row[c]) 
                                              for c in cn["stems"]
                                              if not Excel.empty_cell(row[c])]
                        this_line["site"] = filename[0:-4]
                        
                        lines.append(this_line)
                        
                        # Check how far the species is identified
                        full_id = 0
                        if len(this_line["species"]) < 3:
                            if len(this_line["genus"]) < 3:
                                id_level = "family"
                            else:
                                id_level = "genus"
                        else:
                            id_level = "species"
                            full_id = 1
                        tax.append((this_line["family"], 
                                    this_line["genus"], 
                                    this_line["species"].lower().replace('\\', '').replace('"', ''), 
                                    id_level, 
                                    str(full_id)))
                    except:
                        raise
                        pass                    
        
        tax = sorted(tax, key=lambda group: group[0] + " " + group[1] + " " + group[2])
        unique_tax = []
        tax_dict = dict()
        tax_count = 0
        
        # Get all unique families/genera/species
        for group in tax:
            if not (group in unique_tax):
                unique_tax.append(group)
                tax_count += 1
                tax_dict[group[0:3]] = tax_count
                if tax_count % 10 == 0:
                    msg = "Generating taxonomic groups: " + str(tax_count) + " / " + str(TAX_GROUPS)
                    sys.stdout.write(msg + "\b" * len(msg))
        print "Generating taxonomic groups: " + str(TAX_GROUPS) + " / " + str(TAX_GROUPS)
        
        
        # Create species table
        table = Table("species", delimiter=",")
        table.columns=[("species_id"            ,   ("pk-int",)    ),
                       ("family"                ,   ("char", )    ),
                       ("genus"                 ,   ("char", )    ),
                       ("species"               ,   ("char", )    ),
                       ("id_level"              ,   ("char", 10)    ),
                       ("full_id"               ,   ("bool",)       )]

        data = [','.join([str(tax_dict[group[:3]])] + ['"%s"' % g for g in group]) 
                for group in unique_tax]
        table.pk = 'species_id'
        table.contains_pk = True
        
        self.engine.table = table
        self.engine.create_table()
        self.engine.add_to_table(data)
        
        
        # Create stems table
        table = Table("stems", delimiter=",", contains_pk=False)
        table.columns=[("stem_id"               ,   ("pk-auto",)    ),
                       ("line"                  ,   ("int",)        ),
                       ("species_id"            ,   ("int",)        ),
                       ("site_code"             ,   ("char", 12)    ),
                       ("liana"                 ,   ("char", 10)    ),
                       ("stem"                  ,   ("double",)     )]
        stems = []
        counts = []
        for line in lines:
            try:
                liana = line["liana"]
            except KeyError:
                liana = ""
            species_info = [line["line"], 
                            tax_dict[(line["family"], 
                                      line["genus"], 
                                      line["species"].lower())],
                            line["site"],
                            liana
                            ]
            try:
                counts.append([str(value) for value in species_info + [line["count"]]])
            except KeyError:
                pass

            for i in line["stems"]:
                stem = species_info + [i]
                stems.append([str(value) for value in stem])
            
        data = [','.join(stem) for stem in stems]
        self.engine.table = table
        self.engine.create_table()
        self.engine.add_to_table(data)
        
        
        # Create counts table
        table = Table("counts", delimiter=",", contains_pk=False)
        table.columns=[("count_id"              ,   ("pk-auto",)    ),
                       ("line"                  ,   ("int",)        ),
                       ("species_id"            ,   ("int",)        ),
                       ("site_code"             ,   ("char", 12)    ),
                       ("liana"                 ,   ("char", 10)    ),
                       ("count"                 ,   ("double",)     )]
        data = [','.join(count) for count in counts]
        self.engine.table = table
        self.engine.create_table()
        self.engine.add_to_table(data)
            
        return self.engine
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        reload(sys)
        if hasattr(sys, 'setdefaultencoding'):
            sys.setdefaultencoding("utf-8")

        self.engine.download_file(self.urls["GWDD"],
                                  "GlobalWoodDensityDatabase.xls")
        filename = os.path.basename("GlobalWoodDensityDatabase.xls")
        book = xlrd.open_workbook(self.engine.format_filename(filename))
        sh = book.sheet_by_index(1)
        rows = sh.nrows

        # Creating data files
        file_path = self.engine.format_filename("gwdd_data.csv")
        gwdd_data = open_fw(file_path)
        csv_writer = open_csvw(gwdd_data)
        csv_writer.writerow([
            "Number", "Family", "Binomial", "Wood_Density", "Region",
            "Reference_Number"
        ])

        for index in range(1, rows):
            row = sh.row(index)
            # get each row and format the sell value.
            row_as_list = [to_str(column_value.value) for column_value in row]
            csv_writer.writerow(row_as_list)
        gwdd_data.close()

        table = Table("data", delimiter=",")
        table.columns = [("Number", ("pk-int", )), ("Family", ("char", )),
                         ("Binomial", ("char", )),
                         ("Wood_Density", ("double", )),
                         ("Region", ("char", )),
                         ("Reference_Number", ("int", ))]
        table.pk = 'Number'
        table.contains_pk = True

        self.engine.table = table
        self.engine.create_table()
        self.engine.insert_data_from_file(engine.format_filename(file_path))

        # Creating reference tale file
        file_path = self.engine.format_filename("gwdd_ref.csv")
        ref_file = open_fw(file_path)
        csv_writerd = open_csvw(ref_file)
        csv_writerd.writerow(["Reference_Number", "Reference"])
        sh = book.sheet_by_index(2)
        rows = sh.nrows
        for index in range(1, rows):
            row = sh.row(index)
            # get each row and format the sell value.
            row_as_list = [
                to_str(column_value.value, object_encoding=sys.stdout)
                for column_value in row
            ]
            csv_writerd.writerow(row_as_list)
        ref_file.close()

        table = Table("reference", delimiter=",")
        table.columns = [("Reference_Number", ("pk-int", )),
                         ("Reference", ("char", ))]
        table.pk = 'Reference_Number'
        table.contains_pk = True
        self.engine.table = table
        self.engine.create_table()
        self.engine.insert_data_from_file(engine.format_filename(file_path))

        return self.engine
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)

        self.engine.auto_create_table(Table("sites"), url=self.urls["sites"], filename='gentry_sites.csv')
        self.engine.insert_data_from_url(self.urls["sites"])

        self.engine.download_file(self.urls["stems"], "all_Excel.zip")
        local_zip = zipfile.ZipFile(self.engine.format_filename("all_Excel.zip"))
        filelist = local_zip.namelist()
        local_zip.close()
        self.engine.download_files_from_archive(self.urls["stems"], filelist)

        filelist = [os.path.basename(filename) for filename in filelist]

        # Currently all_Excel.zip is missing CURUYUQU.xls
        # Download it separately and add it to the file list
        if not self.engine.find_file('CURUYUQU.xls'):
            self.engine.download_file("http://www.mobot.org/mobot/gentry/123/samerica/CURUYUQU.xls", "CURUYUQU.xls")
            filelist.append('CURUYUQU.xls')

        lines = []
        tax = []
        for filename in filelist:
            print("Extracting data from " + filename + "...")
            book = xlrd.open_workbook(self.engine.format_filename(filename))
            sh = book.sheet_by_index(0)
            rows = sh.nrows
            cn = {'stems': []}
            n = 0
            for colnum, c in enumerate(sh.row(0)):
                if not Excel.empty_cell(c):
                    cid = c.value.lower().strip()
                    # line number column is sometimes named differently
                    if cid in ["sub", "number"]:
                        cid = "line"
                    # the "number of individuals" column is named in various
                    # different ways; they always at least contain "nd"
                    if "nd" in cid:
                        cid = "count"
                    # in QUIAPACA.xls the "number of individuals" column is
                    # misnamed "STEMDBH" just like the stems columns, so weep
                    # for the state of scientific data and then fix manually
                    if filename == "QUIAPACA.xls" and colnum == 13:
                        cid = "count"

                    # if column is a stem, add it to the list of stems;
                    # otherwise, make note of the column name/number
                    if "stem" in cid or "dbh" in cid:
                        cn["stems"].append(n)
                    else:
                        cn[cid] = n
                n += 1
            # sometimes, a data file does not contain a liana or count column
            if not "liana" in list(cn.keys()):
                cn["liana"] = -1
            if not "count" in list(cn.keys()):
                cn["count"] = -1
            for i in range(1, rows):
                row = sh.row(i)
                cellcount = len(row)
                # make sure the row is real, not just empty cells
                if not all(Excel.empty_cell(cell) for cell in row):
                    try:
                        this_line = {}

                        # get the following information from the appropriate columns
                        for i in ["line", "family", "genus", "species",
                                  "liana", "count"]:
                            if cn[i] > -1:
                                if row[cn[i]].ctype != 2:
                                    # if the cell type(ctype) is not a number
                                    this_line[i] = row[cn[i]].value.lower().strip().replace("\\", "/").replace('"', '')
                                else:
                                    this_line[i] = row[cn[i]].value
                                if this_line[i] == '`':
                                    this_line[i] = 1
                        this_line["stems"] = [row[c]
                                              for c in cn["stems"]
                                              if not Excel.empty_cell(row[c])]
                        this_line["site"] = filename[0:-4]

                        # Manually correct CEDRAL data, which has a single line
                        # that is shifted by one to the left starting at Liana
                        if this_line["site"] == "CEDRAL" and type(this_line["liana"]) == float:
                            this_line["liana"] = ""
                            this_line["count"] = 3
                            this_line["stems"] = [2.5, 2.5, 30, 18, 25]

                        lines.append(this_line)

                        # Check how far the species is identified
                        full_id = 0
                        if len(this_line["species"]) < 3:
                            if len(this_line["genus"]) < 3:
                                id_level = "family"
                            else:
                                id_level = "genus"
                        else:
                            id_level = "species"
                            full_id = 1
                        tax.append((this_line["family"],
                                    this_line["genus"],
                                    this_line["species"],
                                    id_level,
                                    str(full_id)))
                    except:
                        raise
                        pass

        tax = sorted(tax, key=lambda group: group[0] + " " + group[1] + " " + group[2])
        unique_tax = []
        tax_dict = {}
        tax_count = 0

        # Get all unique families/genera/species
        print("\n")
        for group in tax:
            if not (group in unique_tax):
                unique_tax.append(group)
                tax_count += 1
                tax_dict[group[0:3]] = tax_count
                if tax_count % 10 == 0:
                    msg = "Generating taxonomic groups: " + str(tax_count) + " / " + str(TAX_GROUPS)
                    sys.stdout.flush()
                    sys.stdout.write(msg + "\b" * len(msg))
        print("\n")
        # Create species table
        table = Table("species", delimiter=",")
        table.columns=[("species_id"            ,   ("pk-int",)    ),
                       ("family"                ,   ("char", )    ),
                       ("genus"                 ,   ("char", )    ),
                       ("species"               ,   ("char", )    ),
                       ("id_level"              ,   ("char", 10)    ),
                       ("full_id"               ,   ("int",)       )]

        data = [[str(tax_dict[group[:3]])] + ['"%s"' % g for g in group]
                for group in unique_tax]
        table.pk = 'species_id'
        table.contains_pk = True

        self.engine.table = table
        self.engine.create_table()
        self.engine.add_to_table(data)

        # Create stems table
        table = Table("stems", delimiter=",")
        table.columns=[("stem_id"               ,   ("pk-auto",)    ),
                       ("line"                  ,   ("int",)        ),
                       ("species_id"            ,   ("int",)        ),
                       ("site_code"             ,   ("char", 12)    ),
                       ("liana"                 ,   ("char", 10)    ),
                       ("stem"                  ,   ("double",)     )]
        stems = []
        counts = []
        for line in lines:
            try:
                liana = line["liana"]
            except KeyError:
                liana = ""
            species_info = [line["line"],
                            tax_dict[(line["family"],
                                      line["genus"],
                                      line["species"])],
                            line["site"],
                            liana
                            ]
            try:
                counts.append([value for value in species_info + [line["count"]]])
            except KeyError:
                pass

            for i in line["stems"]:
                stem = species_info + [str(i)]
                stems.append(stem)

        self.engine.table = table
        self.engine.create_table()
        self.engine.add_to_table(stems)

        # Create counts table
        table = Table("counts", delimiter=",", contains_pk=False)
        table.columns=[("count_id"              ,   ("pk-auto",)    ),
                       ("line"                  ,   ("int",)        ),
                       ("species_id"            ,   ("int",)        ),
                       ("site_code"             ,   ("char", 12)    ),
                       ("liana"                 ,   ("char", 10)    ),
                       ("count"                 ,   ("double",)     )]
        self.engine.table = table
        self.engine.create_table()
        self.engine.add_to_table(counts)

        return self.engine
Exemple #7
0
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)

        self.engine.download_file(self.urls["GWDD"], "GlobalWoodDensityDatabase.xls")
        filename = os.path.basename("GlobalWoodDensityDatabase.xls")

        book = xlrd.open_workbook(self.engine.format_filename(filename))
        sh = book.sheet_by_index(1)
        rows = sh.nrows

        #Creating data table
        lines = []
        for i in range(1, rows):
            row = sh.row(i)
            if not all(Excel.empty_cell(cell) for cell in row):
                this_line = {}
                def format_value(s):
                    s = Excel.cell_value(s)
                    return str(s).title().replace("\\", "/").replace('"', '')
                for num, label in enumerate(["Number", "Family", "Binomial", "Wood_Density",
                            "Region", "Reference_Number"]):
                    this_line[label] = format_value(row[num])
                lines.append(this_line)

        table = Table("data", delimiter="\t")
        table.columns=[("Number"                ,   ("pk-int",) ),
                       ("Family"                ,   ("char",)   ),
                       ("Binomial"              ,   ("char",)   ),
                       ("Wood_Density"          ,   ("double",) ),
                       ("Region"                ,   ("char",)   ),
                       ("Reference_Number"      ,   ("int",)    )]
        table.pk = 'Number'
        table.contains_pk = True

        gwdd = []
        for line in lines:
            gwdd_data = [line["Number"],
                         line["Family"],
                         line["Binomial"],
                         line["Wood_Density"],
                         line["Region"],
                         line["Reference_Number"]]
            gwdd.append(gwdd_data)

        data = ['\t'.join(gwdd_line) for gwdd_line in gwdd]
        self.engine.table = table
        self.engine.create_table()
        self.engine.add_to_table(data)

        #Creating reference table
        lines = []
        sh = book.sheet_by_index(2)
        rows = sh.nrows
        for i in range(1, rows):
            row = sh.row(i)
            if not all(Excel.empty_cell(cell) for cell in row):
                this_line = {}
                def format_value(s):
                    s = Excel.cell_value(s)
                    return str(s).title().replace("\\", "/").replace('"', '')
                for num, label in enumerate(["Reference_Number", "Reference"]):
                    this_line[label] = format_value(row[num])
                lines.append(this_line)

        table = Table("reference", delimiter="\t")
        table.columns=[("Reference_Number"  ,   ("pk-int",) ),
                       ("Reference"         ,   ("char",)   )]
        table.pk = 'Reference_Number'
        table.contains_pk = True

        gwdd = []
        for line in lines:
            gwdd_ref = [line["Reference_Number"],
                        line["Reference"]]
            gwdd.append(gwdd_ref)

        data = ['\t'.join(gwdd_line) for gwdd_line in gwdd]
        self.engine.table = table
        self.engine.create_table()
        self.engine.add_to_table(data)
        
        return self.engine