Esempio n. 1
0
def sort_csv(filename):
    """Sort CSV rows minus the header and return the file
    function is used for only testing and can handle the file of the size
    """
    filename = os.path.normpath(filename)
    input_file = open_fr(filename)
    csv_reader_infile = csv.reader(input_file, escapechar="\\")
    #  write the data to a temporary file and sort it
    temp_path = os.path.normpath("tempfile")
    temp_file = open_fw(temp_path)

    csv_writer = open_csvw(temp_file)
    i = 0
    for row in csv_reader_infile:
        if i == 0:
            # The first entry is the header line
            infields = row
            i += 1
        else:
            csv_writer.writerow(row)
    input_file.close()
    temp_file.close()

    # sort the temp file
    sorted_txt = sort_file(temp_path)
    tmp = open_fr(sorted_txt)
    in_txt = csv.reader(tmp, delimiter=',', escapechar="\\")
    csv_file = open_fw(filename)
    csv_writer = open_csvw(csv_file)
    csv_writer.writerow(infields)
    csv_writer.writerows(in_txt)
    tmp.close()
    csv_file.close()
    os.remove(os.path.normpath(temp_path))
    return filename
Esempio n. 2
0
def sort_csv(filename):
    """Sort CSV rows minus the header and return the file
    function is used for only testing and can handle the file of the size
    """
    filename = os.path.normpath(filename)
    input_file = open_fr(filename)
    csv_reader_infile = csv.reader(input_file, escapechar="\\")
    #  write the data to a temporary file and sort it
    temp_path = os.path.normpath("tempfile")
    temp_file = open_fw(temp_path)

    csv_writer = open_csvw(temp_file)
    i = 0
    for row in csv_reader_infile:
        if i == 0:
            # The first entry is the header line
            infields = row
            i += 1
        else:
            csv_writer.writerow(row)
    input_file.close()
    temp_file.close()

    # sort the temp file
    sorted_txt = sort_file(temp_path)
    tmp = open_fr(sorted_txt)
    in_txt = csv.reader(tmp, delimiter=',', escapechar="\\")
    csv_file = open_fw(filename)
    csv_writer = open_csvw(csv_file)
    csv_writer.writerow(infields)
    csv_writer.writerows(in_txt)
    tmp.close()
    csv_file.close()
    os.remove(os.path.normpath(temp_path))
    return filename
Esempio n. 3
0
 def create_table(self):
     """Create the table by creating an empty csv file"""
     self.auto_column_number = 1
     self.file = open_fw(self.table_name())
     self.output_file = open_csvw(self.file)
     self.output_file.writerow([u'{}'.format(val) for val in self.table.get_insert_columns(join=False,create=True)])
     self.table_names.append((self.file, self.table_name()))
Esempio n. 4
0
 def to_csv(self):
     # due to Cyclic imports we can not move this import to the top
     from retriever.lib.tools import sort_csv
     for item in list(self.script.urls.keys()):
         table_name = self.table_name()
         csv_file_output = os.path.normpath(table_name + '.csv')
         csv_file = open_fw(csv_file_output)
         csv_writer = open_csvw(csv_file)
         self.get_cursor()
         self.set_engine_encoding()
         self.cursor.execute("SELECT * FROM  {};".format(table_name))
         row = self.cursor.fetchone()
         colnames = [u'{}'.format(tuple_i[0]) for tuple_i in self.cursor.description]
         csv_writer.writerow(colnames)
         while row is not None:
             csv_writer.writerow(row)
             row = self.cursor.fetchone()
         csv_file.close()
         sort_csv(csv_file_output)
     self.disconnect()
Esempio n. 5
0
 def to_csv(self):
     # due to Cyclic imports we can not move this import to the top
     from retriever.lib.tools import sort_csv
     for item in list(self.script.urls.keys()):
         table_name = self.table_name()
         csv_file_output = os.path.normpath(table_name + '.csv')
         csv_file = open_fw(csv_file_output)
         csv_writer = open_csvw(csv_file)
         self.get_cursor()
         self.set_engine_encoding()
         self.cursor.execute("SELECT * FROM  {};".format(table_name))
         row = self.cursor.fetchone()
         colnames = [u'{}'.format(tuple_i[0]) for tuple_i in self.cursor.description]
         csv_writer.writerow(colnames)
         while row is not None:
             csv_writer.writerow(row)
             row = self.cursor.fetchone()
         csv_file.close()
         sort_csv(csv_file_output)
     self.disconnect()
Esempio n. 6
0
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        reload(sys)
        if hasattr(sys, 'setdefaultencoding'):
            sys.setdefaultencoding("utf-8")

        self.engine.download_file(self.urls["GWDD"], "GlobalWoodDensityDatabase.xls")
        filename = os.path.basename("GlobalWoodDensityDatabase.xls")
        book = xlrd.open_workbook(self.engine.format_filename(filename))
        sh = book.sheet_by_index(1)
        rows = sh.nrows

        # Creating data files
        file_path = self.engine.format_filename("gwdd_data.csv")
        gwdd_data = open_fw(file_path)
        csv_writer = open_csvw(gwdd_data)
        csv_writer.writerow(["Number", "Family", "Binomial", "Wood_Density", "Region", "Reference_Number"])

        for index in range(1, rows):
            row = sh.row(index)
            # get each row and format the sell value.
            row_as_list = [to_str(column_value.value) for column_value in row]
            csv_writer.writerow(row_as_list)
        gwdd_data.close()

        table = Table("data", delimiter=",")
        table.columns = [("Number", ("pk-int",)),
                         ("Family", ("char",)),
                         ("Binomial", ("char",)),
                         ("Wood_Density", ("double",)),
                         ("Region", ("char",)),
                         ("Reference_Number", ("int",))]
        table.pk = 'Number'
        table.contains_pk = True

        self.engine.table = table
        self.engine.create_table()
        self.engine.insert_data_from_file(engine.format_filename(file_path))

        # Creating reference tale file
        file_path = self.engine.format_filename("gwdd_ref.csv")
        ref_file = open_fw(file_path)
        csv_writerd = open_csvw(ref_file)
        csv_writerd.writerow(["Reference_Number", "Reference"])
        sh = book.sheet_by_index(2)
        rows = sh.nrows
        for index in range(1, rows):
            row = sh.row(index)
            # get each row and format the sell value.
            row_as_list = [to_str(column_value.value, object_encoding=sys.stdout) for column_value in row]
            csv_writerd.writerow(row_as_list)
        ref_file.close()

        table = Table("reference", delimiter=",")
        table.columns = [("Reference_Number", ("pk-int",)), ("Reference", ("char",))]
        table.pk = 'Reference_Number'
        table.contains_pk = True
        self.engine.table = table
        self.engine.create_table()
        self.engine.insert_data_from_file(engine.format_filename(file_path))

        return self.engine
Esempio n. 7
0
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)

        engine = self.engine
        csv_files = []
        request_src = "http://www.data-retriever.org/"
        base_url = "http://www.usanpn.org/npn_portal/observations/getObservations.xml?start_date={startYear}&end_date={endYear_date}&request_src={request_src}"
        header_values = [
            "observation_id", "update_datetime", "site_id", "latitude",
            "longitude", "elevation_in_meters", "state", "species_id", "genus",
            "species", "common_name", "kingdom", "individual_id",
            "phenophase_id", "phenophase_description", "observation_date",
            "day_of_year", "phenophase_status", "intensity_category_id",
            "intensity_value", "abundance_value"
        ]

        columns = [
            ("record_id", ("pk-auto", )),
            ("observation_id",
             ("int", )),  # subsequently refered to as "status record"
            ("update_datetime", ("char", )),
            ("site_id", ("int", )),
            ("latitude", ("double", )),
            ("longitude", ("double", )),
            ("elevation_in_meters", ("char", )),
            ("state", ("char", )),
            ("species_id", ("int", )),
            ("genus", ("char", )),
            ("species", ("char", )),
            ("common_name", ("char", )),
            ("kingdom", ("char", )),  # skip kingdom
            ("individual_id", ("char", )),
            ("phenophase_id", ("int", )),
            ("phenophase_description", ("char", )),
            ("observation_date", ("char", )),
            ("day_of_year", ("char", )),
            ("phenophase_status", ("char", )),
            ("intensity_category_id", ("char", )),
            ("intensity_value", ("char", )),
            ("abundance_value", ("char", ))
        ]

        start_date = datetime.date(2009, 1, 1)
        end_date = datetime.date.today()

        while start_date < end_date:
            to_date = start_date + datetime.timedelta(90)
            if to_date >= end_date:
                data_url = base_url.format(startYear=str(start_date),
                                           endYear_date=str(end_date),
                                           request_src=request_src)
            else:
                data_url = base_url.format(startYear=str(start_date),
                                           endYear_date=str(to_date),
                                           request_src=request_src)

            xml_file_name = '{}'.format(start_date) + ".xml"
            engine.download_file(data_url, xml_file_name)

            # Create csv files for 3 months
            csv_observation = '{}'.format(start_date) + ".csv"
            csv_files.append(csv_observation)
            csv_buff = open_fw(engine.format_filename(csv_observation))
            csv_writer = open_csvw(csv_buff)

            csv_writer.writerow(header_values)

            # Parse xml to read data
            file_read = ""
            fname = DATA_WRITE_PATH.strip('{dataset}') + 'NPN/' + xml_file_name
            with open(fname, 'r') as fp1:
                file_read = fp1.read()

            root = ET.fromstring(file_read)

            for elements in root:
                index_map = {val: i for i, val in enumerate(header_values)}
                diction = sorted(elements.attrib.items(),
                                 key=lambda pair: index_map[pair[0]])
                csv_writer.writerow([x[1] for x in diction])

            csv_buff.close()
            start_date = to_date + datetime.timedelta(1)

        # Create table
        table = Table('obsercations',
                      delimiter=',',
                      pk='record_id',
                      contains_pk=True)
        table.columns = columns
        engine.table = table
        engine.create_table()
        for data_file in csv_files:
            engine.insert_data_from_file(engine.find_file(data_file))
        return engine
Esempio n. 8
0
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)

        engine = self.engine
        csv_files = []
        request_src = "http://www.data-retriever.org/"
        base_url = "http://www.usanpn.org/npn_portal/observations/getObservations.xml?start_date={startYear}&end_date={endYear_date}&request_src={request_src}"
        header_values = ["observation_id",
                         "update_datetime",
                         "site_id",
                         "latitude",
                         "longitude",
                         "elevation_in_meters",
                         "state",
                         "species_id",
                         "genus",
                         "species",
                         "common_name",
                         "kingdom",
                         "individual_id",
                         "phenophase_id",
                         "phenophase_description",
                         "observation_date",
                         "day_of_year",
                         "phenophase_status",
                         "intensity_category_id",
                         "intensity_value",
                         "abundance_value"
                         ]

        columns = [("record_id", ("pk-auto",)),
                   ("observation_id", ("int",)),  # subsequently refered to as "status record"
                   ("update_datetime", ("char",)),
                   ("site_id", ("int",)),
                   ("latitude", ("double",)),
                   ("longitude", ("double",)),
                   ("elevation_in_meters", ("char",)),
                   ("state", ("char",)),
                   ("species_id", ("int",)),
                   ("genus", ("char",)),
                   ("species", ("char",)),
                   ("common_name", ("char",)),
                   ("kingdom", ("char",)),  # skip kingdom
                   ("individual_id", ("char",)),
                   ("phenophase_id", ("int",)),
                   ("phenophase_description", ("char",)),
                   ("observation_date", ("char",)),
                   ("day_of_year", ("char",)),
                   ("phenophase_status", ("char",)),
                   ("intensity_category_id", ("char",)),
                   ("intensity_value", ("char",)),
                   ("abundance_value", ("char",))
                   ]

        start_date = datetime.date(2009, 1, 1)
        end_date = datetime.date.today()

        while start_date < end_date:
            to_date = start_date + datetime.timedelta(90)
            if to_date >= end_date:
                data_url = base_url.format(startYear=str(start_date), endYear_date=str(end_date),
                                           request_src=request_src)
            else:
                data_url = base_url.format(startYear=str(start_date), endYear_date=str(to_date),
                                           request_src=request_src)

            xml_file_name = '{}'.format(start_date) + ".xml"
            engine.download_file(data_url, xml_file_name)

            # Create csv files for 3 months
            csv_observation = '{}'.format(start_date) + ".csv"
            csv_files.append(csv_observation)
            csv_buff = open_fw(engine.format_filename(csv_observation))
            csv_writer = open_csvw(csv_buff)

            csv_writer.writerow(header_values)

            # Parse xml to read data
            file_read = ""
            fname = DATA_WRITE_PATH.strip('{dataset}') + 'NPN/' + xml_file_name
            with open(fname, 'r') as fp1:
              file_read = fp1.read()

            root = ET.fromstring(file_read)

            for elements in root:
                index_map = {val: i for i, val in enumerate(header_values)}
                diction = sorted(elements.attrib.items(), key=lambda pair: index_map[pair[0]])
                csv_writer.writerow([x[1] for x in diction])

            csv_buff.close()
            start_date = to_date + datetime.timedelta(1)

        # Create table
        table = Table('obsercations', delimiter=',', pk='record_id', contains_pk=True)
        table.columns = columns
        engine.table = table
        engine.create_table()
        for data_file in csv_files:
            engine.insert_data_from_file(engine.find_file(data_file))
        return engine
Esempio n. 9
0
    def download(self, engine=None, debug=False):
        Script.download(self, engine, debug)
        reload(sys)
        if hasattr(sys, 'setdefaultencoding'):
            sys.setdefaultencoding("utf-8")

        self.engine.download_file(self.urls["GWDD"],
                                  "GlobalWoodDensityDatabase.xls")
        filename = os.path.basename("GlobalWoodDensityDatabase.xls")
        book = xlrd.open_workbook(self.engine.format_filename(filename))
        sh = book.sheet_by_index(1)
        rows = sh.nrows

        # Creating data files
        file_path = self.engine.format_filename("gwdd_data.csv")
        gwdd_data = open_fw(file_path)
        csv_writer = open_csvw(gwdd_data)
        csv_writer.writerow([
            "Number", "Family", "Binomial", "Wood_Density", "Region",
            "Reference_Number"
        ])

        for index in range(1, rows):
            row = sh.row(index)
            # get each row and format the sell value.
            row_as_list = [to_str(column_value.value) for column_value in row]
            csv_writer.writerow(row_as_list)
        gwdd_data.close()

        table = Table("data", delimiter=",")
        table.columns = [("Number", ("pk-int", )), ("Family", ("char", )),
                         ("Binomial", ("char", )),
                         ("Wood_Density", ("double", )),
                         ("Region", ("char", )),
                         ("Reference_Number", ("int", ))]
        table.pk = 'Number'
        table.contains_pk = True

        self.engine.table = table
        self.engine.create_table()
        self.engine.insert_data_from_file(engine.format_filename(file_path))

        # Creating reference tale file
        file_path = self.engine.format_filename("gwdd_ref.csv")
        ref_file = open_fw(file_path)
        csv_writerd = open_csvw(ref_file)
        csv_writerd.writerow(["Reference_Number", "Reference"])
        sh = book.sheet_by_index(2)
        rows = sh.nrows
        for index in range(1, rows):
            row = sh.row(index)
            # get each row and format the sell value.
            row_as_list = [
                to_str(column_value.value, object_encoding=sys.stdout)
                for column_value in row
            ]
            csv_writerd.writerow(row_as_list)
        ref_file.close()

        table = Table("reference", delimiter=",")
        table.columns = [("Reference_Number", ("pk-int", )),
                         ("Reference", ("char", ))]
        table.pk = 'Reference_Number'
        table.contains_pk = True
        self.engine.table = table
        self.engine.create_table()
        self.engine.insert_data_from_file(engine.format_filename(file_path))

        return self.engine