Example #1
0
 def to_csv(self):
     """Export table from json engine to CSV file"""
     for keys in list(self.script.tables):
         table_name = self.table_name()
         header = self.script.tables[keys].get_insert_columns(join=False, create=True)
         csv_outfile = json2csv(table_name, header_values=header)
         sort_csv(csv_outfile)
Example #2
0
    def to_csv(self, sort=True, path=None):
        # Due to Cyclic imports we can not move this import to the top
        from retriever.lib.engine_tools import sort_csv

        for table_name in self.script_table_registry[self.script.name]:

            csv_file_output = os.path.normpath(
                os.path.join(path if path else '', table_name[0] + '.csv'))
            csv_file = open_fw(csv_file_output)
            csv_writer = open_csvw(csv_file)
            self.get_cursor()
            self.set_engine_encoding()
            self.cursor.execute("SELECT * FROM  {};".format(table_name[0]))
            row = self.cursor.fetchone()
            column_names = [
                u'{}'.format(tuple_i[0]) for tuple_i in self.cursor.description
            ]
            csv_writer.writerow(column_names)
            while row is not None:
                csv_writer.writerow(row)
                row = self.cursor.fetchone()
            csv_file.close()
            if sort:
                sort_csv(csv_file_output)
        self.disconnect()
Example #3
0
 def to_csv(self):
     """Export table from xml engine to CSV file."""
     for keys in list(self.script.tables):
         table_name = self.table_name()
         header = self.script.tables[keys].get_insert_columns(join=False, create=True)
         csv_outfile = xml2csv(table_name, header_values=header)
         sort_csv(csv_outfile)
Example #4
0
 def to_csv(self, sort=True, path=None, select_columns=None):
     """Export table from json engine to CSV file"""
     for table_item in self.script_table_registry[self.script.name]:
         header = table_item[1].get_insert_columns(join=False, create=True)
         outputfile = os.path.normpath(
             os.path.join(path if path else '', os.path.splitext(os.path.basename(table_item[0]))[0] + '.csv'))
         csv_outfile = json2csv(table_item[0], output_file=outputfile, header_values=header)
         sort_csv(csv_outfile)
Example #5
0
 def to_csv(self, sort=True, path=None):
     """Export table from xml engine to CSV file."""
     for table_item in self.script_table_registry[self.script.name]:
         header = table_item[1].get_insert_columns(join=False, create=True)
         outputfile = os.path.normpath(
             os.path.join(path if path else '', os.path.splitext(os.path.basename(table_item[0]))[0] + '.csv'))
         csv_outfile = xml2csv(table_item[0], outputfile=outputfile, header_values=header)
         sort_csv(csv_outfile)
Example #6
0
 def to_csv(self):
     """Export table from json engine to CSV file"""
     for keys in list(self.script.tables):
         table_name = self.opts['table_name'].format(db=self.db_name,
                                                     table=keys)
         header = self.script.tables[keys].get_insert_columns(join=False,
                                                              create=True)
         csv_outfile = json2csv(table_name, header_values=header)
         sort_csv(csv_outfile)
Example #7
0
    def to_csv(self,
               sort=True,
               path=None,
               select_columns=None,
               select_table=None):
        """Create a CSV file from the a data store.

        sort flag to create a sorted file,
        path to write the flag else write to the PWD,
        select_columns flag is used by large files to select
        columns data and has SELECT LIMIT 3.
        """
        # Due to Cyclic imports we can not move this import to the top
        from retriever.lib.engine_tools import sort_csv

        for table_name in self.script_table_registry[self.script.name]:

            csv_file_output = os.path.normpath(
                os.path.join(path if path else '', table_name[0] + '.csv'))
            self.get_cursor()
            self.set_engine_encoding()
            csv_file = open_fw(csv_file_output, encoding=self.encoding)
            csv_writer = open_csvw(csv_file)

            limit = ""
            cols = "*"
            if select_columns:
                limit = "LIMIT 3"
                cols = ",".join(select_columns)
            sql_query = "SELECT {cols} FROM  {tab} {limit};"
            self.cursor.execute(
                sql_query.format(cols=cols, tab=table_name[0], limit=limit))
            row = self.cursor.fetchone()
            column_names = [
                u'{}'.format(tuple_i[0]) for tuple_i in self.cursor.description
            ]
            csv_writer.writerow(column_names)
            while row is not None:
                csv_writer.writerow(row)
                row = self.cursor.fetchone()
            csv_file.close()
            if sort:
                sort_csv(csv_file_output)
        self.disconnect()
Example #8
0
def test_sort_csv():
    """Test md5 sum calculation."""
    data_file = create_file(
        ['User,Country,Age', 'Ben,US,24', 'Alex,US,25', 'Alex,PT,25'])
    out_file = sort_csv(data_file)
    obs_out = file_2list(out_file)
    os.remove(out_file)
    assert obs_out == [
        'User,Country,Age', 'Alex,PT,25', 'Alex,US,25', 'Ben,US,24'
    ]
Example #9
0
 def to_csv(self):
     # Due to Cyclic imports we can not move this import to the top
     from retriever.lib.engine_tools import sort_csv
     for _ in list(self.script.urls.keys()):
         table_name = self.table_name()
         csv_file_output = os.path.normpath(table_name + '.csv')
         csv_file = open_fw(csv_file_output)
         csv_writer = open_csvw(csv_file)
         self.get_cursor()
         self.set_engine_encoding()
         self.cursor.execute("SELECT * FROM  {};".format(table_name))
         row = self.cursor.fetchone()
         colnames = [u'{}'.format(tuple_i[0]) for tuple_i in self.cursor.description]
         csv_writer.writerow(colnames)
         while row is not None:
             csv_writer.writerow(row)
             row = self.cursor.fetchone()
         csv_file.close()
         sort_csv(csv_file_output)
     self.disconnect()
Example #10
0
    def to_csv(self, sort=True, path=None, select_columns=None):
        """Export table from xml engine to CSV file."""
        for table_item in self.script_table_registry[self.script.name]:
            header = table_item[1].get_insert_columns(join=False, create=True)
            outputfile = os.path.normpath(
                os.path.join(
                    path if path else '',
                    os.path.splitext(os.path.basename(table_item[0]))[0] +
                    '.csv'))
            empty_rows = 1
            if hasattr(self.script, "empty_rows"):
                empty_rows = self.script.empty_rows
            input_file = table_item[0]
            header_values = header

            csv_outfile = xml2csv_test(input_file,
                                       outputfile,
                                       header_values,
                                       row_tag="row")
            sort_csv(csv_outfile, encoding=self.encoding)
Example #11
0
 def to_csv(self, sort=True):
     # Due to Cyclic imports we can not move this import to the top
     from retriever.lib.engine_tools import sort_csv
     for table_n in list(self.script.tables.keys()):
         table_name = self.table_name(name=table_n)
         csv_file_output = os.path.normpath(table_name + '.csv')
         csv_file = open_fw(csv_file_output)
         csv_writer = open_csvw(csv_file)
         self.get_cursor()
         self.set_engine_encoding()
         self.cursor.execute("SELECT * FROM  {};".format(table_name))
         row = self.cursor.fetchone()
         colnames = [u'{}'.format(tuple_i[0]) for tuple_i in self.cursor.description]
         csv_writer.writerow(colnames)
         while row is not None:
             csv_writer.writerow(row)
             row = self.cursor.fetchone()
         csv_file.close()
         if sort:
             sort_csv(csv_file_output)
     self.disconnect()
Example #12
0
def test_sort_csv():
    """Test md5 sum calculation."""
    data_file = create_file(['User,Country,Age',
                             'Ben,US,24',
                             'Alex,US,25',
                             'Alex,PT,25'])
    out_file = sort_csv(data_file)
    obs_out = file_2list(out_file)
    os.remove(out_file)
    assert obs_out == [
        'User,Country,Age',
        'Alex,PT,25',
        'Alex,US,25',
        'Ben,US,24']
Example #13
0
    def to_csv(self, sort=True, path=None, select_columns=None):
        """Create a CSV file from the a data store.

        sort flag to create a sorted file,
        path to write the flag else write to the PWD,
        select_columns flag is used by large files to select
        columns data and has SELECT LIMIT 3.
        """
        # Due to Cyclic imports we can not move this import to the top
        from retriever.lib.engine_tools import sort_csv

        for table_name in self.script_table_registry[self.script.name]:

            csv_file_output = os.path.normpath(os.path.join(path if path else '',
                                                            table_name[0] + '.csv'))
            csv_file = open_fw(csv_file_output)
            csv_writer = open_csvw(csv_file)
            self.get_cursor()
            self.set_engine_encoding()
            limit = ""
            cols = "*"
            if select_columns:
                limit = "LIMIT 3"
                cols = ",".join(select_columns)
            sql_query = "SELECT {cols} FROM  {tab} {limit};"
            self.cursor.execute(sql_query.format(cols=cols, tab=table_name[0], limit=limit))
            row = self.cursor.fetchone()
            column_names = [u'{}'.format(tuple_i[0])
                            for tuple_i in self.cursor.description]
            csv_writer.writerow(column_names)
            while row is not None:
                csv_writer.writerow(row)
                row = self.cursor.fetchone()
            csv_file.close()
            if sort:
                sort_csv(csv_file_output)
        self.disconnect()
Example #14
0
 def to_csv(self, sort=True):
     """Export table from json engine to CSV file"""
     for table_item in self.script_table_registry[self.script.name]:
         header = table_item[1].get_insert_columns(join=False, create=True)
         csv_outfile = json2csv(table_item[0], header_values=header)
         sort_csv(csv_outfile)
Example #15
0
 def to_csv(self, sort=True):
     """Export sorted version of CSV file"""
     for table_item in self.script_table_registry[self.script.name]:
         sort_csv(table_item[0])
Example #16
0
 def to_csv(self):
     """Export sorted version of CSV file"""
     for keys in self.script.tables:
         table_name = self.opts['table_name'].format(db=self.db_name, table=keys)
         sort_csv(table_name)
Example #17
0
 def to_csv(self, sort=True, path=None, select_columns=None):
     """Export sorted version of CSV file"""
     for table_item in self.script_table_registry[self.script.name]:
         sort_csv(table_item[0])
Example #18
0
 def to_csv(self):
     """Export sorted version of CSV file"""
     for keys in self.script.tables:
         sort_csv(self.table_name())