Ejemplo n.º 1
0
 def __datasource_to_csv(self, execution_date):
     final_query = self.extract_query.\
         replace("$EXECUTION_DATE", """'%s'""" % execution_date)
     logging.info("QUERY : %s" % final_query)
     cursor = PostgresHook(self.connection).get_conn().cursor()
     cursor.execute(final_query)
     result = cursor.fetchall()
     # Write to CSV file
     temp_path = self.file_path + \
         self.table_name + \
         '_' + execution_date + '.csv'
     with open(temp_path, 'w') as fp:
         a = csv.writer(fp, quoting=csv.QUOTE_MINIMAL, delimiter='|')
         a.writerow([i[0] for i in cursor.description])
         a.writerows(result)
     # Read CSV file
     full_path = temp_path + '.gz'
     with open(temp_path, 'rb') as f:
         data = f.read()
     # Compress CSV file
     with gzip.open(full_path, 'wb') as output:
         try:
             output.write(data)
         finally:
             output.close()
     # Close file after reading
     f.close()
     # Delete csv file
     os.remove(temp_path)
     # Change access mode
     os.chmod(full_path, 0o777)
Ejemplo n.º 2
0
 def __csv_to_db(self, execution_date):
     csv_file_path = self.file_path + \
         self.table_name + \
         '_' + execution_date + '.csv.gz'
     with gzip.open(csv_file_path, 'rt') as f:
         csvobj = csv.reader(f,delimiter = '|',quotechar='"')
     
     create_query = """
     CREATE TABLE etl.order (
         id bigint primary key,
         student_id bigint,
         teacher_id bigint,
         stage varchar(10),
         status varchar(512),
         created_at timestamp,
         updated_at timestamp
     );
     """
     
     
     
     cursor = PostgresHook(self.connection).get_conn().cursor()
     cursor.execute(final_query)
     result = cursor.fetchall()
     # Write to CSV file
     temp_path = self.file_path + \
         self.table_name + \
         '_' + execution_date + '.csv'
     with open(temp_path, 'w') as fp:
         a = csv.writer(fp, quoting=csv.QUOTE_MINIMAL, delimiter='|')
         a.writerow([i[0] for i in cursor.description])
         a.writerows(result)
     # Read CSV file
     full_path = temp_path + '.gz'
     with open(temp_path, 'rb') as f:
         data = f.read()
     # Compress CSV file
     with gzip.open(full_path, 'wb') as output:
         try:
             output.write(data)
         finally:
             output.close()
     # Close file after reading
     f.close()
     # Delete csv file
     os.remove(temp_path)
     # Change access mode
     os.chmod(full_path, 0o777)