def __datasource_to_csv(self, execution_date): final_query = self.extract_query.\ replace("$EXECUTION_DATE", """'%s'""" % execution_date) logging.info("QUERY : %s" % final_query) cursor = PostgresHook(self.connection).get_conn().cursor() cursor.execute(final_query) result = cursor.fetchall() # Write to CSV file temp_path = self.file_path + \ self.table_name + \ '_' + execution_date + '.csv' with open(temp_path, 'w') as fp: a = csv.writer(fp, quoting=csv.QUOTE_MINIMAL, delimiter='|') a.writerow([i[0] for i in cursor.description]) a.writerows(result) # Read CSV file full_path = temp_path + '.gz' with open(temp_path, 'rb') as f: data = f.read() # Compress CSV file with gzip.open(full_path, 'wb') as output: try: output.write(data) finally: output.close() # Close file after reading f.close() # Delete csv file os.remove(temp_path) # Change access mode os.chmod(full_path, 0o777)
def __csv_to_db(self, execution_date): csv_file_path = self.file_path + \ self.table_name + \ '_' + execution_date + '.csv.gz' with gzip.open(csv_file_path, 'rt') as f: csvobj = csv.reader(f,delimiter = '|',quotechar='"') create_query = """ CREATE TABLE etl.order ( id bigint primary key, student_id bigint, teacher_id bigint, stage varchar(10), status varchar(512), created_at timestamp, updated_at timestamp ); """ cursor = PostgresHook(self.connection).get_conn().cursor() cursor.execute(final_query) result = cursor.fetchall() # Write to CSV file temp_path = self.file_path + \ self.table_name + \ '_' + execution_date + '.csv' with open(temp_path, 'w') as fp: a = csv.writer(fp, quoting=csv.QUOTE_MINIMAL, delimiter='|') a.writerow([i[0] for i in cursor.description]) a.writerows(result) # Read CSV file full_path = temp_path + '.gz' with open(temp_path, 'rb') as f: data = f.read() # Compress CSV file with gzip.open(full_path, 'wb') as output: try: output.write(data) finally: output.close() # Close file after reading f.close() # Delete csv file os.remove(temp_path) # Change access mode os.chmod(full_path, 0o777)