Esempio n. 1
0
    def execute(self, context: 'Context') -> None:
        gcs_hook = GCSHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )

        presto_hook = PrestoHook(presto_conn_id=self.presto_conn_id)

        with NamedTemporaryFile("w+") as temp_file:
            self.log.info("Downloading data from %s", self.source_object)
            gcs_hook.download(
                bucket_name=self.source_bucket,
                object_name=self.source_object,
                filename=temp_file.name,
            )

            data = list(csv.reader(temp_file))
            fields = tuple(data[0])
            rows = []
            for row in data[1:]:
                rows.append(tuple(row))

            self.log.info("Inserting data into %s", self.presto_table)
            presto_hook.insert_rows(table=self.presto_table, rows=rows, target_fields=fields)
Esempio n. 2
0
    def execute(self, context: 'Context') -> None:
        gcs_hook = GCSHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )

        presto_hook = PrestoHook(presto_conn_id=self.presto_conn_id)

        with NamedTemporaryFile("w+") as temp_file:
            self.log.info("Downloading data from %s", self.source_object)
            gcs_hook.download(
                bucket_name=self.source_bucket,
                object_name=self.source_object,
                filename=temp_file.name,
            )

            data = csv.reader(temp_file)
            rows = (tuple(row) for row in data)
            self.log.info("Inserting data into %s", self.presto_table)

            if self.schema_fields:
                presto_hook.insert_rows(table=self.presto_table,
                                        rows=rows,
                                        target_fields=self.schema_fields)
            elif self.schema_object:
                blob = gcs_hook.download(
                    bucket_name=self.source_bucket,
                    object_name=self.schema_object,
                )
                schema_fields = json.loads(blob.decode("utf-8"))
                presto_hook.insert_rows(table=self.presto_table,
                                        rows=rows,
                                        target_fields=schema_fields)
            else:
                presto_hook.insert_rows(table=self.presto_table, rows=rows)