コード例 #1
0
    def get_sf_object(self, sf_fields, sf_conn_id, sf_object):
        # Sign into Salesforce
        sf_conn = SalesforceHook(conn_id=sf_conn_id)
        sf_conn.sign_in()

        fields = [field['sf_name'] for field in sf_fields]

        logging.info("Making request for {0} fields from {1}".format(
            len(fields), sf_object))

        query = sf_conn.get_object_from_salesforce(sf_object, fields)
        return query
コード例 #2
0
    def execute(self, context):
        """
        Execute the operator.
        This will get all the data for a particular Salesforce model
        and write it to a file.

        """
        logging.info("Prepping to gather data from Salesforce")

        # load the SalesforceHook
        # this is what has all the logic for
        # conencting and getting data from Salesforce
        hook = SalesforceHook(conn_id=self.conn_id, output=self.output)

        # attempt to login to Salesforce
        # if this process fails, it will raise an error and die right here
        # we could wrap it
        hook.sign_in()

        # get object from salesforce
        # if fields were not defined,
        # then we assume that the user wants to get all of them
        if not self.fields:
            self.fields = hook.get_available_fields(self.object)

        logging.info("Making request for"
                     "{0} fields from {1}".format(len(self.fields),
                                                  self.object))

        if self.query:
            query = self.special_query(
                self.query, hook, relationship_object=self.relationship_object)
        else:
            query = hook.get_object_from_salesforce(self.object, self.fields)

        # output the records from the query to a file
        # the list of records is stored under the "records" key
        logging.info("Writing query results to: {0}".format(self.output))
        hook.write_object_to_file(query['records'],
                                  filename=self.output,
                                  fmt=self.fmt,
                                  coerce_to_timestamp=self.coerce_to_timestamp,
                                  record_time_added=self.record_time_added)

        logging.info("Query finished!")
コード例 #3
0
    def execute(self, context):
        """
        Execute the operator.
        This will get all the data for a particular Salesforce model
        and write it to a file.
        """
        logging.info("Prepping to gather data from Salesforce")

        # Open a name temporary file to store output file until S3 upload
        with NamedTemporaryFile("w") as tmp:

            # Load the SalesforceHook
            hook = SalesforceHook(conn_id=self.sf_conn_id, output=tmp.name)

            # Attempt to login to Salesforce
            # If this process fails, it will raise an error and die.
            try:
                hook.sign_in()
            except:
                logging.debug('Unable to login.')

            # Get object from Salesforce
            # If fields were not defined, all fields are pulled.
            if not self.fields:
                self.fields = hook.get_available_fields(self.object)

            logging.info("Making request for "
                         "{0} fields from {1}".format(len(self.fields),
                                                      self.object))

            if self.query:
                query = self.special_query(
                    self.query,
                    hook,
                    relationship_object=self.relationship_object)
            else:
                query = hook.get_object_from_salesforce(
                    self.object, self.fields)

            # output the records from the query to a file
            # the list of records is stored under the "records" key
            logging.info("Writing query results to: {0}".format(tmp.name))

            hook.write_object_to_file(
                query['records'],
                filename=tmp.name,
                fmt=self.fmt,
                coerce_to_timestamp=self.coerce_to_timestamp,
                record_time_added=self.record_time_added)

            # Flush the temp file and upload temp file to S3
            tmp.flush()

            dest_s3 = S3Hook(s3_conn_id=self.s3_conn_id)

            dest_s3.load_file(filename=tmp.name,
                              key=self.output,
                              bucket_name=self.s3_bucket,
                              replace=True)

            dest_s3.connection.close()

            tmp.close()

        logging.info("Query finished!")
コード例 #4
0
    def execute(self, context):

        with NamedTemporaryFile("w") as tmp:

            # Load the SalesforceHook
            hook = SalesforceHook(conn_id=self.sf_conn_id, output=tmp.name)

            # Attempt to login to Salesforce
            # If this process fails, it will raise an error and die.
            try:
                sf_conn = hook.sign_in()
            except:
                logging.debug('Unable to login.')

            logging.info(self.soql)
            logging.info(self.object)

            logging.debug('Connecting to Salesforce...')
            query_results = sf_conn.bulk.__getattr__(self.object).query(
                self.soql)
            logging.info('Retrieved results...')

            logging.info(type(query_results))
            logging.info('First line is:')
            logging.info(query_results[0])

            gcs = GoogleCloudStorageHook(self.gcs_conn_id)
            service = gcs.get_conn()

            logging.info('Preparing File...')

            intermediate_arr = []

            for i, q in enumerate(query_results):

                del q['attributes']
                q["partition_date"] = date.today().strftime('%Y-%m-%d')

                for k, v in q.items():

                    if (type(v) == float):
                        q[k] = round(v, 2)
                    if (type(v) == int) and (len(str(v)) == 13):
                        q[k] = datetime.fromtimestamp(
                            v / 1000).strftime('%Y-%m-%d %H:%M:%S')
                    if (type(v) == str) and (re.search(r"^(\d+\.\d+)$", v) !=
                                             None):
                        q[k] = round(float(v), 2)

                for key in q.keys():

                    q[key.lower()] = q.pop(key)

                query = json.dumps(q, ensure_ascii=False)
                intermediate_arr.append(query + '\n')
                del query

                if i % 100 == 0:
                    tmp.file.writelines(intermediate_arr)
                    intermediate_arr = []

                    #tmp.file.write(str(query+'\n'))
            tmp.file.writelines(intermediate_arr)

            #            tmp.file.flush()

            logging.info('Loading results to GCS...')

            self.upload(service=service,
                        bucket=self.gcs_bucket,
                        filename=tmp.name,
                        object=self.gcs_object,
                        multipart=True,
                        num_retries=2)

            tmp.close()

        logging.info("Query finished!")