def get_sf_object(self, sf_fields, sf_conn_id, sf_object): # Sign into Salesforce sf_conn = SalesforceHook(conn_id=sf_conn_id) sf_conn.sign_in() fields = [field['sf_name'] for field in sf_fields] logging.info("Making request for {0} fields from {1}".format( len(fields), sf_object)) query = sf_conn.get_object_from_salesforce(sf_object, fields) return query
def execute(self, context): """ Execute the operator. This will get all the data for a particular Salesforce model and write it to a file. """ logging.info("Prepping to gather data from Salesforce") # load the SalesforceHook # this is what has all the logic for # conencting and getting data from Salesforce hook = SalesforceHook(conn_id=self.conn_id, output=self.output) # attempt to login to Salesforce # if this process fails, it will raise an error and die right here # we could wrap it hook.sign_in() # get object from salesforce # if fields were not defined, # then we assume that the user wants to get all of them if not self.fields: self.fields = hook.get_available_fields(self.object) logging.info("Making request for" "{0} fields from {1}".format(len(self.fields), self.object)) if self.query: query = self.special_query( self.query, hook, relationship_object=self.relationship_object) else: query = hook.get_object_from_salesforce(self.object, self.fields) # output the records from the query to a file # the list of records is stored under the "records" key logging.info("Writing query results to: {0}".format(self.output)) hook.write_object_to_file(query['records'], filename=self.output, fmt=self.fmt, coerce_to_timestamp=self.coerce_to_timestamp, record_time_added=self.record_time_added) logging.info("Query finished!")
def execute(self, context): """ Execute the operator. This will get all the data for a particular Salesforce model and write it to a file. """ logging.info("Prepping to gather data from Salesforce") # Open a name temporary file to store output file until S3 upload with NamedTemporaryFile("w") as tmp: # Load the SalesforceHook hook = SalesforceHook(conn_id=self.sf_conn_id, output=tmp.name) # Attempt to login to Salesforce # If this process fails, it will raise an error and die. try: hook.sign_in() except: logging.debug('Unable to login.') # Get object from Salesforce # If fields were not defined, all fields are pulled. if not self.fields: self.fields = hook.get_available_fields(self.object) logging.info("Making request for " "{0} fields from {1}".format(len(self.fields), self.object)) if self.query: query = self.special_query( self.query, hook, relationship_object=self.relationship_object) else: query = hook.get_object_from_salesforce( self.object, self.fields) # output the records from the query to a file # the list of records is stored under the "records" key logging.info("Writing query results to: {0}".format(tmp.name)) hook.write_object_to_file( query['records'], filename=tmp.name, fmt=self.fmt, coerce_to_timestamp=self.coerce_to_timestamp, record_time_added=self.record_time_added) # Flush the temp file and upload temp file to S3 tmp.flush() dest_s3 = S3Hook(s3_conn_id=self.s3_conn_id) dest_s3.load_file(filename=tmp.name, key=self.output, bucket_name=self.s3_bucket, replace=True) dest_s3.connection.close() tmp.close() logging.info("Query finished!")
def execute(self, context): with NamedTemporaryFile("w") as tmp: # Load the SalesforceHook hook = SalesforceHook(conn_id=self.sf_conn_id, output=tmp.name) # Attempt to login to Salesforce # If this process fails, it will raise an error and die. try: sf_conn = hook.sign_in() except: logging.debug('Unable to login.') logging.info(self.soql) logging.info(self.object) logging.debug('Connecting to Salesforce...') query_results = sf_conn.bulk.__getattr__(self.object).query( self.soql) logging.info('Retrieved results...') logging.info(type(query_results)) logging.info('First line is:') logging.info(query_results[0]) gcs = GoogleCloudStorageHook(self.gcs_conn_id) service = gcs.get_conn() logging.info('Preparing File...') intermediate_arr = [] for i, q in enumerate(query_results): del q['attributes'] q["partition_date"] = date.today().strftime('%Y-%m-%d') for k, v in q.items(): if (type(v) == float): q[k] = round(v, 2) if (type(v) == int) and (len(str(v)) == 13): q[k] = datetime.fromtimestamp( v / 1000).strftime('%Y-%m-%d %H:%M:%S') if (type(v) == str) and (re.search(r"^(\d+\.\d+)$", v) != None): q[k] = round(float(v), 2) for key in q.keys(): q[key.lower()] = q.pop(key) query = json.dumps(q, ensure_ascii=False) intermediate_arr.append(query + '\n') del query if i % 100 == 0: tmp.file.writelines(intermediate_arr) intermediate_arr = [] #tmp.file.write(str(query+'\n')) tmp.file.writelines(intermediate_arr) # tmp.file.flush() logging.info('Loading results to GCS...') self.upload(service=service, bucket=self.gcs_bucket, filename=tmp.name, object=self.gcs_object, multipart=True, num_retries=2) tmp.close() logging.info("Query finished!")