class SalesforceConnector: def __init__(self, **kwargs): self.sf_version = kwargs.get('version', '29.0') self.sandbox = kwargs.get('sandbox', False) self.proxies = kwargs.get('proxies') self.domain = kwargs.get('domain', None) try: credentials = [elem for elem in credential_files if os.path.exists(elem)][0] except: raise ValueError('No credentials found') #Load Credentials from file if os.path.exists(credentials): creds = pickle.loads(open(credentials).read()) username = creds['username'] password = creds['password'] security_token = creds['security_token'] self.session_id, self.sf_instance = SalesforceLogin( username=username, password=password, security_token=security_token, sandbox=self.sandbox, sf_version=self.sf_version, proxies=self.proxies, domain = self.domain) print 'read credentials' ##Read credentials from arguments elif 'username' in kwargs and 'password' in kwargs and 'security_token' in kwargs: self.auth_type = "password" username = kwargs['username'] password = kwargs['password'] security_token = kwargs['security_token'] self.session_id, self.sf_instance = SalesforceLogin( username=username, password=password, security_token=security_token, sandbox=self.sandbox, sf_version=self.sf_version, proxies=self.proxies, domain = self.domain) self.saveLogin(username, password, security_token) else: raise TypeError( 'You must provide login information or an instance and token' ) print self.sf_instance self.bulk = SalesforceBulk(sessionId= self.session_id, host = self.sf_instance) def saveLogin(self, username, password, security_token): with open(credentials, "w") as f: f.write(pickle.dumps( dict(password = password, username = username, security_token = security_token))) ##Returns csv dict # Each row is a dictionary of column_header:row_value def query(self, sObject, queryString, contentType): job_id = self.bulk.create_query_job(sObject, contentType = contentType) batch_id = self.bulk.query(job_id, queryString) self.bulk.wait_for_batch(job_id, batch_id, timeout=120) self.bulk.close_job(job_id) print 'job closed' result_id = self.bulk.get_batch_result_ids(batch_id,job_id)[0] result = [row for row in self.bulk.get_batch_results(batch_id = batch_id, result_id = result_id, job_id=job_id, parse_csv=True)] csv_dict = [dict(zip(result[0],row)) for row in result[1:]] return csv_dict def update(self, sObject, data, contentType): job_id = self.bulk.create_update_job(sObject, contentType='CSV') csv_iter = CsvDictsAdapter(iter(data)) batch_id = self.bulk.post_bulk_batch(job_id, csv_iter) self.bulk.wait_for_batch(job_id, batch_id, timeout=120) self.bulk.close_job(job_id) print 'done' return
def get_query_records_dict(self, db_table, soql_query): """Execute bulk Salesforce soql queries and return results as generator of dictionaries. :param db_table: Database table name :param soql_query: Soql queries :return: If success, List of result record dictionaries; Else empty list """ self.bulk = SalesforceBulk(sessionId=self.session_id, host=self.instance) job = self.bulk.create_query_job(db_table, contentType="JSON") batch = self.bulk.query(job, soql_query) self.bulk.close_job(job) while not self.bulk.is_batch_done(batch): print("Waiting for batch query to complete") sleep(10) dict_records = [] rec_count = 0 print("Iterating through batch result set") for result in self.bulk.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) for row in result: rec_count += 1 dict_records.append(row) print("Current fetched record count: ", rec_count) return dict_records
def setUp(self): request_patcher = mock.patch('simple_salesforce.api.requests') self.mockrequest = request_patcher.start() self.addCleanup(request_patcher.stop) self.sessionId = '12345' self.host = 'https://example.com' self.bulk = SalesforceBulk(self.sessionId, self.host)
def __init__(self, config_path): """ Bootstrap a fetcher class :param config_path: Path to the configuration file to use for this instance """ # Get settings with open(config_path, 'r') as f: self.settings = yaml.safe_load(f) # Configure the logger log_level = (logging.WARN, logging.DEBUG)[self.settings['debug']] LOG_FORMAT = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger("salesforce-fetcher") logger.setLevel(log_level) ch = logging.StreamHandler() ch.setFormatter(LOG_FORMAT) logger.addHandler(ch) logger.debug("Logging is set to DEBUG level") # let's not output the password #logger.debug("Settings: %s" % self.settings) self.logger = logger self.salesforce = Salesforce(**self.settings['salesforce']['auth']) self.salesforce_bulk = SalesforceBulk(**self.settings['salesforce'] ['auth'], API_version='46.0') # Make sure output dir is created output_directory = self.settings['output_dir'] if not os.path.exists(output_directory): os.makedirs(output_directory)
def __init__(self, connector_param): self.connector_param = connector_param self.instance_url = 'https://' + connector_param.url_prefix + 'salesforce.com' self.token_url = 'https://' + connector_param.url_prefix + 'salesforce.com/services/oauth2/token' self.access_token = None self.access_token = self.get_token() self.bulk = SalesforceBulk(sessionId=self.access_token, host=urlparse(self.instance_url).hostname)
def __init__(self): if BulkHelper.__instance != None: raise Exception("BulkHelper class is a singleton!") else: BulkHelper.__instance = self self.__bulk = SalesforceBulk(username=Config.USERNAME, password=Config.PASSWORD, security_token=Config.SECURITY_TOKEN, sandbox=Config.IS_SANDBOX, API_version=Config.API_VERSION)
def test_upload(self): bulk = SalesforceBulk(self.sessionId, self.endpoint) self.bulk = bulk job_id = bulk.create_insert_job("Contact", contentType=self.contentType) self.jobs.append(job_id) self.assertIsNotNone(re.match("\w+", job_id)) batch_ids = [] data = [ { 'FirstName': 'BulkTestFirst%s' % i, 'LastName': 'BulkLastName', 'Phone': '555-555-5555', } for i in range(50) ] for i in range(2): content = self.generate_content(data) batch_id = bulk.post_batch(job_id, content) self.assertIsNotNone(re.match("\w+", batch_id)) batch_ids.append(batch_id) bulk.close_job(job_id) for batch_id in batch_ids: bulk.wait_for_batch(job_id, batch_id, timeout=120) for batch_id in batch_ids: results = bulk.get_batch_results(batch_id) print(results) self.assertTrue(len(results) > 0) self.assertTrue(isinstance(results, list)) self.assertTrue(isinstance(results[0], UploadResult)) self.assertEqual(len(results), 50)
def test_query_pk_chunk(self): bulk = SalesforceBulk(self.sessionId, self.endpoint) self.bulk = bulk job_id = bulk.create_query_job("Contact", contentType=self.contentType, pk_chunking=True) self.jobs.append(job_id) self.assertIsNotNone(re.match("\w+", job_id)) query = "Select Id,Name,Email from Contact" batch_id = bulk.query(job_id, query) self.assertIsNotNone(re.match("\w+", batch_id)) try: i = 0 while not bulk.is_batch_done(batch_id): print("Job not done yet...") print(bulk.batch_status(batch_id)) time.sleep(2) i += 1 if i == 20: raise Exception except BulkBatchFailed as e: if e.state != bulk_states.NOT_PROCESSED: raise batches = bulk.get_batch_list(job_id) print (batches) batch_ids = [x['id'] for x in batches if x['state'] != bulk_states.NOT_PROCESSED] requests = [bulk.get_query_batch_request(x, job_id) for x in batch_ids] print (requests) for request in requests: self.assertTrue(request.startswith(query)) all_results = [] i = 0 while not all(bulk.is_batch_done(j, job_id) for j in batch_ids): print("Job not done yet...") print(bulk.batch_status(batch_id, job_id)) time.sleep(2) i += 1 if i == 20: raise Exception for batch_id in batch_ids: results = bulk.get_all_results_for_query_batch(batch_id, job_id) for result in results: all_results.extend(self.parse_results(result)) self.assertTrue(len(all_results) > 0) self.assertEqual( sorted(all_results[0].keys()), ['Email', 'Id', 'Name'] )
def setUp(self): login = salesforce_oauth_request.login( username=USERNAME, password=PASSWORD, token=SECURITY_TOKEN, client_id=CONSUMER_KEY, client_secret=CONSUMER_SECRET, cache_session=False, sandbox=True, ) self.bulk = SalesforceBulk(login['access_token'], login['endpoint']) self.jobs = []
def test_raw_query(self): bulk = SalesforceBulk(self.sessionId, self.endpoint) self.bulk = bulk job_id = bulk.create_query_job("Contact") self.jobs.append(job_id) self.assertIsNotNone(re.match("\w+", job_id)) batch_id = bulk.query(job_id, "Select Id,Name,Email from Contact Limit 1000") self.assertIsNotNone(re.match("\w+", batch_id)) while not bulk.is_batch_done(job_id, batch_id): print "Job not done yet..." print bulk.batch_status(job_id, batch_id) time.sleep(2) self.results = "" def save_results(tfile, **kwargs): print "in save results" self.results = tfile.read() flag = bulk.get_batch_results(job_id, batch_id, callback = save_results) self.assertTrue(flag) self.assertTrue(len(self.results) > 0) self.assertIn('"', self.results)
def bulkUpdate(sObject): sfBulk = SalesforceBulk(username=username, password=password, security_token=security_token) job = sfBulk.create_insert_job(sObject, contentType='CSV', concurrency='Parallel') dir = "c:/kenandy/python/stageCSV/" stageCSV = dir + sObject + '.csv' print(stageCSV) with open(stageCSV) as csvfile: reader = csv.DictReader(csvfile) #print (reader.fieldnames) rows = [] for row in reader: print("row****", dict(row)) #print(row['Id'], row['Name']) # print(row['Id'], row['Name']) rows.append(dict(row)) #print("rows****", rows) csv_iter = CsvDictsAdapter(iter(rows)) #print("csv_iter**** ", csv_iter) print("rows****", rows) batch = sfBulk.post_batch(job, csv_iter) sfBulk.wait_for_batch(job, batch) sfBulk.close_job(job) print("Done. Data Uploaded.")
def sfBulkUpdate(namespace,sObject): myObject =sObject if len(namespace) > 0: myObject = namespace.upper() + '__' + sObject stageCSV = stageCSVDir + myObject + '_stg.csv' print(stageCSV) #print (sObject) sfBulk = SalesforceBulk(username=username_loc, password=password_loc, security_token=security_token_loc) job = sfBulk.create_insert_job(myObject, contentType='CSV', concurrency='Parallel') with open(stageCSV) as csvfile: reader = csv.DictReader(csvfile) #print (reader.fieldnames) rows = [] for row in reader: print("row****", dict(row)) rows.append(dict(row)) csv_iter = CsvDictsAdapter(iter(rows)) print("rows****", rows) batch = sfBulk.post_batch(job, csv_iter) sfBulk.wait_for_batch(job, batch) sfBulk.close_job(job) print("Done. Data Uploaded.")
def test_csv_query(self): bulk = SalesforceBulk(self.sessionId, self.endpoint) self.bulk = bulk job_id = bulk.create_query_job("Account") self.jobs.append(job_id) self.assertIsNotNone(re.match("\w+", job_id)) batch_id = bulk.query(job_id, "Select Id,Name,Description from Account Limit 10000") self.assertIsNotNone(re.match("\w+", batch_id)) bulk.wait_for_batch(job_id, batch_id, timeout=120) self.results = None def save_results1(rows, **kwargs): self.results = rows flag = bulk.get_batch_results(job_id, batch_id, callback = save_results1, parse_csv=True) self.assertTrue(flag) results = self.results self.assertTrue(len(results) > 0) self.assertTrue(isinstance(results,list)) self.assertEqual(results[0], ['Id','Name','Description']) self.assertTrue(len(results) > 3) self.results = None self.callback_count = 0 def save_results2(rows, **kwargs): self.results = rows print rows self.callback_count += 1 batch = len(results) / 3 self.callback_count = 0 flag = bulk.get_batch_results(job_id, batch_id, callback = save_results2, parse_csv=True, batch_size=batch) self.assertTrue(self.callback_count >= 3)
def login(): global bulk logging.info('logging in...') # domain passed to SalesforceBulk should be 'test' or 'login' or 'something.my' bulk = SalesforceBulk(username=os.environ['ORG_USERNAME'], password=os.environ['ORG_PASSWORD'], security_token=os.environ['ORG_SECURITY_TOKEN'], domain=os.environ['ORG_DOMAIN']) logging.info('login successful !')
def _init_bulk(sf, org_config): from salesforce_bulk import SalesforceBulk return SalesforceBulk( host=org_config.instance_url.replace("https://", "").rstrip("/"), sessionId=org_config.access_token, API_version=sf.sf_version, )
def test_query(self): bulk = SalesforceBulk(self.sessionId, self.endpoint) self.bulk = bulk job_id = bulk.create_query_job("Contact", contentType=self.contentType) self.jobs.append(job_id) self.assertIsNotNone(re.match("\w+", job_id)) batch_id = bulk.query(job_id, "Select Id,Name,Email from Contact Limit 1000") self.assertIsNotNone(re.match("\w+", batch_id)) while not bulk.is_batch_done(batch_id): print("Job not done yet...") print(bulk.batch_status(batch_id)) time.sleep(2) all_results = [] results = bulk.get_all_results_for_query_batch(batch_id) for result in results: all_results.extend(self.parse_results(result)) self.assertTrue(len(all_results) > 0) self.assertEqual( sorted(all_results[0].keys()), ['Email', 'Id', 'Name'] )
def _init_bulk(self): version = self.api_version or self.project_config.project__package__api_version if not version: raise ConfigError("Cannot find Salesforce version") return SalesforceBulk( host=self.org_config.instance_url.replace("https://", "").rstrip("/"), sessionId=self.org_config.access_token, API_version=version, )
def request(self, data=()): # use csv iterator csv_iter = CsvDictsAdapter(iter(data)) bulk = SalesforceBulk(username=self.username, password=self.password, organizationId=self.organizationId) job = bulk.create_insert_job('SamanageCMDB__AgentPost__c', contentType='CSV') batch = bulk.post_batch(job, csv_iter) bulk.wait_for_batch(job, batch) bulk.close_job(job) while not bulk.is_batch_done(batch): sleep(10)
def test_csv_upload(self): bulk = SalesforceBulk(SALESFORCE_API_VERSION, self.sessionId, self.endpoint) self.bulk = bulk job_id = bulk.create_insert_job("Contact") self.jobs.append(job_id) self.assertIsNotNone(re.match("\w+", job_id)) batch_ids = [] content = open("example.csv").read() for i in range(5): batch_id = bulk.query(job_id, content) self.assertIsNotNone(re.match("\w+", batch_id)) batch_ids.append(batch_id) for batch_id in batch_ids: bulk.wait_for_batch(job_id, batch_id, timeout=120) self.results = None def save_results1(rows, failed, remaining): self.results = rows for batch_id in batch_ids: flag = bulk.get_upload_results(job_id, batch_id, callback=save_results1) self.assertTrue(flag) results = self.results self.assertTrue(len(results) > 0) self.assertTrue(isinstance(results, list)) self.assertEqual(results[0], UploadResult('Id', 'Success', 'Created', 'Error')) self.assertEqual(len(results), 3) self.results = None self.callback_count = 0 def save_results2(rows, failed, remaining): self.results = rows self.callback_count += 1 batch = len(results) / 3 self.callback_count = 0 flag = bulk.get_upload_results(job_id, batch_id, callback=save_results2, batch_size=batch) self.assertTrue(self.callback_count >= 3)
def setUpClass(cls): username = os.environ.get('SALESFORCE_BULK_TEST_USERNAME') password = os.environ.get('SALESFORCE_BULK_TEST_PASSWORD') security_token = os.environ.get('SALESFORCE_BULK_TEST_SECURITY_TOKEN') sandbox = os.environ.get('SALESFORCE_BULK_TEST_SANDBOX') if not all(x for x in [username, password, security_token]): raise unittest.SkipTest('Missing Configuration for logged in tests') sessionId, endpoint = SalesforceBulk.login_to_salesforce( username, password, sandbox, security_token) cls.endpoint = endpoint cls.sessionId = sessionId
def setUpClass(cls): username = os.environ.get('SALESFORCE_BULK_TEST_USERNAME') password = os.environ.get('SALESFORCE_BULK_TEST_PASSWORD') security_token = os.environ.get('SALESFORCE_BULK_TEST_SECURITY_TOKEN') domain = os.environ.get('SALESFORCE_BULK_TEST_DOMAIN') if not all(x for x in [username, password, security_token]): raise unittest.SkipTest('Missing Configuration for logged in tests') sessionId, endpoint = SalesforceBulk.login_to_salesforce( username, password, domain, security_token) cls.endpoint = endpoint cls.sessionId = sessionId
def __init__(self, username, password, security_token, sandbox=True): """ :param username: :type username: str :param password: :type password: str :param security_token: :type security_token: str :param sandbox: Whether the Salesforce Instance is Production or Sandbox. Default value is False (Production). :type sandbox: bool """ # Logging setup self.log = logging.getLogger(__name__) self.log.info('Signing into Salesforce.') try: self.bulk = SalesforceBulk(username=username, password=password, security_token=security_token, sandbox=sandbox) self.log.info( f'Successfully connected to Salesforce as "{username}".') except Exception as auth_err: self.log.exception(f'Failed to connect to Salesforce: {auth_err}') raise
def __init__(self, **kwargs): self.sf_version = kwargs.get('version', '29.0') self.sandbox = kwargs.get('sandbox', False) self.proxies = kwargs.get('proxies') self.domain = kwargs.get('domain', None) try: credentials = [elem for elem in credential_files if os.path.exists(elem)][0] except: raise ValueError('No credentials found') #Load Credentials from file if os.path.exists(credentials): creds = pickle.loads(open(credentials).read()) username = creds['username'] password = creds['password'] security_token = creds['security_token'] self.session_id, self.sf_instance = SalesforceLogin( username=username, password=password, security_token=security_token, sandbox=self.sandbox, sf_version=self.sf_version, proxies=self.proxies, domain = self.domain) print 'read credentials' ##Read credentials from arguments elif 'username' in kwargs and 'password' in kwargs and 'security_token' in kwargs: self.auth_type = "password" username = kwargs['username'] password = kwargs['password'] security_token = kwargs['security_token'] self.session_id, self.sf_instance = SalesforceLogin( username=username, password=password, security_token=security_token, sandbox=self.sandbox, sf_version=self.sf_version, proxies=self.proxies, domain = self.domain) self.saveLogin(username, password, security_token) else: raise TypeError( 'You must provide login information or an instance and token' ) print self.sf_instance self.bulk = SalesforceBulk(sessionId= self.session_id, host = self.sf_instance)
def upload_table(sessionId, hostname, tablename, connection_string): schema, table = tablename.split('.') log.debug('%s, %s, %s, %s, %s, %s', sessionId, hostname, tablename, connection_string, schema, table) bulk = SalesforceBulk( sessionId=sessionId, host=hostname) engine = create_engine(connection_string) result = engine.execute(text('select column_name from information_schema.columns where table_name = :table and table_schema = :schema'), {'table': table, 'schema': schema}) exclude = ['sfid', 'id', 'systemmodstamp', 'isdeleted'] columns = [x[0] for x in result if not x[0].startswith('_') and x[0].lower() not in exclude] log.debug('columns: %s', columns) column_select = ','.join('"%s"' % x for x in columns) result = engine.execute('select %s from %s' % (column_select, tablename)) dict_iter = (dict(zip(columns, row_modifier(row))) for row in result) dict_iter = list(dict_iter) log.debug('Sending rows: %s', [x['name'] for x in dict_iter]) csv_iter = CsvDictsAdapter(iter(dict_iter)) job = bulk.create_insert_job(table.capitalize(), contentType='CSV') batch = bulk.post_bulk_batch(job, csv_iter) bulk.wait_for_batch(job, batch) bulk_result = [] def save_results(rows, failed, remaining): bulk_result[:] = [rows, failed, remaining] flag = bulk.get_upload_results(job, batch, callback=save_results) bulk.close_job(job) log.debug('results: %s, %s', flag, bulk_result) return bulk_result
def test_csv_upload(self): bulk = SalesforceBulk(self.sessionId, self.endpoint) self.bulk = bulk job_id = bulk.create_insert_job("Contact") self.jobs.append(job_id) self.assertIsNotNone(re.match("\w+", job_id)) batch_ids = [] content = open("example.csv").read() for i in range(5): batch_id = bulk.query(job_id, content) self.assertIsNotNone(re.match("\w+", batch_id)) batch_ids.append(batch_id) for batch_id in batch_ids: bulk.wait_for_batch(job_id, batch_id, timeout=120) self.results = None def save_results1(rows, failed, remaining): self.results = rows for batch_id in batch_ids: flag = bulk.get_upload_results(job_id, batch_id, callback = save_results1) self.assertTrue(flag) results = self.results self.assertTrue(len(results) > 0) self.assertTrue(isinstance(results,list)) self.assertEqual(results[0], UploadResult('Id','Success','Created','Error')) self.assertEqual(len(results), 3) self.results = None self.callback_count = 0 def save_results2(rows, failed, remaining): self.results = rows self.callback_count += 1 batch = len(results) / 3 self.callback_count = 0 flag = bulk.get_upload_results(job_id, batch_id, callback = save_results2, batch_size=batch) self.assertTrue(self.callback_count >= 3)
def _init_bulk(self): return SalesforceBulk( host=self.org_config.instance_url.replace("https://", "").rstrip("/"), sessionId=self.org_config.access_token, )
def sfBulk_Login(username, password, security_token): sfBulk = SalesforceBulk(username=username, password=password, security_token=security_token) return sfBulk
import csv from salesforce_bulk import SalesforceBulk from salesforce_bulk import CsvDictsAdapter username = '******' password = '******' security_token = 'HxK2ciSHbsjN5PvAE8psL9w9F' bulk = SalesforceBulk(username=username, password=password, security_token=security_token) job = bulk.create_insert_job("account", contentType='CSV', concurrency='Parallel') rootDir = "c:/python/kenandy/stageCSV/" objectName = "Account" stageCSV = rootDir + objectName + '.csv' print (stageCSV) with open(stageCSV) as csvfile: reader = csv.DictReader(stageCSV) account = [dict(Name="Account%d" % idx) for idx in xrange(5)] #disbursals = [] #for row in reader: # disbursals.append(row) #print (disbursals) print (account) csv_iter = CsvDictsAdapter(iter(account)) #csv_iter = CsvDictsAdapter(iter(disbursals)) batch = bulk.post_batch(job, csv_iter)
import json from salesforce_bulk import SalesforceBulk from salesforce_bulk.util import IteratorBytesIO from time import sleep from salesforce_bulk import CsvDictsAdapter import pandas as pd import config as cfg #Authentication bulk = SalesforceBulk(username=cfg.USERNAME, password=cfg.PASSWORD, security_token=cfg.SECURITY_KEY, sandbox=True) #Source CSV File path for Account input_file = "/home/baadmin/NCT_ETL/input_files/pg_extract_prd/staging2_payment.csv" #Target SFDC Object name target_obj = "cpm__Payment__c" # Mapping of Input csv Fields to SalesForce Fields sf_mapping = { 'paymentkey': 'Payment_Key__c', 'accountkey': 'Account_key__c', 'contactkey': 'Contact_Key__c', 'installmentkey': 'Installment_Key__c', 'mandatekey': 'Mandate_Key__c', 'paymentprofilekey': 'Payment_Profile_Key__c', 'installment': 'cpm__Installment__c', 'paymentprofile': 'cpm__Payment_Profile__c',
"USERNAME": os.getenv("SALESFORCE_USERNAME"), "PASSWORD": os.getenv("SALESFORCE_PASSWORD"), "HOST": os.getenv("SALESFORCE_HOST"), "TOKEN": os.getenv("SALESFORCE_TOKEN"), "CLIENT_ID": os.getenv("SALESFORCE_CLIENT_ID"), "CLIENT_SECRET": os.getenv("SALESFORCE_CLIENT_SECRET"), } USER = SALESFORCE["USERNAME"] PASS = SALESFORCE["PASSWORD"] TOKEN = SALESFORCE["TOKEN"] HOST = SALESFORCE["HOST"] sf = Salesforce(username=USER, password=PASS, security_token=TOKEN) bulk = SalesforceBulk(sessionId=sf.session_id, host=HOST) job = bulk.create_query_job("Contact", contentType="CSV") batch = bulk.query(job, query) while not bulk.is_batch_done(job, batch): sleep(3) bulk.close_job(job) rows = bulk.get_batch_result_iter(job, batch, parse_csv=True) bulk_email = list(rows) email_list = [] emails_sf = [x[COMBINED_EMAIL_FIELD] for x in bulk_email] print ("The following email addresses appear in Stripe but not Salesforce: \n") for field in emails_sf: for email in field.split(","):
"USERNAME": os.getenv('SALESFORCE_USERNAME'), "PASSWORD": os.getenv('SALESFORCE_PASSWORD'), "HOST": os.getenv("SALESFORCE_HOST"), "TOKEN": os.getenv("SALESFORCE_TOKEN"), "CLIENT_ID": os.getenv("SALESFORCE_CLIENT_ID"), "CLIENT_SECRET": os.getenv("SALESFORCE_CLIENT_SECRET"), } USER = SALESFORCE['USERNAME'] PASS = SALESFORCE['PASSWORD'] TOKEN = SALESFORCE['TOKEN'] HOST = SALESFORCE['HOST'] sf = Salesforce(username=USER, password=PASS, security_token=TOKEN) bulk = SalesforceBulk(sessionId=sf.session_id, host=HOST) job = bulk.create_query_job("Contact", contentType='CSV') batch = bulk.query(job, query) while not bulk.is_batch_done(job, batch): sleep(3) bulk.close_job(job) rows = bulk.get_batch_result_iter(job, batch, parse_csv=True) bulk_email = list(rows) email_list = [] emails_sf = [x[COMBINED_EMAIL_FIELD] for x in bulk_email] print ("The following email addresses appear in Stripe but not Salesforce: \n") for field in emails_sf: for email in field.split(','):
def sf_data(query): """ Get opportunity data using supplied query. Get account data. Return both as dataframes. """ USER = SALESFORCE['USERNAME'] PASS = SALESFORCE['PASSWORD'] TOKEN = SALESFORCE['TOKEN'] HOST = SALESFORCE['HOST'] sf = Salesforce(username=USER, password=PASS, security_token=TOKEN) bulk = SalesforceBulk(sessionId=sf.session_id, host=HOST) print "Creating Opportunity job..." job = bulk.create_query_job("Opportunity", contentType='CSV') print "Issuing query..." batch = bulk.query(job, query) while not bulk.is_batch_done(job, batch): print "waiting for query to complete..." sleep(3) bulk.close_job(job) rows = bulk.get_batch_result_iter(job, batch, parse_csv=True) all = list(rows) opps = DataFrame.from_dict(all) job = bulk.create_query_job("Account", contentType='CSV') print "Creating Account job..." batch = bulk.query(job, "SELECT Id, Website, Text_For_Donor_Wall__c FROM Account") print "Issuing query..." while not bulk.is_batch_done(job, batch): print "waiting for query to complete..." sleep(3) bulk.close_job(job) rows = bulk.get_batch_result_iter(job, batch, parse_csv=True) accts = DataFrame.from_dict(list(rows)) accts.rename(columns={'Id': 'AccountId'}, inplace=True) return opps, accts
class SalesforceFetcher(object): """ Class that encapsulates all the fetching logic for SalesForce. """ def __init__(self, config_path): """ Bootstrap a fetcher class :param config_path: Path to the configuration file to use for this instance """ # Get settings with open(config_path, 'r') as f: self.settings = yaml.safe_load(f) # Configure the logger log_level = (logging.WARN, logging.DEBUG)[self.settings['debug']] LOG_FORMAT = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger("salesforce-fetcher") logger.setLevel(log_level) ch = logging.StreamHandler() ch.setFormatter(LOG_FORMAT) logger.addHandler(ch) logger.debug("Logging is set to DEBUG level") # let's not output the password #logger.debug("Settings: %s" % self.settings) self.logger = logger self.salesforce = Salesforce(**self.settings['salesforce']['auth']) self.salesforce_bulk = SalesforceBulk(**self.settings['salesforce'] ['auth'], API_version='46.0') # Make sure output dir is created output_directory = self.settings['output_dir'] if not os.path.exists(output_directory): os.makedirs(output_directory) def fetch_all(self, fetch_only, airflow_date, fetch_method, days_lookback): """ Fetch any reports or queries, writing them out as files in the output_dir """ queries = self.load_queries() for name, query in queries.items(): if fetch_only and name != fetch_only: self.logger.debug( "'--fetch-only %s' specified. Skipping fetch of %s" % (fetch_only, name)) continue #if name == 'contacts' or name == 'opportunity': if fetch_method and fetch_method == 'bulk': self.fetch_soql_query_bulk(name, query, airflow_date) else: self.fetch_soql_query(name, query, airflow_date) reports = self.settings['salesforce']['reports'] for name, report_url in reports.items(): if fetch_only and name != fetch_only: self.logger.debug( "'--fetch-only %s' specified. Skipping fetch of %s" % (fetch_only, name)) continue self.fetch_report(name, report_url, airflow_date) if fetch_only: if fetch_only == 'contact_deletes': self.fetch_contact_deletes(days=days_lookback, airflow_date=airflow_date) else: self.fetch_contact_deletes(days=days_lookback, airflow_date=airflow_date) self.logger.info("Job Completed") def fetch_contact_deletes(self, days=29, airflow_date=None): """ Fetches all deletes from Contact for X days :param days: Fetch deletes from this number of days to present :return: """ path = self.create_output_path('contact_deletes', airflow_date=airflow_date) end = datetime.datetime.now( pytz.UTC) # we need to use UTC as salesforce API requires this! records = self.salesforce.Contact.deleted( end - datetime.timedelta(days=days), end) data_list = records['deletedRecords'] if len(data_list) > 0: fieldnames = list(data_list[0].keys()) with open(path, 'w') as f: writer = DictWriter(f, fieldnames=fieldnames, quoting=QUOTE_ALL) writer.writeheader() for delta_record in data_list: writer.writerow(delta_record) def fetch_report(self, name, report_url, airflow_date=None): """ Fetches a single prebuilt Salesforce report via an HTTP request :param name: Name of the report to fetch :param report_url: Base URL for the report :return: """ self.logger.info("Fetching report - %s" % name) sf_host = self.settings['salesforce']['host'] url = "%s%s?view=d&snip&export=1&enc=UTF-8&xf=csv" % (sf_host, report_url) resp = requests.get(url, headers=self.salesforce.headers, cookies={'sid': self.salesforce.session_id}, stream=True) path = self.create_output_path(name, airflow_date=airflow_date) with open(path, 'w+') as f: # Write the full contents f.write(resp.text.replace("\"", "")) # Remove the Salesforce footer (last 7 lines) f.seek(0, os.SEEK_END) pos = f.tell() - 1 count = 0 while pos > 0 and count < 7: pos -= 1 f.seek(pos, os.SEEK_SET) if f.read(1) == "\n": count += 1 # So long as we're not at the start of the file, delete all the characters ahead of this position if pos > 0: # preserve the last newline then truncate the file pos += 1 f.seek(pos, os.SEEK_SET) f.truncate() def fetch_soql_query_bulk(self, name, query, airflow_date=None): self.logger.info("BULK Executing %s" % name) self.logger.info("BULK Query is: %s" % query) if name == 'contacts' or name == 'contact_updates': table_name = 'Contact' elif name == 'opportunity' or name == 'opportunity_updates': table_name = 'Opportunity' job = self.salesforce_bulk.create_query_job(table_name, contentType='CSV', pk_chunking=True, concurrency='Parallel') self.logger.info("job: %s" % job) batch = self.salesforce_bulk.query(job, query) # job = '7504O00000LUxuCQAT' # batch = '7514O00000TvapeQAB' self.logger.info("Bulk batch created: %s" % batch) while True: batch_state = self.salesforce_bulk.batch_state( batch, job_id=job, reload=True).lower() if batch_state == 'notprocessed': self.logger.info("master batch is done") break elif batch_state == 'aborted' or batch_state == 'failed': self.logger.error("master batch failed") self.logger.error( self.salesforce_bulk.batch_status(batch_id=batch, job_id=job, reload=True)) raise Exception("master batch failed") self.logger.info("waiting for batch to be done. status=%s" % batch_state) time.sleep(10) count = 0 downloaded = {} pool = mp.Pool(5) while True: stats = {} batch_count = 0 all_batches = self.salesforce_bulk.get_batch_list(job) for batch_info in all_batches: batch_count += 1 batch_state = batch_info['state'].lower() if batch_state in stats: stats[batch_state] += 1 else: stats[batch_state] = 1 if batch_info['id'] == batch: #self.logger.debug("skipping the master batch id") continue elif batch_info['id'] in downloaded: #self.logger.debug("batch %s already downloaded" % batch_info['id']) continue if batch_state == 'completed': self.logger.debug( "batch %s (%s of %s)" % (batch_info['id'], batch_count, len(all_batches))) for result_id in self.salesforce_bulk.get_query_batch_result_ids( batch_info['id'], job_id=job): self.logger.debug("result_id: %s" % result_id) path = self.create_output_path( name, result_id, airflow_date=airflow_date) pool.apply_async( get_and_write_bulk_results, args=(batch_info['id'], result_id, job, self.salesforce_bulk.endpoint, self.salesforce_bulk.headers(), path)) downloaded[batch_info['id']] = 1 elif batch_state == 'failed': downloaded[batch_info['id']] = 1 self.logger.error("batch %s failed!" % batch_info['id']) self.logger.error( self.salesforce_bulk.batch_status( batch_id=batch_info['id'], job_id=job, reload=True)) if 'completed' in stats and stats['completed'] + 1 == batch_count: self.logger.info("all batches retrieved") break elif 'failed' in stats and stats['failed'] + 1 == batch_count: self.logger.error("NO batches retrieved") self.logger.error( self.salesforce_bulk.batch_status(batch_id=batch, job_id=job, reload=True)) raise Exception("NO batches retrieved") elif 'failed' in stats and stats['failed'] + stats[ 'completed'] == batch_count: self.logger.warning("all batches WITH SOME FAILURES") break else: self.logger.info(stats) time.sleep(5) try: self.salesforce_bulk.close_job(job) except: pass pool.close() pool.join() def fetch_soql_query(self, name, query, airflow_date=None): self.logger.info("Executing %s" % name) self.logger.info("Query is: %s" % query) path = self.create_output_path(name, airflow_date=airflow_date) result = self.salesforce.query(query) self.logger.info("First result set received") batch = 0 count = 0 if result['records']: fieldnames = list(result['records'][0].keys()) fieldnames.pop(0) # get rid of attributes with open(path, 'w') as f: writer = DictWriter(f, fieldnames=fieldnames, quoting=QUOTE_ALL) writer.writeheader() while True: batch += 1 for row in result['records']: # each row has a strange attributes key we don't want row.pop('attributes', None) out_dict = {} for key, value in row.items(): if type(value) is collections.OrderedDict: out_dict[key] = json.dumps(value) else: out_dict[key] = value writer.writerow(out_dict) count += 1 if count % 100000 == 0: self.logger.debug("%s rows fetched" % count) # fetch next batch if we're not done else break out of loop if not result['done']: result = self.salesforce.query_more( result['nextRecordsUrl'], True) else: break else: self.logger.warn("No results returned for %s" % name) def create_output_path(self, name, filename='output', airflow_date=None): output_dir = self.settings['output_dir'] if airflow_date: date = airflow_date else: date = time.strftime("%Y-%m-%d") child_dir = os.path.join(output_dir, name, date) if not os.path.exists(child_dir): os.makedirs(child_dir) filename = filename + ".csv" file_path = os.path.join(child_dir, filename) self.logger.info("Writing output to %s" % file_path) return file_path def create_custom_query(self, table_name='Contact', dir='/usr/local/salesforce_fetcher/queries', updates_only=False): """ The intention is to have Travis upload the "contact_fields.yaml" file to a bucket where it can be pulled down dynamically by this script and others (instead of having to rebuild the image on each change) """ fields_file_name = table_name.lower() + '_fields.yaml' fields_file = os.path.join(dir, fields_file_name) if not os.path.exists(fields_file): return with open(fields_file, 'r') as stream: columns = yaml.safe_load(stream) query = "SELECT " for field in columns['fields']: query += next(iter(field)) + ', ' query = query[:-2] + " FROM " + table_name if updates_only: query += " WHERE LastModifiedDate >= LAST_N_DAYS:3" return query def load_queries(self): """ load queries from an external directory :return: a dict containing all the SOQL queries to be executed """ queries = {} query_dir = self.settings['salesforce']['query_dir'] for file in os.listdir(query_dir): if file.endswith(".soql"): name, ext = os.path.splitext(file) query_file = os.path.join(query_dir, file) with open(query_file, 'r') as f: queries[name] = f.read().strip().replace('\n', ' ') # explicitly add the non-file queries queries['contacts'] = self.create_custom_query(table_name='Contact', dir=query_dir) queries['contact_updates'] = self.create_custom_query( table_name='Contact', dir=query_dir, updates_only=True) queries['opportunity'] = self.create_custom_query( table_name='Opportunity', dir=query_dir) queries['opportunity_updates'] = self.create_custom_query( table_name='Opportunity', dir=query_dir, updates_only=True) return queries
def load_records(test=False, target="Contact1000__c", count=10, batch_size=100000, username="******", password=None, token=None, sessionId=None, endpoint=None, return_records=False, field_spec = None): if not test: if username and password: sf = SalesforceBatch(username=username, password=password, token=token) else: sf = SalesforceBatch(sessionId=sessionId, endpoint=endpoint) user_ids = [r.Id for r in sf.query_salesforce("User", ["Id"], where="ReceivesAdminInfoEmails=true", limit=20).records] print "User ids: " + str(user_ids) bulk = SalesforceBulk(sessionId=sf.sessionId, host=sf.host) job = bulk.create_insert_job(target, concurrency="Parallel") else: user_ids = [1, 2, 3] record_generator.define_lookup("UserId", random_choices=user_ids) record_generator.define_lookup("Industry", random_choices=["Finance","Agriculture","Technology","Banking","Chemicals"]) record_generator.define_lookup("account_type", random_choices=["Analyst","Competitor","Customer","Integrator","Partner"]) global indexer indexer = 0 def gen_index(): global indexer indexer += 1 return "{0} {1}".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), indexer) record_generator.define_lookup("counter", callable=gen_index) output = open("records_{0}.json".format(target), "a") total = count batches = [] all_records = [] while count > 0: if field_spec: records = record_generator.mock_records(field_spec, count=batch_size) else: if 'Contact' in target: records = gen_Contact(min(count,batch_size)) else: records = gen_Account(min(count,batch_size)) if test: return list(records) print "Made batch of size {}".format(batch_size) if return_records: records = list(records) all_records += records records = iter(records) if total < 1000: # Use SOAP sf.insert_salesforce(target, records) count = 0 else: csv_gen = CsvDictsAdapter(records) print "Posting batch to BULK API" batch = bulk.post_bulk_batch(job, csv_gen) print "Posted: %s" % batch batches.append(batch) count -= batch_size for r in records: output.write(json.dumps(r)) output.write("\n") for b in batches: print "Waiting for %s" % b bulk.wait_for_batch(job, b) bulk.close_job(job) print "DONE!" if return_records: return all_records
def sf_Bulk_Login(username, password,security_token): sfBulk = SalesforceBulk(username=username, password=password,security_token=security_token) print ('login sucessfully') return sfBulk
class RESTConnector: def __init__(self, connector_param): self.connector_param = connector_param self.instance_url = 'https://' + connector_param.url_prefix + 'salesforce.com' self.token_url = 'https://' + connector_param.url_prefix + 'salesforce.com/services/oauth2/token' self.access_token = None self.get_token() self.bulk = SalesforceBulk(sessionId=self.access_token, host=urlparse(self.instance_url).hostname) def check_token(self): try: job = self.bulk.create_query_job(object, contentType='CSV') test_query = 'SELECT ID FROM Account LIMIT 1' batch = self.bulk.query(job, test_query) self.connector_wait(job, batch, 'Query done') self.bulk.close_job(job) return True except: return False def get_token(self): if self.access_token == None: cached_token = self.get_cached_token() if cached_token: self.access_token = cached_token if not self.check_token(): self.get_oauth2_token() else: self.get_oauth2_token() else: self.get_oauth2_token() return self.access_token def get_oauth2_token(self): req_param = { 'grant_type': 'password', 'client_id': self.connector_param.consumer_key, 'client_secret': self.connector_param.consumer_secret, 'username': self.connector_param.username, 'password': self.connector_param.password } result = requests.post( self.token_url, headers={"Content-Type": "application/x-www-form-urlencoded"}, data=req_param) result_dict = loads(result.content) if 'access_token' in result_dict.keys(): self.access_token = result_dict['access_token'] self.save_token() return result_dict['access_token'] else: print(result_dict) return None def get_cached_token(self): try: tokens_dict = load(open(session_file, 'r')) except: return None if self.connector_param.username in tokens_dict.keys(): return tokens_dict[self.connector_param.username] else: return None def save_token(self): tokens_dict = {} try: tokens_dict = load(open(session_file, 'r')) except: pass tokens_dict[self.connector_param.username] = self.access_token dump(tokens_dict, open(session_file, 'w')) def remove_token(self): tokens_dict = load(open(session_file, 'r')) tokens_dict.pop(self.connector_param.username, None) dump(tokens_dict, open(session_file, 'w')) def bulk_load(self, object, soql, header_columns=None, csv_file=None): try: job = self.bulk.create_query_job(object, contentType='CSV') except: self.access_token = None self.get_oauth2_token() job = self.bulk.create_query_job(object, contentType='CSV') batch = self.bulk.query(job, soql) self.connector_wait(job, batch, 'Query done') self.bulk.close_job(job) if csv_file: open_mode = 'w' with open(csv_file, open_mode) as f_csv: writer = csv.DictWriter(f_csv, fieldnames=header_columns) writer.writeheader() for row in self.bulk.get_batch_result_iter(job, batch, parse_csv=True): writer.writerow(row) else: data = [] for row in self.bulk.get_batch_result_iter(job, batch, parse_csv=True): data.append(row) return data def bulk_insert(self, object, data): job = self.bulk.create_insert_job(object, contentType='CSV') csv_iter = CsvDictsAdapter(iter(data)) batch = self.bulk.post_bulk_batch(job, csv_iter) self.connector_wait(job, batch, 'bulk insert done') # do not work should return Id`s of created elements # res = self.bulk.get_batch_result_iter(job,batch,parse_csv=False) self.bulk.close_job(job) def bulk_update(self, object, data): job = self.bulk.create_update_job(object, contentType='CSV') csv_iter = CsvDictsAdapter(iter(data)) batch = self.bulk.post_bulk_batch(job, csv_iter) self.connector_wait(job, batch, 'bulk update done') # do not work shuld return Id`s of created elements self.bulk.close_job(job) rows = [] for row in self.get_batch_result_iter(job, batch, parse_csv=False): rows.append(row) return rows def bulk_delete(self, object, where): delete_job = self.bulk.create_delete_job(object_name=object) delete_batch = self.bulk.bulk_delete(delete_job, object, where) self.bulk.wait_for_batch(delete_job, delete_batch) print('deletion done') def bulk_upsert(self, object, external_id_name, data): job = self.bulk.create_upsert_job(object_name=object, external_id_name=external_id_name) csv_iter = CsvDictsAdapter(iter(data)) batch = self.bulk.post_bulk_batch(job, csv_iter) self.connector_wait(job, batch, 'upserting done') self.bulk.close_job(job) rows = [] for row in self.get_batch_result_iter(job, batch, parse_csv=False): rows.append(row) return rows def connector_wait(self, job, batch, ending_message=''): wait_message = 'Wait for job done' clock = 0 while True: if clock == 10: clock = 0 if self.bulk.is_batch_done(job, batch): break sleep(0.5) clock = clock + 1 spin(wait_message) print('\r' + ending_message.ljust( len(ending_message) if len(ending_message) > len(wait_message) + 4 else len(wait_message) + 4)) self.bulk.wait_for_batch(job, batch) def get_batch_result_iter(self, job_id, batch_id, parse_csv=False, logger=None): """ **** This code snippet was taken from salesforce bulk library **** Return a line interator over the contents of a batch result document. If csv=True then parses the first line as the csv header and the iterator returns dicts. """ status = self.bulk.batch_status(job_id, batch_id) if status['state'] != 'Completed': return None elif logger: if 'numberRecordsProcessed' in status: logger("Bulk batch %d processed %s records" % (batch_id, status['numberRecordsProcessed'])) if 'numberRecordsFailed' in status: failed = int(status['numberRecordsFailed']) if failed > 0: logger("Bulk batch %d had %d failed records" % (batch_id, failed)) print(self.bulk.headers()) uri = self.bulk.endpoint + \ "/job/%s/batch/%s/result" % (job_id, batch_id) r = requests.get(uri, headers=self.bulk.headers(), stream=True) # print(type(r)) # print(r.text) # print(r.keys()) # result_id = r.text.split("<result>")[1].split("</result>")[0] # uri = self.bulk.endpoint + \ # "/job/%s/batch/%s/result/%s" % (job_id, batch_id, result_id) # r = requests.get(uri, headers=self.bulk.headers(), stream=True) if parse_csv: return csv.DictReader(r.iter_lines(chunk_size=2048), delimiter=",", quotechar='"') else: return r.iter_lines(chunk_size=2048)
from salesforce_bulk import SalesforceBulk, CsvDictsAdapter import json with open('data.json', 'r') as myfile: data = json.loads(myfile.read()) username = data["user"] password = data["password"] instance = data["instance"] security_token = data["token"] try: bulk = SalesforceBulk(username=username, password=password, security_token=security_token) job = bulk.create_insert_job("Account", contentType='CSV') accounts = [dict(Name="Account%d" % idx) for idx in range(5, 10)] csv_iter = CsvDictsAdapter(iter(accounts)) batch = bulk.post_batch(job, csv_iter) bulk.wait_for_batch(job, batch) bulk.close_job(job) result = bulk.get_batch_results(batch, job) jsonString = json.dumps(result) print(jsonString) except Exception as e: Exception(e)
class SalesforceBulkTestCase(unittest.TestCase): def setUp(self): login = salesforce_oauth_request.login( username=USERNAME, password=PASSWORD, token=SECURITY_TOKEN, client_id=CONSUMER_KEY, client_secret=CONSUMER_SECRET, cache_session=False, sandbox=True, ) self.bulk = SalesforceBulk(login['access_token'], login['endpoint']) self.jobs = [] def tearDown(self): if hasattr(self, 'bulk'): for job_id in self.jobs: print "Closing job: %s" % job_id self.bulk.close_job(job_id) def test_raw_query(self): job_id = self.bulk.create_query_job("Contact") self.jobs.append(job_id) self.assertIsNotNone(re.match("\w+", job_id)) batch_id = self.bulk.query(job_id, "Select Id,Name,Email from Contact Limit 1000") self.assertIsNotNone(re.match("\w+", batch_id)) while not self.bulk.is_batch_done(job_id, batch_id): print "Job not done yet..." print self.bulk.batch_status(job_id, batch_id) time.sleep(2) self.results = "" def save_results(tfile, **kwargs): print "in save results" self.results = tfile.read() flag = self.bulk.get_batch_results(job_id, batch_id, callback = save_results) self.assertTrue(flag) self.assertTrue(len(self.results) > 0) self.assertIn('"', self.results) def test_csv_query(self): job_id = self.bulk.create_query_job("Account") self.jobs.append(job_id) self.assertIsNotNone(re.match("\w+", job_id)) batch_id = self.bulk.query(job_id, "Select Id,Name,Description from Account Limit 10000") self.assertIsNotNone(re.match("\w+", batch_id)) self.bulk.wait_for_batch(job_id, batch_id, timeout=120) self.results = None def save_results1(rows, **kwargs): self.results = rows flag = self.bulk.get_batch_results(job_id, batch_id, callback = save_results1, parse_csv=True) self.assertTrue(flag) results = self.results self.assertTrue(len(results) > 0) self.assertTrue(isinstance(results,list)) self.assertEqual(results[0], ['Id','Name','Description']) self.assertTrue(len(results) > 3) self.results = None self.callback_count = 0 def save_results2(rows, **kwargs): self.results = rows print rows self.callback_count += 1 batch = len(results) / 3 self.callback_count = 0 flag = self.bulk.get_batch_results(job_id, batch_id, callback = save_results2, parse_csv=True, batch_size=batch) self.assertTrue(self.callback_count >= 3) def test_csv_upload(self): job_id = self.bulk.create_insert_job("Contact") self.jobs.append(job_id) self.assertIsNotNone(re.match("\w+", job_id)) batch_ids = [] content = open("example.csv").read() for i in range(5): batch_id = self.bulk.query(job_id, content) self.assertIsNotNone(re.match("\w+", batch_id)) batch_ids.append(batch_id) for batch_id in batch_ids: self.bulk.wait_for_batch(job_id, batch_id, timeout=120) self.results = None def save_results1(rows, failed, remaining): self.results = rows for batch_id in batch_ids: flag = self.bulk.get_upload_results(job_id, batch_id, callback = save_results1) self.assertTrue(flag) results = self.results self.assertTrue(len(results) > 0) self.assertTrue(isinstance(results,list)) self.assertEqual(results[0], UploadResult('Id','Success','Created','Error')) self.assertEqual(len(results), 3) self.results = None self.callback_count = 0 def save_results2(rows, failed, remaining): self.results = rows self.callback_count += 1 batch = len(results) / 3 self.callback_count = 0 flag = self.bulk.get_upload_results(job_id, batch_id, callback = save_results2, batch_size=batch) self.assertTrue(self.callback_count >= 3)
with open(file_name, 'w+', encoding='utf-8') as csv_file: write_header = csv.writer(csv_file) write_header.writerow(config.csv_header) # Create the time_log file that will be used for the daily delta date comparison time_log = open("run_time.txt", "a") time_log.write(run_time + "\n") # Create the log file and write the time the program is run log = open("log.txt", "a") log.write("\n" + "|---------------------------------------|" + "\n") log.write("PROGRAM STARTED: "),log.write(datetime.now().ctime()) log.write("\n" + "|---------------------------------------|" + "\n") # Set the Salesforce username, password, and token sf = SalesforceBulk(username=config.salesforce["username"], password=config.salesforce["password"], sandbox=True, security_token=config.salesforce["token"]) try: # Set the sftp hostkeys (if any) cnopts = pysftp.CnOpts() cnopts.hostkeys = None except Exception as e: pass else: pass # Set the sftp host, username, and password (optional paramter: port="22") sftp = pysftp.Connection(host=config.sftp["host"], username=config.sftp["username"], password=config.sftp["password"], cnopts=cnopts) # Build a dynamic User list, format the string, and create a variable that can be used in the SOQL filter
def test_upload_with_mapping_file(self): if self.contentType != 'CSV': print('Mapping file can only be used with CSV content') return bulk = SalesforceBulk(self.sessionId, self.endpoint) self.bulk = bulk job_id = bulk.create_insert_job("Contact", contentType=self.contentType) self.jobs.append(job_id) self.assertIsNotNone(re.match("\w+", job_id)) batch_ids = [] data = [ { 'Not FirstName': 'BulkTestFirst%s' % i, 'Arbitrary Field': 'BulkLastName', 'Phone': '555-555-5555', } for i in range(50) ] mapping_data = [ { "Salesforce Field": "FirstName", "Csv Header": "NotFirstName", "Value": "", "Hint": "" }, { "Salesforce Field": "Phone", "Csv Header": "Phone", "Value": "", "Hint": "" }, { "Salesforce Field": "LastName", "Csv Header": "Arbitrary Field", "Value": "", "Hint": "" } ] mapping_data = self.generate_content(mapping_data) bulk.post_mapping_file(job_id,mapping_data) for i in range(2): content = self.generate_content(data) batch_id = bulk.post_batch(job_id, content) self.assertIsNotNone(re.match("\w+", batch_id)) batch_ids.append(batch_id) bulk.close_job(job_id) for batch_id in batch_ids: bulk.wait_for_batch(job_id, batch_id, timeout=120) for batch_id in batch_ids: results = bulk.get_batch_results(batch_id) print(results) self.assertTrue(len(results) > 0) self.assertTrue(isinstance(results, list)) self.assertTrue(isinstance(results[0], UploadResult)) self.assertEqual(len(results), 50)
# ID Extraction from Salesforce and saving to local import json from salesforce_bulk import SalesforceBulk from salesforce_bulk.util import IteratorBytesIO from time import sleep from salesforce_bulk import CsvDictsAdapter import pandas as pd import unicodecsv import config as cfg #Authentication bulk = SalesforceBulk(username=cfg.USERNAME, password=cfg.PASSWORD, security_token=cfg.SECURITY_KEY, sandbox=True) #Source CSV File path for Account input_file = "/home/baadmin/NCT_ETL/input_files/pg_extract_prd/InstallmentId_sf.csv" #Target SFDC Object name target_obj = "cpm__Installment__c" # Mapping of Input csv Fields to SalesForce Fields sf_fields = ['Contact_Key__c', 'cpm__Contact__c', 'Installment_Key__c', 'Id'] # Extract the data from salesforce and save it to csv job = bulk.create_query_job(target_obj, contentType='CSV') sql = "SELECT " + ",".join(sf_fields) + " FROM " + target_obj batch = bulk.query(job, sql)
class SalesforceBulkTests(unittest.TestCase): def setUp(self): request_patcher = mock.patch('simple_salesforce.api.requests') self.mockrequest = request_patcher.start() self.addCleanup(request_patcher.stop) self.sessionId = '12345' self.host = 'https://example.com' self.bulk = SalesforceBulk(self.sessionId, self.host) def test_headers_default(self): self.assertEqual( self.bulk.headers(), { 'X-SFDC-Session': self.sessionId, 'Content-Type': 'application/xml; charset=UTF-8', 'Accept-Encoding': 'gzip', } ) def test_headers_json(self): self.assertEqual( self.bulk.headers(content_type='application/json'), { 'X-SFDC-Session': self.sessionId, 'Content-Type': 'application/json; charset=UTF-8', 'Accept-Encoding': 'gzip', } ) def test_create_job_doc(self): doc = self.bulk.create_job_doc( 'Contact', 'insert' ) tree = ET.fromstring(doc) operation = tree.findtext('{%s}operation' % self.bulk.jobNS) self.assertEqual(operation, 'insert') obj = tree.findtext('{%s}object' % self.bulk.jobNS) self.assertEqual(obj, 'Contact') contentType = tree.findtext('{%s}contentType' % self.bulk.jobNS) self.assertEqual(contentType, 'CSV') concurrencyMode = tree.findtext('{%s}concurrencyMode' % self.bulk.jobNS) self.assertIsNone(concurrencyMode) extIdField = tree.findtext('{%s}externalIdFieldName' % self.bulk.jobNS) self.assertIsNone(extIdField) def test_create_job_doc_concurrency(self): doc = self.bulk.create_job_doc( 'Contact', 'insert', concurrency='Serial' ) tree = ET.fromstring(doc) operation = tree.findtext('{%s}operation' % self.bulk.jobNS) self.assertEqual(operation, 'insert') obj = tree.findtext('{%s}object' % self.bulk.jobNS) self.assertEqual(obj, 'Contact') contentType = tree.findtext('{%s}contentType' % self.bulk.jobNS) self.assertEqual(contentType, 'CSV') concurrencyMode = tree.findtext('{%s}concurrencyMode' % self.bulk.jobNS) self.assertEqual(concurrencyMode, 'Serial') extIdField = tree.findtext('{%s}externalIdFieldName' % self.bulk.jobNS) self.assertIsNone(extIdField) def test_create_job_doc_external_id(self): doc = self.bulk.create_job_doc( 'Contact', 'upsert', external_id_name='ext_id__c' ) tree = ET.fromstring(doc) operation = tree.findtext('{%s}operation' % self.bulk.jobNS) self.assertEqual(operation, 'upsert') obj = tree.findtext('{%s}object' % self.bulk.jobNS) self.assertEqual(obj, 'Contact') contentType = tree.findtext('{%s}contentType' % self.bulk.jobNS) self.assertEqual(contentType, 'CSV') concurrencyMode = tree.findtext('{%s}concurrencyMode' % self.bulk.jobNS) self.assertIsNone(concurrencyMode) extIdField = tree.findtext('{%s}externalIdFieldName' % self.bulk.jobNS) self.assertEqual(extIdField, 'ext_id__c') def test_create_job_doc_json(self): doc = self.bulk.create_job_doc( 'Contact', 'insert', contentType='JSON' ) tree = ET.fromstring(doc) operation = tree.findtext('{%s}operation' % self.bulk.jobNS) self.assertEqual(operation, 'insert') obj = tree.findtext('{%s}object' % self.bulk.jobNS) self.assertEqual(obj, 'Contact') contentType = tree.findtext('{%s}contentType' % self.bulk.jobNS) self.assertEqual(contentType, 'JSON') concurrencyMode = tree.findtext('{%s}concurrencyMode' % self.bulk.jobNS) self.assertIsNone(concurrencyMode) extIdField = tree.findtext('{%s}externalIdFieldName' % self.bulk.jobNS) self.assertIsNone(extIdField) def test_create_close_job_doc(self): doc = self.bulk.create_close_job_doc() tree = ET.fromstring(doc) state = tree.findtext('{%s}state' % self.bulk.jobNS) self.assertEqual(state, 'Closed') def test_create_abort_job_doc(self): doc = self.bulk.create_abort_job_doc() tree = ET.fromstring(doc) state = tree.findtext('{%s}state' % self.bulk.jobNS) self.assertEqual(state, 'Aborted') def test_pickle_roundtrip_bulk_api_error_no_status(self): s = pickle.dumps(BulkApiError('message')) e = pickle.loads(s) assert e.__class__ is BulkApiError assert e.args[0] == 'message' assert e.status_code is None def test_pickle_roundtrip_bulk_api_error_no_status_code(self): s = pickle.dumps(BulkApiError('message', 400)) e = pickle.loads(s) assert e.__class__ is BulkApiError assert e.args[0] == 'message' assert e.status_code == 400 def test_pickle_roundtrip_bulk_job_aborted(self): orig = BulkJobAborted('sfid1234') s = pickle.dumps(orig) e = pickle.loads(s) assert e.__class__ is BulkJobAborted assert e.job_id == 'sfid1234' assert 'sfid1234' in e.args[0] assert e.args[0] == orig.args[0] def test_pickle_roundtrip_bulk_batch_failed(self): orig = BulkBatchFailed('sfid1234', 'sfid5678', 'some thing happened') s = pickle.dumps(orig) e = pickle.loads(s) assert e.__class__ is BulkBatchFailed assert e.job_id == 'sfid1234' assert e.batch_id == 'sfid5678' assert e.state_message == 'some thing happened' assert 'sfid1234' in e.args[0] assert 'sfid5678' in e.args[0] assert 'some thing happened' in e.args[0] assert orig.args[0] == e.args[0]