Esempio n. 1
0
def sf12Query(lastHourDateTime, bulkOld=bulkOld):
    intLead = []
    intOppo = []
    batch = buildQueryBatch(bulkOld, sfConstants.interactionSF12, 'interaction__c', lastHourDateTime)
    data = []
    for result in bulkOld.get_all_results_for_query_batch(batch):
        result = json.load(IteratorBytesIO(result))
        for row in result:
            data.append(row)
            print json.dumps(row, indent=4, sort_keys=True)
    writeTxt(logPath+'Qinteraction12.txt', data)

    batch = buildQueryBatch(bulkOld, sfConstants.opportunitySF12, 'opportunity', lastHourDateTime, intOppo)
    data = []
    for result in bulkOld.get_all_results_for_query_batch(batch):
        result = json.load(IteratorBytesIO(result))
        for row in result:
            if  row['Lead__c'] and row['Lead__c'] not in intLead:
                data.append(row)
            print json.dumps(row, indent=4, sort_keys=True)
    writeTxt(logPath+'Qoppo12.txt', data)

    batch = buildQueryBatch(bulkOld, sfConstants.leadSF12, 'lead', lastHourDateTime, intLead)
    while not bulkOld.is_batch_done(batch):
        time.sleep(10)
    data = []
    for result in bulkOld.get_all_results_for_query_batch(batch):
        result = json.load(IteratorBytesIO(result))
        for row in result:
            if (row['MobilePhone'] or row['Phone'] or row['Email']) or row['Id'] in intLead:
                data.append(row)
                print json.dumps(row, indent=4, sort_keys=True)
    writeTxt(logPath+'Qlead12.txt', data)
    def test_perform_bulk_api_pass_converts_datetimes(self, bulk_proxy):
        connection = Mock()

        oc = amaxa.ExtractOperation(connection)
        oc.get_field_map = Mock(return_value={
            'CreatedDate': {
                'name': 'CreatedDate',
                'type': 'datetime'
            }
        })
        retval = [{'Id': '001000000000001', 'CreatedDate': 1546659665000}]
        bulk_proxy.is_batch_done = Mock(side_effect=[False, True])
        bulk_proxy.create_query_job = Mock(return_value='075000000000000AAA')
        bulk_proxy.get_all_results_for_query_batch = Mock(return_value=[
            IteratorBytesIO([json.dumps(retval).encode('utf-8')])
        ])

        step = amaxa.ExtractionStep('Account', amaxa.ExtractionScope.QUERY,
                                    ['CreatedDate'])
        step.store_result = Mock()
        oc.add_step(step)
        step.initialize()

        step.perform_bulk_api_pass('SELECT Id, CreatedDate FROM Account')
        step.store_result.assert_called_once_with({
            'Id':
            '001000000000001',
            'CreatedDate':
            '2019-01-05T03:41:05.000+0000'
        })
    def test_perform_bulk_api_pass_stores_results(self, bulk_proxy):
        connection = Mock()

        oc = amaxa.ExtractOperation(connection)
        oc.get_field_map = Mock(
            return_value={
                'Lookup__c': {
                    'name': 'Lookup__c',
                    'type': 'reference',
                    'referenceTo': ['Account']
                }
            })
        retval = [{'Id': '001000000000001'}, {'Id': '001000000000002'}]
        bulk_proxy.is_batch_done = Mock(side_effect=[False, True])
        bulk_proxy.create_query_job = Mock(return_value='075000000000000AAA')
        bulk_proxy.get_all_results_for_query_batch = Mock(return_value=[
            IteratorBytesIO([json.dumps(retval).encode('utf-8')])
        ])

        step = amaxa.ExtractionStep('Account',
                                    amaxa.ExtractionScope.ALL_RECORDS,
                                    ['Lookup__c'])
        step.store_result = Mock()
        oc.add_step(step)
        step.initialize()

        step.perform_bulk_api_pass('SELECT Id FROM Account')
        step.store_result.assert_any_call(retval[0])
        step.store_result.assert_any_call(retval[1])
Esempio n. 4
0
    def _store_inserted_ids_for_batch(
        self, result_file, local_ids, id_table_name, conn
    ):
        # Set up a function to generate rows based on this result file
        def produce_csv():
            """Iterate over job results and prepare rows for id table"""
            reader = unicodecsv.reader(result_file)
            next(reader)  # skip header
            i = 0
            for row, local_id in zip(reader, local_ids):
                if row[1] == "true":  # Success
                    sf_id = row[0]
                    yield "{},{}\n".format(local_id, sf_id).encode("utf-8")
                else:
                    if self.options["ignore_row_errors"]:
                        self.logger.warning(
                            "      Error on row {}: {}".format(i, row[3])
                        )
                    else:
                        raise BulkDataException("Error on row {}: {}".format(i, row[3]))
                i += 1

        # Bulk insert rows into id table
        columns = ("id", "sf_id")
        data_file = IteratorBytesIO(produce_csv())
        self._sql_bulk_insert_from_csv(conn, id_table_name, columns, data_file)
    def get_query_records_dict(self, db_table, soql_query):
        """Execute bulk Salesforce soql queries and return results as generator of dictionaries.

        :param db_table: Database table name
        :param soql_query: Soql queries
        :return: If success, List of result record dictionaries; Else empty list
        """
        self.bulk = SalesforceBulk(sessionId=self.session_id,
                                   host=self.instance)
        job = self.bulk.create_query_job(db_table, contentType="JSON")
        batch = self.bulk.query(job, soql_query)
        self.bulk.close_job(job)
        while not self.bulk.is_batch_done(batch):
            print("Waiting for batch query to complete")
            sleep(10)

        dict_records = []
        rec_count = 0
        print("Iterating through batch result set")
        for result in self.bulk.get_all_results_for_query_batch(batch):
            result = json.load(IteratorBytesIO(result))
            for row in result:
                rec_count += 1
                dict_records.append(row)
            print("Current fetched record count: ", rec_count)

        return dict_records
Esempio n. 6
0
    def test_bulk_query_converts_datetimes(self):
        sf = Mock()
        sf.bulk_url = "https://salesforce.com"

        conn = Connection(sf)
        conn._bulk = Mock()

        retval = [
            {"Id": "001000000000001", "CreatedDate": 1546659665000},
            {"Id": "001000000000002", "CreatedDate": None},
        ]
        conn._bulk.is_batch_done = Mock(side_effect=[False, True])
        conn._bulk.create_query_job = Mock(return_value="075000000000000AAA")
        conn._bulk.get_all_results_for_query_batch = Mock(
            return_value=[IteratorBytesIO([json.dumps(retval).encode("utf-8")])]
        )

        results = list(
            conn.bulk_api_query(
                "Account", "SELECT Id, CreatedDate, FROM Account", ["CreatedDate"], 5
            )
        )

        self.assertEqual(
            results[0],
            {"Id": "001000000000001", "CreatedDate": "2019-01-05T03:41:05.000+0000"},
        )
        self.assertEqual(results[1], {"Id": "001000000000002", "CreatedDate": None})
def query_from_db(*fields, object_name=None, where_clause=None):
    if object_name is None:
        raise ValueError('object_name is not provided.')
    job = bulk.create_query_job(object_name, contentType='JSON')
    query = 'select ' + ', '.join(fields) + ' from ' + object_name
    if where_clause is not None:
        query += ' where ' + where_clause
    logging.info(query)
    batch = bulk.query(job, query)
    bulk.close_job(job)
    try:
        while not bulk.is_batch_done(batch):
            logging.info('batch status: %s', bulk.batch_status(batch_id=batch)['state'])
            time.sleep(10)
    finally:
        if not bulk.is_batch_done(batch):
            bulk.abort_job(job)
            logging.info('aborted job')

    records = []
    for result in bulk.get_all_results_for_query_batch(batch):
        result = json.load(IteratorBytesIO(result))
        for row in result:
            records.append(row)
    return records
Esempio n. 8
0
def cleanOppoSF2(bulkNew=bulkNew):
    job = bulkNew.create_query_job("Opportunity", contentType='JSON')
    batch = bulkNew.query(job, "select Id, AccountId, LeadId__c, OldSalesforceExtID__c from Opportunity where LeadId__c = null and AccountId = null")
    bulkNew.close_job(job)
    while not bulkNew.is_batch_done(batch):
        time.sleep(10)
    data = []
    datafull = []
    for result in bulkNew.get_all_results_for_query_batch(batch):
        result = json.load(IteratorBytesIO(result))
        for row in result:
            # row['OldSalesforceExtID__c'] = row['Id']
            datafull.append(row)
            row.pop('OldSalesforceExtID__c', None)
            row.pop('AccountId', None)
            row.pop('LeadId__c', None)
            row.pop('attributes', None)
            data.append(row)
            print json.dumps(row, indent=4, sort_keys=True)

    if data:
        job_id = bulkNew.create_delete_job("Opportunity", contentType='CSV')
        content = CsvDictsAdapter(iter(data))
        batch_id = bulkNew.post_batch(job_id, content)
        bulkNew.wait_for_batch(job_id, batch_id, timeout=120)
        results = bulkNew.get_batch_results(batch_id)
        for i in range(len(data)):
            datafull[i]['resultId-success-created-error'] = results[i]
        print json.dumps(datafull, indent=4, sort_keys=True)
        bulkNew.close_job(job_id)
        print "Clean opportunity done." 
    else:
        print "No opportunity to clean"
    writeTxt(logPath+'CleanOppo12.txt', datafull)
Esempio n. 9
0
    def _import_results(self, mapping, result_file, conn):
        # Map SF field names to local db column names
        sf_header = result_file.readline().strip().decode("utf-8").split(",")
        columns = []
        for sf in sf_header:
            if sf == "Records not found for this query":
                return
            if sf:
                column = mapping["fields"].get(sf)
                if not column:
                    column = mapping.get("lookups",
                                         {}).get(sf, {}).get("key_field")
                if column:
                    columns.append(column)
        if not columns:
            return
        record_type = mapping.get("record_type")
        if record_type:
            columns.append("record_type")

        processor = log_progress(
            process_incoming_rows(result_file, record_type), self.logger)
        data_file = IteratorBytesIO(processor)
        self._sql_bulk_insert_from_csv(conn, mapping["table"], columns,
                                       data_file)
        self.session.commit()
Esempio n. 10
0
    def test_perform_bulk_api_pass_stores_high_volume_results(self, bulk_proxy):
        connection = Mock()

        oc = amaxa.ExtractOperation(connection)
        oc.get_field_map = Mock(return_value={
            'Lookup__c': {
                'name': 'Lookup__c',
                'type': 'reference',
                'referenceTo': ['Account']
            }
        })
        retval = []
        for i in range(5):
            retval.append([
                {'Id': '00100000{:d}{:06d}'.format(i, j), 'Name': 'Account {:d}{:06d}'.format(i, j)}
                for j in range(100000)
            ])


        bulk_proxy.is_batch_done = Mock(side_effect=[False, True])
        bulk_proxy.create_query_job = Mock(return_value = '075000000000000AAA')
        bulk_proxy.get_all_results_for_query_batch = Mock(
            return_value = [IteratorBytesIO([json.dumps(chunk).encode('utf-8')]) for chunk in retval]
        )

        step = amaxa.ExtractionStep('Account', amaxa.ExtractionScope.ALL_RECORDS, ['Lookup__c'])
        step.store_result = Mock()
        oc.add_step(step)
        step.initialize()

        step.perform_bulk_api_pass('SELECT Id FROM Account')
        self.assertEqual(500000, step.store_result.call_count)
Esempio n. 11
0
    def test_bulk_api_query(self):  # FIXME: test wait
        sf = Mock()
        sf.bulk_url = "https://salesforce.com"

        conn = Connection(sf)
        conn._bulk = Mock()

        retval = [{"Id": "001000000000001"}, {"Id": "001000000000002"}]
        conn._bulk.is_batch_done = Mock(side_effect=[False, True])
        conn._bulk.create_query_job = Mock(return_value="075000000000000AAA")
        conn._bulk.get_all_results_for_query_batch = Mock(
            return_value=[IteratorBytesIO([json.dumps(retval).encode("utf-8")])]
        )

        results = list(conn.bulk_api_query("Account", "SELECT Id FROM Account", [], 5))
        conn._bulk.query.assert_called_once_with(
            "075000000000000AAA", "SELECT Id FROM Account"
        )
        self.assertEqual(
            conn._bulk.is_batch_done.call_args_list,
            [call(conn._bulk.query.return_value), call(conn._bulk.query.return_value)],
        )
        conn._bulk.get_all_results_for_query_batch.assert_called_once_with(
            conn._bulk.query.return_value
        )

        self.assertEqual(retval, results)
Esempio n. 12
0
    def _import_results(self, mapping, result_file, conn):
        # Map SF field names to local db column names
        sf_header = [
            name.strip('"')
            for name in result_file.readline().strip().decode("utf-8").split(",")
        ]
        columns = []
        lookup_keys = []
        for sf in sf_header:
            if sf == "Records not found for this query":
                return
            if sf:
                column = mapping.get("fields", {}).get(sf)
                if not column:
                    lookup = mapping.get("lookups", {}).get(sf, {})
                    if lookup:
                        lookup_keys.append(sf)
                        column = get_lookup_key_field(lookup, sf)
                if column:
                    columns.append(column)
        if not columns:
            return
        record_type = mapping.get("record_type")
        if record_type:
            columns.append("record_type")

        processor = log_progress(
            process_incoming_rows(result_file, record_type), self.logger
        )
        data_file = IteratorBytesIO(processor)
        if mapping["oid_as_pk"]:
            self._sql_bulk_insert_from_csv(conn, mapping["table"], columns, data_file)
        else:
            # If using the autogenerated id field, split out the CSV file from the Bulk API
            # into two separate files and load into the main table and the sf_id_table
            with tempfile.TemporaryFile("w+b") as f_values:
                with tempfile.TemporaryFile("w+b") as f_ids:
                    data_file_values, data_file_ids = self._split_batch_csv(
                        data_file, f_values, f_ids
                    )
                    self._sql_bulk_insert_from_csv(
                        conn, mapping["table"], columns, data_file_values
                    )
                    self._sql_bulk_insert_from_csv(
                        conn, mapping["sf_id_table"], ["sf_id"], data_file_ids
                    )

        if "RecordTypeId" in mapping["fields"]:
            self._extract_record_types(
                mapping["sf_object"], mapping["record_type_table"], conn
            )

        self.session.commit()

        if lookup_keys and not mapping["oid_as_pk"]:
            self._convert_lookups_to_id(mapping, lookup_keys)
Esempio n. 13
0
def sf21Query(lastHourDateTime, bulkNew=bulkNew):
    intLead = []
    intOppo = []
    batch = buildQueryBatch(bulkNew, sfConstants.eventSF21, 'event', lastHourDateTime, newToOld = True)
    data = []
    for result in bulkNew.get_all_results_for_query_batch(batch):
        result = json.load(IteratorBytesIO(result))
        for row in result:
            if not row['WhatId']: #Closer Appt, query lead
                # intLead.append(row['NewSalesforceLeadId__c'])
                data.append(row)
            # if row['WhatId']: #CAD Appt, query opportunity
                # intOppo.append(row['WhatId'])
                # data.append(row)  #not syncing CAD meeting before we map the external id on Opportunity
            print json.dumps(row, indent=4, sort_keys=True)
    writeTxt(logPath+'Qinteraction21.txt', data)

    batch = buildQueryBatch(bulkNew, sfConstants.opportunitySF21, 'opportunity', lastHourDateTime, intOppo, newToOld = True)
    data = []
    for result in bulkNew.get_all_results_for_query_batch(batch):
        result = json.load(IteratorBytesIO(result))
        for row in result:
            data.append(row)
            print json.dumps(row, indent=4, sort_keys=True)
    writeTxt(logPath+'Qoppo21.txt', data)

    batch = buildQueryBatch(bulkNew, sfConstants.leadSF21, 'lead', lastHourDateTime, intLead, newToOld = True)
    while not bulkNew.is_batch_done(batch):
        time.sleep(10)
    data = []
    for result in bulkNew.get_all_results_for_query_batch(batch):
        result = json.load(IteratorBytesIO(result))
        for row in result:
            if (row['MobilePhone'] or row['Phone'] or row['Email']) or row['Id'] in intLead:
                data.append(row)
                print json.dumps(row, indent=4, sort_keys=True)
    writeTxt(logPath+'Qlead21.txt', data)
Esempio n. 14
0
def checkLeads1(bulk = bulkOld):
    job = bulk.create_query_job("Lead", contentType='JSON')
    batch = bulk.query(job, "Select id, Name, CreatedDate From Lead where NewSalesforceExtId__c = null and IsConverted = false")
    bulk.close_job(job)
    while not bulk.is_batch_done(batch):
        time.sleep(10)
    msg = ''
    for result in bulk.get_all_results_for_query_batch(batch):
        result = json.load(IteratorBytesIO(result))
        leadCount = len(result)
        if leadCount > 0:
            msg = '<h1># of Leads in SF1 donot have a NewSalesforceExtId: '+str(leadCount)+'</h1>'
        for row in result:
            msg += '<p>'+row['Name']+' @ '+secondToTime(int(row['CreatedDate']))+' <a href="'+oldSFURl+str(row['Id'])+'" >SF1</a>'
    if msg:
        print msg
        errorEmail('Orphan leads', msg)
Esempio n. 15
0
    def _process_job_results(self, mapping, job_id, local_ids_for_batch):
        """Get the job results and process the results. If we're raising for
        row-level errors, do so; if we're inserting, store the new Ids."""
        if mapping["action"] == "insert":
            id_table_name = self._reset_id_table(mapping)
            conn = self.session.connection()

        for batch_id, local_ids in local_ids_for_batch.items():
            try:
                results_url = (
                    f"{self.bulk.endpoint}/job/{job_id}/batch/{batch_id}/result"
                )
                # Download entire result file to a temporary file first
                # to avoid the server dropping connections
                with download_file(results_url, self.bulk) as f:
                    self.logger.info(
                        f"  Downloaded results for batch {batch_id}")
                    results_generator = self._generate_results_id_map(
                        f, local_ids)
                    if mapping["action"] == "insert":
                        self._sql_bulk_insert_from_csv(
                            conn,
                            id_table_name,
                            ("id", "sf_id"),
                            IteratorBytesIO(results_generator),
                        )
                        self.logger.info(
                            f"  Updated {id_table_name} for batch {batch_id}")
                    else:
                        for r in results_generator:
                            pass  # Drain generator to validate results

            except BulkDataException:
                raise
            except Exception as e:
                raise BulkDataException(
                    f"Failed to download results for batch {batch_id} ({str(e)})"
                )

        if mapping["action"] == "insert":
            self.session.commit()
Esempio n. 16
0
def check1212(bulk = bulkOld, bulkDev = bulkNew):
    timeStamp = time.strftime("%d/%m/%Y")
    job = bulk.create_query_job("Opportunity", contentType='JSON')
    batch = bulk.query(job, "select Id, Name, CreatedDate, leadId__c from Opportunity where CreatedDate = today")
    bulk.close_job(job)
    while not bulk.is_batch_done(batch):
        time.sleep(10)
    oppoExt = []
    convertedOppo = {}
    for result in bulk.get_all_results_for_query_batch(batch):
        result = json.load(IteratorBytesIO(result))
        oppoCount = len(result)
        msg = '<h1># of Opportunities converted in SF1: '+str(oppoCount)+'</h1>'
        for row in result:
            # print json.dumps(row, indent=4, sort_keys=True)
            convertedOppo[row['LeadId__c']] = {'createdDate':row['CreatedDate'],
                                        'oldSFId':row['Id'],
                                        'oldLeadId':row['LeadId__c'],
                                        'Name':row['Name']}
            oppoExt.append(row['LeadId__c'])
            

    job = bulk.create_query_job("Interaction__c", contentType='JSON')
    batch = bulk.query(job, "select Id, Name, CreatedDate, Subject__c, NewSalesforceExtID__c from Interaction__c where CreatedDate = today and (Subject__c = 'Closer Appointment' or Subject__c = 'CAD Appointment') AND CreatedById != '00539000005GkosAAC' order by CreatedDate")
    bulk.close_job(job)
    while not bulk.is_batch_done(batch):
        time.sleep(10)
    for result in bulk.get_all_results_for_query_batch(batch):
        result = json.load(IteratorBytesIO(result))
        oppoCount = len(result)
        msgInteraction = '<h1># of Interactions created in SF1: '+str(oppoCount)+'</h1>'
        for row in result:
            msgInteraction += '<p>@'+secondToTime(int(row['CreatedDate']))+' <a href="'+oldSFURl+str(row['Id'])+'" >SF1</a>'
            if row['NewSalesforceExtID__c']:
                msgInteraction += ' & <a href="'+newSFURl+str(row['NewSalesforceExtID__c'])+'" >SF2</a></p>'
            else:
                msgInteraction += ' & NOT IN SF2</p>'
    print json.dumps(convertedOppo, indent=4, sort_keys=True)

    if oppoExt:
        job = bulkDev.create_query_job("Opportunity", contentType='JSON')
        queryStr = "select Id, OldSalesforceExtID__c from opportunity where OldSalesforceExtID__c in " + str(oppoExt).replace("u'","'").replace('[','(').replace(']',')').replace(' ','') + " order by CreatedDate"
        batch = bulkDev.query(job, queryStr)
        bulkDev.close_job(job)
        while not bulkDev.is_batch_done(batch):
            time.sleep(10)
        for result in bulkDev.get_all_results_for_query_batch(batch):
            result = json.load(IteratorBytesIO(result))
            for row in result:
                msg += '<p>'+convertedOppo[row['OldSalesforceExtID__c']]['Name']+' @ '+secondToTime(int(convertedOppo[row['OldSalesforceExtID__c']]['createdDate']))+'</p>'
                msg += '<p><a href="'+oldSFURl+str(convertedOppo[row['OldSalesforceExtID__c']]['oldSFId'])+'" >SF1</a> & <a href="'+newSFURl+str(row['Id'])+'" >SF2</a></p>'
                convertedOppo.pop(row['OldSalesforceExtID__c'], None)
                print json.dumps(row, indent=4, sort_keys=True)
    for key in convertedOppo:
        print convertedOppo[key]
        msg += '<p>'+convertedOppo[key]['Name']+' @ '+secondToTime(convertedOppo[key]['createdDate'])+'</p>'
        msg += '<p><a href="'+oldSFURl+str(convertedOppo[key]['oldSFId'])+'" >SF1</a> & NOT IN SF2</p>'

    job = bulkDev.create_query_job("Event", contentType='JSON')
    batch = bulkDev.query(job, "select Id, CreatedDate, Subject, OldSalesforceExtID__c from Event where CreatedDate = today and (Subject = 'Closer Appointment' or Subject = 'CAD Appointment') AND CreatedById != '005f4000000JdJ4AAK' order by CreatedDate")
    bulkDev.close_job(job)
    while not bulkDev.is_batch_done(batch):
        time.sleep(10)
    convertedOppo = {}
    for result in bulkDev.get_all_results_for_query_batch(batch):
        result = json.load(IteratorBytesIO(result))
        oppoCount = len(result)
        msgInteraction += '<h1># of Events created in SF2: '+str(oppoCount)+'</h1>'
        for row in result:
            msgInteraction += '<p>@'+secondToTime(int(row['CreatedDate']))+' <a href="'+newSFURl+str(row['Id'])+'" >SF2</a>'
            if row['Id']:
                msgInteraction += ' & <a href="'+oldSFURl+str(row['OldSalesforceExtID__c'])+'" >SF1</a></p>'
            else:
                msgInteraction += ' & NOT IN SF1</p>'
            # print json.dumps(row, indent=4, sort_keys=True)

    errorEmail(str(timeStamp)+' - Today 1212', msg+msgInteraction)
Esempio n. 17
0
    def create_and_run_bulk_job(self, job_type, object_name, primary_key,
                                data):
        """
        Note: If the specified job_type is Update or Upsert, you must provide the kwarg "primary_key" for
        Salesforce to identify records with.

        :param job_type: "Update", "Insert", or "Upsert".
        :type job_type: str
        :param object_name: The Name of the SF Object you are performing a bulk job on.
                            Ex. object_name = "Account"
        :type object_name: str
        :param primary_key:
        :type primary_key: str
        :param data: Needs to be formatted as a list of dictionaries.
                     Ex. data = [{'Id': 1, 'Name': 'Andrew'}, {'Id': 2, 'Name': 'Jerry'}]
        :type data: list
        """
        # Ensure string arguments have their first letter capitalized.
        job_type = str.title(job_type)
        object_name = str.title(object_name)
        # Connect and authenticate to Salesforce.
        # Create Job.
        self.log.info(f'Creating {object_name} {job_type} job.')
        if job_type not in ['Insert', 'Update', 'Upsert']:
            raise ReferenceError(
                'Invalid job_type not specified. Please use "Insert", "Update", or "Upsert".'
            )
        try:
            if job_type == 'Insert':
                job = self.bulk.create_insert_job(object_name,
                                                  contentType='CSV')
            elif job_type == 'Update':
                job = self.bulk.create_update_job(object_name,
                                                  contentType='CSV')
                soql_query = f'select Id, {primary_key} from {object_name}'
                query_job = self.bulk.create_query_job(object_name,
                                                       contentType='CSV')
                query_batch = self.bulk.query(query_job, soql_query)
                self.bulk.close_job(query_job)
                self.bulk.wait_for_batch(query_job,
                                         query_batch,
                                         timeout=60 * 10)
                query_results = list(
                    self.bulk.get_all_results_for_query_batch(query_batch))
                if len(query_results) == 1:
                    id_map = json.load(IteratorBytesIO(query_results[0]))
                    for rec in data:
                        for row in id_map:
                            if primary_key not in row:
                                key_split = primary_key.split('.')
                                row[primary_key] = row[key_split[0]][
                                    key_split[1]]
                            if rec[primary_key] == row[primary_key]:
                                rec['Id'] = row['Id']
                                break
                else:
                    raise OverflowError(
                        'Query Results larger than expected. Please review.')
            elif job_type == 'Upsert':
                job = self.bulk.create_upsert_job(object_name,
                                                  external_id_name=primary_key,
                                                  contentType='CSV')
        except Exception as job_creation_error:
            self.log.info(
                f'Unable to create {object_name} {job_type} Job. Please verify the value of the object_name variable.'
            )
            self.log.exception(
                f'Encountered exception when creating job: {job_creation_error}'
            )
            raise

        # Transform data from list of dictionaries into iterable CSV Content Type,
        # since the salesforce_bulk package provides a sweet class for it.
        csv_iter = CsvDictsAdapter(iter(data))
        # Create a batch with the data and add it to the job.
        batch = self.bulk.post_batch(job, csv_iter)
        # Wait for the batch to complete. Default timeout is 10 minutes.
        self.bulk.wait_for_batch(job, batch, timeout=60 * 10)
        # Once the batch has been completed, get the results.
        results = self.bulk.get_batch_results(batch)
        # Close the Job.
        self.bulk.close_job(job)
        self.log.info(
            f'{job_type}, {object_name}, job has been successfully completed.')
        return results
Esempio n. 18
0
    def batch_query_records_dict(self,
                                 db_table,
                                 soql_query,
                                 concurrency='Serial'):
        """Execute bulk Salesforce soql queries and return results as generator of dictionaries.

        works only for PK CHUNKING enabled SF tables.

        Allows millions of record read.

        :param db_table: Database table name
        :param soql_query: Soql queries
        :return: If success, List of result record dictionaries; Else empty list
        """
        self.bulk = SalesforceBulk(sessionId=self.session_id,
                                   host=self.instance)
        job = self.bulk.create_query_job(db_table,
                                         contentType="JSON",
                                         pk_chunking=True,
                                         concurrency=concurrency)
        try:
            batch = self.bulk.query(job, soql_query)
            batch_list = self.bulk.get_batch_list(job)
            print('first batch', batch_list[0])
            batch_id = batch_list[0]['id']
            job_id = batch_list[0]['jobId']
            state = batch_list[0]['state']
            while state == 'Queued' or state == 'InProgress':
                print(
                    "Waiting for batch state Queued or InProgress to change " +
                    state)
                sleep(10)
                state = self.bulk.batch_state(batch_id, job_id)

            batch_list = self.bulk.get_batch_list(job)
            print(f'number of batches: {len(batch_list)}')
            for item in batch_list:
                print('item', item)
                batch_id = item['id']
                job_id = item['jobId']
                state = item['state']

                if state == 'NotProcessed':
                    continue

                while not self.bulk.is_batch_done(batch_id, job_id):
                    print(
                        f"Waiting for batch query to complete batch_id:{batch_id}, job_id: {job_id}, state: {state}"
                    )
                    sleep(10)
                    state = self.bulk.batch_state(batch_id, job_id)

                total_retry_count = len(batch_list)
                retry = len(batch_list)
                lastIndex = 0
                while retry > 0:
                    print(f'retry {retry} times left')
                    try:
                        for result in list(
                                self.bulk.get_all_results_for_query_batch(
                                    batch_id, job_id))[lastIndex:]:
                            result = json.load(IteratorBytesIO(result))
                            lastIndex += 1
                            yield result
                        break
                    except requests.exceptions.ChunkedEncodingError as e:
                        print('Chunking failed')
                        retry -= 1
                        self.connect()
                        self.bulk = SalesforceBulk(sessionId=self.session_id,
                                                   host=self.instance)
                        pass
                    except Exception as e:
                        print('There was an error')
                        traceback.print_exc()
                        retry -= 1
                        self.connect()
                        self.bulk = SalesforceBulk(sessionId=self.session_id,
                                                   host=self.instance)
                        pass
                if retry <= 0:
                    raise Exception(
                        f'Retried {total_retry_count} times and it still failed'
                    )
        except BulkApiError as e:
            self.bulk.abort_job(self.job_id)
            raise e