def _query_db(self, mapping): """Build a query to retrieve data from the local db. Includes columns from the mapping as well as joining to the id tables to get real SF ids for lookups. """ model = self.models[mapping.get("table")] # Use primary key instead of the field mapped to SF Id fields = mapping.get("fields", {}).copy() if mapping["oid_as_pk"]: del fields["Id"] id_column = model.__table__.primary_key.columns.keys()[0] columns = [getattr(model, id_column)] for f in fields.values(): columns.append(model.__table__.columns[f]) lookups = { lookup_field: lookup for lookup_field, lookup in mapping.get("lookups", {}).items() if "after" not in lookup } for lookup in lookups.values(): lookup["aliased_table"] = aliased( self.metadata.tables["{}_sf_ids".format(lookup["table"])]) columns.append(lookup["aliased_table"].columns.sf_id) query = self.session.query(*columns) if "record_type" in mapping and hasattr(model, "record_type"): query = query.filter(model.record_type == mapping["record_type"]) if "filters" in mapping: filter_args = [] for f in mapping["filters"]: filter_args.append(text(f)) query = query.filter(*filter_args) for sf_field, lookup in lookups.items(): # Outer join with lookup ids table: # returns main obj even if lookup is null key_field = get_lookup_key_field(lookup, sf_field) value_column = getattr(model, key_field) query = query.outerjoin( lookup["aliased_table"], lookup["aliased_table"].columns.id == value_column, ) # Order by foreign key to minimize lock contention # by trying to keep lookup targets in the same batch lookup_column = getattr(model, key_field) query = query.order_by(lookup_column) self.logger.debug(str(query)) return query
def _import_results(self, mapping, result_file, conn): # Map SF field names to local db column names sf_header = [ name.strip('"') for name in result_file.readline().strip().decode("utf-8").split(",") ] columns = [] lookup_keys = [] for sf in sf_header: if sf == "Records not found for this query": return if sf: column = mapping.get("fields", {}).get(sf) if not column: lookup = mapping.get("lookups", {}).get(sf, {}) if lookup: lookup_keys.append(sf) column = get_lookup_key_field(lookup, sf) if column: columns.append(column) if not columns: return record_type = mapping.get("record_type") if record_type: columns.append("record_type") processor = log_progress( process_incoming_rows(result_file, record_type), self.logger ) data_file = IteratorBytesIO(processor) if mapping["oid_as_pk"]: self._sql_bulk_insert_from_csv(conn, mapping["table"], columns, data_file) else: # If using the autogenerated id field, split out the CSV file from the Bulk API # into two separate files and load into the main table and the sf_id_table with tempfile.TemporaryFile("w+b") as f_values: with tempfile.TemporaryFile("w+b") as f_ids: data_file_values, data_file_ids = self._split_batch_csv( data_file, f_values, f_ids ) self._sql_bulk_insert_from_csv( conn, mapping["table"], columns, data_file_values ) self._sql_bulk_insert_from_csv( conn, mapping["sf_id_table"], ["sf_id"], data_file_ids ) if "RecordTypeId" in mapping["fields"]: self._extract_record_types( mapping["sf_object"], mapping["record_type_table"], conn ) self.session.commit() if lookup_keys and not mapping["oid_as_pk"]: self._convert_lookups_to_id(mapping, lookup_keys)
def _fields_for_mapping(self, mapping): fields = [] for sf_field, db_field in mapping.get("fields", {}).items(): fields.append({"sf": sf_field, "db": db_field}) for sf_field, lookup in mapping.get("lookups", {}).items(): fields.append({ "sf": sf_field, "db": get_lookup_key_field(lookup, sf_field) }) return fields
def _convert_lookups_to_id(self, mapping, lookup_keys): for lookup_key in lookup_keys: lookup_dict = mapping["lookups"][lookup_key] model = self.models[mapping["table"]] lookup_mapping = self._get_mapping_for_table(lookup_dict["table"]) lookup_model = self.models[lookup_mapping["sf_id_table"]] key_field = get_lookup_key_field(lookup_dict, lookup_key) key_attr = getattr(model, key_field) try: self.session.query(model).filter( key_attr.isnot(None), key_attr == lookup_model.sf_id ).update({key_attr: lookup_model.id}, synchronize_session=False) except NotImplementedError: # Some databases such as sqlite don't support multitable update mappings = [] for row, lookup_id in self.session.query(model, lookup_model.id).join( lookup_model, key_attr == lookup_model.sf_id ): mappings.append({"id": row.id, key_field: lookup_id}) self.session.bulk_update_mappings(model, mappings) self.session.commit()
def _query_db(self, mapping): """Build a query to retrieve data from the local db. Includes columns from the mapping as well as joining to the id tables to get real SF ids for lookups. """ model = self.models[mapping.get("table")] # Use primary key instead of the field mapped to SF Id fields = mapping.get("fields", {}).copy() if mapping["oid_as_pk"]: del fields["Id"] id_column = model.__table__.primary_key.columns.keys()[0] columns = [getattr(model, id_column)] for name, f in fields.items(): if name != "RecordTypeId": columns.append(model.__table__.columns[f]) lookups = { lookup_field: lookup for lookup_field, lookup in mapping.get("lookups", {}).items() if not lookup.get("after") } for lookup in lookups.values(): lookup["aliased_table"] = aliased( self.metadata.tables[f"{lookup['table']}_sf_ids"]) columns.append(lookup["aliased_table"].columns.sf_id) if mapping["fields"].get("RecordTypeId"): rt_dest_table = self.metadata.tables[mapping["sf_object"] + "_rt_target_mapping"] columns.append(rt_dest_table.columns.record_type_id) query = self.session.query(*columns) if mapping.get("record_type") and hasattr(model, "record_type"): query = query.filter(model.record_type == mapping["record_type"]) if mapping.get("filters"): filter_args = [] for f in mapping["filters"]: filter_args.append(text(f)) query = query.filter(*filter_args) if mapping["fields"].get("RecordTypeId"): rt_source_table = self.metadata.tables[mapping["sf_object"] + "_rt_mapping"] rt_dest_table = self.metadata.tables[mapping["sf_object"] + "_rt_target_mapping"] query = query.outerjoin( rt_source_table, rt_source_table.columns.record_type_id == getattr( model, mapping["fields"]["RecordTypeId"]), ) query = query.outerjoin( rt_dest_table, rt_dest_table.columns.developer_name == rt_source_table.columns.developer_name, ) for sf_field, lookup in lookups.items(): # Outer join with lookup ids table: # returns main obj even if lookup is null key_field = get_lookup_key_field(lookup, sf_field) value_column = getattr(model, key_field) query = query.outerjoin( lookup["aliased_table"], lookup["aliased_table"].columns.id == value_column, ) # Order by foreign key to minimize lock contention # by trying to keep lookup targets in the same batch lookup_column = getattr(model, key_field) query = query.order_by(lookup_column) return query