Example #1
0
    def create_object_slug(self, container, data, *args, **kwargs):
        """Create an content object slug for the given data
        """
        id = data.get("id")
        portal_type = data.get("portal_type")
        types_tool = api.get_tool("portal_types")
        fti = types_tool.getTypeInfo(portal_type)

        logger.info("Creating {} with ID {} in parent path {}".format(
            portal_type, id, api.get_path(container)))

        if fti.product:
            obj = _createObjectByType(portal_type, container, id)
        else:
            # newstyle factory
            factory = getUtility(IFactory, fti.factory)
            obj = factory(id, *args, **kwargs)
            if hasattr(obj, '_setPortalTypeName'):
                obj._setPortalTypeName(fti.getId())
            # notifies ObjectWillBeAddedEvent, ObjectAddedEvent and ContainerModifiedEvent
            container._setObject(id, obj)
            # we get the object here with the current object id, as it might be renamed
            # already by an event handler
            obj = container._getOb(obj.getId())
        return obj
Example #2
0
    def store(self, domain, key, value, overwrite=False):
        """Store a dictionary in the domain's storage
        """
        # Get the storage for the current URL
        storage = self.get_storage(domain=domain)
        datastore = storage["data"]
        indexstore = storage["index"]

        # already fetched
        if key in datastore and not overwrite:
            logger.info("Skipping existing key {}".format(key))
            return

        # Create some indexes
        for index in ["portal_type", "parent_id", "parent_path"]:
            index_key = "by_{}".format(index)
            if not indexstore.get(index_key):
                indexstore[index_key] = OOBTree()
            indexvalue = value.get(index)
            # Check if the index value, e.g. the portal_type="Sample", is
            # already known as a key in the index.
            if not indexstore[index_key].get(indexvalue):
                indexstore[index_key][indexvalue] = OOSet()
            indexstore[index_key][indexvalue].add(key)

        # store the data
        datastore[key] = value
Example #3
0
 def _recover_failed_objects(self):
     """ Checks for non-updated objects (by filtering null Title) and
     re-updates them.
     :return:
     """
     uc = api.get_tool('uid_catalog', self.portal)
     # Reference objects must be skipped
     query = Eq('Title', '') & ~ Eq('portal_type', 'Reference') & ~ \
         Eq('portal_type', 'ARReport')
     brains = uc.evalAdvancedQuery(query)
     total = len(brains)
     logger.info('*** Recovering {} objects ***'.format(total))
     for idx, brain in enumerate(brains):
         # Check if object has been created during migration
         uid = brain.UID
         existing = self.sh.find_unique(LOCAL_UID, uid)
         if existing is None:
             continue
         logger.info('Recovering {0}/{1} : {2} '.format(
             idx + 1, total, existing[REMOTE_PATH]))
         # Mark that update failed previously
         existing['updated'] = '0'
         self._handle_obj(existing, handle_dependencies=False)
         obj = brain.getObject()
         obj.reindexObject()
     return
Example #4
0
    def _import_registry_records(self):
        """Import the registry records from the storage identified by domain
        """
        if not self.import_registry:
            return

        logger.info("***Importing Registry Records: {}***".format(
            self.domain_name))

        storage = self.get_storage()
        registry_store = storage["registry"]
        current_registry = getUtility(IRegistry)

        # For each of the keywords used to retrieve registry data
        # import the records that were found
        for key in registry_store.keys():
            records = registry_store[key]
            for record in records.keys():
                logger.debug("Updating record {} with value {}".format(
                    record, records.get(record)))
                if record not in current_registry.records:
                    logger.warn(
                        "Current Registry has no record named {}".format(
                            record))
                    continue
                current_registry[record] = records.get(record)

        logger.info("*** Registry Records Imported: {}***".format(
            self.domain_name))
Example #5
0
    def fetch_users(self, domain):
        """Fetch all users from the source URL
        """
        logger.info("*** FETCH USERS {} ***".format(domain))
        storage = self.get_storage(domain=domain)
        userstore = storage["users"]

        for user in self.yield_items("users"):
            username = user.get("username")
            userstore[username] = user
Example #6
0
 def run(self):
     """
     :return:
     """
     logger.info("*** FETCH STARTED {} ***".format(self.domain_name))
     if self.import_registry:
         self._fetch_registry_records(keys=["bika", "senaite"])
     if self.import_settings:
         self._fetch_settings()
     self._fetch_data()
     logger.info("*** FETCH FINISHED {} ***".format(self.domain_name))
     return
Example #7
0
    def _import_settings(self):
        """Import the settings from the storage identified by domain
        """
        if not self.import_settings:
            return

        logger.info("*** Importing Settings: {} ***".format(self.domain_name))

        storage = self.get_storage()
        settings_store = storage["settings"]

        for key in settings_store:
            self._set_settings(key, settings_store[key])
Example #8
0
    def import_users(self, domain):
        """Import the users from the storage identified by domain
        """
        logger.info("*** IMPORT USERS {} ***".format(domain))

        storage = self.get_storage(domain=domain)
        userstore = storage["users"]

        for username, userdata in userstore.items():

            if ploneapi.user.get(username):
                logger.info("Skipping existing user {}".format(username))
                continue
            email = userdata.get("email", "")
            roles = userdata.get("roles", ())
            # TODO handle groups
            # groups = userdata.get("groups",  groups=groups)())
            logger.info("Creating user {}".format(username))
            message = _("Created new user {} with password {}".format(
                username, username))
            # create new user with the same password as the username
            ploneapi.user.create(
                email=email,
                username=username,
                password=username,
                roles=roles,
            )
            self.add_status_message(message, "info")
            logger.info(message)
Example #9
0
    def _import_data(self):
        """
        For each UID from the fetched data, creates and updates objects
        step by step.
        :return:
        """
        logger.info("*** IMPORT DATA STARTED: {} ***".format(self.domain_name))

        self.sh = SoupHandler(self.domain_name)
        self.uids_to_reindex = []
        storage = self.get_storage()
        ordered_uids = storage["ordered_uids"]
        total_object_count = len(ordered_uids)
        start_time = datetime.now()

        for item_index, r_uid in enumerate(ordered_uids):
            row = self.sh.find_unique(REMOTE_UID, r_uid)
            logger.debug("Handling: {} ".format(row[REMOTE_PATH]))
            self._handle_obj(row)

            # Handling object means there is a chunk containing several objects
            # which have been created and updated. Reindex them now.
            self.uids_to_reindex = list(set(self.uids_to_reindex))
            for uid in self.uids_to_reindex:
                # It is possible that the object has a method (not a Field
                # in its Schema) which is used as an index and it fails.
                # TODO: Make sure reindexing won't fail!
                try:
                    obj = api.get_object_by_uid(uid)
                    obj.reindexObject()
                except Exception, e:
                    rec = self.sh.find_unique(LOCAL_UID, uid)
                    logger.error("Error while reindexing {} - {}".format(
                        rec, e))
            self._non_commited_objects += len(self.uids_to_reindex)
            self.uids_to_reindex = []

            # Commit the transaction if necessary
            if self._non_commited_objects > COMMIT_INTERVAL:
                transaction.commit()
                logger.info("Committed: {} / {} ".format(
                    self._non_commited_objects, total_object_count))
                self._non_commited_objects = 0

            # Log.info every 50 objects imported
            utils.log_process(task_name="Data Import",
                              started=start_time,
                              processed=item_index + 1,
                              total=total_object_count,
                              frequency=50)
Example #10
0
    def import_registry_records(self, domain):
        """Import the registry records from the storage identified by domain
        """
        logger.info("*** IMPORT REGISTRY RECORDS {} ***".format(domain))

        storage = self.get_storage(domain=domain)
        registry_store = storage["registry"]
        current_registry = getUtility(IRegistry)
        # For each of the keywords used to retrieve registry data
        # import the records that were found
        for key in registry_store.keys():
            records = registry_store[key]
            for record in records.keys():
                logger.info("Updating record {} with value {}".format(
                    record, records.get(record)))
                current_registry[record] = records.get(record)
Example #11
0
 def insert(self, data):
     """
     Inserts a row to the soup table.
     :param data: row dictionary
     :return: intid of created record
     """
     if self._already_exists(data):
         logger.debug("Trying to insert existing record... {}".format(data))
         return False
     record = Record()
     record.attrs[REMOTE_UID] = data[REMOTE_UID]
     record.attrs[LOCAL_UID] = data.get(LOCAL_UID, "")
     record.attrs[REMOTE_PATH] = data[REMOTE_PATH]
     record.attrs[LOCAL_PATH] = data.get(LOCAL_PATH, "")
     record.attrs[PORTAL_TYPE] = data[PORTAL_TYPE]
     record.attrs[UPDATED] = data.get(UPDATED, "0")
     r_id = self.soup.add(record)
     logger.info("Record {} inserted: {}".format(r_id, data))
     return r_id
Example #12
0
    def _fetch_settings(self, keys=None):
        """Fetch source instance settings by keyword
        """
        logger.info("*** Fetching Settings: {} ***".format(self.domain_name))
        storage = self.get_storage()
        settings_store = storage["settings"]

        if keys is None:
            retrieved_settings = self._get_settings_by_key()
        else:
            retrieved_settings = []
            for key in keys:
                retrieved_settings += self._get_settings_by_key(key)

        for setting_dict in retrieved_settings:
            for key in setting_dict.keys():
                if not setting_dict[key]:
                    continue
                settings_store[key] = setting_dict[key]
Example #13
0
 def get_json(self, url_or_endpoint, **kw):
     """Fetch the given url or endpoint and return a parsed JSON object
     """
     api_url = self.get_api_url(url_or_endpoint, **kw)
     logger.info("get_json::url={}".format(api_url))
     try:
         response = self.session.get(api_url)
     except Exception as e:
         message = "Could not connect to {} Please check.".format(api_url)
         logger.error(e)
         self.add_status_message(message, "error")
         return {}
     status = response.status_code
     if status != 200:
         message = "GET for {} ({}) returned Status Code {}. Please check.".format(
             url_or_endpoint, api_url, status)
         self.add_status_message(message, "warning")
         return {}
     return response.json()
Example #14
0
def log_process(task_name, started, processed, total, frequency=1):
    """Logs the current status of the process
    :param task_name: name of the task
    :param started: datetime when the process started
    :param processed: number of processed items
    :param total: total number of items to be processed
    :param frequency: number of items to be processed before logging more
    :return:
    """
    if frequency <= 0 or processed % frequency > 0 or total <= 0:
        return

    percentage = "0.0"
    if processed > 0:
        percentage = "{0:.1f}".format(processed * 100.0 / total)

    estimated = get_estimated_end_date(started, processed, total)
    estimated = estimated and estimated.strftime("%Y-%m-%d %H:%M:%S") or "-"
    msg = "{}: {} / {} ({}%) - ETD: {}".format(task_name, processed, total,
                                               percentage, estimated)
    logger.info(msg)
Example #15
0
    def fetch_registry_records(self, domain, keys=None):
        """Fetch configuration registry records of interest (those associated
        to the keywords passed) from source instance
        """
        logger.info("*** FETCH REGISTRY RECORDS {} ***".format(domain))
        storage = self.get_storage(domain=domain)
        registry_store = storage["registry"]
        retrieved_records = {}

        if keys is None:
            retrieved_records["all"] = self.get_registry_records_by_key()
        else:
            for key in keys:
                retrieved_records[key] = self.get_registry_records_by_key(key)

        for key in retrieved_records.keys():
            if not retrieved_records[key]:
                continue
            registry_store[key] = OOBTree()
            for record in retrieved_records[key][0].keys():
                registry_store[key][record] = retrieved_records[key][0][record]
Example #16
0
    def _import_users(self):
        """Import the users from the storage identified by domain
        """
        if not self.import_users:
            return

        logger.info("*** Importing Users: {} ***".format(self.domain_name))

        for user in self.yield_items("users"):
            username = user.get("username")
            if ploneapi.user.get(username):
                logger.debug("Skipping existing user {}".format(username))
                continue
            email = user.get("email", "")
            if not email:
                email = "{}@example.com".format(username)
            roles = user.get("roles", ())
            groups = user.get("groups", ())
            logger.debug("Creating user {}".format(username))
            message = _("Created new user {} with password {}".format(
                username, username))
            # create new user with the same password as the username
            ploneapi.user.create(
                email=email,
                username=username,
                password=username,
                roles=roles,
            )
            for group in groups:
                # Try to add the user to the group if group exists.
                try:
                    ploneapi.group.add_user(groupname=group, username=username)
                except KeyError:
                    continue

            logger.debug(message)

        logger.info("*** Users Were Imported: {} ***".format(self.domain_name))
Example #17
0
    def _fetch_registry_records(self, keys=None):
        """Fetch configuration registry records of interest (those associated
        to the keywords passed) from source instance
        """
        logger.info("*** Fetching Registry Records: {} ***".format(
            self.domain_name))
        storage = self.get_storage()
        registry_store = storage["registry"]
        retrieved_records = {}

        if keys is None:
            retrieved_records["all"] = self._get_registry_records_by_key()
        else:
            for key in keys:
                retrieved_records[key] = self._get_registry_records_by_key(key)

        for key in retrieved_records.keys():
            if not retrieved_records[key]:
                continue
            registry_store[key] = OOBTree()
            for record in retrieved_records[key][0].keys():
                registry_store[key][record] = retrieved_records[key][0][record]
        logger.info("*** Registry Records Fetched: {} ***".format(
            self.domain_name))
Example #18
0
    def reindex_updated_objects(self):
        """
        Reindexes updated objects.
        """
        total = len(self.uids_to_reindex)
        logger.info(
            'Reindexing {} objects which were updated...'.format(total))
        indexed = 0
        for uid in self.uids_to_reindex:
            obj = api.get_object_by_uid(uid[0], None)
            if obj is None:
                logger.error("Object not found: {} ".format(uid[1]))
                continue
            obj.reindexObject()
            indexed += 1
            if indexed % 100 == 0:
                logger.info('{} objects were reindexed, remain {}'.format(
                    indexed, total - indexed))

        logger.info('Reindexing finished...')
Example #19
0
    def __call__(self):
        protect.CheckAuthenticator(self.request.form)

        logger.info("**** AUTO SYNC STARTED ****")

        self.portal = api.get_portal()
        storage = u.get_annotation(self.portal)[SYNC_STORAGE]

        for domain_name, values in storage.iteritems():

            # Check if Auto-Sync is enabled for this Remote
            if not values["configuration"]["auto_sync"]:
                continue

            logger.info("Updating data with: '{}' ".format(domain_name))
            self.request.form["dataform"] = 1
            self.request.form["update"] = 1
            self.request.form["domain_name"] = domain_name
            response = Sync(self.context, self.request)
            response()

        logger.info("**** AUTO SYNC FINISHED ****")
        return "Done..."
Example #20
0
    def __call__(self):
        protect.CheckAuthenticator(self.request.form)

        self.portal = api.get_portal()
        self.request.set('disable_plone.rightcolumn', 1)
        self.request.set('disable_border', 1)

        # Handle form submit
        form = self.request.form
        fetchform = form.get("fetchform", False)
        dataform = form.get("dataform", False)
        if not any([fetchform, dataform]):
            return self.template()

        # remember the form field values
        url = form.get("url", "")
        if not url.startswith("http"):
            url = "http://{}".format(url)
        self.url = url
        self.username = form.get("ac_name", None)
        self.password = form.get("ac_password", None)

        # Handle "Import" action
        if form.get("import", False):
            domain = form.get("domain", None)
            self.import_registry_records(domain)
            self.import_users(domain)
            self.import_data(domain)
            logger.info("*** END OF DATA IMPORT {} ***".format(domain))
            return self.template()

        # Handle "Clear this Storage" action
        if form.get("clear_storage", False):
            domain = form.get("domain", None)
            del self.storage[domain]
            message = _("Cleared Storage {}".format(domain))
            self.add_status_message(message, "info")
            return self.template()

        # Handle "Clear all Storages" action
        if form.get("clear", False):
            self.flush_storage()
            message = _("Cleared Data Storage")
            self.add_status_message(message, "info")
            return self.template()

        # Handle "Fetch" action
        if form.get("fetch", False):
            # check if all mandatory fields have values
            if not all([self.url, self.username, self.password]):
                message = _("Please fill in all required fields")
                self.add_status_message(message, "error")
                return self.template()

            # initialize the session
            self.session = self.get_session(self.username, self.password)

            # remember the credentials in the storage
            storage = self.get_storage(self.url)
            storage["credentials"]["username"] = self.username
            storage["credentials"]["password"] = self.password

            # try to get the version of the remote JSON API
            version = self.get_version()
            if not version or not version.get('version'):
                message = _(
                    "Please install senaite.jsonapi on the source system")
                self.add_status_message(message, "error")
                return self.template()

            # try to get the current logged in user
            user = self.get_authenticated_user()
            if not user or user.get("authenticated") is False:
                message = _("Wrong username/password")
                self.add_status_message(message, "error")
                return self.template()

            domain = self.url
            # Fetch all users from the source
            self.fetch_users(domain)
            # Start the fetch process beginning from the portal object
            self.fetch_data(domain, uid="0")
            # Fetch registry records that contain the word bika or senaite
            self.fetch_registry_records(domain, keys=["bika", "senaite"])
            logger.info("*** FETCHING DATA FINISHED {} ***".format(domain))

        # always render the template
        return self.template()
Example #21
0
class ImportStep(SyncStep):
    """ Class for the Import step of the Synchronization. It must create and
    update objects based on previously fetched data.

    """
    fields_to_skip = [
        'id',  # Overriding ID's can remove prefixes
        'excludeFromNav',
        'constrainTypesMode',
        'allowDiscussion'
    ]

    def __init__(self, credentials, config):
        SyncStep.__init__(self, credentials, config)
        # A list to keep UID's of an object chunk
        self.uids_to_reindex = []
        # An 'infinite recursion preventative' list of objects which are
        # being updated.
        self._queue = []
        # An Integer to count the number of non-committed objects.
        self._non_commited_objects = 0
        self.skipped = []

    def run(self):
        """

        :return:
        """
        self.session = self.get_session()
        self._import_registry_records()
        self._import_settings()
        self._import_users()
        self._import_data()
        return

    def _import_settings(self):
        """Import the settings from the storage identified by domain
        """
        if not self.import_settings:
            return

        logger.info("*** Importing Settings: {} ***".format(self.domain_name))

        storage = self.get_storage()
        settings_store = storage["settings"]

        for key in settings_store:
            self._set_settings(key, settings_store[key])

    def _set_settings(self, key, data):
        """Set settings by key
        """
        # Get the Schema interface of the settings being imported
        ischemas = CONTROLPANEL_INTERFACE_MAPPING.get(key)
        if not ischemas:
            return
        for ischema_name in data.keys():
            ischema = None
            for candidate_schema in ischemas:
                if candidate_schema.getName() == ischema_name:
                    ischema = candidate_schema
            schema = getAdapter(api.get_portal(), ischema)
            # Once we have the schema set the data
            schema_import_data = data.get(ischema_name)
            for schema_field in schema_import_data:
                if schema_import_data[schema_field]:
                    self._set_attr_from_json(schema, schema_field,
                                             schema_import_data[schema_field])

    def _set_attr_from_json(self, schema, attribute, data):
        """Set schema attribute from JSON data. Since JSON converts tuples to lists
           we have to perform a preventive check before setting the value to see if the
           expected value is a tuple or a list. In the case it is a tuple we cast the list
           to tuple
        """
        if hasattr(schema, attribute) and data:
            current_value = getattr(schema, attribute)
            if type(current_value) == tuple:
                setattr(schema, attribute, tuple(data))
            else:
                setattr(schema, attribute, data)

    def _import_registry_records(self):
        """Import the registry records from the storage identified by domain
        """
        if not self.import_registry:
            return

        logger.info("***Importing Registry Records: {}***".format(
            self.domain_name))

        storage = self.get_storage()
        registry_store = storage["registry"]
        current_registry = getUtility(IRegistry)

        # For each of the keywords used to retrieve registry data
        # import the records that were found
        for key in registry_store.keys():
            records = registry_store[key]
            for record in records.keys():
                logger.debug("Updating record {} with value {}".format(
                    record, records.get(record)))
                if record not in current_registry.records:
                    logger.warn(
                        "Current Registry has no record named {}".format(
                            record))
                    continue
                current_registry[record] = records.get(record)

        logger.info("*** Registry Records Imported: {}***".format(
            self.domain_name))

    def _import_users(self):
        """Import the users from the storage identified by domain
        """
        if not self.import_users:
            return

        logger.info("*** Importing Users: {} ***".format(self.domain_name))

        for user in self.yield_items("users"):
            username = user.get("username")
            if ploneapi.user.get(username):
                logger.debug("Skipping existing user {}".format(username))
                continue
            email = user.get("email", "")
            if not email:
                email = "{}@example.com".format(username)
            roles = user.get("roles", ())
            groups = user.get("groups", ())
            logger.debug("Creating user {}".format(username))
            message = _("Created new user {} with password {}".format(
                username, username))
            # create new user with the same password as the username
            ploneapi.user.create(
                email=email,
                username=username,
                password=username,
                roles=roles,
            )
            for group in groups:
                # Try to add the user to the group if group exists.
                try:
                    ploneapi.group.add_user(groupname=group, username=username)
                except KeyError:
                    continue

            logger.debug(message)

        logger.info("*** Users Were Imported: {} ***".format(self.domain_name))

    def _import_data(self):
        """
        For each UID from the fetched data, creates and updates objects
        step by step.
        :return:
        """
        logger.info("*** IMPORT DATA STARTED: {} ***".format(self.domain_name))

        self.sh = SoupHandler(self.domain_name)
        self.uids_to_reindex = []
        storage = self.get_storage()
        ordered_uids = storage["ordered_uids"]
        total_object_count = len(ordered_uids)
        start_time = datetime.now()

        for item_index, r_uid in enumerate(ordered_uids):
            row = self.sh.find_unique(REMOTE_UID, r_uid)
            logger.debug("Handling: {} ".format(row[REMOTE_PATH]))
            self._handle_obj(row)

            # Handling object means there is a chunk containing several objects
            # which have been created and updated. Reindex them now.
            self.uids_to_reindex = list(set(self.uids_to_reindex))
            for uid in self.uids_to_reindex:
                # It is possible that the object has a method (not a Field
                # in its Schema) which is used as an index and it fails.
                # TODO: Make sure reindexing won't fail!
                try:
                    obj = api.get_object_by_uid(uid)
                    obj.reindexObject()
                except Exception, e:
                    rec = self.sh.find_unique(LOCAL_UID, uid)
                    logger.error("Error while reindexing {} - {}".format(
                        rec, e))
            self._non_commited_objects += len(self.uids_to_reindex)
            self.uids_to_reindex = []

            # Commit the transaction if necessary
            if self._non_commited_objects > COMMIT_INTERVAL:
                transaction.commit()
                logger.info("Committed: {} / {} ".format(
                    self._non_commited_objects, total_object_count))
                self._non_commited_objects = 0

            # Log.info every 50 objects imported
            utils.log_process(task_name="Data Import",
                              started=start_time,
                              processed=item_index + 1,
                              total=total_object_count,
                              frequency=50)

        # Delete the UID list from the storage.
        storage["ordered_uids"] = []

        self._recover_failed_objects()

        # Mark all objects as non-updated for the next import.
        self.sh.reset_updated_flags()

        logger.info("*** END OF DATA IMPORT: {} ***".format(self.domain_name))
Example #22
0
    def update_object_with_data(self, obj, data, domain):
        """Update an existing object with data
        """

        # get the storage and UID map
        storage = self.get_storage(domain=domain)
        uidmap = storage["uidmap"]
        # Proxy Fields must be set after its dependency object is already set.
        # Thus, we will store all the ProxyFields and set them in the end
        proxy_fields = []

        for fieldname, field in api.get_fields(obj).items():

            fm = IFieldManager(field)
            value = data.get(fieldname)

            # handle JSON data reference fields
            if isinstance(value, dict) and value.get("uid"):
                # dereference the referenced object
                value = self.dereference_object(value.get("uid"), uidmap)
            elif isinstance(value, (list, tuple)):
                for item in value:
                    # If it is list of json data dict of objects, add local
                    # uid to that dictionary. This local_uid can be used in
                    # Field Managers.
                    if isinstance(item, dict):
                        for k, v in item.iteritems():
                            if 'uid' in k:
                                local_uid = uidmap.get(v)
                                item[k] = local_uid

            # handle file fields
            if field.type in ("file", "image", "blob"):
                if data.get(fieldname) is not None:
                    fileinfo = data.get(fieldname)
                    url = fileinfo.get("download")
                    filename = fileinfo.get("filename")
                    data["filename"] = filename
                    response = requests.get(url)
                    value = response.content

            # Leave the Proxy Fields for later
            if isinstance(fm, ProxyFieldManager):
                proxy_fields.append({
                    'field_name': fieldname,
                    'fm': fm,
                    'value': value
                })
                continue

            logger.info("Setting value={} on field={} of object={}".format(
                repr(value), fieldname, api.get_id(obj)))
            try:
                fm.set(obj, value)
            except:
                logger.error("Could not set field '{}' with value '{}'".format(
                    fieldname, value))

        # All reference fields are set. We can set the proxy fields now.
        for pf in proxy_fields:
            field_name = pf.get("field_name")
            fm = pf.get("fm")
            value = pf.get("value")
            logger.info("Setting value={} on field={} of object={}".format(
                repr(value), field_name, api.get_id(obj)))
            try:
                fm.set(obj, value)
            except:
                logger.error("Could not set field '{}' with value '{}'".format(
                    field_name, value))

        # Set the workflow states
        wf_info = data.get("workflow_info", [])
        for wf_dict in wf_info:
            wf_id = wf_dict.get("workflow")
            review_history = wf_dict.get("review_history")
            self.import_review_history(obj, wf_id, review_history)

        # finally reindex the object
        self.uids_to_reindex.append([api.get_uid(obj), repr(obj)])
Example #23
0
    def import_data(self, domain):
        """Import the data from the storage identified by domain
        """
        logger.info("*** IMPORT DATA {} ***".format(domain))

        storage = self.get_storage(domain=domain)
        datastore = storage["data"]
        indexstore = storage["index"]
        uidmap = storage["uidmap"]
        credentials = storage["credentials"]

        # At some points api cannot retrieve objects by UID in the end of
        # creation process. Thus we keep them in an dictionary to access easily.
        objmap = {}
        # We will create objects from top to bottom, but will update from bottom
        # to up.
        ordered_uids = []

        # initialize a new session with the stored credentials for later requests
        username = credentials.get("username")
        password = credentials.get("password")
        self.session = self.get_session(username, password)
        logger.info("Initialized a new session for user {}".format(username))

        # Get UIDs grouped by their parent path
        ppaths = indexstore.get("by_parent_path")
        if ppaths is None:
            message = _(
                "No parent path info found in the import data. "
                "Please install senaite.jsonapi>=1.1.1 on the source instance "
                "and clear&refetch this storage")
            self.add_status_message(message, "warning")
            return

        # Import by paths from top to bottom
        for ppath in sorted(ppaths):
            # nothing to do
            if not ppath:
                continue

            logger.info("Importing items for parent path {}".format(ppath))
            uids = ppaths[ppath]

            for uid in uids:
                ordered_uids.append(uid)
                # get the data for this uid
                data = datastore[uid]
                # check if the object exists in this instance
                remote_path = data.get("path")
                local_path = self.translate_path(remote_path)
                existing = self.portal.unrestrictedTraverse(
                    str(local_path), None)

                if existing:
                    # remember the UID -> object UID mapping for the update step
                    uidmap[uid] = api.get_uid(existing)
                    objmap[uid] = existing
                else:
                    # get the container object by path
                    container_path = self.translate_path(ppath)
                    container = self.portal.unrestrictedTraverse(
                        str(container_path), None)
                    # create an object slug in this container
                    obj = self.create_object_slug(container, data)
                    # remember the UID -> object UID mapping for the update step
                    uidmap[uid] = api.get_uid(obj)
                    objmap[uid] = obj

        # When creation process is done, commit the transaction to avoid
        # ReferenceField relation problems.
        transaction.commit()

        # UIDs were added from up to bottom. Reverse the list to update objects
        # from bottom to up.
        ordered_uids.reverse()

        # Update all objects with the given data
        for uid in ordered_uids:
            obj = objmap.get(uid, None)
            if obj is None:
                logger.warn("Object not found: {} ".format(uid))
                continue
            logger.info("Update object {} with import data".format(
                api.get_path(obj)))
            self.update_object_with_data(obj, datastore[uid], domain)

        self.reindex_updated_objects()
Example #24
0
    def _fetch_data(self, window=1000, overlap=10):
        """Fetch data from the uid catalog in the source URL
        :param window: number of elements to be retrieved with each query to
                       the catalog
        :type window: int
        :param overlap: overlap between windows
        :type overlap: int
        :return:
        """
        logger.info("*** FETCHING DATA: {} ***".format(self.domain_name))
        start_time = datetime.now()
        storage = self.get_storage()
        storage["ordered_uids"] = []
        ordered_uids = storage["ordered_uids"]
        self.sh = SoupHandler(self.domain_name)
        # Dummy query to get overall number of items in the specified catalog
        query = {
            "url_or_endpoint": "search",
            "catalog": 'uid_catalog',
            "limit": 1
        }
        if self.full_sync_types:
            types = list()
            types.extend(self.full_sync_types + self.prefixable_types +
                         self.update_only_types + self.read_only_types)
            query["portal_type"] = types
        cd = self.get_json(**query)
        # Knowing the catalog length compute the number of pages we will need
        # with the desired window size and overlap
        effective_window = window - overlap
        # When we receive an error message in JSON response or we
        # don't get any response at all the key 'count' doesn't exist.
        if not cd.get("count", None):
            error_message = "Error message: {}".format(
                cd.get('message', None) or '')
            logger.error(
                "A query to the JSON API returned and error. {}".format(
                    error_message))
            return

        number_of_pages = (cd["count"] / effective_window) + 1
        # Retrieve data from catalog in batches with size equal to window,
        # format it and insert it into the import soup
        for current_page in xrange(number_of_pages):
            start_from = (current_page * window) - overlap
            query["limit"] = window
            query["b_start"] = start_from
            items = self.get_items_with_retry(**query)
            if not items:
                logger.error("CAN NOT GET ITEMS FROM {} TO {}".format(
                    start_from, start_from + window))
            for item in items:
                # skip object or extract the required data for the import
                if not self.is_item_allowed(item):
                    continue
                data_dict = utils.get_soup_format(item)
                rec_id = self.sh.insert(data_dict)
                ordered_uids.insert(0, data_dict[REMOTE_UID])
                if not self._parents_fetched(item):
                    logger.warning(
                        "Some parents are missing: {} ".format(item))

            utils.log_process(task_name="Pages fetched",
                              started=start_time,
                              processed=current_page + 1,
                              total=number_of_pages)

        logger.info("*** FETCHING DATA FINISHED: {} ***".format(
            self.domain_name))

        transaction.commit()