Example #1
0
 def _parents_fetched(self, item):
     """
     If data was fetched with portal type filter, this method will be used
     to fill the missing parents for fetched objects.
     :return: True if ALL parents are fetched
     """
     # Never fetch parents of an unnecessary objects
     if not utils.has_valid_portal_type(item):
         return False
     parent_path = item.get("parent_path")
     # Skip if the parent is portal object
     if self.is_portal_path(parent_path):
         return True
     # Skip if already exists
     if self.sh.find_unique(REMOTE_PATH, parent_path):
         return True
     logger.debug("Inserting missing parent: {}".format(parent_path))
     parent = self.get_first_item(item.get("parent_url"))
     if not parent:
         logger.error("Cannot fetch parent info: {} ".format(parent_path))
         return False
     par_dict = utils.get_soup_format(parent)
     self.sh.insert(par_dict)
     # Recursively import grand parents too
     return self._parents_fetched(parent)
Example #2
0
    def _import_registry_records(self):
        """Import the registry records from the storage identified by domain
        """
        if not self.import_registry:
            return

        logger.info("***Importing Registry Records: {}***".format(
            self.domain_name))

        storage = self.get_storage()
        registry_store = storage["registry"]
        current_registry = getUtility(IRegistry)

        # For each of the keywords used to retrieve registry data
        # import the records that were found
        for key in registry_store.keys():
            records = registry_store[key]
            for record in records.keys():
                logger.debug("Updating record {} with value {}".format(
                    record, records.get(record)))
                if record not in current_registry.records:
                    logger.warn(
                        "Current Registry has no record named {}".format(
                            record))
                    continue
                current_registry[record] = records.get(record)

        logger.info("*** Registry Records Imported: {}***".format(
            self.domain_name))
Example #3
0
    def _import_data(self):
        """
        For each UID from the fetched data, creates and updates objects
        step by step.
        :return:
        """
        logger.info("*** IMPORT DATA STARTED: {} ***".format(self.domain_name))

        self.sh = SoupHandler(self.domain_name)
        self.uids_to_reindex = []
        storage = self.get_storage()
        ordered_uids = storage["ordered_uids"]
        total_object_count = len(ordered_uids)
        start_time = datetime.now()

        for item_index, r_uid in enumerate(ordered_uids):
            row = self.sh.find_unique(REMOTE_UID, r_uid)
            logger.debug("Handling: {} ".format(row[REMOTE_PATH]))
            self._handle_obj(row)

            # Handling object means there is a chunk containing several objects
            # which have been created and updated. Reindex them now.
            self.uids_to_reindex = list(set(self.uids_to_reindex))
            for uid in self.uids_to_reindex:
                # It is possible that the object has a method (not a Field
                # in its Schema) which is used as an index and it fails.
                # TODO: Make sure reindexing won't fail!
                try:
                    obj = api.get_object_by_uid(uid)
                    obj.reindexObject()
                except Exception, e:
                    rec = self.sh.find_unique(LOCAL_UID, uid)
                    logger.error("Error while reindexing {} - {}".format(
                        rec, e))
            self._non_commited_objects += len(self.uids_to_reindex)
            self.uids_to_reindex = []

            # Commit the transaction if necessary
            if self._non_commited_objects > COMMIT_INTERVAL:
                transaction.commit()
                logger.info("Committed: {} / {} ".format(
                    self._non_commited_objects, total_object_count))
                self._non_commited_objects = 0

            # Log.info every 50 objects imported
            utils.log_process(task_name="Data Import",
                              started=start_time,
                              processed=item_index + 1,
                              total=total_object_count,
                              frequency=50)
Example #4
0
 def insert(self, data):
     """
     Inserts a row to the soup table.
     :param data: row dictionary
     :return: intid of created record
     """
     if self._already_exists(data):
         logger.debug("Trying to insert existing record... {}".format(data))
         return False
     record = Record()
     record.attrs[REMOTE_UID] = data[REMOTE_UID]
     record.attrs[LOCAL_UID] = data.get(LOCAL_UID, "")
     record.attrs[REMOTE_PATH] = data[REMOTE_PATH]
     record.attrs[LOCAL_PATH] = data.get(LOCAL_PATH, "")
     record.attrs[PORTAL_TYPE] = data[PORTAL_TYPE]
     record.attrs[UPDATED] = data.get(UPDATED, "0")
     r_id = self.soup.add(record)
     logger.info("Record {} inserted: {}".format(r_id, data))
     return r_id
Example #5
0
 def get_json(self, url_or_endpoint, **kw):
     """Fetch the given url or endpoint and return a parsed JSON object
     """
     api_url = self.get_api_url(url_or_endpoint, **kw)
     logger.debug("get_json::url={}".format(api_url))
     try:
         response = self.session.get(api_url)
     except Exception as e:
         message = "Could not connect to {} Please check.".format(api_url)
         logger.error(message)
         logger.error(e)
         return {}
     status = response.status_code
     if status != 200:
         message = "GET for {} ({}) returned Status Code {}. Please check.".format(
             url_or_endpoint, api_url, status)
         logger.error(message)
         return {}
     return response.json()
Example #6
0
    def _create_object_slug(self, container, data, *args, **kwargs):
        """Create an content object slug for the given data
        """
        id = data.get("id")
        remote_path = data.get("remote_path")
        portal_type = data.get("portal_type")
        types_tool = api.get_tool("portal_types")
        fti = types_tool.getTypeInfo(portal_type)
        if not fti:
            self.skipped.append(remote_path)
            logger.error("Type Info not found for {}".format(portal_type))
            return None
        logger.debug("Creating {} with ID {} in parent path {}".format(
            portal_type, id, api.get_path(container)))

        if fti.product:
            obj = _createObjectByType(portal_type, container, id)
        else:
            # new style factory
            factory = getUtility(IFactory, fti.factory)
            obj = factory(id, *args, **kwargs)
            if hasattr(obj, '_setPortalTypeName'):
                obj._setPortalTypeName(fti.getId())
            # notifies ObjectWillBeAddedEvent, ObjectAddedEvent and
            # ContainerModifiedEvent
            container._setObject(id, obj)
            # we get the object here with the current object id, as it
            # might be renamed
            # already by an event handler
            obj = container._getOb(obj.getId())

        # Be sure that Creation Flag is Cleared.
        if obj.checkCreationFlag():
            obj.unmarkCreationFlag()

        return obj
Example #7
0
    def _import_users(self):
        """Import the users from the storage identified by domain
        """
        if not self.import_users:
            return

        logger.info("*** Importing Users: {} ***".format(self.domain_name))

        for user in self.yield_items("users"):
            username = user.get("username")
            if ploneapi.user.get(username):
                logger.debug("Skipping existing user {}".format(username))
                continue
            email = user.get("email", "")
            if not email:
                email = "{}@example.com".format(username)
            roles = user.get("roles", ())
            groups = user.get("groups", ())
            logger.debug("Creating user {}".format(username))
            message = _("Created new user {} with password {}".format(
                username, username))
            # create new user with the same password as the username
            ploneapi.user.create(
                email=email,
                username=username,
                password=username,
                roles=roles,
            )
            for group in groups:
                # Try to add the user to the group if group exists.
                try:
                    ploneapi.group.add_user(groupname=group, username=username)
                except KeyError:
                    continue

            logger.debug(message)

        logger.info("*** Users Were Imported: {} ***".format(self.domain_name))
Example #8
0
    def _update_object_with_data(self, obj, data):
        """Update an existing object with data
        """
        # Proxy Fields must be set after its dependency object is already set.
        # Thus, we will store all the ProxyFields and set them in the end
        proxy_fields = []

        for fieldname, field in api.get_fields(obj).items():

            if fieldname in self.fields_to_skip:
                continue

            fm = IFieldManager(field)
            value = data.get(fieldname)
            kwargs = {}

            # Computed Fields don't have set methods.
            if isinstance(fm, ComputedFieldManager):
                continue

            # handle JSON data reference fields
            if isinstance(value, dict) and value.get("uid"):
                # dereference the referenced object
                local_uid = self.sh.get_local_uid(value.get("uid"))
                if local_uid:
                    value = api.get_object_by_uid(local_uid)
                else:
                    value = None

            elif isinstance(value, (list, tuple)):
                for item in value:
                    # If it is list of json data dict of objects, add local
                    # uid to that dictionary. This local_uid can be used in
                    # Field Managers.
                    if isinstance(item, dict):
                        for k, v in item.iteritems():
                            if 'uid' in k:
                                local_uid = self.sh.get_local_uid(v)
                                item[k] = local_uid

            # handle file fields
            if field.type in ("file", "image", "blob"):
                if data.get(fieldname) is not None:
                    fileinfo = data.get(fieldname)
                    url = fileinfo.get("download")
                    filename = fileinfo.get("filename")
                    kwargs["filename"] = filename
                    response = self.session.get(url)
                    value = response.content

            # Leave the Proxy Fields for later
            if isinstance(fm, ProxyFieldManager):
                proxy_fields.append({
                    'field_name': fieldname,
                    'fm': fm,
                    'value': value
                })
                continue
            try:
                fm.set(obj, value, **kwargs)
            except:
                logger.debug("Could not set field '{}' with value '{}'".format(
                    fieldname, value))

        # All reference fields are set. We can set the proxy fields now.
        for pf in proxy_fields:
            field_name = pf.get("field_name")
            fm = pf.get("fm")
            value = pf.get("value")
            try:
                fm.set(obj, value)
            except:
                logger.debug("Could not set field '{}' with value '{}'".format(
                    field_name, value))

        # Set the workflow states
        wf_info = data.get("workflow_info", [])
        for wf_dict in wf_info:
            wf_id = wf_dict.get("workflow")
            review_history = wf_dict.get("review_history")
            self._import_review_history(obj, wf_id, review_history)

        # finally reindex the object
        self.uids_to_reindex.append(api.get_uid(obj))
Example #9
0
    def _create_dependencies(self, obj, data):
        """
        Creates and updates objects' dependencies if they are not in the queue.
        Dependencies are found as UIDs in object data.
        :param obj: an object to get dependencies created
        :param data: object data
        """

        dependencies = []

        for fieldname, field in api.get_fields(obj).items():

            if fieldname in self.fields_to_skip:
                continue

            value = data.get(fieldname)

            if isinstance(value, dict) and value.get("uid"):
                dependencies.append(value.get("uid"))
            elif isinstance(value, (list, tuple)):
                for item in value:
                    if isinstance(item, dict):
                        for k, v in item.iteritems():
                            if 'uid' in k:
                                dependencies.append(v)

        logger.debug("Dependencies of {} are : {} ".format(
            repr(obj), dependencies))
        dependencies = list(set(dependencies))
        for r_uid in dependencies:
            dep_row = self.sh.find_unique(REMOTE_UID, r_uid)
            if dep_row is None:
                # If dependency doesn't exist in fetched data table,
                # just try to create its object for the first time
                dep_item = self.get_json(r_uid)
                if not dep_item:
                    logger.error(
                        "Remote UID not found in fetched data: {}".format(
                            r_uid))
                    continue
                if not utils.has_valid_portal_type(dep_item):
                    logger.error(
                        "Skipping dependency with unknown portal type:"
                        " {}".format(dep_item))
                    continue
                data_dict = utils.get_soup_format(dep_item)
                rec_id = self.sh.insert(data_dict)
                dep_row = self.sh.get_record_by_id(rec_id, as_dict=True)
                if self._parents_fetched(dep_item):
                    self._handle_obj(dep_row, handle_dependencies=False)
                continue

            # If Dependency is being processed, skip it.
            if r_uid in self._queue:
                continue

            # No need to handle already updated objects
            if dep_row.get("updated") == "0":
                self._handle_obj(dep_row)
            # Reindex dependency just in case it has a field that uses
            # BackReference of this object.
            else:
                self.uids_to_reindex.append(dep_row.get(LOCAL_UID))

        return True