Beispiel #1
0
    def channeldiffstats(self, request):
        job_metadata = {}
        channel_id = request.data.get("channel_id")
        method = request.data.get("method")
        drive_id = request.data.get("drive_id")
        baseurl = request.data.get("baseurl")

        # request validation and job metadata info
        if not channel_id:
            raise serializers.ValidationError("The channel_id field is required.")
        if not method:
            raise serializers.ValidationError("The method field is required.")

        if method == "network":
            baseurl = baseurl or conf.OPTIONS["Urls"]["CENTRAL_CONTENT_BASE_URL"]
            job_metadata["baseurl"] = baseurl
            # get channel version metadata
            url = get_channel_lookup_url(baseurl=baseurl, identifier=channel_id)
            resp = requests.get(url)
            channel_metadata = resp.json()
            job_metadata["new_channel_version"] = channel_metadata[0]["version"]
        elif method == "disk":
            if not drive_id:
                raise serializers.ValidationError(
                    "The drive_id field is required when using 'disk' method."
                )
            job_metadata = _add_drive_info(job_metadata, request.data)
            # get channel version metadata
            drive = get_mounted_drive_by_id(drive_id)
            channel_metadata = read_channel_metadata_from_db_file(
                get_content_database_file_path(channel_id, drive.datafolder)
            )
            job_metadata["new_channel_version"] = channel_metadata.version
        else:
            raise serializers.ValidationError(
                "'method' field should either be 'network' or 'disk'."
            )

        job_metadata.update(
            {
                "type": "CHANNELDIFFSTATS",
                "started_by": request.user.pk,
                "channel_id": channel_id,
            }
        )

        job_id = priority_queue.enqueue(
            diff_stats,
            channel_id,
            method,
            drive_id=drive_id,
            baseurl=baseurl,
            extra_metadata=job_metadata,
            track_progress=False,
            cancellable=True,
        )

        resp = _job_to_response(priority_queue.fetch_job(job_id))

        return Response(resp)
Beispiel #2
0
def fix_multiple_trees_with_tree_id1():
    # Do a check for improperly imported ContentNode trees
    # These trees have been naively imported, and so there are multiple trees
    # with tree_ids set to 1. Just check the root nodes to reduce the query size.
    tree_id_one_channel_ids = ContentNode.objects.filter(
        parent=None, tree_id=1
    ).values_list("channel_id", flat=True)
    if len(tree_id_one_channel_ids) > 1:
        logger.warning("Improperly imported channels discovered")
        # There is more than one channel with a tree_id of 1
        # Find which channel has the most content nodes, and then delete and reimport the rest.
        channel_sizes = {}
        for channel_id in tree_id_one_channel_ids:
            channel_sizes[channel_id] = ContentNode.objects.filter(
                channel_id=channel_id
            ).count()
        # Get sorted list of ids by increasing number of nodes
        sorted_channel_ids = sorted(channel_sizes, key=channel_sizes.get)
        # Loop through all but the largest channel, delete and reimport
        count = 0

        for channel_id in sorted_channel_ids[:-1]:
            # Double check that we have a content db to import from before deleting any metadata
            if os.path.exists(get_content_database_file_path(channel_id)):
                logger.warning(
                    "Deleting and reimporting channel metadata for {channel_id}".format(
                        channel_id=channel_id
                    )
                )
                ChannelMetadata.objects.get(
                    id=channel_id
                ).delete_content_tree_and_files()
                import_channel_from_local_db(channel_id)
                logger.info(
                    "Successfully reimported channel metadata for {channel_id}".format(
                        channel_id=channel_id
                    )
                )
                count += 1
            else:
                logger.warning(
                    "Attempted to reimport channel metadata for channel {channel_id} but no content database found".format(
                        channel_id=channel_id
                    )
                )
        if count:
            logger.info(
                "Successfully reimported channel metadata for {count} channels".format(
                    count=count
                )
            )
        failed_count = len(sorted_channel_ids) - 1 - count
        if failed_count:
            logger.warning(
                "Failed to reimport channel metadata for {count} channels".format(
                    count=failed_count
                )
            )
Beispiel #3
0
    def handle_async(self, *args, **options):
        channel_id = options["channel_id"]
        node_ids = options["node_ids"]
        exclude_node_ids = options["exclude_node_ids"]
        force_delete = options["force_delete"]

        try:
            channel = ChannelMetadata.objects.get(pk=channel_id)
        except ChannelMetadata.DoesNotExist:
            raise CommandError(
                "Channel matching id {id} does not exist".format(id=channel_id)
            )

        delete_all_metadata = delete_metadata(
            channel, node_ids, exclude_node_ids, force_delete
        )

        unused_files = LocalFile.objects.get_unused_files()

        # Get orphan files that are being deleted
        total_file_deletion_operations = unused_files.count()
        job = get_current_job()
        if job:
            total_file_deletion_size = unused_files.aggregate(Sum("file_size")).get(
                "file_size__sum", 0
            )
            job.extra_metadata["file_size"] = total_file_deletion_size
            job.extra_metadata["total_resources"] = total_file_deletion_operations
            job.save_meta()

        progress_extra_data = {"channel_id": channel_id}

        additional_progress = sum((1, bool(delete_all_metadata)))

        with self.start_progress(
            total=total_file_deletion_operations + additional_progress
        ) as progress_update:

            for file in LocalFile.objects.delete_unused_files():
                progress_update(1, progress_extra_data)

            with db_task_write_lock:
                LocalFile.objects.delete_orphan_file_objects()

            progress_update(1, progress_extra_data)

            if delete_all_metadata:
                try:
                    os.remove(get_content_database_file_path(channel_id))
                except OSError:
                    pass

                progress_update(1, progress_extra_data)
Beispiel #4
0
    def handle_async(self, *args, **options):
        channel_id = options["channel_id"]

        try:
            channel = ChannelMetadata.objects.get(pk=channel_id)
        except ChannelMetadata.DoesNotExist:
            raise CommandError(
                "Channel matching id {id} does not exist".format(
                    id=channel_id))

        logger.info("Deleting all channel metadata")
        channel.delete_content_tree_and_files()

        # Get orphan files that are being deleted
        total_file_deletion_operations = (
            LocalFile.objects.get_orphan_files().filter(
                available=True).count())

        total_local_files_to_delete = LocalFile.objects.get_orphan_files(
        ).count()

        progress_extra_data = {"channel_id": channel_id}

        with self.start_progress(total=total_file_deletion_operations +
                                 total_local_files_to_delete +
                                 1) as progress_update:
            logger.info("Deleting all channel metadata")

            for file in LocalFile.objects.delete_orphan_files():
                if file.available:
                    progress_update(1, progress_extra_data)

            LocalFile.objects.delete_orphan_file_objects()

            progress_update(total_local_files_to_delete, progress_extra_data)

            try:
                os.remove(get_content_database_file_path(channel_id))
            except OSError:
                pass

            progress_update(1, progress_extra_data)
Beispiel #5
0
    def set_content_fixture(self, db_path_mock):
        _, self.content_db_path = tempfile.mkstemp(suffix=".sqlite3")
        db_path_mock.return_value = self.content_db_path
        self.content_engine = create_engine("sqlite:///" +
                                            self.content_db_path,
                                            convert_unicode=True)

        with open(SCHEMA_PATH_TEMPLATE.format(name=self.schema_name),
                  "rb") as f:
            metadata = pickle.load(f)

        data_path = DATA_PATH_TEMPLATE.format(name=self.data_name)
        with io.open(data_path, mode="r", encoding="utf-8") as f:
            data = json.load(f)

        metadata.bind = self.content_engine

        metadata.create_all()

        conn = self.content_engine.connect()

        # Write data for each fixture into the table
        for table in metadata.sorted_tables:
            if data[table.name]:
                conn.execute(table.insert(), data[table.name])

        conn.close()

        with patch(
                "kolibri.core.content.utils.sqlalchemybridge.get_engine",
                new=self.get_engine,
        ):

            channel_metadata = read_channel_metadata_from_db_file(
                get_content_database_file_path(
                    "6199dde695db4ee4ab392222d5af1e5c"))

            # Double check that we have actually created a valid content db that is recognized as having that schema
            assert channel_metadata.inferred_schema_version == self.schema_name

            import_channel_from_local_db("6199dde695db4ee4ab392222d5af1e5c")
Beispiel #6
0
def _localexport(
    channel_id,
    drive_id,
    update_progress=None,
    check_for_cancel=None,
    node_ids=None,
    exclude_node_ids=None,
    extra_metadata=None,
):
    drive = get_mounted_drive_by_id(drive_id)

    call_command(
        "exportchannel",
        channel_id,
        drive.datafolder,
        update_progress=update_progress,
        check_for_cancel=check_for_cancel,
    )
    try:
        call_command(
            "exportcontent",
            channel_id,
            drive.datafolder,
            node_ids=node_ids,
            exclude_node_ids=exclude_node_ids,
            update_progress=update_progress,
            check_for_cancel=check_for_cancel,
        )
    except UserCancelledError:
        try:
            os.remove(
                get_content_database_file_path(channel_id,
                                               datafolder=drive.datafolder))
        except OSError:
            pass
        raise