def channeldiffstats(self, request): job_metadata = {} channel_id = request.data.get("channel_id") method = request.data.get("method") drive_id = request.data.get("drive_id") baseurl = request.data.get("baseurl") # request validation and job metadata info if not channel_id: raise serializers.ValidationError("The channel_id field is required.") if not method: raise serializers.ValidationError("The method field is required.") if method == "network": baseurl = baseurl or conf.OPTIONS["Urls"]["CENTRAL_CONTENT_BASE_URL"] job_metadata["baseurl"] = baseurl # get channel version metadata url = get_channel_lookup_url(baseurl=baseurl, identifier=channel_id) resp = requests.get(url) channel_metadata = resp.json() job_metadata["new_channel_version"] = channel_metadata[0]["version"] elif method == "disk": if not drive_id: raise serializers.ValidationError( "The drive_id field is required when using 'disk' method." ) job_metadata = _add_drive_info(job_metadata, request.data) # get channel version metadata drive = get_mounted_drive_by_id(drive_id) channel_metadata = read_channel_metadata_from_db_file( get_content_database_file_path(channel_id, drive.datafolder) ) job_metadata["new_channel_version"] = channel_metadata.version else: raise serializers.ValidationError( "'method' field should either be 'network' or 'disk'." ) job_metadata.update( { "type": "CHANNELDIFFSTATS", "started_by": request.user.pk, "channel_id": channel_id, } ) job_id = priority_queue.enqueue( diff_stats, channel_id, method, drive_id=drive_id, baseurl=baseurl, extra_metadata=job_metadata, track_progress=False, cancellable=True, ) resp = _job_to_response(priority_queue.fetch_job(job_id)) return Response(resp)
def fix_multiple_trees_with_tree_id1(): # Do a check for improperly imported ContentNode trees # These trees have been naively imported, and so there are multiple trees # with tree_ids set to 1. Just check the root nodes to reduce the query size. tree_id_one_channel_ids = ContentNode.objects.filter( parent=None, tree_id=1 ).values_list("channel_id", flat=True) if len(tree_id_one_channel_ids) > 1: logger.warning("Improperly imported channels discovered") # There is more than one channel with a tree_id of 1 # Find which channel has the most content nodes, and then delete and reimport the rest. channel_sizes = {} for channel_id in tree_id_one_channel_ids: channel_sizes[channel_id] = ContentNode.objects.filter( channel_id=channel_id ).count() # Get sorted list of ids by increasing number of nodes sorted_channel_ids = sorted(channel_sizes, key=channel_sizes.get) # Loop through all but the largest channel, delete and reimport count = 0 for channel_id in sorted_channel_ids[:-1]: # Double check that we have a content db to import from before deleting any metadata if os.path.exists(get_content_database_file_path(channel_id)): logger.warning( "Deleting and reimporting channel metadata for {channel_id}".format( channel_id=channel_id ) ) ChannelMetadata.objects.get( id=channel_id ).delete_content_tree_and_files() import_channel_from_local_db(channel_id) logger.info( "Successfully reimported channel metadata for {channel_id}".format( channel_id=channel_id ) ) count += 1 else: logger.warning( "Attempted to reimport channel metadata for channel {channel_id} but no content database found".format( channel_id=channel_id ) ) if count: logger.info( "Successfully reimported channel metadata for {count} channels".format( count=count ) ) failed_count = len(sorted_channel_ids) - 1 - count if failed_count: logger.warning( "Failed to reimport channel metadata for {count} channels".format( count=failed_count ) )
def handle_async(self, *args, **options): channel_id = options["channel_id"] node_ids = options["node_ids"] exclude_node_ids = options["exclude_node_ids"] force_delete = options["force_delete"] try: channel = ChannelMetadata.objects.get(pk=channel_id) except ChannelMetadata.DoesNotExist: raise CommandError( "Channel matching id {id} does not exist".format(id=channel_id) ) delete_all_metadata = delete_metadata( channel, node_ids, exclude_node_ids, force_delete ) unused_files = LocalFile.objects.get_unused_files() # Get orphan files that are being deleted total_file_deletion_operations = unused_files.count() job = get_current_job() if job: total_file_deletion_size = unused_files.aggregate(Sum("file_size")).get( "file_size__sum", 0 ) job.extra_metadata["file_size"] = total_file_deletion_size job.extra_metadata["total_resources"] = total_file_deletion_operations job.save_meta() progress_extra_data = {"channel_id": channel_id} additional_progress = sum((1, bool(delete_all_metadata))) with self.start_progress( total=total_file_deletion_operations + additional_progress ) as progress_update: for file in LocalFile.objects.delete_unused_files(): progress_update(1, progress_extra_data) with db_task_write_lock: LocalFile.objects.delete_orphan_file_objects() progress_update(1, progress_extra_data) if delete_all_metadata: try: os.remove(get_content_database_file_path(channel_id)) except OSError: pass progress_update(1, progress_extra_data)
def handle_async(self, *args, **options): channel_id = options["channel_id"] try: channel = ChannelMetadata.objects.get(pk=channel_id) except ChannelMetadata.DoesNotExist: raise CommandError( "Channel matching id {id} does not exist".format( id=channel_id)) logger.info("Deleting all channel metadata") channel.delete_content_tree_and_files() # Get orphan files that are being deleted total_file_deletion_operations = ( LocalFile.objects.get_orphan_files().filter( available=True).count()) total_local_files_to_delete = LocalFile.objects.get_orphan_files( ).count() progress_extra_data = {"channel_id": channel_id} with self.start_progress(total=total_file_deletion_operations + total_local_files_to_delete + 1) as progress_update: logger.info("Deleting all channel metadata") for file in LocalFile.objects.delete_orphan_files(): if file.available: progress_update(1, progress_extra_data) LocalFile.objects.delete_orphan_file_objects() progress_update(total_local_files_to_delete, progress_extra_data) try: os.remove(get_content_database_file_path(channel_id)) except OSError: pass progress_update(1, progress_extra_data)
def set_content_fixture(self, db_path_mock): _, self.content_db_path = tempfile.mkstemp(suffix=".sqlite3") db_path_mock.return_value = self.content_db_path self.content_engine = create_engine("sqlite:///" + self.content_db_path, convert_unicode=True) with open(SCHEMA_PATH_TEMPLATE.format(name=self.schema_name), "rb") as f: metadata = pickle.load(f) data_path = DATA_PATH_TEMPLATE.format(name=self.data_name) with io.open(data_path, mode="r", encoding="utf-8") as f: data = json.load(f) metadata.bind = self.content_engine metadata.create_all() conn = self.content_engine.connect() # Write data for each fixture into the table for table in metadata.sorted_tables: if data[table.name]: conn.execute(table.insert(), data[table.name]) conn.close() with patch( "kolibri.core.content.utils.sqlalchemybridge.get_engine", new=self.get_engine, ): channel_metadata = read_channel_metadata_from_db_file( get_content_database_file_path( "6199dde695db4ee4ab392222d5af1e5c")) # Double check that we have actually created a valid content db that is recognized as having that schema assert channel_metadata.inferred_schema_version == self.schema_name import_channel_from_local_db("6199dde695db4ee4ab392222d5af1e5c")
def _localexport( channel_id, drive_id, update_progress=None, check_for_cancel=None, node_ids=None, exclude_node_ids=None, extra_metadata=None, ): drive = get_mounted_drive_by_id(drive_id) call_command( "exportchannel", channel_id, drive.datafolder, update_progress=update_progress, check_for_cancel=check_for_cancel, ) try: call_command( "exportcontent", channel_id, drive.datafolder, node_ids=node_ids, exclude_node_ids=exclude_node_ids, update_progress=update_progress, check_for_cancel=check_for_cancel, ) except UserCancelledError: try: os.remove( get_content_database_file_path(channel_id, datafolder=drive.datafolder)) except OSError: pass raise