def filter_by_file_availability(nodes_to_include, channel_id, drive_id,
                                peer_id):
    # By default don't filter node ids by their underlying file importability
    file_based_node_id_list = None
    if drive_id:
        file_based_node_id_list = get_channel_stats_from_disk(
            channel_id, drive_id).keys()

    if peer_id:
        file_based_node_id_list = get_channel_stats_from_peer(
            channel_id, peer_id).keys()

    if file_based_node_id_list is not None:
        nodes_to_include = nodes_to_include.filter_by_uuids(
            file_based_node_id_list)

    return nodes_to_include
Exemple #2
0
def get_nodes_to_transfer(
    channel_id,
    node_ids,
    exclude_node_ids,
    available,
    renderable_only=True,
    drive_id=None,
    peer_id=None,
):
    nodes_to_include = ContentNode.objects.filter(channel_id=channel_id)

    # if requested, filter down to only include particular topics/nodes
    if node_ids:
        nodes_to_include = nodes_to_include.filter_by_uuids(node_ids).get_descendants(
            include_self=True
        )

    # if requested, filter out nodes we're not able to render
    if renderable_only:
        nodes_to_include = nodes_to_include.filter(renderable_contentnodes_q_filter)

    # filter down the query to remove files associated with nodes we've specifically been asked to exclude
    if exclude_node_ids:
        nodes_to_exclude = ContentNode.objects.filter_by_uuids(
            exclude_node_ids
        ).get_descendants(include_self=True)

        nodes_to_include = nodes_to_include.order_by().exclude_by_uuids(
            nodes_to_exclude.values("pk")
        )

    # By default don't filter node ids by their underlying file importability
    file_based_node_id_list = None
    if drive_id:
        file_based_node_id_list = get_channel_stats_from_disk(
            channel_id, drive_id
        ).keys()

    if peer_id:
        file_based_node_id_list = get_channel_stats_from_peer(
            channel_id, peer_id
        ).keys()
    if file_based_node_id_list is not None:
        nodes_to_include = nodes_to_include.filter_by_uuids(file_based_node_id_list)
    return nodes_to_include.filter(available=available).order_by()
Exemple #3
0
    def retrieve(self, request, pk):
        queryset = self.get_queryset()
        instance = get_object_or_404(queryset, pk=pk)
        channel_id = instance.channel_id
        drive_id = self.request.query_params.get("importing_from_drive_id",
                                                 None)
        peer_id = self.request.query_params.get("importing_from_peer_id", None)
        for_export = self.request.query_params.get("for_export", None)
        flag_count = sum(
            int(bool(flag)) for flag in (drive_id, peer_id, for_export))
        if flag_count > 1:
            raise serializers.ValidationError(
                "Must specify at most one of importing_from_drive_id, importing_from_peer_id, and for_export"
            )
        if not flag_count:
            self.channel_stats = get_channel_stats_from_studio(channel_id)
        if for_export:
            self.channel_stats = None
        if drive_id:
            try:
                self.channel_stats = get_channel_stats_from_disk(
                    channel_id, drive_id)
            except LocationError:
                raise serializers.ValidationError(
                    "The external drive with given drive id {} does not exist."
                    .format(drive_id))
        if peer_id:
            try:
                self.channel_stats = get_channel_stats_from_peer(
                    channel_id, peer_id)
            except LocationError:
                raise serializers.ValidationError(
                    "The network location with the id {} does not exist".
                    format(peer_id))
        children = queryset.filter(parent=instance)
        parent_serializer = self.get_serializer(instance)
        parent_data = parent_serializer.data
        child_serializer = self.get_serializer(children, many=True)
        parent_data["children"] = child_serializer.data

        parent_data["ancestors"] = list(instance.get_ancestors().values(
            "id", "title"))

        return Response(parent_data)
Exemple #4
0
def get_import_data_for_update(channel_id,
                               drive_id=None,
                               peer_id=None,
                               renderable_only=True):
    update_stats = process_cache.get(
        CHANNEL_UPDATE_STATS_CACHE_KEY.format(channel_id))
    if not update_stats:
        raise ValueError(
            "Tried to get update content nodes for channel {} that has no precalculated update stats"
            .format(channel_id))

    # By default don't filter node ids by their underlying file importability
    file_based_node_id_dict = None
    if drive_id:
        file_based_node_id_dict = get_channel_stats_from_disk(
            channel_id, drive_id)

    if peer_id:
        file_based_node_id_dict = get_channel_stats_from_peer(
            channel_id, peer_id)

    updated_resource_ids = update_stats.get("updated_resource_ids", [])

    i = 0

    updated_ids_slice = updated_resource_ids[i:i + batch_size]

    nodes_to_include = ContentNode.objects.filter(channel_id=channel_id)

    # if requested, filter out nodes we're not able to render
    if renderable_only:
        nodes_to_include = nodes_to_include.filter(
            renderable_contentnodes_q_filter)

    queried_file_objects = []

    content_ids = set()

    while updated_ids_slice:
        if file_based_node_id_dict is not None:
            # If we have a list of limited node id availability limit our slice here
            updated_ids_slice = list(
                filter(lambda x: x in file_based_node_id_dict,
                       updated_ids_slice))

        # Possible that the above filtering rendered our list empty, so skip queries
        # in that case

        if updated_ids_slice:

            batch_nodes = nodes_to_include.filter_by_uuids(updated_ids_slice)

            content_ids.update(
                batch_nodes.values_list("content_id", flat=True).distinct())

            files_to_transfer = LocalFile.objects.filter(
                available=False, files__contentnode__in=batch_nodes)

            queried_file_objects.extend(files_to_transfer)

        i += batch_size
        updated_ids_slice = updated_resource_ids[i:i + batch_size]

    # Get all nodes that are marked as available but have missing files.
    # This will ensure that we update thumbnails, and other files.
    queried_file_objects.extend(
        LocalFile.objects.filter(
            available=False,
            files__contentnode__in=ContentNode.objects.filter(
                available=True, channel_id=channel_id),
        ))

    checksums = set()

    total_bytes_to_transfer = 0

    files_to_download = []

    for file in queried_file_objects:
        if file.id not in checksums:
            checksums.add(file.id)
            total_bytes_to_transfer += file.file_size
            files_to_download.append(file)

    return len(content_ids), files_to_download, total_bytes_to_transfer