Beispiel #1
0
def get_available_checksums_from_disk(channel_id, drive_id):
    try:
        basepath = get_mounted_drive_by_id(drive_id).datafolder
    except KeyError:
        raise LocationError("Drive with id {} does not exist".format(drive_id))
    PER_DISK_CACHE_KEY = "DISK_AVAILABLE_CHECKSUMS_{basepath}".format(
        basepath=basepath)
    PER_DISK_PER_CHANNEL_CACHE_KEY = (
        "DISK_AVAILABLE_CHECKSUMS_{basepath}_{channel_id}".format(
            basepath=basepath, channel_id=channel_id))
    if PER_DISK_PER_CHANNEL_CACHE_KEY not in process_cache:
        if PER_DISK_CACHE_KEY not in process_cache:
            content_dir = get_content_storage_dir_path(datafolder=basepath)

            disk_checksums = []

            for _, _, files in os.walk(content_dir):
                for name in files:
                    checksum = os.path.splitext(name)[0]
                    # Only add valid checksums formatted according to our standard filename
                    if checksum_regex.match(checksum):
                        disk_checksums.append(checksum)
            # Cache is per device, so a relatively long lived one should
            # be fine.
            process_cache.set(PER_DISK_CACHE_KEY, disk_checksums, 3600)
        else:
            disk_checksums = process_cache.get(PER_DISK_CACHE_KEY)
        checksums = set(
            LocalFile.objects.filter(
                files__contentnode__channel_id=channel_id).values_list(
                    "id", flat=True)).intersection(set(disk_checksums))
        process_cache.set(PER_DISK_PER_CHANNEL_CACHE_KEY, checksums, 3600)
    else:
        checksums = process_cache.get(PER_DISK_PER_CHANNEL_CACHE_KEY)
    return checksums
Beispiel #2
0
    def to_representation(self, instance):
        value = super(ChannelMetadataSerializer, self).to_representation(instance)

        # if the request includes a GET param 'include_fields', add the requested calculated fields
        if "request" in self.context:

            include_fields = (
                self.context["request"].GET.get("include_fields", "").split(",")
            )

            if include_fields:

                # build querysets for the full set of channel nodes, as well as those that are unrenderable
                channel_nodes = ContentNode.objects.filter(channel_id=instance.id)
                unrenderable_nodes = channel_nodes.exclude(
                    renderable_contentnodes_without_topics_q_filter
                )

                if "total_resources" in include_fields:
                    # count the total number of renderable non-topic resources in the channel
                    # (note: it's faster to count them all and then subtract the unrenderables, of which there are fewer)
                    value["total_resources"] = (
                        channel_nodes.dedupe_by_content_id().count()
                        - unrenderable_nodes.dedupe_by_content_id().count()
                    )

                if "total_file_size" in include_fields:
                    # count the total file size of files associated with renderable content nodes
                    # (note: it's faster to count them all and then subtract the unrenderables, of which there are fewer)
                    value["total_file_size"] = total_file_size(
                        channel_nodes
                    ) - total_file_size(unrenderable_nodes)

                if "on_device_resources" in include_fields:
                    # read the precalculated total number of resources from the channel already available
                    value["on_device_resources"] = instance.total_resource_count

                if "on_device_file_size" in include_fields:
                    # read the precalculated total size of available files associated with the channel
                    value["on_device_file_size"] = instance.published_size

                new_resource_stats = process_cache.get(
                    CHANNEL_UPDATE_STATS_CACHE_KEY.format(instance.id)
                )

                if "new_resource_count" in include_fields and new_resource_stats:
                    new_resource_ids = new_resource_stats.get("new_resource_ids")
                    value["new_resource_count"] = (
                        len(new_resource_ids) if new_resource_ids is not None else None
                    )

                if "new_resource_total_size" in include_fields and new_resource_stats:
                    new_resource_stats = process_cache.get(
                        CHANNEL_UPDATE_STATS_CACHE_KEY.format(instance.id)
                    )
                    value["new_resource_total_size"] = new_resource_stats.get(
                        "new_resource_total_size", None
                    )

        return value
Beispiel #3
0
 def get(self, **kwargs):
     if DEVICE_SETTINGS_CACHE_KEY not in cache:
         model = super(DeviceSettingsManager, self).get(**kwargs)
         cache.set(DEVICE_SETTINGS_CACHE_KEY, model, 600)
     else:
         model = cache.get(DEVICE_SETTINGS_CACHE_KEY)
     return model
def clear_channel_stats(channel_id):
    cached_keys = process_cache.get(
        CHANNEL_STATS_CACHED_KEYS.format(channel_id=channel_id), set())
    for key in cached_keys:
        process_cache.delete(key)
    process_cache.set(CHANNEL_STATS_CACHED_KEYS.format(channel_id=channel_id),
                      set(), None)
Beispiel #5
0
 def get_app_key(cls):
     key = cache.get(APP_KEY_CACHE_KEY)
     if key is None:
         try:
             app_key = cls.objects.get()
         except cls.DoesNotExist:
             app_key = cls.update_app_key()
         key = app_key.key
         cache.set(APP_KEY_CACHE_KEY, key, 5000)
     return key
Beispiel #6
0
 def get_cache_key(cls):
     key = cache.get(CONTENT_CACHE_KEY_CACHE_KEY)
     if key is None:
         try:
             cache_key = cls.objects.get()
         except cls.DoesNotExist:
             cache_key = cls.update_cache_key()
         key = cache_key.key
         cache.set(CONTENT_CACHE_KEY_CACHE_KEY, key, 5000)
     return key
def get_channel_stats_from_studio(channel_id):
    CACHE_KEY = "STUDIO_CHANNEL_STATS_{channel_id}".format(
        channel_id=channel_id)
    if CACHE_KEY not in process_cache:
        channel_stats = get_channel_annotation_stats(channel_id)
        process_cache.set(CACHE_KEY, channel_stats, 3600)
        register_key_as_cached(CACHE_KEY, channel_id)
    else:
        channel_stats = process_cache.get(CACHE_KEY)
    return channel_stats
def get_channel_stats_from_peer(channel_id, peer_id):
    CACHE_KEY = "PEER_CHANNEL_STATS_{peer_id}_{channel_id}".format(
        peer_id=peer_id, channel_id=channel_id)
    if CACHE_KEY not in process_cache:
        checksums = get_available_checksums_from_remote(channel_id, peer_id)
        channel_stats = get_channel_annotation_stats(channel_id, checksums)
        process_cache.set(CACHE_KEY, channel_stats, 3600)
        register_key_as_cached(CACHE_KEY, channel_id)
    else:
        channel_stats = process_cache.get(CACHE_KEY)
    return channel_stats
def get_channel_stats_from_disk(channel_id, drive_id):
    CACHE_KEY = "DISK_CHANNEL_STATS_{drive_id}_{channel_id}".format(
        drive_id=drive_id, channel_id=channel_id)
    if CACHE_KEY not in process_cache:
        checksums = get_available_checksums_from_disk(channel_id, drive_id)
        channel_stats = get_channel_annotation_stats(channel_id, checksums)
        process_cache.set(CACHE_KEY, channel_stats, 3600)
        register_key_as_cached(CACHE_KEY, channel_id)
    else:
        channel_stats = process_cache.get(CACHE_KEY)
    return channel_stats
Beispiel #10
0
def get_available_checksums_from_remote(channel_id, peer_id):
    """
    The current implementation prioritizes minimising requests to the remote server.
    In order to achieve this, it caches based on the baseurl and the channel_id.
    Also, it POSTs the complete list of non-supplementary files to the rmeote endpoint,
    and thus can keep this representation cached regardless of how the availability on
    the local server has changed in the interim.
    """
    try:
        baseurl = NetworkLocation.objects.values_list(
            "base_url", flat=True).get(id=peer_id)
    except NetworkLocation.DoesNotExist:
        raise LocationError("Peer with id {} does not exist".format(peer_id))

    CACHE_KEY = "PEER_AVAILABLE_CHECKSUMS_{baseurl}_{channel_id}".format(
        baseurl=baseurl, channel_id=channel_id)
    if CACHE_KEY not in process_cache:

        channel_checksums = (LocalFile.objects.filter(
            files__contentnode__channel_id=channel_id,
            files__supplementary=False).values_list("id",
                                                    flat=True).distinct())

        response = requests.post(
            get_file_checksums_url(channel_id, baseurl),
            data=compress_string(
                bytes(json.dumps(list(channel_checksums)).encode("utf-8"))),
            headers={"content-type": "application/gzip"},
        )

        checksums = None

        # Do something if we got a successful return
        if response.status_code == 200:
            try:
                integer_mask = int(response.content)

                # Filter to avoid passing in bad checksums
                checksums = set(
                    compress(channel_checksums,
                             _generate_mask_from_integer(integer_mask)))
                process_cache.set(CACHE_KEY, checksums, 3600)
            except (ValueError, TypeError):
                # Bad JSON parsing will throw ValueError
                # If the result of the json.loads is not iterable, a TypeError will be thrown
                # If we end up here, just set checksums to None to allow us to cleanly continue
                pass
    else:
        checksums = process_cache.get(CACHE_KEY)
    return checksums
Beispiel #11
0
    def device_port_open(self):
        """ check to see if a port is open at a given `base_url` """

        cached = process_cache.get(
            DEVICE_PORT_CACHE_KEY.format(url=self.base_url))

        if cached:
            return cached

        result = check_if_port_open(self.base_url)
        process_cache.set(DEVICE_PORT_CACHE_KEY.format(url=self.base_url),
                          result, DEVICE_PORT_TIMEOUT)

        return result
Beispiel #12
0
def get_import_data_for_update(channel_id,
                               drive_id=None,
                               peer_id=None,
                               renderable_only=True):
    update_stats = process_cache.get(
        CHANNEL_UPDATE_STATS_CACHE_KEY.format(channel_id))
    if not update_stats:
        raise ValueError(
            "Tried to get update content nodes for channel {} that has no precalculated update stats"
            .format(channel_id))

    # By default don't filter node ids by their underlying file importability
    file_based_node_id_dict = None
    if drive_id:
        file_based_node_id_dict = get_channel_stats_from_disk(
            channel_id, drive_id)

    if peer_id:
        file_based_node_id_dict = get_channel_stats_from_peer(
            channel_id, peer_id)

    updated_resource_ids = update_stats.get("updated_resource_ids", [])

    i = 0

    updated_ids_slice = updated_resource_ids[i:i + batch_size]

    nodes_to_include = ContentNode.objects.filter(channel_id=channel_id)

    # if requested, filter out nodes we're not able to render
    if renderable_only:
        nodes_to_include = nodes_to_include.filter(
            renderable_contentnodes_q_filter)

    queried_file_objects = []

    content_ids = set()

    while updated_ids_slice:
        if file_based_node_id_dict is not None:
            # If we have a list of limited node id availability limit our slice here
            updated_ids_slice = list(
                filter(lambda x: x in file_based_node_id_dict,
                       updated_ids_slice))

        # Possible that the above filtering rendered our list empty, so skip queries
        # in that case

        if updated_ids_slice:

            batch_nodes = nodes_to_include.filter_by_uuids(updated_ids_slice)

            content_ids.update(
                batch_nodes.values_list("content_id", flat=True).distinct())

            files_to_transfer = LocalFile.objects.filter(
                available=False, files__contentnode__in=batch_nodes)

            queried_file_objects.extend(files_to_transfer)

        i += batch_size
        updated_ids_slice = updated_resource_ids[i:i + batch_size]

    # Get all nodes that are marked as available but have missing files.
    # This will ensure that we update thumbnails, and other files.
    queried_file_objects.extend(
        LocalFile.objects.filter(
            available=False,
            files__contentnode__in=ContentNode.objects.filter(
                available=True, channel_id=channel_id),
        ))

    checksums = set()

    total_bytes_to_transfer = 0

    files_to_download = []

    for file in queried_file_objects:
        if file.id not in checksums:
            checksums.add(file.id)
            total_bytes_to_transfer += file.file_size
            files_to_download.append(file)

    return len(content_ids), files_to_download, total_bytes_to_transfer
Beispiel #13
0
 def device_info(self):
     return process_cache.get(
         DEVICE_INFO_CACHE_KEY.format(url=self.base_url))
def register_key_as_cached(key, channel_id):
    cached_keys = process_cache.get(
        CHANNEL_STATS_CACHED_KEYS.format(channel_id=channel_id), set())
    cached_keys.add(key)
    process_cache.set(CHANNEL_STATS_CACHED_KEYS.format(channel_id=channel_id),
                      cached_keys, None)