def get_available_checksums_from_disk(channel_id, drive_id): try: basepath = get_mounted_drive_by_id(drive_id).datafolder except KeyError: raise LocationError("Drive with id {} does not exist".format(drive_id)) PER_DISK_CACHE_KEY = "DISK_AVAILABLE_CHECKSUMS_{basepath}".format( basepath=basepath) PER_DISK_PER_CHANNEL_CACHE_KEY = ( "DISK_AVAILABLE_CHECKSUMS_{basepath}_{channel_id}".format( basepath=basepath, channel_id=channel_id)) if PER_DISK_PER_CHANNEL_CACHE_KEY not in process_cache: if PER_DISK_CACHE_KEY not in process_cache: content_dir = get_content_storage_dir_path(datafolder=basepath) disk_checksums = [] for _, _, files in os.walk(content_dir): for name in files: checksum = os.path.splitext(name)[0] # Only add valid checksums formatted according to our standard filename if checksum_regex.match(checksum): disk_checksums.append(checksum) # Cache is per device, so a relatively long lived one should # be fine. process_cache.set(PER_DISK_CACHE_KEY, disk_checksums, 3600) else: disk_checksums = process_cache.get(PER_DISK_CACHE_KEY) checksums = set( LocalFile.objects.filter( files__contentnode__channel_id=channel_id).values_list( "id", flat=True)).intersection(set(disk_checksums)) process_cache.set(PER_DISK_PER_CHANNEL_CACHE_KEY, checksums, 3600) else: checksums = process_cache.get(PER_DISK_PER_CHANNEL_CACHE_KEY) return checksums
def to_representation(self, instance): value = super(ChannelMetadataSerializer, self).to_representation(instance) # if the request includes a GET param 'include_fields', add the requested calculated fields if "request" in self.context: include_fields = ( self.context["request"].GET.get("include_fields", "").split(",") ) if include_fields: # build querysets for the full set of channel nodes, as well as those that are unrenderable channel_nodes = ContentNode.objects.filter(channel_id=instance.id) unrenderable_nodes = channel_nodes.exclude( renderable_contentnodes_without_topics_q_filter ) if "total_resources" in include_fields: # count the total number of renderable non-topic resources in the channel # (note: it's faster to count them all and then subtract the unrenderables, of which there are fewer) value["total_resources"] = ( channel_nodes.dedupe_by_content_id().count() - unrenderable_nodes.dedupe_by_content_id().count() ) if "total_file_size" in include_fields: # count the total file size of files associated with renderable content nodes # (note: it's faster to count them all and then subtract the unrenderables, of which there are fewer) value["total_file_size"] = total_file_size( channel_nodes ) - total_file_size(unrenderable_nodes) if "on_device_resources" in include_fields: # read the precalculated total number of resources from the channel already available value["on_device_resources"] = instance.total_resource_count if "on_device_file_size" in include_fields: # read the precalculated total size of available files associated with the channel value["on_device_file_size"] = instance.published_size new_resource_stats = process_cache.get( CHANNEL_UPDATE_STATS_CACHE_KEY.format(instance.id) ) if "new_resource_count" in include_fields and new_resource_stats: new_resource_ids = new_resource_stats.get("new_resource_ids") value["new_resource_count"] = ( len(new_resource_ids) if new_resource_ids is not None else None ) if "new_resource_total_size" in include_fields and new_resource_stats: new_resource_stats = process_cache.get( CHANNEL_UPDATE_STATS_CACHE_KEY.format(instance.id) ) value["new_resource_total_size"] = new_resource_stats.get( "new_resource_total_size", None ) return value
def get(self, **kwargs): if DEVICE_SETTINGS_CACHE_KEY not in cache: model = super(DeviceSettingsManager, self).get(**kwargs) cache.set(DEVICE_SETTINGS_CACHE_KEY, model, 600) else: model = cache.get(DEVICE_SETTINGS_CACHE_KEY) return model
def clear_channel_stats(channel_id): cached_keys = process_cache.get( CHANNEL_STATS_CACHED_KEYS.format(channel_id=channel_id), set()) for key in cached_keys: process_cache.delete(key) process_cache.set(CHANNEL_STATS_CACHED_KEYS.format(channel_id=channel_id), set(), None)
def get_app_key(cls): key = cache.get(APP_KEY_CACHE_KEY) if key is None: try: app_key = cls.objects.get() except cls.DoesNotExist: app_key = cls.update_app_key() key = app_key.key cache.set(APP_KEY_CACHE_KEY, key, 5000) return key
def get_cache_key(cls): key = cache.get(CONTENT_CACHE_KEY_CACHE_KEY) if key is None: try: cache_key = cls.objects.get() except cls.DoesNotExist: cache_key = cls.update_cache_key() key = cache_key.key cache.set(CONTENT_CACHE_KEY_CACHE_KEY, key, 5000) return key
def get_channel_stats_from_studio(channel_id): CACHE_KEY = "STUDIO_CHANNEL_STATS_{channel_id}".format( channel_id=channel_id) if CACHE_KEY not in process_cache: channel_stats = get_channel_annotation_stats(channel_id) process_cache.set(CACHE_KEY, channel_stats, 3600) register_key_as_cached(CACHE_KEY, channel_id) else: channel_stats = process_cache.get(CACHE_KEY) return channel_stats
def get_channel_stats_from_peer(channel_id, peer_id): CACHE_KEY = "PEER_CHANNEL_STATS_{peer_id}_{channel_id}".format( peer_id=peer_id, channel_id=channel_id) if CACHE_KEY not in process_cache: checksums = get_available_checksums_from_remote(channel_id, peer_id) channel_stats = get_channel_annotation_stats(channel_id, checksums) process_cache.set(CACHE_KEY, channel_stats, 3600) register_key_as_cached(CACHE_KEY, channel_id) else: channel_stats = process_cache.get(CACHE_KEY) return channel_stats
def get_channel_stats_from_disk(channel_id, drive_id): CACHE_KEY = "DISK_CHANNEL_STATS_{drive_id}_{channel_id}".format( drive_id=drive_id, channel_id=channel_id) if CACHE_KEY not in process_cache: checksums = get_available_checksums_from_disk(channel_id, drive_id) channel_stats = get_channel_annotation_stats(channel_id, checksums) process_cache.set(CACHE_KEY, channel_stats, 3600) register_key_as_cached(CACHE_KEY, channel_id) else: channel_stats = process_cache.get(CACHE_KEY) return channel_stats
def get_available_checksums_from_remote(channel_id, peer_id): """ The current implementation prioritizes minimising requests to the remote server. In order to achieve this, it caches based on the baseurl and the channel_id. Also, it POSTs the complete list of non-supplementary files to the rmeote endpoint, and thus can keep this representation cached regardless of how the availability on the local server has changed in the interim. """ try: baseurl = NetworkLocation.objects.values_list( "base_url", flat=True).get(id=peer_id) except NetworkLocation.DoesNotExist: raise LocationError("Peer with id {} does not exist".format(peer_id)) CACHE_KEY = "PEER_AVAILABLE_CHECKSUMS_{baseurl}_{channel_id}".format( baseurl=baseurl, channel_id=channel_id) if CACHE_KEY not in process_cache: channel_checksums = (LocalFile.objects.filter( files__contentnode__channel_id=channel_id, files__supplementary=False).values_list("id", flat=True).distinct()) response = requests.post( get_file_checksums_url(channel_id, baseurl), data=compress_string( bytes(json.dumps(list(channel_checksums)).encode("utf-8"))), headers={"content-type": "application/gzip"}, ) checksums = None # Do something if we got a successful return if response.status_code == 200: try: integer_mask = int(response.content) # Filter to avoid passing in bad checksums checksums = set( compress(channel_checksums, _generate_mask_from_integer(integer_mask))) process_cache.set(CACHE_KEY, checksums, 3600) except (ValueError, TypeError): # Bad JSON parsing will throw ValueError # If the result of the json.loads is not iterable, a TypeError will be thrown # If we end up here, just set checksums to None to allow us to cleanly continue pass else: checksums = process_cache.get(CACHE_KEY) return checksums
def device_port_open(self): """ check to see if a port is open at a given `base_url` """ cached = process_cache.get( DEVICE_PORT_CACHE_KEY.format(url=self.base_url)) if cached: return cached result = check_if_port_open(self.base_url) process_cache.set(DEVICE_PORT_CACHE_KEY.format(url=self.base_url), result, DEVICE_PORT_TIMEOUT) return result
def get_import_data_for_update(channel_id, drive_id=None, peer_id=None, renderable_only=True): update_stats = process_cache.get( CHANNEL_UPDATE_STATS_CACHE_KEY.format(channel_id)) if not update_stats: raise ValueError( "Tried to get update content nodes for channel {} that has no precalculated update stats" .format(channel_id)) # By default don't filter node ids by their underlying file importability file_based_node_id_dict = None if drive_id: file_based_node_id_dict = get_channel_stats_from_disk( channel_id, drive_id) if peer_id: file_based_node_id_dict = get_channel_stats_from_peer( channel_id, peer_id) updated_resource_ids = update_stats.get("updated_resource_ids", []) i = 0 updated_ids_slice = updated_resource_ids[i:i + batch_size] nodes_to_include = ContentNode.objects.filter(channel_id=channel_id) # if requested, filter out nodes we're not able to render if renderable_only: nodes_to_include = nodes_to_include.filter( renderable_contentnodes_q_filter) queried_file_objects = [] content_ids = set() while updated_ids_slice: if file_based_node_id_dict is not None: # If we have a list of limited node id availability limit our slice here updated_ids_slice = list( filter(lambda x: x in file_based_node_id_dict, updated_ids_slice)) # Possible that the above filtering rendered our list empty, so skip queries # in that case if updated_ids_slice: batch_nodes = nodes_to_include.filter_by_uuids(updated_ids_slice) content_ids.update( batch_nodes.values_list("content_id", flat=True).distinct()) files_to_transfer = LocalFile.objects.filter( available=False, files__contentnode__in=batch_nodes) queried_file_objects.extend(files_to_transfer) i += batch_size updated_ids_slice = updated_resource_ids[i:i + batch_size] # Get all nodes that are marked as available but have missing files. # This will ensure that we update thumbnails, and other files. queried_file_objects.extend( LocalFile.objects.filter( available=False, files__contentnode__in=ContentNode.objects.filter( available=True, channel_id=channel_id), )) checksums = set() total_bytes_to_transfer = 0 files_to_download = [] for file in queried_file_objects: if file.id not in checksums: checksums.add(file.id) total_bytes_to_transfer += file.file_size files_to_download.append(file) return len(content_ids), files_to_download, total_bytes_to_transfer
def device_info(self): return process_cache.get( DEVICE_INFO_CACHE_KEY.format(url=self.base_url))
def register_key_as_cached(key, channel_id): cached_keys = process_cache.get( CHANNEL_STATS_CACHED_KEYS.format(channel_id=channel_id), set()) cached_keys.add(key) process_cache.set(CHANNEL_STATS_CACHED_KEYS.format(channel_id=channel_id), cached_keys, None)