def do_upgrade_pony_db_6to7(self, mds): with db_session: db_version = mds.MiscData.get(name="db_version") if int(db_version.value) != 6: return for c in mds.ChannelMetadata.select_by_sql(f""" select rowid, title, tags, metadata_type from ChannelNode where metadata_type = {CHANNEL_TORRENT} """): if is_forbidden(c.title + c.tags): c.contents.delete() c.delete() # The channel torrent will be removed by GigaChannel manager during the cruft cleanup # The process is broken down into batches to limit memory usage batch_size = 10000 with db_session: total_entries = mds.TorrentMetadata.select().count() page_num = total_entries // batch_size while page_num >= 0: with db_session: for t in mds.TorrentMetadata.select().page( page_num, pagesize=batch_size): if is_forbidden(t.title + t.tags): t.delete() page_num -= 1 with db_session: db_version = mds.MiscData.get(name="db_version") db_version.value = str(7) return
def reject_payload_with_offending_words(self): """ Check if the payload contains strong offending words. If it does, stop processing and return empty list. Otherwise, CONTINUE control to further checks. """ if is_forbidden(" ".join([ getattr(self.payload, attr) for attr in ("title", "tags", "text") if hasattr(self.payload, attr) ])): return [] return CONTINUE
def convert_discovered_channels(self): # Reflect conversion state with db_session: v = self.mds.MiscData.get_for_update( name=CONVERSION_FROM_72_CHANNELS) if v: if v.value == CONVERSION_STARTED: # Just drop the entries from the previous try orm.delete(g for g in self.mds.ChannelMetadata if g.status == LEGACY_ENTRY) else: v.set(value=CONVERSION_STARTED) else: self.mds.MiscData(name=CONVERSION_FROM_72_CHANNELS, value=CONVERSION_STARTED) old_channels = self.get_old_channels() # We break it up into separate sessions and add sleep because this is going to be executed # on a background thread and we do not want to hold the DB lock for too long with db_session: for c in old_channels: if self.shutting_down: break try: self.mds.ChannelMetadata(**c) except: continue with db_session: for c in self.mds.ChannelMetadata.select().for_update()[:]: contents_len = c.contents_len title = c.title if is_forbidden(title): c.delete() elif contents_len: c.num_entries = contents_len else: c.delete() with db_session: v = self.mds.MiscData.get_for_update( name=CONVERSION_FROM_72_CHANNELS) v.value = CONVERSION_FINISHED
def process_payload(self, payload, skip_personal_metadata_payload=True, channel_public_key=None): """ This routine decides what to do with a given payload and executes the necessary actions. To do so, it looks into the database, compares version numbers, etc. It returns a list of tuples each of which contain the corresponding new/old object and the actions that were performed on that object. :param payload: payload to work on :param skip_personal_metadata_payload: if this is set to True, personal torrent metadata payload received through gossip will be ignored. The default value is True. :param channel_public_key: rejects payloads that do not belong to this key. Enabling this allows to skip some costly checks during e.g. channel processing. :return: a list of tuples of (<metadata or payload>, <action type>) """ # In case we're processing a channel, we allow only payloads with the channel's public_key if channel_public_key is not None and payload.public_key != channel_public_key: self._logger.warning( "Tried to push metadata entry with foreign public key.\ Expected public key: %s, entry public key / id: %s / %i", hexlify(channel_public_key), payload.public_key, payload.id_, ) return [(None, NO_ACTION)] if payload.metadata_type == DELETED: if payload.public_key == self.my_public_key_bin and skip_personal_metadata_payload: return [(None, NO_ACTION)] # We only allow people to delete their own entries, thus PKs must match node = self.ChannelNode.get_for_update(signature=payload.delete_signature, public_key=payload.public_key) if node: node.delete() return [(None, DELETED_METADATA)] if payload.metadata_type not in [CHANNEL_TORRENT, REGULAR_TORRENT, COLLECTION_NODE]: return [] # Check for offending words stop-list if is_forbidden(payload.title + payload.tags): return [(None, NO_ACTION)] # FFA payloads get special treatment: if payload.public_key == NULL_KEY: if payload.metadata_type == REGULAR_TORRENT: node = self.TorrentMetadata.add_ffa_from_dict(payload.to_dict()) if node: return [(node, UNKNOWN_TORRENT)] return [(None, NO_ACTION)] if channel_public_key is None and payload.metadata_type in [COLLECTION_NODE, REGULAR_TORRENT]: # Check if the received payload is from a channel that we already have and send update if necessary # Get the toplevel parent parent = self.ChannelNode.get(public_key=payload.public_key, id_=payload.origin_id) if parent: parent_channel = None if parent.origin_id == 0: parent_channel = parent else: parents_ids = parent.get_parents_ids() if 0 in parents_ids: parent_channel = self.ChannelNode.get(public_key=payload.public_key, id_=parents_ids[1]) if parent_channel and parent_channel.local_version > payload.timestamp: return [(None, NO_ACTION)] # Check for the older version of the added node node = self.ChannelNode.get_for_update(public_key=database_blob(payload.public_key), id_=payload.id_) if node: return self.update_channel_node(node, payload, skip_personal_metadata_payload) if payload.public_key == self.my_public_key_bin and skip_personal_metadata_payload: return [(None, NO_ACTION)] for orm_class, response in ( (self.TorrentMetadata, UNKNOWN_TORRENT), (self.ChannelMetadata, UNKNOWN_CHANNEL), (self.CollectionNode, UNKNOWN_COLLECTION), ): if orm_class._discriminator_ == payload.metadata_type: return [(orm_class.from_payload(payload), response)] return []
def test_l2_filter(self): self.assertTrue(is_forbidden("9yo ponies")) self.assertTrue(is_forbidden("12yo ponies")) self.assertFalse(is_forbidden("18yo ponies"))
def get_old_torrents(self, personal_channel_only=False, batch_size=10000, offset=0, sign=False): with contextlib.closing(sqlite3.connect(self.tribler_db)) as connection, connection: cursor = connection.cursor() cursor.execute("PRAGMA temp_store = 2") personal_channel_filter = "" if self.personal_channel_id: equality_sign = " == " if personal_channel_only else " != " personal_channel_filter = f"AND ct.channel_id {equality_sign} {self.personal_channel_id}" torrents = [] batch_not_empty = False # This is a dumb way to indicate that this batch got zero entries from DB for tracker_url, channel_id, name, infohash, length, creation_date, torrent_id, category, num_seeders, \ num_leechers, last_tracker_check in cursor.execute( f"{self.select_full} {personal_channel_filter} group by infohash " f"LIMIT {batch_size} OFFSET {offset}" ): batch_not_empty = True # check if name is valid unicode data try: name = str(name) except UnicodeDecodeError: continue try: invalid_decoding = len(base64.decodebytes(infohash.encode('utf-8'))) != 20 invalid_id = not torrent_id or int(torrent_id) == 0 invalid_length = not length or (int(length) <= 0) or (int(length) > (1 << 45)) invalid_name = not name or is_forbidden(name) if invalid_decoding or invalid_id or invalid_length or invalid_name: continue infohash = base64.decodebytes(infohash.encode()) torrent_date = datetime.datetime.utcfromtimestamp(creation_date or 0) torrent_date = torrent_date if 0 <= time2int(torrent_date) <= self.conversion_start_timestamp_int \ else int2time(0) torrent_dict = { "status": NEW, "infohash": infohash, "size": int(length), "torrent_date": torrent_date, "title": name or '', "tags": category or '', "tracker_info": tracker_url or '', "xxx": int(category == u'xxx')} if not sign: torrent_dict.update({"origin_id": infohash_to_id(channel_id)}) seeders = int(num_seeders or 0) leechers = int(num_leechers or 0) last_tracker_check = int(last_tracker_check or 0) health_dict = { "seeders": seeders, "leechers": leechers, "last_check": last_tracker_check } if (last_tracker_check >= 0 and seeders >= 0 and leechers >= 0) else None torrents.append((torrent_dict, health_dict)) except Exception as e: self._logger.warning("During retrieval of old torrents an exception was raised: %s", e) continue return torrents if batch_not_empty else None
def process_payload(self, payload, skip_personal_metadata_payload=True): """ This routine decides what to do with a given payload and executes the necessary actions. To do so, it looks into the database, compares version numbers, etc. It returns a list of tuples each of which contain the corresponding new/old object and the actions that were performed on that object. :param payload: payload to work on :param skip_personal_metadata_payload: if this is set to True, personal torrent metadata payload received through gossip will be ignored. The default value is True. :return: a list of tuples of (<metadata or payload>, <action type>) """ if payload.metadata_type == DELETED: # We only allow people to delete their own entries, thus PKs must match node = self.ChannelNode.get_for_update( signature=payload.delete_signature, public_key=payload.public_key) if node: node.delete() return [(None, DELETED_METADATA)] # Check if we already got an older version of the same node that we can update, and # check the uniqueness constraint on public_key+infohash tuple. If the received entry # has the same tuple as the entry we already have, update our entry if necessary. # This procedure is necessary to handle the case when the original author of the payload # had created another entry with the same infohash earlier, deleted it, and sent # the different versions to two different peers. # There is a corner case where there already exist 2 entries in our database that match both # update conditions: # A: (pk, id1, ih1) # B: (pk, id2, ih2) # When we receive the payload C1:(pk, id1, ih2) or C2:(pk, id2, ih1), we have to # replace _both_ entries with a single one, to honor the DB uniqueness constraints. if payload.metadata_type not in [ CHANNEL_TORRENT, REGULAR_TORRENT, COLLECTION_NODE ]: return [] # Check for offending words stop-list if is_forbidden(payload.title + payload.tags): return [(None, NO_ACTION)] # FFA payloads get special treatment: if payload.public_key == NULL_KEY: if payload.metadata_type == REGULAR_TORRENT: node = self.TorrentMetadata.add_ffa_from_dict( payload.to_dict()) if node: return [(node, UNKNOWN_TORRENT)] return [(None, NO_ACTION)] # Check if we already have this payload node = self.ChannelNode.get(signature=payload.signature, public_key=payload.public_key) if node: return [(node, NO_ACTION)] result = [] if payload.metadata_type in [CHANNEL_TORRENT, REGULAR_TORRENT]: # Signed entry > FFA entry. Old FFA entry > new FFA entry ffa_node = self.TorrentMetadata.get(public_key=database_blob(b""), infohash=database_blob( payload.infohash)) if ffa_node: ffa_node.delete() def check_update_opportunity(): # Check for possible update sending opportunity. node = self.TorrentMetadata.get( lambda g: g.public_key == database_blob( payload.public_key) and g.id_ == payload.id_ and g. timestamp > payload.timestamp) return [(node, GOT_NEWER_VERSION)] if node else [(None, NO_ACTION)] # Check if the received payload is a deleted entry from a channel that we already have parent_channel = self.ChannelMetadata.get(public_key=database_blob( payload.public_key), id_=payload.origin_id) if parent_channel and parent_channel.local_version > payload.timestamp: return check_update_opportunity() # If we received a metadata payload signed by ourselves we simply ignore it since we are the only # authoritative source of information about our own channel. if skip_personal_metadata_payload and payload.public_key == bytes( database_blob(self.my_key.pub().key_to_bin()[10:])): return check_update_opportunity() # Check for a node with the same infohash node = self.TorrentMetadata.get_for_update( public_key=database_blob(payload.public_key), infohash=database_blob(payload.infohash)) if node: if node.timestamp < payload.timestamp: node.delete() result.append((None, DELETED_METADATA)) elif node.timestamp > payload.timestamp: result.append((node, GOT_NEWER_VERSION)) return result else: return result # Otherwise, we got the same version locally and do nothing. # Check for the older version of the same node node = self.ChannelNode.get_for_update(public_key=database_blob( payload.public_key), id_=payload.id_) if node: if node.timestamp < payload.timestamp: node.set(**payload.to_dict()) result.append((node, UPDATED_OUR_VERSION)) return result elif node.timestamp > payload.timestamp: result.append((node, GOT_NEWER_VERSION)) return result # Otherwise, we got the same version locally and do nothing. # The situation when something was marked for deletion, and then we got here (i.e. we have the same or # newer version) should never happen, because this version should have removed the node we deleted earlier if result: self._logger.warning("Broken DB state!") return result if payload.metadata_type == REGULAR_TORRENT: result.append( (self.TorrentMetadata.from_payload(payload), UNKNOWN_TORRENT)) elif payload.metadata_type == CHANNEL_TORRENT: result.append( (self.ChannelMetadata.from_payload(payload), UNKNOWN_CHANNEL)) elif payload.metadata_type == COLLECTION_NODE: result.append((self.CollectionNode.from_payload(payload), UNKNOWN_COLLECTION)) return result
def process_payload(self, payload, skip_personal_metadata_payload=True, channel_public_key=None): """ This routine decides what to do with a given payload and executes the necessary actions. To do so, it looks into the database, compares version numbers, etc. It returns a list of tuples each of which contain the corresponding new/old object and the actions that were performed on that object. :param payload: payload to work on :param skip_personal_metadata_payload: if this is set to True, personal torrent metadata payload received through gossip will be ignored. The default value is True. :param channel_public_key: rejects payloads that do not belong to this key. Enabling this allows to skip some costly checks during e.g. channel processing. :return: a list of tuples of (<metadata or payload>, <action type>) """ # In case we're processing a channel, we allow only payloads with the channel's public_key if channel_public_key is not None and payload.public_key != channel_public_key: self._logger.warning( "Tried to push metadata entry with foreign public key.\ Expected public key: %s, entry public key / id: %s / %i", hexlify(channel_public_key), payload.public_key, payload.id_, ) return [] if payload.metadata_type == DELETED: if payload.public_key == self.my_public_key_bin and skip_personal_metadata_payload: return [] # We only allow people to delete their own entries, thus PKs must match node = self.ChannelNode.get_for_update( signature=payload.delete_signature, public_key=payload.public_key) if node: node.delete() return [] if payload.metadata_type not in [ CHANNEL_TORRENT, REGULAR_TORRENT, COLLECTION_NODE, CHANNEL_DESCRIPTION, CHANNEL_THUMBNAIL, ]: return [] # Check for offending words stop-list if is_forbidden(" ".join([ getattr(payload, attr) for attr in ("title", "tags", "text") if hasattr(payload, attr) ])): return [] # FFA payloads get special treatment: if payload.public_key == NULL_KEY: if payload.metadata_type == REGULAR_TORRENT: node = self.TorrentMetadata.add_ffa_from_dict( payload.to_dict()) if node: return [ ProcessingResult(md_obj=node, obj_state=ObjState.UNKNOWN_OBJECT) ] return [] if channel_public_key is None and payload.metadata_type in [ COLLECTION_NODE, REGULAR_TORRENT, CHANNEL_DESCRIPTION, CHANNEL_THUMBNAIL, ]: # Check if the received payload is from a channel that we already have and send update if necessary # Get the toplevel parent parent = self.ChannelNode.get(public_key=payload.public_key, id_=payload.origin_id) if parent: parent_channel = None if parent.origin_id == 0: parent_channel = parent else: parents_ids = parent.get_parents_ids() if 0 in parents_ids: parent_channel = self.ChannelNode.get( public_key=payload.public_key, id_=parents_ids[1]) if parent_channel and parent_channel.local_version > payload.timestamp: # Remark: add check_for_missing_dependencies here when collections are allowed descriptions return [] # Check for the older version of the added node node = self.ChannelNode.get_for_update(public_key=database_blob( payload.public_key), id_=payload.id_) if node: node.to_simple_dict( ) # Force loading of related objects (like TorrentMetadata.health) in db_session update_results = self.update_channel_node( node, payload, skip_personal_metadata_payload) for r in update_results: r.missing_deps = self.check_for_missing_dependencies( r.md_obj, include_newer=True) return update_results if payload.public_key == self.my_public_key_bin and skip_personal_metadata_payload: return [] for orm_class in ( self.TorrentMetadata, self.ChannelMetadata, self.CollectionNode, self.ChannelThumbnail, self.ChannelDescription, ): if orm_class._discriminator_ == payload.metadata_type: # pylint: disable=W0212 obj = orm_class.from_payload(payload) missing_deps = self.check_for_missing_dependencies(obj) return [ ProcessingResult(md_obj=obj, obj_state=ObjState.UNKNOWN_OBJECT, missing_deps=missing_deps) ] return []
def test_l2_filter(): assert is_forbidden("9yo ponies") assert is_forbidden("12yo ponies") assert not is_forbidden("18yo ponies")