Esempio n. 1
0
    def do_upgrade_pony_db_6to7(self, mds):
        with db_session:
            db_version = mds.MiscData.get(name="db_version")
            if int(db_version.value) != 6:
                return
            for c in mds.ChannelMetadata.select_by_sql(f"""
                select rowid, title, tags, metadata_type from ChannelNode
                where metadata_type = {CHANNEL_TORRENT}
            """):
                if is_forbidden(c.title + c.tags):
                    c.contents.delete()
                    c.delete()
                    # The channel torrent will be removed by GigaChannel manager during the cruft cleanup

        # The process is broken down into batches to limit memory usage
        batch_size = 10000
        with db_session:
            total_entries = mds.TorrentMetadata.select().count()
            page_num = total_entries // batch_size
        while page_num >= 0:
            with db_session:
                for t in mds.TorrentMetadata.select().page(
                        page_num, pagesize=batch_size):
                    if is_forbidden(t.title + t.tags):
                        t.delete()
            page_num -= 1
        with db_session:
            db_version = mds.MiscData.get(name="db_version")
            db_version.value = str(7)
        return
Esempio n. 2
0
 def reject_payload_with_offending_words(self):
     """
     Check if the payload contains strong offending words.
     If it does, stop processing and return empty list.
     Otherwise, CONTINUE control to further checks.
     """
     if is_forbidden(" ".join([
             getattr(self.payload, attr)
             for attr in ("title", "tags", "text")
             if hasattr(self.payload, attr)
     ])):
         return []
     return CONTINUE
Esempio n. 3
0
    def convert_discovered_channels(self):
        # Reflect conversion state
        with db_session:
            v = self.mds.MiscData.get_for_update(
                name=CONVERSION_FROM_72_CHANNELS)
            if v:
                if v.value == CONVERSION_STARTED:
                    # Just drop the entries from the previous try
                    orm.delete(g for g in self.mds.ChannelMetadata
                               if g.status == LEGACY_ENTRY)
                else:
                    v.set(value=CONVERSION_STARTED)
            else:
                self.mds.MiscData(name=CONVERSION_FROM_72_CHANNELS,
                                  value=CONVERSION_STARTED)

        old_channels = self.get_old_channels()
        # We break it up into separate sessions and add sleep because this is going to be executed
        # on a background thread and we do not want to hold the DB lock for too long
        with db_session:
            for c in old_channels:
                if self.shutting_down:
                    break
                try:
                    self.mds.ChannelMetadata(**c)
                except:
                    continue

        with db_session:
            for c in self.mds.ChannelMetadata.select().for_update()[:]:
                contents_len = c.contents_len
                title = c.title
                if is_forbidden(title):
                    c.delete()
                elif contents_len:
                    c.num_entries = contents_len
                else:
                    c.delete()

        with db_session:
            v = self.mds.MiscData.get_for_update(
                name=CONVERSION_FROM_72_CHANNELS)
            v.value = CONVERSION_FINISHED
Esempio n. 4
0
    def process_payload(self, payload, skip_personal_metadata_payload=True, channel_public_key=None):
        """
        This routine decides what to do with a given payload and executes the necessary actions.
        To do so, it looks into the database, compares version numbers, etc.
        It returns a list of tuples each of which contain the corresponding new/old object and the actions
        that were performed on that object.
        :param payload: payload to work on
        :param skip_personal_metadata_payload: if this is set to True, personal torrent metadata payload received
                through gossip will be ignored. The default value is True.
        :param channel_public_key: rejects payloads that do not belong to this key.
               Enabling this allows to skip some costly checks during e.g. channel processing.

        :return: a list of tuples of (<metadata or payload>, <action type>)
        """

        # In case we're processing a channel, we allow only payloads with the channel's public_key
        if channel_public_key is not None and payload.public_key != channel_public_key:
            self._logger.warning(
                "Tried to push metadata entry with foreign public key.\
             Expected public key: %s, entry public key / id: %s / %i",
                hexlify(channel_public_key),
                payload.public_key,
                payload.id_,
            )
            return [(None, NO_ACTION)]

        if payload.metadata_type == DELETED:
            if payload.public_key == self.my_public_key_bin and skip_personal_metadata_payload:
                return [(None, NO_ACTION)]
            # We only allow people to delete their own entries, thus PKs must match
            node = self.ChannelNode.get_for_update(signature=payload.delete_signature, public_key=payload.public_key)
            if node:
                node.delete()
                return [(None, DELETED_METADATA)]

        if payload.metadata_type not in [CHANNEL_TORRENT, REGULAR_TORRENT, COLLECTION_NODE]:
            return []

        # Check for offending words stop-list
        if is_forbidden(payload.title + payload.tags):
            return [(None, NO_ACTION)]

        # FFA payloads get special treatment:
        if payload.public_key == NULL_KEY:
            if payload.metadata_type == REGULAR_TORRENT:
                node = self.TorrentMetadata.add_ffa_from_dict(payload.to_dict())
                if node:
                    return [(node, UNKNOWN_TORRENT)]
            return [(None, NO_ACTION)]

        if channel_public_key is None and payload.metadata_type in [COLLECTION_NODE, REGULAR_TORRENT]:
            # Check if the received payload is from a channel that we already have and send update if necessary

            # Get the toplevel parent
            parent = self.ChannelNode.get(public_key=payload.public_key, id_=payload.origin_id)
            if parent:
                parent_channel = None
                if parent.origin_id == 0:
                    parent_channel = parent
                else:
                    parents_ids = parent.get_parents_ids()
                    if 0 in parents_ids:
                        parent_channel = self.ChannelNode.get(public_key=payload.public_key, id_=parents_ids[1])
                if parent_channel and parent_channel.local_version > payload.timestamp:
                    return [(None, NO_ACTION)]

        # Check for the older version of the added node
        node = self.ChannelNode.get_for_update(public_key=database_blob(payload.public_key), id_=payload.id_)
        if node:
            return self.update_channel_node(node, payload, skip_personal_metadata_payload)

        if payload.public_key == self.my_public_key_bin and skip_personal_metadata_payload:
            return [(None, NO_ACTION)]
        for orm_class, response in (
            (self.TorrentMetadata, UNKNOWN_TORRENT),
            (self.ChannelMetadata, UNKNOWN_CHANNEL),
            (self.CollectionNode, UNKNOWN_COLLECTION),
        ):
            if orm_class._discriminator_ == payload.metadata_type:
                return [(orm_class.from_payload(payload), response)]
        return []
Esempio n. 5
0
 def test_l2_filter(self):
     self.assertTrue(is_forbidden("9yo ponies"))
     self.assertTrue(is_forbidden("12yo ponies"))
     self.assertFalse(is_forbidden("18yo ponies"))
Esempio n. 6
0
    def get_old_torrents(self, personal_channel_only=False, batch_size=10000, offset=0,
                         sign=False):
        with contextlib.closing(sqlite3.connect(self.tribler_db)) as connection, connection:
            cursor = connection.cursor()
            cursor.execute("PRAGMA temp_store = 2")

            personal_channel_filter = ""
            if self.personal_channel_id:
                equality_sign = " == " if personal_channel_only else " != "
                personal_channel_filter = f"AND ct.channel_id {equality_sign} {self.personal_channel_id}"

            torrents = []
            batch_not_empty = False # This is a dumb way to indicate that this batch got zero entries from DB

            for tracker_url, channel_id, name, infohash, length, creation_date, torrent_id, category, num_seeders, \
                num_leechers, last_tracker_check in cursor.execute(
                        f"{self.select_full} {personal_channel_filter} group by infohash "
                        f"LIMIT {batch_size} OFFSET {offset}"
            ):
                batch_not_empty = True
                # check if name is valid unicode data
                try:
                    name = str(name)
                except UnicodeDecodeError:
                    continue

                try:
                    invalid_decoding = len(base64.decodebytes(infohash.encode('utf-8'))) != 20
                    invalid_id = not torrent_id or int(torrent_id) == 0
                    invalid_length = not length or (int(length) <= 0) or (int(length) > (1 << 45))
                    invalid_name = not name or is_forbidden(name)
                    if invalid_decoding or invalid_id or invalid_length or invalid_name:
                        continue

                    infohash = base64.decodebytes(infohash.encode())

                    torrent_date = datetime.datetime.utcfromtimestamp(creation_date or 0)
                    torrent_date = torrent_date if 0 <= time2int(torrent_date) <= self.conversion_start_timestamp_int \
                        else int2time(0)
                    torrent_dict = {
                        "status": NEW,
                        "infohash": infohash,
                        "size": int(length),
                        "torrent_date": torrent_date,
                        "title": name or '',
                        "tags": category or '',
                        "tracker_info": tracker_url or '',
                        "xxx": int(category == u'xxx')}
                    if not sign:
                        torrent_dict.update({"origin_id": infohash_to_id(channel_id)})
                    seeders = int(num_seeders or 0)
                    leechers = int(num_leechers or 0)
                    last_tracker_check = int(last_tracker_check or 0)
                    health_dict = {
                        "seeders": seeders,
                        "leechers": leechers,
                        "last_check": last_tracker_check
                    } if (last_tracker_check >= 0 and seeders >= 0 and leechers >= 0) else None
                    torrents.append((torrent_dict, health_dict))
                except Exception as e:
                    self._logger.warning("During retrieval of old torrents an exception was raised: %s", e)
                    continue

        return torrents if batch_not_empty else None
Esempio n. 7
0
    def process_payload(self, payload, skip_personal_metadata_payload=True):
        """
        This routine decides what to do with a given payload and executes the necessary actions.
        To do so, it looks into the database, compares version numbers, etc.
        It returns a list of tuples each of which contain the corresponding new/old object and the actions
        that were performed on that object.
        :param payload: payload to work on
        :param skip_personal_metadata_payload: if this is set to True, personal torrent metadata payload received
                through gossip will be ignored. The default value is True.
        :return: a list of tuples of (<metadata or payload>, <action type>)
        """

        if payload.metadata_type == DELETED:
            # We only allow people to delete their own entries, thus PKs must match
            node = self.ChannelNode.get_for_update(
                signature=payload.delete_signature,
                public_key=payload.public_key)
            if node:
                node.delete()
                return [(None, DELETED_METADATA)]

        # Check if we already got an older version of the same node that we can update, and
        # check the uniqueness constraint on public_key+infohash tuple. If the received entry
        # has the same tuple as the entry we already have, update our entry if necessary.
        # This procedure is necessary to handle the case when the original author of the payload
        # had created another entry with the same infohash earlier, deleted it, and sent
        # the different versions to two different peers.
        # There is a corner case where there already exist 2 entries in our database that match both
        # update conditions:
        # A: (pk, id1, ih1)
        # B: (pk, id2, ih2)
        # When we receive the payload C1:(pk, id1, ih2) or C2:(pk, id2, ih1), we have to
        # replace _both_ entries with a single one, to honor the DB uniqueness constraints.

        if payload.metadata_type not in [
                CHANNEL_TORRENT, REGULAR_TORRENT, COLLECTION_NODE
        ]:
            return []

        # Check for offending words stop-list
        if is_forbidden(payload.title + payload.tags):
            return [(None, NO_ACTION)]

        # FFA payloads get special treatment:
        if payload.public_key == NULL_KEY:
            if payload.metadata_type == REGULAR_TORRENT:
                node = self.TorrentMetadata.add_ffa_from_dict(
                    payload.to_dict())
                if node:
                    return [(node, UNKNOWN_TORRENT)]
            return [(None, NO_ACTION)]

        # Check if we already have this payload
        node = self.ChannelNode.get(signature=payload.signature,
                                    public_key=payload.public_key)
        if node:
            return [(node, NO_ACTION)]

        result = []
        if payload.metadata_type in [CHANNEL_TORRENT, REGULAR_TORRENT]:
            # Signed entry > FFA entry. Old FFA entry > new FFA entry
            ffa_node = self.TorrentMetadata.get(public_key=database_blob(b""),
                                                infohash=database_blob(
                                                    payload.infohash))
            if ffa_node:
                ffa_node.delete()

            def check_update_opportunity():
                # Check for possible update sending opportunity.
                node = self.TorrentMetadata.get(
                    lambda g: g.public_key == database_blob(
                        payload.public_key) and g.id_ == payload.id_ and g.
                    timestamp > payload.timestamp)
                return [(node, GOT_NEWER_VERSION)] if node else [(None,
                                                                  NO_ACTION)]

            # Check if the received payload is a deleted entry from a channel that we already have
            parent_channel = self.ChannelMetadata.get(public_key=database_blob(
                payload.public_key),
                                                      id_=payload.origin_id)
            if parent_channel and parent_channel.local_version > payload.timestamp:
                return check_update_opportunity()

            # If we received a metadata payload signed by ourselves we simply ignore it since we are the only
            # authoritative source of information about our own channel.
            if skip_personal_metadata_payload and payload.public_key == bytes(
                    database_blob(self.my_key.pub().key_to_bin()[10:])):
                return check_update_opportunity()

            # Check for a node with the same infohash
            node = self.TorrentMetadata.get_for_update(
                public_key=database_blob(payload.public_key),
                infohash=database_blob(payload.infohash))
            if node:
                if node.timestamp < payload.timestamp:
                    node.delete()
                    result.append((None, DELETED_METADATA))
                elif node.timestamp > payload.timestamp:
                    result.append((node, GOT_NEWER_VERSION))
                    return result
                else:
                    return result
                # Otherwise, we got the same version locally and do nothing.

        # Check for the older version of the same node
        node = self.ChannelNode.get_for_update(public_key=database_blob(
            payload.public_key),
                                               id_=payload.id_)
        if node:
            if node.timestamp < payload.timestamp:
                node.set(**payload.to_dict())
                result.append((node, UPDATED_OUR_VERSION))
                return result
            elif node.timestamp > payload.timestamp:
                result.append((node, GOT_NEWER_VERSION))
                return result
            # Otherwise, we got the same version locally and do nothing.
            # The situation when something was marked for deletion, and then we got here (i.e. we have the same or
            # newer version) should never happen, because this version should have removed the node we deleted earlier
            if result:
                self._logger.warning("Broken DB state!")
            return result

        if payload.metadata_type == REGULAR_TORRENT:
            result.append(
                (self.TorrentMetadata.from_payload(payload), UNKNOWN_TORRENT))
        elif payload.metadata_type == CHANNEL_TORRENT:
            result.append(
                (self.ChannelMetadata.from_payload(payload), UNKNOWN_CHANNEL))
        elif payload.metadata_type == COLLECTION_NODE:
            result.append((self.CollectionNode.from_payload(payload),
                           UNKNOWN_COLLECTION))
        return result
Esempio n. 8
0
    def process_payload(self,
                        payload,
                        skip_personal_metadata_payload=True,
                        channel_public_key=None):
        """
        This routine decides what to do with a given payload and executes the necessary actions.
        To do so, it looks into the database, compares version numbers, etc.
        It returns a list of tuples each of which contain the corresponding new/old object and the actions
        that were performed on that object.
        :param payload: payload to work on
        :param skip_personal_metadata_payload: if this is set to True, personal torrent metadata payload received
                through gossip will be ignored. The default value is True.
        :param channel_public_key: rejects payloads that do not belong to this key.
               Enabling this allows to skip some costly checks during e.g. channel processing.

        :return: a list of tuples of (<metadata or payload>, <action type>)
        """

        # In case we're processing a channel, we allow only payloads with the channel's public_key
        if channel_public_key is not None and payload.public_key != channel_public_key:
            self._logger.warning(
                "Tried to push metadata entry with foreign public key.\
             Expected public key: %s, entry public key / id: %s / %i",
                hexlify(channel_public_key),
                payload.public_key,
                payload.id_,
            )
            return []

        if payload.metadata_type == DELETED:
            if payload.public_key == self.my_public_key_bin and skip_personal_metadata_payload:
                return []
            # We only allow people to delete their own entries, thus PKs must match
            node = self.ChannelNode.get_for_update(
                signature=payload.delete_signature,
                public_key=payload.public_key)
            if node:
                node.delete()
                return []

        if payload.metadata_type not in [
                CHANNEL_TORRENT,
                REGULAR_TORRENT,
                COLLECTION_NODE,
                CHANNEL_DESCRIPTION,
                CHANNEL_THUMBNAIL,
        ]:
            return []

        # Check for offending words stop-list
        if is_forbidden(" ".join([
                getattr(payload, attr) for attr in ("title", "tags", "text")
                if hasattr(payload, attr)
        ])):
            return []

        # FFA payloads get special treatment:
        if payload.public_key == NULL_KEY:
            if payload.metadata_type == REGULAR_TORRENT:
                node = self.TorrentMetadata.add_ffa_from_dict(
                    payload.to_dict())
                if node:
                    return [
                        ProcessingResult(md_obj=node,
                                         obj_state=ObjState.UNKNOWN_OBJECT)
                    ]
            return []

        if channel_public_key is None and payload.metadata_type in [
                COLLECTION_NODE,
                REGULAR_TORRENT,
                CHANNEL_DESCRIPTION,
                CHANNEL_THUMBNAIL,
        ]:
            # Check if the received payload is from a channel that we already have and send update if necessary

            # Get the toplevel parent
            parent = self.ChannelNode.get(public_key=payload.public_key,
                                          id_=payload.origin_id)
            if parent:
                parent_channel = None
                if parent.origin_id == 0:
                    parent_channel = parent
                else:
                    parents_ids = parent.get_parents_ids()
                    if 0 in parents_ids:
                        parent_channel = self.ChannelNode.get(
                            public_key=payload.public_key, id_=parents_ids[1])
                if parent_channel and parent_channel.local_version > payload.timestamp:
                    # Remark: add check_for_missing_dependencies here when collections are allowed descriptions
                    return []

        # Check for the older version of the added node
        node = self.ChannelNode.get_for_update(public_key=database_blob(
            payload.public_key),
                                               id_=payload.id_)
        if node:
            node.to_simple_dict(
            )  # Force loading of related objects (like TorrentMetadata.health) in db_session
            update_results = self.update_channel_node(
                node, payload, skip_personal_metadata_payload)
            for r in update_results:
                r.missing_deps = self.check_for_missing_dependencies(
                    r.md_obj, include_newer=True)
            return update_results

        if payload.public_key == self.my_public_key_bin and skip_personal_metadata_payload:
            return []
        for orm_class in (
                self.TorrentMetadata,
                self.ChannelMetadata,
                self.CollectionNode,
                self.ChannelThumbnail,
                self.ChannelDescription,
        ):
            if orm_class._discriminator_ == payload.metadata_type:  # pylint: disable=W0212
                obj = orm_class.from_payload(payload)
                missing_deps = self.check_for_missing_dependencies(obj)
                return [
                    ProcessingResult(md_obj=obj,
                                     obj_state=ObjState.UNKNOWN_OBJECT,
                                     missing_deps=missing_deps)
                ]
        return []
Esempio n. 9
0
def test_l2_filter():
    assert is_forbidden("9yo ponies")
    assert is_forbidden("12yo ponies")
    assert not is_forbidden("18yo ponies")