Python clean_tags Examples, lbry.schema.tags.clean_tags Python Examples

Example #1

0

Show file

File: writer.py Project: zhyniq/lbry-sdk

    def _upsertable_claims(self, txos: List[Output], header, clear_first=False):
        claim_hashes, claims, tags, languages = set(), [], {}, {}
        for txo in txos:
            tx = txo.tx_ref.tx

            try:
                assert txo.claim_name
                assert txo.normalized_name
            except:
                #self.logger.exception(f"Could not decode claim name for {tx.id}:{txo.position}.")
                continue

            language = None
            try:
                if txo.claim.is_stream and txo.claim.stream.languages:
                    language = txo.claim.stream.languages[0].language
            except:
                pass

            claim_hash = txo.claim_hash
            claim_hashes.add(claim_hash)
            claim_record = {
                'claim_hash': claim_hash,
                'claim_id': txo.claim_id,
                'claim_name': txo.claim_name,
                'normalized': txo.normalized_name,
                'txo_hash': txo.ref.hash,
                'tx_position': tx.position,
                'amount': txo.amount,
                'timestamp': header['timestamp'],
                'height': tx.height,
                'title': None,
                'description': None,
                'author': None,
                'duration': None,
                'claim_type': None,
                'stream_type': None,
                'media_type': None,
                'release_time': None,
                'fee_currency': None,
                'fee_amount': 0,
                'reposted_claim_hash': None
            }
            claims.append(claim_record)

            try:
                claim = txo.claim
            except:
                #self.logger.exception(f"Could not parse claim protobuf for {tx.id}:{txo.position}.")
                continue

            if claim.is_stream:
                claim_record['claim_type'] = CLAIM_TYPES['stream']
                claim_record['media_type'] = claim.stream.source.media_type
                claim_record['stream_type'] = STREAM_TYPES[guess_stream_type(claim_record['media_type'])]
                claim_record['title'] = claim.stream.title
                claim_record['description'] = claim.stream.description
                claim_record['author'] = claim.stream.author
                if claim.stream.video and claim.stream.video.duration:
                    claim_record['duration'] = claim.stream.video.duration
                if claim.stream.audio and claim.stream.audio.duration:
                    claim_record['duration'] = claim.stream.audio.duration
                if claim.stream.release_time:
                    claim_record['release_time'] = claim.stream.release_time
                if claim.stream.has_fee:
                    fee = claim.stream.fee
                    if isinstance(fee.currency, str):
                        claim_record['fee_currency'] = fee.currency.lower()
                    if isinstance(fee.amount, Decimal):
                        claim_record['fee_amount'] = int(fee.amount*1000)
            elif claim.is_repost:
                claim_record['claim_type'] = CLAIM_TYPES['repost']
                claim_record['reposted_claim_hash'] = claim.repost.reference.claim_hash
            elif claim.is_channel:
                claim_record['claim_type'] = CLAIM_TYPES['channel']

            if language:
                languages[(language, claim_hash)] = (language, claim_hash, tx.height)

            for tag in clean_tags(claim.message.tags):
                tags[(tag, claim_hash)] = (tag, claim_hash, tx.height)

        if clear_first:
            self._clear_claim_metadata(claim_hashes)

        if tags:
            self.executemany(
                "INSERT OR IGNORE INTO tag (tag, claim_hash, height) VALUES (?, ?, ?)", tags.values()
            )
        if languages:
            self.executemany(
                "INSERT OR IGNORE INTO language (language, claim_hash, height) VALUES (?, ?, ?)", languages.values()
            )

        return claims

Example #2

0

Show file

File: search.py Project: nishp77/lbry-sdk

def expand_query(**kwargs):
    if "amount_order" in kwargs:
        kwargs["limit"] = 1
        kwargs["order_by"] = "effective_amount"
        kwargs["offset"] = int(kwargs["amount_order"]) - 1
    if 'name' in kwargs:
        kwargs['name'] = normalize_name(kwargs.pop('name'))
    if kwargs.get('is_controlling') is False:
        kwargs.pop('is_controlling')
    query = {'must': [], 'must_not': []}
    collapse = None
    if 'fee_currency' in kwargs and kwargs['fee_currency'] is not None:
        kwargs['fee_currency'] = kwargs['fee_currency'].upper()
    for key, value in kwargs.items():
        key = key.replace('claim.', '')
        many = key.endswith('__in') or isinstance(value, list)
        if many and len(value) > 2048:
            raise TooManyClaimSearchParametersError(key, 2048)
        if many:
            key = key.replace('__in', '')
            value = list(filter(None, value))
        if value is None or isinstance(value, list) and len(value) == 0:
            continue
        key = REPLACEMENTS.get(key, key)
        if key in FIELDS:
            partial_id = False
            if key == 'claim_type':
                if isinstance(value, str):
                    value = CLAIM_TYPES[value]
                else:
                    value = [CLAIM_TYPES[claim_type] for claim_type in value]
            elif key == 'stream_type':
                value = [STREAM_TYPES[value]] if isinstance(
                    value, str) else list(map(STREAM_TYPES.get, value))
            if key == '_id':
                if isinstance(value, Iterable):
                    value = [item[::-1].hex() for item in value]
                else:
                    value = value[::-1].hex()
            if not many and key in ('_id', 'claim_id',
                                    'sd_hash') and len(value) < 20:
                partial_id = True
            if key in ('signature_valid', 'has_source'):
                continue  # handled later
            if key in TEXT_FIELDS:
                key += '.keyword'
            ops = {'<=': 'lte', '>=': 'gte', '<': 'lt', '>': 'gt'}
            if partial_id:
                query['must'].append({"prefix": {key: value}})
            elif key in RANGE_FIELDS and isinstance(value,
                                                    str) and value[0] in ops:
                operator_length = 2 if value[:2] in ops else 1
                operator, value = value[:operator_length], value[
                    operator_length:]
                if key == 'fee_amount':
                    value = str(Decimal(value) * 1000)
                query['must'].append({"range": {key: {ops[operator]: value}}})
            elif key in RANGE_FIELDS and isinstance(value, list) and all(
                    v[0] in ops for v in value):
                range_constraints = []
                for v in value:
                    operator_length = 2 if v[:2] in ops else 1
                    operator, stripped_op_v = v[:operator_length], v[
                        operator_length:]
                    if key == 'fee_amount':
                        stripped_op_v = str(Decimal(stripped_op_v) * 1000)
                    range_constraints.append((operator, stripped_op_v))
                query['must'].append({
                    "range": {
                        key: {
                            ops[operator]: v
                            for operator, v in range_constraints
                        }
                    }
                })
            elif many:
                query['must'].append({"terms": {key: value}})
            else:
                if key == 'fee_amount':
                    value = str(Decimal(value) * 1000)
                query['must'].append({"term": {key: {"value": value}}})
        elif key == 'not_channel_ids':
            for channel_id in value:
                query['must_not'].append(
                    {"term": {
                        'channel_id.keyword': channel_id
                    }})
                query['must_not'].append({"term": {'_id': channel_id}})
        elif key == 'channel_ids':
            query['must'].append({"terms": {'channel_id.keyword': value}})
        elif key == 'claim_ids':
            query['must'].append({"terms": {'claim_id.keyword': value}})
        elif key == 'media_types':
            query['must'].append({"terms": {'media_type.keyword': value}})
        elif key == 'any_languages':
            query['must'].append({"terms": {'languages': clean_tags(value)}})
        elif key == 'any_languages':
            query['must'].append({"terms": {'languages': value}})
        elif key == 'all_languages':
            query['must'].extend([{
                "term": {
                    'languages': tag
                }
            } for tag in value])
        elif key == 'any_tags':
            query['must'].append(
                {"terms": {
                    'tags.keyword': clean_tags(value)
                }})
        elif key == 'all_tags':
            query['must'].extend([{
                "term": {
                    'tags.keyword': tag
                }
            } for tag in clean_tags(value)])
        elif key == 'not_tags':
            query['must_not'].extend([{
                "term": {
                    'tags.keyword': tag
                }
            } for tag in clean_tags(value)])
        elif key == 'not_claim_id':
            query['must_not'].extend([{
                "term": {
                    'claim_id.keyword': cid
                }
            } for cid in value])
        elif key == 'limit_claims_per_channel':
            collapse = ('channel_id.keyword', value)
    if kwargs.get('has_channel_signature'):
        query['must'].append({"exists": {"field": "signature"}})
        if 'signature_valid' in kwargs:
            query['must'].append({
                "term": {
                    "is_signature_valid": bool(kwargs["signature_valid"])
                }
            })
    elif 'signature_valid' in kwargs:
        query.setdefault('should', [])
        query["minimum_should_match"] = 1
        query['should'].append(
            {"bool": {
                "must_not": {
                    "exists": {
                        "field": "signature"
                    }
                }
            }})
        query['should'].append(
            {"term": {
                "is_signature_valid": bool(kwargs["signature_valid"])
            }})
    if 'has_source' in kwargs:
        query.setdefault('should', [])
        query["minimum_should_match"] = 1
        is_stream_or_repost = {
            "terms": {
                "claim_type": [CLAIM_TYPES['stream'], CLAIM_TYPES['repost']]
            }
        }
        query['should'].append({
            "bool": {
                "must": [{
                    "match": {
                        "has_source": kwargs['has_source']
                    }
                }, is_stream_or_repost]
            }
        })
        query['should'].append({"bool": {"must_not": [is_stream_or_repost]}})
        query['should'].append({
            "bool": {
                "must": [{
                    "term": {
                        "reposted_claim_type": CLAIM_TYPES['channel']
                    }
                }]
            }
        })
    if kwargs.get('text'):
        query['must'].append({
            "simple_query_string": {
                "query":
                kwargs["text"],
                "fields": [
                    "claim_name^4", "channel_name^8", "title^1",
                    "description^.5", "author^1", "tags^.5"
                ]
            }
        })
    query = {
        "_source": {
            "excludes": ["description", "title"]
        },
        'query': {
            'bool': query
        },
        "sort": [],
    }
    if "limit" in kwargs:
        query["size"] = kwargs["limit"]
    if 'offset' in kwargs:
        query["from"] = kwargs["offset"]
    if 'order_by' in kwargs:
        if isinstance(kwargs["order_by"], str):
            kwargs["order_by"] = [kwargs["order_by"]]
        for value in kwargs['order_by']:
            if 'trending_group' in value:
                # fixme: trending_mixed is 0 for all records on variable decay, making sort slow.
                continue
            is_asc = value.startswith('^')
            value = value[1:] if is_asc else value
            value = REPLACEMENTS.get(value, value)
            if value in TEXT_FIELDS:
                value += '.keyword'
            query['sort'].append({value: "asc" if is_asc else "desc"})
    if collapse:
        query["collapse"] = {
            "field": collapse[0],
            "inner_hits": {
                "name": collapse[0],
                "size": collapse[1],
                "sort": query["sort"]
            }
        }
    return query

Example #3

0

Show file

File: test_tags.py Project: zhyniq/lbry-sdk

 def test_clean_tags(self):
     self.assertEqual(['tag'], clean_tags([' \t #!~', '!taG', '\t']))
     cleaned = clean_tags(['fOo', '!taG', 'FoO'])
     self.assertIn('tag', cleaned)
     self.assertIn('foo', cleaned)
     self.assertEqual(len(cleaned), 2)

Example #4

0

Show file

File: writer.py Project: colinfruit/lbry-sdk

    def _upsertable_claims(self,
                           txos: List[Output],
                           header,
                           clear_first=False):
        claim_hashes, claims, tags = [], [], {}
        for txo in txos:
            tx = txo.tx_ref.tx

            try:
                assert txo.claim_name
                assert txo.normalized_name
            except:
                #self.logger.exception(f"Could not decode claim name for {tx.id}:{txo.position}.")
                continue

            claim_hash = sqlite3.Binary(txo.claim_hash)
            claim_hashes.append(claim_hash)
            claim_record = {
                'claim_hash': claim_hash,
                'claim_id': txo.claim_id,
                'claim_name': txo.claim_name,
                'normalized': txo.normalized_name,
                'txo_hash': sqlite3.Binary(txo.ref.hash),
                'tx_position': tx.position,
                'amount': txo.amount,
                'timestamp': header['timestamp'],
                'height': tx.height,
                'claim_type': None,
                'stream_type': None,
                'media_type': None,
                'release_time': None,
                'fee_currency': None,
                'fee_amount': 0
            }
            claims.append(claim_record)

            try:
                claim = txo.claim
            except:
                #self.logger.exception(f"Could not parse claim protobuf for {tx.id}:{txo.position}.")
                continue

            if claim.is_stream:
                claim_record['claim_type'] = CLAIM_TYPES['stream']
                claim_record['media_type'] = claim.stream.source.media_type
                claim_record['stream_type'] = STREAM_TYPES[guess_stream_type(
                    claim_record['media_type'])]
                if claim.stream.release_time:
                    claim_record['release_time'] = claim.stream.release_time
                if claim.stream.has_fee:
                    fee = claim.stream.fee
                    if isinstance(fee.currency, str):
                        claim_record['fee_currency'] = fee.currency.lower()
                    if isinstance(fee.amount, Decimal):
                        claim_record['fee_amount'] = int(fee.amount * 1000)
            elif claim.is_channel:
                claim_record['claim_type'] = CLAIM_TYPES['channel']

            for tag in clean_tags(claim.message.tags):
                tags[(tag, claim_hash)] = (tag, claim_hash, tx.height)

        if clear_first:
            self._clear_claim_metadata(claim_hashes)

        if tags:
            self.db.executemany(
                "INSERT OR IGNORE INTO tag (tag, claim_hash, height) VALUES (?, ?, ?)",
                tags.values())

        return claims

Example #5

0

Show file

File: test_tags.py Project: shauser/lbry-sdk

 def test_clean_tags(self):
     self.assertEqual(['tag'], clean_tags([' \t #!~', '!taG', '\t']))