def _upsertable_claims(self, txos: List[Output], header, clear_first=False): claim_hashes, claims, tags, languages = set(), [], {}, {} for txo in txos: tx = txo.tx_ref.tx try: assert txo.claim_name assert txo.normalized_name except: #self.logger.exception(f"Could not decode claim name for {tx.id}:{txo.position}.") continue language = None try: if txo.claim.is_stream and txo.claim.stream.languages: language = txo.claim.stream.languages[0].language except: pass claim_hash = txo.claim_hash claim_hashes.add(claim_hash) claim_record = { 'claim_hash': claim_hash, 'claim_id': txo.claim_id, 'claim_name': txo.claim_name, 'normalized': txo.normalized_name, 'txo_hash': txo.ref.hash, 'tx_position': tx.position, 'amount': txo.amount, 'timestamp': header['timestamp'], 'height': tx.height, 'title': None, 'description': None, 'author': None, 'duration': None, 'claim_type': None, 'stream_type': None, 'media_type': None, 'release_time': None, 'fee_currency': None, 'fee_amount': 0, 'reposted_claim_hash': None } claims.append(claim_record) try: claim = txo.claim except: #self.logger.exception(f"Could not parse claim protobuf for {tx.id}:{txo.position}.") continue if claim.is_stream: claim_record['claim_type'] = CLAIM_TYPES['stream'] claim_record['media_type'] = claim.stream.source.media_type claim_record['stream_type'] = STREAM_TYPES[guess_stream_type(claim_record['media_type'])] claim_record['title'] = claim.stream.title claim_record['description'] = claim.stream.description claim_record['author'] = claim.stream.author if claim.stream.video and claim.stream.video.duration: claim_record['duration'] = claim.stream.video.duration if claim.stream.audio and claim.stream.audio.duration: claim_record['duration'] = claim.stream.audio.duration if claim.stream.release_time: claim_record['release_time'] = claim.stream.release_time if claim.stream.has_fee: fee = claim.stream.fee if isinstance(fee.currency, str): claim_record['fee_currency'] = fee.currency.lower() if isinstance(fee.amount, Decimal): claim_record['fee_amount'] = int(fee.amount*1000) elif claim.is_repost: claim_record['claim_type'] = CLAIM_TYPES['repost'] claim_record['reposted_claim_hash'] = claim.repost.reference.claim_hash elif claim.is_channel: claim_record['claim_type'] = CLAIM_TYPES['channel'] if language: languages[(language, claim_hash)] = (language, claim_hash, tx.height) for tag in clean_tags(claim.message.tags): tags[(tag, claim_hash)] = (tag, claim_hash, tx.height) if clear_first: self._clear_claim_metadata(claim_hashes) if tags: self.executemany( "INSERT OR IGNORE INTO tag (tag, claim_hash, height) VALUES (?, ?, ?)", tags.values() ) if languages: self.executemany( "INSERT OR IGNORE INTO language (language, claim_hash, height) VALUES (?, ?, ?)", languages.values() ) return claims
def expand_query(**kwargs): if "amount_order" in kwargs: kwargs["limit"] = 1 kwargs["order_by"] = "effective_amount" kwargs["offset"] = int(kwargs["amount_order"]) - 1 if 'name' in kwargs: kwargs['name'] = normalize_name(kwargs.pop('name')) if kwargs.get('is_controlling') is False: kwargs.pop('is_controlling') query = {'must': [], 'must_not': []} collapse = None if 'fee_currency' in kwargs and kwargs['fee_currency'] is not None: kwargs['fee_currency'] = kwargs['fee_currency'].upper() for key, value in kwargs.items(): key = key.replace('claim.', '') many = key.endswith('__in') or isinstance(value, list) if many and len(value) > 2048: raise TooManyClaimSearchParametersError(key, 2048) if many: key = key.replace('__in', '') value = list(filter(None, value)) if value is None or isinstance(value, list) and len(value) == 0: continue key = REPLACEMENTS.get(key, key) if key in FIELDS: partial_id = False if key == 'claim_type': if isinstance(value, str): value = CLAIM_TYPES[value] else: value = [CLAIM_TYPES[claim_type] for claim_type in value] elif key == 'stream_type': value = [STREAM_TYPES[value]] if isinstance( value, str) else list(map(STREAM_TYPES.get, value)) if key == '_id': if isinstance(value, Iterable): value = [item[::-1].hex() for item in value] else: value = value[::-1].hex() if not many and key in ('_id', 'claim_id', 'sd_hash') and len(value) < 20: partial_id = True if key in ('signature_valid', 'has_source'): continue # handled later if key in TEXT_FIELDS: key += '.keyword' ops = {'<=': 'lte', '>=': 'gte', '<': 'lt', '>': 'gt'} if partial_id: query['must'].append({"prefix": {key: value}}) elif key in RANGE_FIELDS and isinstance(value, str) and value[0] in ops: operator_length = 2 if value[:2] in ops else 1 operator, value = value[:operator_length], value[ operator_length:] if key == 'fee_amount': value = str(Decimal(value) * 1000) query['must'].append({"range": {key: {ops[operator]: value}}}) elif key in RANGE_FIELDS and isinstance(value, list) and all( v[0] in ops for v in value): range_constraints = [] for v in value: operator_length = 2 if v[:2] in ops else 1 operator, stripped_op_v = v[:operator_length], v[ operator_length:] if key == 'fee_amount': stripped_op_v = str(Decimal(stripped_op_v) * 1000) range_constraints.append((operator, stripped_op_v)) query['must'].append({ "range": { key: { ops[operator]: v for operator, v in range_constraints } } }) elif many: query['must'].append({"terms": {key: value}}) else: if key == 'fee_amount': value = str(Decimal(value) * 1000) query['must'].append({"term": {key: {"value": value}}}) elif key == 'not_channel_ids': for channel_id in value: query['must_not'].append( {"term": { 'channel_id.keyword': channel_id }}) query['must_not'].append({"term": {'_id': channel_id}}) elif key == 'channel_ids': query['must'].append({"terms": {'channel_id.keyword': value}}) elif key == 'claim_ids': query['must'].append({"terms": {'claim_id.keyword': value}}) elif key == 'media_types': query['must'].append({"terms": {'media_type.keyword': value}}) elif key == 'any_languages': query['must'].append({"terms": {'languages': clean_tags(value)}}) elif key == 'any_languages': query['must'].append({"terms": {'languages': value}}) elif key == 'all_languages': query['must'].extend([{ "term": { 'languages': tag } } for tag in value]) elif key == 'any_tags': query['must'].append( {"terms": { 'tags.keyword': clean_tags(value) }}) elif key == 'all_tags': query['must'].extend([{ "term": { 'tags.keyword': tag } } for tag in clean_tags(value)]) elif key == 'not_tags': query['must_not'].extend([{ "term": { 'tags.keyword': tag } } for tag in clean_tags(value)]) elif key == 'not_claim_id': query['must_not'].extend([{ "term": { 'claim_id.keyword': cid } } for cid in value]) elif key == 'limit_claims_per_channel': collapse = ('channel_id.keyword', value) if kwargs.get('has_channel_signature'): query['must'].append({"exists": {"field": "signature"}}) if 'signature_valid' in kwargs: query['must'].append({ "term": { "is_signature_valid": bool(kwargs["signature_valid"]) } }) elif 'signature_valid' in kwargs: query.setdefault('should', []) query["minimum_should_match"] = 1 query['should'].append( {"bool": { "must_not": { "exists": { "field": "signature" } } }}) query['should'].append( {"term": { "is_signature_valid": bool(kwargs["signature_valid"]) }}) if 'has_source' in kwargs: query.setdefault('should', []) query["minimum_should_match"] = 1 is_stream_or_repost = { "terms": { "claim_type": [CLAIM_TYPES['stream'], CLAIM_TYPES['repost']] } } query['should'].append({ "bool": { "must": [{ "match": { "has_source": kwargs['has_source'] } }, is_stream_or_repost] } }) query['should'].append({"bool": {"must_not": [is_stream_or_repost]}}) query['should'].append({ "bool": { "must": [{ "term": { "reposted_claim_type": CLAIM_TYPES['channel'] } }] } }) if kwargs.get('text'): query['must'].append({ "simple_query_string": { "query": kwargs["text"], "fields": [ "claim_name^4", "channel_name^8", "title^1", "description^.5", "author^1", "tags^.5" ] } }) query = { "_source": { "excludes": ["description", "title"] }, 'query': { 'bool': query }, "sort": [], } if "limit" in kwargs: query["size"] = kwargs["limit"] if 'offset' in kwargs: query["from"] = kwargs["offset"] if 'order_by' in kwargs: if isinstance(kwargs["order_by"], str): kwargs["order_by"] = [kwargs["order_by"]] for value in kwargs['order_by']: if 'trending_group' in value: # fixme: trending_mixed is 0 for all records on variable decay, making sort slow. continue is_asc = value.startswith('^') value = value[1:] if is_asc else value value = REPLACEMENTS.get(value, value) if value in TEXT_FIELDS: value += '.keyword' query['sort'].append({value: "asc" if is_asc else "desc"}) if collapse: query["collapse"] = { "field": collapse[0], "inner_hits": { "name": collapse[0], "size": collapse[1], "sort": query["sort"] } } return query
def test_clean_tags(self): self.assertEqual(['tag'], clean_tags([' \t #!~', '!taG', '\t'])) cleaned = clean_tags(['fOo', '!taG', 'FoO']) self.assertIn('tag', cleaned) self.assertIn('foo', cleaned) self.assertEqual(len(cleaned), 2)
def _upsertable_claims(self, txos: List[Output], header, clear_first=False): claim_hashes, claims, tags = [], [], {} for txo in txos: tx = txo.tx_ref.tx try: assert txo.claim_name assert txo.normalized_name except: #self.logger.exception(f"Could not decode claim name for {tx.id}:{txo.position}.") continue claim_hash = sqlite3.Binary(txo.claim_hash) claim_hashes.append(claim_hash) claim_record = { 'claim_hash': claim_hash, 'claim_id': txo.claim_id, 'claim_name': txo.claim_name, 'normalized': txo.normalized_name, 'txo_hash': sqlite3.Binary(txo.ref.hash), 'tx_position': tx.position, 'amount': txo.amount, 'timestamp': header['timestamp'], 'height': tx.height, 'claim_type': None, 'stream_type': None, 'media_type': None, 'release_time': None, 'fee_currency': None, 'fee_amount': 0 } claims.append(claim_record) try: claim = txo.claim except: #self.logger.exception(f"Could not parse claim protobuf for {tx.id}:{txo.position}.") continue if claim.is_stream: claim_record['claim_type'] = CLAIM_TYPES['stream'] claim_record['media_type'] = claim.stream.source.media_type claim_record['stream_type'] = STREAM_TYPES[guess_stream_type( claim_record['media_type'])] if claim.stream.release_time: claim_record['release_time'] = claim.stream.release_time if claim.stream.has_fee: fee = claim.stream.fee if isinstance(fee.currency, str): claim_record['fee_currency'] = fee.currency.lower() if isinstance(fee.amount, Decimal): claim_record['fee_amount'] = int(fee.amount * 1000) elif claim.is_channel: claim_record['claim_type'] = CLAIM_TYPES['channel'] for tag in clean_tags(claim.message.tags): tags[(tag, claim_hash)] = (tag, claim_hash, tx.height) if clear_first: self._clear_claim_metadata(claim_hashes) if tags: self.db.executemany( "INSERT OR IGNORE INTO tag (tag, claim_hash, height) VALUES (?, ?, ?)", tags.values()) return claims
def test_clean_tags(self): self.assertEqual(['tag'], clean_tags([' \t #!~', '!taG', '\t']))