async def update_activity_for_all_collections(self) -> None: # TODO(femi-ogunkola): Account for deleted transactions collectionActivity = await self.retriever.list_collections_activity( orders=[ Order(fieldName=CollectionHourlyActivityTable.c.date.key, direction=Direction.DESCENDING) ], limit=1) if len(collectionActivity) > 0: lastestProcessedDate = collectionActivity[0].date else: lastestProcessedDate = date_util.start_of_day() newTokenTransfers = await self.retriever.list_token_transfers( fieldFilters=[ DateFieldFilter(fieldName=BlocksTable.c.updatedDate.key, gte=lastestProcessedDate) ]) registryDatePairs = {(tokenTransfer.registryAddress, date_hour_from_datetime(tokenTransfer.blockDate)) for tokenTransfer in newTokenTransfers} logging.info( f'Scheduling processing for {len(registryDatePairs)} collection, hour pairs' ) messages = [ UpdateActivityForCollectionMessageContent( address=address, startDate=startDate).to_message() for (address, startDate) in registryDatePairs ] await self.tokenQueue.send_messages(messages=messages)
async def update_token_metadata_deferred( self, registryAddress: str, tokenId: str, shouldForce: bool = False) -> None: registryAddress = chain_util.normalize_address(value=registryAddress) if not shouldForce: recentlyUpdatedTokens = await self.retriever.list_token_metadatas( fieldFilters=[ StringFieldFilter( fieldName=TokenMetadatasTable.c.registryAddress.key, eq=registryAddress), StringFieldFilter( fieldName=TokenMetadatasTable.c.tokenId.key, eq=tokenId), DateFieldFilter( fieldName=TokenMetadatasTable.c.updatedDate.key, gt=date_util.datetime_from_now( days=-_TOKEN_UPDATE_MIN_DAYS)) ], ) if len(recentlyUpdatedTokens) > 0: logging.info( 'Skipping token because it has been updated recently.') return await self.tokenQueue.send_message( message=UpdateTokenMetadataMessageContent( registryAddress=registryAddress, tokenId=tokenId).to_message())
async def update_token_metadatas_deferred( self, collectionTokenIds: List[Tuple[str, str]], shouldForce: bool = False) -> None: if len(collectionTokenIds) == 0: return if not shouldForce: query = (TokenMetadatasTable.select().where( TokenMetadatasTable.c.updatedDate > date_util. datetime_from_now(days=-_TOKEN_UPDATE_MIN_DAYS)).where( sqlalchemy.tuple_( TokenMetadatasTable.c.registryAddress, TokenMetadatasTable.c.tokenId).in_(collectionTokenIds)) ) recentlyUpdatedTokenMetadatas = await self.retriever.query_token_metadatas( query=query) recentlyUpdatedTokenIds = set( (tokenMetadata.registryAddress, tokenMetadata.tokenId) for tokenMetadata in recentlyUpdatedTokenMetadatas) logging.info( f'Skipping {len(recentlyUpdatedTokenIds)} collectionTokenIds because they have been updated recently.' ) collectionTokenIds = set( collectionTokenIds) - recentlyUpdatedTokenIds messages = [ UpdateTokenMetadataMessageContent( registryAddress=registryAddress, tokenId=tokenId, shouldForce=shouldForce).to_message() for (registryAddress, tokenId) in collectionTokenIds ] await self.tokenQueue.send_messages(messages=messages)
async def daily_new_registries(): databaseConnectionString = Database.create_psql_connection_string( username=os.environ["DB_USERNAME"], password=os.environ["DB_PASSWORD"], host=os.environ["DB_HOST"], port=os.environ["DB_PORT"], name=os.environ["DB_NAME"]) database = Database(connectionString=databaseConnectionString) await database.connect() query = TokenTransfersTable.select() query = query.where( TokenTransfersTable.c.registryAddress.in_( TokenTransfersTable.select().with_only_columns([ TokenTransfersTable.c.registryAddress ]).group_by(TokenTransfersTable.c.registryAddress).having( sqlalchemyfunc.count(TokenTransfersTable.c.registryAddress) == 1))) query = query.where( sqlalchemyfunc.date(TokenTransfersTable.c.blockDate) == sqlalchemyfunc.current_date()) rows = await database.fetch_all(query) for row in rows: logging.info( f'New Tokens: registry address {row[2]} and tokenId {row[5]}') await database.disconnect()
async def check_all_processed(startBlockNumber: int, endBlockNumber: int, batchSize: int): databaseConnectionString = Database.create_psql_connection_string( username=os.environ["DB_USERNAME"], password=os.environ["DB_PASSWORD"], host=os.environ["DB_HOST"], port=os.environ["DB_PORT"], name=os.environ["DB_NAME"]) database = Database(connectionString=databaseConnectionString) await database.connect() currentBlockNumber = startBlockNumber while currentBlockNumber < endBlockNumber: start = currentBlockNumber end = min(currentBlockNumber + batchSize, endBlockNumber) logging.info(f'Working on {start} to {end}...') result = await database.execute(f''' update tbl_token_transfers set amount_2 = amount where block_number >= {start} and block_number < {end} and amount_2 is null; ''') print('result:', result) currentBlockNumber += batchSize await database.disconnect()
async def update_collections_deferred(self, addresses: List[str], shouldForce: bool = False) -> None: if len(addresses) == 0: return if not shouldForce: recentlyUpdatedCollections = await self.retriever.list_collections( fieldFilters=[ StringFieldFilter( fieldName=TokenCollectionsTable.c.address.key, containedIn=addresses), DateFieldFilter( fieldName=TokenCollectionsTable.c.updatedDate.key, gt=date_util.datetime_from_now( days=-_COLLECTION_UPDATE_MIN_DAYS)) ], ) recentlyUpdatedAddresses = set( collection.address for collection in recentlyUpdatedCollections) logging.info( f'Skipping {len(recentlyUpdatedAddresses)} collections because they have been updated recently.' ) addresses = set(addresses) - recentlyUpdatedAddresses messages = [ UpdateCollectionMessageContent(address=address).to_message() for address in addresses ] await self.tokenQueue.send_messages(messages=messages)
async def process_tokens_from_old_transfers(startBlockNumber: int, endBlockNumber: int, batchSize: int): databaseConnectionString = Database.create_psql_connection_string(username=os.environ["DB_USERNAME"], password=os.environ["DB_PASSWORD"], host=os.environ["DB_HOST"], port=os.environ["DB_PORT"], name=os.environ["DB_NAME"]) database = Database(connectionString=databaseConnectionString) saver = Saver(database=database) retriever = Retriever(database=database) s3manager = S3Manager(region='eu-west-1', accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET']) workQueue = SqsMessageQueue(region='eu-west-1', accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET'], queueUrl='https://sqs.eu-west-1.amazonaws.com/097520841056/notd-work-queue') tokenQueue = SqsMessageQueue(region='eu-west-1', accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET'], queueUrl='https://sqs.eu-west-1.amazonaws.com/097520841056/notd-token-queue') awsRequester = AwsRequester(accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET']) ethClient = RestEthClient(url='https://nd-foldvvlb25awde7kbqfvpgvrrm.ethereum.managedblockchain.eu-west-1.amazonaws.com', requester=awsRequester) requester = Requester() tokenMetadataProcessor = TokenMetadataProcessor(requester=requester, ethClient=ethClient, s3manager=s3manager, bucketName=os.environ['S3_BUCKET']) openseaApiKey = os.environ['OPENSEA_API_KEY'] tokenOwnershipProcessor = TokenOwnershipProcessor(retriever=retriever) collectionProcessor = CollectionProcessor(requester=requester, ethClient=ethClient, openseaApiKey=openseaApiKey, s3manager=s3manager, bucketName=os.environ['S3_BUCKET']) tokenManager = TokenManager(saver=saver, retriever=retriever, tokenQueue=tokenQueue, collectionProcessor=collectionProcessor, tokenMetadataProcessor=tokenMetadataProcessor, tokenOwnershipProcessor=tokenOwnershipProcessor) revueApiKey = os.environ['REVUE_API_KEY'] await database.connect() await workQueue.connect() await s3manager.connect() await tokenQueue.connect() cache = set() registryCache = set() currentBlockNumber = startBlockNumber while currentBlockNumber < endBlockNumber: start = currentBlockNumber end = min(currentBlockNumber + batchSize, endBlockNumber) currentBlockNumber = end logging.info(f'Working on {start}-{end}...') query = ( sqlalchemy.select(TokenTransfersTable.c.registryAddress, TokenTransfersTable.c.tokenId) .where(TokenTransfersTable.c.blockNumber >= start) .where(TokenTransfersTable.c.blockNumber < end) ) result = await database.execute(query=query,) tokensToProcess = set() collectionsToProcess = set() for (registryAddress, tokenId) in result: if (registryAddress, tokenId) in cache: continue cache.add((registryAddress, tokenId)) tokensToProcess.add((registryAddress, tokenId)) if registryAddress in registryCache: continue registryCache.add(registryAddress) collectionsToProcess.add(registryAddress) print('len(tokensToProcess)', len(tokensToProcess)) print('len(collectionsToProcess)', len(collectionsToProcess)) try: await _update_token_metadatas(tokensToProcess=tokensToProcess, tokenManager=tokenManager, retriever=retriever) await _update_collections(collectionsToProcess=collectionsToProcess, tokenManager=tokenManager, retriever=retriever) except: logging.error(f'Failed during: {start}-{end}') raise await database.disconnect() await workQueue.disconnect() await tokenQueue.disconnect() await s3manager.disconnect()
async def main(): requestIdHolder = RequestIdHolder() name = os.environ.get('NAME', 'notd-api') version = os.environ.get('VERSION', 'local') environment = os.environ.get('ENV', 'dev') isRunningDebugMode = environment == 'dev' if isRunningDebugMode: logging.init_basic_logging() else: logging.init_json_logging(name=name, version=version, environment=environment, requestIdHolder=requestIdHolder) databaseConnectionString = Database.create_psql_connection_string(username=os.environ["DB_USERNAME"], password=os.environ["DB_PASSWORD"], host=os.environ["DB_HOST"], port=os.environ["DB_PORT"], name=os.environ["DB_NAME"]) database = Database(connectionString=databaseConnectionString) saver = Saver(database=database) retriever = Retriever(database=database) s3manager = S3Manager(region='eu-west-1', accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET']) workQueue = SqsMessageQueue(region='eu-west-1', accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET'], queueUrl='https://sqs.eu-west-1.amazonaws.com/097520841056/notd-work-queue') tokenQueue = SqsMessageQueue(region='eu-west-1', accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET'], queueUrl='https://sqs.eu-west-1.amazonaws.com/097520841056/notd-token-queue') awsRequester = AwsRequester(accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET']) ethClient = RestEthClient(url='https://nd-foldvvlb25awde7kbqfvpgvrrm.ethereum.managedblockchain.eu-west-1.amazonaws.com', requester=awsRequester) blockProcessor = BlockProcessor(ethClient=ethClient) requester = Requester() tokenMetadataProcessor = TokenMetadataProcessor(requester=requester, ethClient=ethClient, s3manager=s3manager, bucketName=os.environ['S3_BUCKET']) openseaApiKey = os.environ['OPENSEA_API_KEY'] collectionProcessor = CollectionProcessor(requester=requester, ethClient=ethClient, openseaApiKey=openseaApiKey, s3manager=s3manager, bucketName=os.environ['S3_BUCKET']) tokenOwnershipProcessor = TokenOwnershipProcessor(retriever=retriever) collectionActivityProcessor = CollectionActivityProcessor(retriever=retriever) revueApiKey = os.environ['REVUE_API_KEY'] tokenManager = TokenManager(saver=saver, retriever=retriever, tokenQueue=tokenQueue, collectionProcessor=collectionProcessor, tokenMetadataProcessor=tokenMetadataProcessor, tokenOwnershipProcessor=tokenOwnershipProcessor, collectionActivityProcessor=collectionActivityProcessor) notdManager = NotdManager(blockProcessor=blockProcessor, saver=saver, retriever=retriever, workQueue=workQueue, tokenManager=tokenManager, requester=requester, revueApiKey=revueApiKey) processor = NotdMessageProcessor(notdManager=notdManager) slackClient = SlackClient(webhookUrl=os.environ['SLACK_WEBHOOK_URL'], requester=requester, defaultSender='worker', defaultChannel='notd-notifications') workQueueProcessor = MessageQueueProcessor(queue=workQueue, messageProcessor=processor, slackClient=slackClient, requestIdHolder=requestIdHolder) tokenQueueProcessor = MessageQueueProcessor(queue=tokenQueue, messageProcessor=processor, slackClient=slackClient, requestIdHolder=requestIdHolder) await database.connect() await s3manager.connect() await workQueue.connect() await tokenQueue.connect() try: while True: hasProcessedWork = await workQueueProcessor.execute_batch(batchSize=3, longPollSeconds=1, shouldProcessInParallel=True) if hasProcessedWork: continue hasProcessedToken = await tokenQueueProcessor.execute_batch(batchSize=10, longPollSeconds=1, shouldProcessInParallel=True) if hasProcessedToken: continue logging.info('No message received.. sleeping') time.sleep(60) finally: await database.disconnect() await s3manager.disconnect() await workQueue.disconnect() await tokenQueue.disconnect() await requester.close_connections()
async def _reprocess_metadata(tokenMetadataProcessor: TokenMetadataProcessor, s3manager: S3Manager, tokenManger: TokenManager, tokenMetadata: TokenMetadata): logging.info( f'Re-processing tokenMetadata: {tokenMetadata.tokenMetadataId}') await tokenManger.update_token_metadata( registryAddress=tokenMetadata.registryAddress, tokenId=tokenMetadata.tokenId, shouldForce=True) logging.info( f'Re-processed tokenMetadata: {tokenMetadata.tokenMetadataId}')
async def calculate_token_fields(startCollectionId: Optional[int], endCollectionId: Optional[int]): databaseConnectionString = Database.create_psql_connection_string(username=os.environ["DB_USERNAME"], password=os.environ["DB_PASSWORD"], host=os.environ["DB_HOST"], port=os.environ["DB_PORT"], name=os.environ["DB_NAME"]) database = Database(connectionString=databaseConnectionString) s3manager = S3Manager(region='eu-west-1', accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET']) bucketName = os.environ['S3_BUCKET'] await database.connect() await s3manager.connect() query = TokenCollectionsTable.select() if startCollectionId: query = query.where(TokenCollectionsTable.c.collectionId >= startCollectionId) if endCollectionId: query = query.where(TokenCollectionsTable.c.collectionId < endCollectionId) collections = [collection_from_row(row) async for row in database.iterate(query=query)] rows = [] fields = set() for collection in collections: logging.info(f'Working on {collection.address}') collectionDirectory = f'{bucketName}/token-metadatas/{collection.address}/' index = 0 async for tokenFile in s3manager.generate_directory_files(s3Directory=collectionDirectory): logging.info(f'Working on file {tokenFile.bucket}/{tokenFile.path}') if index > 3: break try: tokenDict = json.loads(await s3manager.read_file(sourcePath=f'{tokenFile.bucket}/{tokenFile.path}')) tokenDict['tokenId'] = tokenFile.path.split('/')[2] if tokenDict.get('attributes'): tokenDict['attributes'] = ",".join(list(set(key for attribute in tokenDict.get('attributes', []) for key in attribute.keys()))) if isinstance(tokenDict.get('attributes', []), List) else [attribute for attribute in tokenDict.get('attributes')] else: tokenDict['attributes'] = None tokenDict['description'] = tokenDict["description"][:10] if tokenDict.get('description') else None tokenDict['collection'] = collection.address fields.update(tokenDict.keys()) rows.append(tokenDict) except Exception as exception: logging.exception(exception) index += 1 with open(f'./output{startCollectionId}-{endCollectionId}.tsv', 'w') as outFile: dictWriter = csv.DictWriter(outFile, fields, delimiter='\t') dictWriter.writeheader() dictWriter.writerows(rows) fieldCounts = defaultdict(int) for row in rows: for key, value in row.items(): if value: fieldCounts[key] += 1 print(fieldCounts) await database.disconnect() await s3manager.disconnect()
async def upload_token_image(self, network: str, tokenId: int) -> None: logging.info(f'Uploading image for token {tokenId}') gridItem = await self.retriever.get_grid_item_by_token_id_network( network=network, tokenId=tokenId) imageId = await self.imageManager.upload_image_from_url( url=gridItem.imageUrl) resizableImageUrl = f'{_API_URL}/v1/images/{imageId}/go' await self.saver.update_grid_item(gridItemId=gridItem.gridItemId, resizableImageUrl=resizableImageUrl) if gridItem.groupId: await self.update_grid_item_group_image_deferred( network=network, ownerId=gridItem.ownerId, groupId=gridItem.groupId)
async def receive_new_blocks(self) -> None: latestBlocks = await self.retriever.list_blocks(orders=[ Order(fieldName=BlocksTable.c.blockNumber.key, direction=Direction.DESCENDING) ], limit=1) latestProcessedBlockNumber = latestBlocks[0].blockNumber latestBlockNumber = await self.blockProcessor.get_latest_block_number() logging.info( f'Scheduling messages for processing blocks from {latestProcessedBlockNumber} to {latestBlockNumber}' ) await self.process_blocks_deferred(blockNumbers=list( reversed(range(latestProcessedBlockNumber, latestBlockNumber + 1))) )
async def reprocess_old_blocks(self) -> None: blocksToReprocessQuery = (sqlalchemy.select( BlocksTable.c.blockNumber).where( BlocksTable.c.createdDate < date_util.datetime_from_now( minutes=-10)).where( BlocksTable.c.updatedDate - BlocksTable.c.blockDate < datetime.timedelta( minutes=10))) result = await self.retriever.database.execute( query=blocksToReprocessQuery) blockNumbers = [blockNumber for (blockNumber, ) in result] logging.info( f'Scheduling messages for reprocessing {len(blockNumbers)} blocks') await self.process_blocks_deferred(blockNumbers=blockNumbers, shouldSkipProcessingTokens=True)
async def update_activity_for_collection( self, address: str, startDate: datetime.datetime) -> None: address = chain_util.normalize_address(address) startDate = date_hour_from_datetime(startDate) retrievedCollectionActivity = await self.collectionActivityProcessor.calculate_collection_hourly_activity( address=address, startDate=startDate) async with self.saver.create_transaction() as connection: collectionActivity = await self.retriever.list_collections_activity( connection=connection, fieldFilters=[ StringFieldFilter( fieldName=CollectionHourlyActivityTable.c.address.key, eq=address), DateFieldFilter( fieldName=CollectionHourlyActivityTable.c.date.key, eq=startDate) ]) if len(collectionActivity) > 0: await self.saver.update_collection_hourly_activity( connection=connection, collectionActivityId=collectionActivity[0]. collectionActivityId, address=address, date=retrievedCollectionActivity.date, transferCount=retrievedCollectionActivity.transferCount, saleCount=retrievedCollectionActivity.saleCount, totalValue=retrievedCollectionActivity.totalValue, minimumValue=retrievedCollectionActivity.minimumValue, maximumValue=retrievedCollectionActivity.maximumValue, averageValue=retrievedCollectionActivity.averageValue, ) else: if retrievedCollectionActivity.transferCount == 0: logging.info( f'Not creating activity with transferCount==0') else: await self.saver.create_collection_hourly_activity( connection=connection, address=retrievedCollectionActivity.address, date=retrievedCollectionActivity.date, transferCount=retrievedCollectionActivity. transferCount, saleCount=retrievedCollectionActivity.saleCount, totalValue=retrievedCollectionActivity.totalValue, minimumValue=retrievedCollectionActivity.minimumValue, maximumValue=retrievedCollectionActivity.maximumValue, averageValue=retrievedCollectionActivity.averageValue, )
def train(name, epochs, verbose): batch_size = 128 # paths log_path = "logs/{}.json".format(name) out_path = "snapshots/" + name + ".{epoch:06d}.h5" echo('log path', log_path) echo('out path', out_path) # init echo('train', locals()) logging.info(log_path, {'train': locals()}) session = tf.Session('') K.set_session(session) K.set_learning_phase(1) # dataset echo('dataset loading...') seq_train, seq_valid = dataset.batch_generator(batch_size) # model building echo('model building...') model = Model.build() model.summary() # training echo('start learning...') callbacks = [ logging.JsonLog(log_path), keras.callbacks.ModelCheckpoint( out_path, monitor='val_loss', save_weights_only=True, save_best_only=True, ) ] model.fit_generator( seq_train, validation_data=seq_valid, shuffle=True, epochs=epochs, verbose=verbose, callbacks=callbacks, workers=1, use_multiprocessing=True, )
async def process_token_ownerships(startTokenId: int, endTokenId: int, batchSize: int): databaseConnectionString = Database.create_psql_connection_string(username=os.environ["DB_USERNAME"], password=os.environ["DB_PASSWORD"], host=os.environ["DB_HOST"], port=os.environ["DB_PORT"], name=os.environ["DB_NAME"]) database = Database(connectionString=databaseConnectionString) saver = Saver(database=database) retriever = Retriever(database=database) s3manager = S3Manager(region='eu-west-1', accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET']) workQueue = SqsMessageQueue(region='eu-west-1', accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET'], queueUrl='https://sqs.eu-west-1.amazonaws.com/097520841056/notd-work-queue') tokenQueue = SqsMessageQueue(region='eu-west-1', accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET'], queueUrl='https://sqs.eu-west-1.amazonaws.com/097520841056/notd-token-queue') awsRequester = AwsRequester(accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET']) ethClient = RestEthClient(url='https://nd-foldvvlb25awde7kbqfvpgvrrm.ethereum.managedblockchain.eu-west-1.amazonaws.com', requester=awsRequester) requester = Requester() tokenMetadataProcessor = TokenMetadataProcessor(requester=requester, ethClient=ethClient, s3manager=s3manager, bucketName=os.environ['S3_BUCKET']) openseaApiKey = os.environ['OPENSEA_API_KEY'] tokenOwnershipProcessor = TokenOwnershipProcessor(retriever=retriever) collectionProcessor = CollectionProcessor(requester=requester, ethClient=ethClient, openseaApiKey=openseaApiKey, s3manager=s3manager, bucketName=os.environ['S3_BUCKET']) tokenManager = TokenManager(saver=saver, retriever=retriever, tokenQueue=tokenQueue, collectionProcessor=collectionProcessor, tokenMetadataProcessor=tokenMetadataProcessor, tokenOwnershipProcessor=tokenOwnershipProcessor) revueApiKey = os.environ['REVUE_API_KEY'] slackClient = SlackClient(webhookUrl=os.environ['SLACK_WEBHOOK_URL'], requester=requester, defaultSender='worker', defaultChannel='notd-notifications') await database.connect() await workQueue.connect() await s3manager.connect() await tokenQueue.connect() await database.connect() await slackClient.post(text=f'process_token_ownerships → 🚧 started: {startTokenId}-{endTokenId}') try: currentTokenId = startTokenId while currentTokenId < endTokenId: start = currentTokenId end = min(currentTokenId + batchSize, endTokenId) currentTokenId = end logging.info(f'Working on {start}-{end}') query = TokenMetadatasTable.select() \ .where(TokenMetadatasTable.c.tokenMetadataId >= start) \ .where(TokenMetadatasTable.c.tokenMetadataId < end) tokenMetadatas = await retriever.query_token_metadatas(query=query) await asyncio.gather(*[process_token_ownership(tokenManager=tokenManager, registryAddress=tokenMetadata.registryAddress, tokenId=tokenMetadata.tokenId) for tokenMetadata in tokenMetadatas]) await slackClient.post(text=f'process_token_ownerships → ✅ completed : {startTokenId}-{endTokenId}') except Exception as exception: await slackClient.post(text=f'process_token_ownerships → � error: {startTokenId}-{endTokenId}\n```{str(exception)}```') raise exception finally: await database.disconnect() await workQueue.disconnect() await tokenQueue.disconnect() await s3manager.disconnect()
async def fix_address(startBlockNumber: int, endBlockNumber: int, batchSize: int): databaseConnectionString = Database.create_psql_connection_string( username=os.environ["DB_USERNAME"], password=os.environ["DB_PASSWORD"], host=os.environ["DB_HOST"], port=os.environ["DB_PORT"], name=os.environ["DB_NAME"]) database = Database(connectionString=databaseConnectionString) await database.connect() currentBlockNumber = startBlockNumber while currentBlockNumber < endBlockNumber: start = currentBlockNumber end = min(currentBlockNumber + batchSize, endBlockNumber) logging.info(f'Working on {start} to {end}...') async with database.transaction(): query = TokenTransfersTable.select() query = query.where(TokenTransfersTable.c.blockNumber >= start) query = query.where(TokenTransfersTable.c.blockNumber < end) query = query.where( or_( sqlalchemyfunc.length( TokenTransfersTable.c.toAddress) != 42, sqlalchemyfunc.length(TokenTransfersTable.c.toAddress) != 42, )) tokenTransfersToChange = [ token_transfer_from_row(row) async for row in database.iterate(query=query) ] logging.info( f'Updating {len(tokenTransfersToChange)} transfers...') for tokenTransfer in tokenTransfersToChange: query = TokenTransfersTable.update( TokenTransfersTable.c.tokenTransferId == tokenTransfer.tokenTransferId) values = { TokenTransfersTable.c.toAddress.key: normalize_address(tokenTransfer.toAddress), TokenTransfersTable.c.fromAddress.key: normalize_address(tokenTransfer.fromAddress), } await database.execute(query=query, values=values) currentBlockNumber = end await database.disconnect()
async def reprocess_owner_token_ownerships(self, ownerAddress: str) -> None: tokenTransfers = await self.retriever.list_token_transfers( fieldFilters=[ StringFieldFilter( fieldName=TokenTransfersTable.c.toAddress.key, eq=ownerAddress) ]) collectionTokenIds = list({(transfer.registryAddress, transfer.tokenId) for transfer in tokenTransfers}) logging.info(f'Refreshing {len(collectionTokenIds)} ownerships') for collectionTokenIdChunk in list_util.generate_chunks( lst=collectionTokenIds, chunkSize=10): await asyncio.gather(*[ self.update_token_ownership(registryAddress=registryAddress, tokenId=tokenId) for (registryAddress, tokenId) in collectionTokenIdChunk ]) await self.update_token_metadatas_deferred( collectionTokenIds=collectionTokenIds)
async def update_token_metadata(self, registryAddress: str, tokenId: str, shouldForce: bool = False) -> None: registryAddress = chain_util.normalize_address(value=registryAddress) if not shouldForce: recentlyUpdatedTokens = await self.retriever.list_token_metadatas( fieldFilters=[ StringFieldFilter( fieldName=TokenMetadatasTable.c.registryAddress.key, eq=registryAddress), StringFieldFilter( fieldName=TokenMetadatasTable.c.tokenId.key, eq=tokenId), DateFieldFilter( fieldName=TokenMetadatasTable.c.updatedDate.key, gt=date_util.datetime_from_now( days=-_TOKEN_UPDATE_MIN_DAYS)) ], ) if len(recentlyUpdatedTokens) > 0: logging.info( 'Skipping token because it has been updated recently.') return collection = await self._get_collection_by_address( address=registryAddress, shouldProcessIfNotFound=True, sleepSecondsBeforeProcess=0.1 * random.randint(1, 10)) try: retrievedTokenMetadata = await self.tokenMetadataProcessor.retrieve_token_metadata( registryAddress=registryAddress, tokenId=tokenId, collection=collection) except (TokenDoesNotExistException, TokenHasNoMetadataException): logging.info( f'Failed to retrieve metadata for token: {registryAddress}: {tokenId}' ) retrievedTokenMetadata = TokenMetadataProcessor.get_default_token_metadata( registryAddress=registryAddress, tokenId=tokenId) await self.save_token_metadata( retrievedTokenMetadata=retrievedTokenMetadata)
async def check_all_processed(startBlockNumber: int, endBlockNumber: int, batchSize: int): databaseConnectionString = Database.create_psql_connection_string( username=os.environ["DB_USERNAME"], password=os.environ["DB_PASSWORD"], host=os.environ["DB_HOST"], port=os.environ["DB_PORT"], name=os.environ["DB_NAME"]) database = Database(connectionString=databaseConnectionString) workQueue = SqsMessageQueue( region='eu-west-1', accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET'], queueUrl= 'https://sqs.eu-west-1.amazonaws.com/097520841056/notd-work-queue') await database.connect() await workQueue.connect() currentBlockNumber = startBlockNumber while currentBlockNumber < endBlockNumber: start = currentBlockNumber end = min(currentBlockNumber + batchSize, endBlockNumber) logging.info(f'Working on {start} - {end}...') async with database.transaction(): query = TokenTransfersTable.select() \ .with_only_columns([TokenTransfersTable.c.blockNumber]) \ .filter(TokenTransfersTable.c.blockNumber >= start) \ .filter(TokenTransfersTable.c.blockNumber < end) \ .distinct(TokenTransfersTable.c.blockNumber) processedBlocks = [ row[0] for row in await database.fetch_all(query) ] unprocessedBlocks = set(range(start, end)) - set(processedBlocks) logging.info( f'Processing {len(unprocessedBlocks)} blocks in {start} - {end}') await workQueue.send_message(message=ProcessBlocksMessageContent( blockNumbers=unprocessedBlocks).to_message()) currentBlockNumber = currentBlockNumber + batchSize await database.disconnect() await workQueue.disconnect()
async def process_block( self, blockNumber: int, shouldSkipProcessingTokens: Optional[bool] = None) -> None: processedBlock = await self.blockProcessor.process_block( blockNumber=blockNumber) logging.info( f'Found {len(processedBlock.retrievedTokenTransfers)} token transfers in block #{blockNumber}' ) collectionTokenIds = await self._save_processed_block( processedBlock=processedBlock) logging.info( f'Found {len(collectionTokenIds)} changed tokens in block #{blockNumber}' ) collectionAddresses = list( set(registryAddress for registryAddress, _ in collectionTokenIds)) logging.info( f'Found {len(collectionAddresses)} changed collections in block #{blockNumber}' ) await self.tokenManager.update_token_ownerships_deferred( collectionTokenIds=collectionTokenIds) if not shouldSkipProcessingTokens: await self.tokenManager.update_collections_deferred( addresses=collectionAddresses) await self.tokenManager.update_token_metadatas_deferred( collectionTokenIds=collectionTokenIds)
async def update_collection_deferred(self, address: str, shouldForce: bool = False) -> None: address = chain_util.normalize_address(value=address) if not shouldForce: recentlyUpdatedCollections = await self.retriever.list_collections( fieldFilters=[ StringFieldFilter( fieldName=TokenCollectionsTable.c.address.key, eq=address), DateFieldFilter( fieldName=TokenCollectionsTable.c.updatedDate.key, gt=date_util.datetime_from_now( days=-_COLLECTION_UPDATE_MIN_DAYS)) ], ) if len(recentlyUpdatedCollections) > 0: logging.info( 'Skipping collection because it has been updated recently.' ) return await self.tokenQueue.send_message( message=UpdateCollectionMessageContent( address=address).to_message())
def _resolve_data(dataString: str, registryAddress: str, tokenId: str) -> Dict: tokenMetadataJson = None if dataString.startswith('data:application/json;base64,'): bse64String = dataString.replace('data:application/json;base64,', '', 1) tokenMetadataJson = base64.b64decode( bse64String.encode('utf-8') + b'==').decode('utf-8', errors='ignore') elif dataString.startswith('data:application/json;utf8,'): tokenMetadataJson = dataString.replace( 'data:application/json;utf8,', '', 1) elif dataString.startswith('data:application/json;ascii,'): tokenMetadataJson = dataString.replace( 'data:application/json;ascii,', '', 1) elif dataString.startswith('data:application/json;charset=utf-8,'): tokenMetadataJson = dataString.replace( 'data:application/json;charset=utf-8,', '', 1) elif dataString.startswith('data:application/json,'): tokenMetadataJson = dataString.replace('data:application/json,', '', 1) elif dataString.startswith('data:text/plain,'): tokenMetadataJson = dataString.replace('data:text/plain,', '', 1) else: logging.info(f'Failed to process data string: {dataString}') tokenMetadataDict = {} if tokenMetadataJson: # NOTE(krishan711): it's safe to decode something that's either encoded or not encoded tokenMetadataJson = urllib.parse.unquote(tokenMetadataJson) try: tokenMetadataDict = json.loads(tokenMetadataJson) except JSONDecodeError as exception: logging.info( f'Failed to parse JSON for {registryAddress}/{tokenId}: {exception}' ) tokenMetadataDict = {} return tokenMetadataDict
async def _reprocess_metadata_from_s3( tokenMetadataProcessor: TokenMetadataProcessor, s3manager: S3Manager, tokenManger: TokenManager, tokenMetadata: TokenMetadata): tokenDirectory = f'{os.environ["S3_BUCKET"]}/token-metadatas/{tokenMetadata.registryAddress}/{tokenMetadata.tokenId}/' tokenFile = None tokenMetadataFiles = [ file async for file in s3manager.generate_directory_files( s3Directory=tokenDirectory) ] if len(tokenMetadataFiles) > 0: tokenFile = tokenMetadataFiles[-1] tokenMetadataJson = await s3manager.read_file( sourcePath=f'{tokenFile.bucket}/{tokenFile.path}') tokenMetadataDict = json.loads(tokenMetadataJson) if not tokenMetadataDict: await _reprocess_metadata( tokenMetadataProcessor=tokenMetadataProcessor, s3manager=s3manager, tokenManger=tokenManger, tokenMetadata=tokenMetadata) return logging.info( f'Updating tokenMetadata: {tokenMetadata.tokenMetadataId}') s3TokenMetadata = await tokenMetadataProcessor._get_token_metadata_from_data( registryAddress=tokenMetadata.registryAddress, tokenId=tokenMetadata.tokenId, metadataUrl=tokenMetadata.metadataUrl, tokenMetadataDict=tokenMetadataDict) await tokenManger.save_token_metadata( retrievedTokenMetadata=s3TokenMetadata) logging.info(f'Updated tokenMetadata: {tokenMetadata.tokenMetadataId}') else: await _reprocess_metadata( tokenMetadataProcessor=tokenMetadataProcessor, s3manager=s3manager, tokenManger=tokenManger, tokenMetadata=tokenMetadata)
async def update_tokens(self, network: str) -> None: networkUpdate = await self.retriever.get_network_update_by_network( network=network) latestProcessedBlockNumber = networkUpdate.latestBlockNumber latestBlockNumber = await self.contractStore.get_latest_block_number( network=network) batchSize = 2500 tokenIdsToUpdate = set() logging.info( f'Processing blocks from {latestProcessedBlockNumber} to {latestBlockNumber}' ) for startBlockNumber in range(latestProcessedBlockNumber + 1, latestBlockNumber + 1, batchSize): endBlockNumber = min(startBlockNumber + batchSize, latestBlockNumber) tokenIdsToUpdate.update(await self._get_updated_token_ids( network=network, startBlockNumber=startBlockNumber, endBlockNumber=endBlockNumber)) for tokenId in list(tokenIdsToUpdate): await self.update_token_deferred(network=network, tokenId=tokenId) await self.saver.update_network_update( networkUpdateId=networkUpdate.networkUpdateId, latestBlockNumber=latestBlockNumber)
async def owned_tokens(ownerAddress: Optional[str]): databaseConnectionString = Database.create_psql_connection_string( username=os.environ["DB_USERNAME"], password=os.environ["DB_PASSWORD"], host=os.environ["DB_HOST"], port=os.environ["DB_PORT"], name=os.environ["DB_NAME"]) database = Database(connectionString=databaseConnectionString) retriever = Retriever(database=database) await database.connect() boughtTokens = [] soldTokens = [] async with database.transaction(): query = TokenTransfersTable.select() query = query.where(TokenTransfersTable.c.toAddress == ownerAddress) async for row in retriever.database.iterate(query=query): tokenTransfer = token_transfer_from_row(row) boughtTokens.append(tokenTransfer) query = TokenTransfersTable.select() query = query.where(TokenTransfersTable.c.fromAddress == ownerAddress) async for row in retriever.database.iterate(query=query): tokenTransfer = token_transfer_from_row(row) soldTokens.append(tokenTransfer) uniqueBoughtTokens = set(boughtTokens) uniqueSoldTokens = set(soldTokens) tokensOwned = uniqueBoughtTokens - uniqueSoldTokens for tokenTransfer in tokensOwned: logging.info( f'Tokens Owned: registry_address: {tokenTransfer.registryAddress}, token_id: {tokenTransfer.tokenId}' ) await database.disconnect() logging.info(f'Got {len(tokensOwned)} total owned')
async def reprocess_metadata(startId: int, endId: int, batchSize: int): databaseConnectionString = Database.create_psql_connection_string( username=os.environ["DB_USERNAME"], password=os.environ["DB_PASSWORD"], host=os.environ["DB_HOST"], port=os.environ["DB_PORT"], name=os.environ["DB_NAME"]) database = Database(connectionString=databaseConnectionString) saver = Saver(database) tokenMetadataProcessor = TokenMetadataProcessor(requester=None, ethClient=None, s3manager=None, bucketName=None) await database.connect() currentId = startId while currentId < endId: start = currentId end = min(currentId + batchSize, endId) logging.info(f'Working on {start} to {end}...') async with database.transaction(): query = TokenMetadatasTable.select() query = query.where(TokenMetadatasTable.c.tokenMetadataId >= start) query = query.where(TokenMetadatasTable.c.tokenMetadataId < end) query = query.where( TokenMetadatasTable.c.metadataUrl.startswith('data:')) query = query.where(TokenMetadatasTable.c.name == None) tokenMetadatasToChange = [ token_metadata_from_row(row) async for row in database.execute(query=query) ] logging.info( f'Updating {len(tokenMetadatasToChange)} transfers...') for tokenMetadata in tokenMetadatasToChange: try: tokenMetadataDict = tokenMetadataProcessor._resolve_data( dataString=tokenMetadata.metadataUrl, registryAddress=tokenMetadata.registryAddress, tokenId=tokenMetadata.tokenId) if tokenMetadataDict: logging.info( f'Processed: {tokenMetadata.tokenMetadataId}') await saver.update_token_metadata( tokenMetadataId=tokenMetadata.tokenMetadataId, name=tokenMetadataDict.get('name'), imageUrl=tokenMetadataDict.get('image'), description=tokenMetadataDict.get('description'), attributes=tokenMetadataDict.get('attributes', [])) except Exception as e: logging.exception( f'Error processing {tokenMetadata.tokenMetadataId}: {e}' ) currentId = currentId + batchSize await database.disconnect()
async def _get_updated_token_ids(self, network: str, startBlockNumber: int, endBlockNumber: int) -> None: tokenIdsToUpdate = set() transferredTokenIds = await self.contractStore.get_transferred_token_ids_in_blocks( network=network, startBlockNumber=startBlockNumber, endBlockNumber=endBlockNumber) logging.info( f'Found {len(transferredTokenIds)} transferred tokens in blocks {startBlockNumber}-{endBlockNumber}' ) tokenIdsToUpdate.update(transferredTokenIds) updatedTokenIds = await self.contractStore.get_updated_token_ids_in_blocks( network=network, startBlockNumber=startBlockNumber, endBlockNumber=endBlockNumber) logging.info( f'Found {len(updatedTokenIds)} updated tokens in blocks {startBlockNumber}-{endBlockNumber}' ) tokenIdsToUpdate.update(updatedTokenIds) offChainUpdatedTokens = await self.retriever.list_offchain_contents( fieldFilters=[ StringFieldFilter( fieldName=OffchainContentsTable.c.network.key, eq=network), IntegerFieldFilter( fieldName=OffchainContentsTable.c.blockNumber.key, gte=startBlockNumber), IntegerFieldFilter( fieldName=OffchainContentsTable.c.blockNumber.key, lt=endBlockNumber), ]) logging.info( f'Found {len(offChainUpdatedTokens)} on-chain updated tokens in blocks {startBlockNumber}-{endBlockNumber}' ) tokenIdsToUpdate.update([ offchainContent.tokenId for offchainContent in offChainUpdatedTokens ]) return tokenIdsToUpdate
async def process_block(self, blockNumber: int) -> List[RetrievedTokenTransfer]: # NOTE(krishan711): some blocks are too large to be retrieved from the AWS hosted node e.g. #14222802 # for these, we can use infura specifically but if this problem gets too big find a better solution blockData = await self.ethClient.get_block(blockNumber=blockNumber, shouldHydrateTransactions=True) retrievedTokenTransfers = [] erc721events = await self.ethClient.get_log_entries(startBlockNumber=blockNumber, endBlockNumber=blockNumber, topics=[self.erc721TansferEventSignatureHash]) logging.info(f'Found {len(erc721events)} erc721 events in block #{blockNumber}') for event in erc721events: retrievedTokenTransfers += await self._process_erc721_single_event(event=dict(event), blockData=blockData) erc1155events = await self.ethClient.get_log_entries(startBlockNumber=blockNumber, endBlockNumber=blockNumber, topics=[self.erc1155TansferEventSignatureHash]) logging.info(f'Found {len(erc1155events)} erc1155Single events in block #{blockNumber}') erc1155Transfers = [] for event in erc1155events: erc1155Transfers += await self._process_erc1155_single_event(event=dict(event), blockData=blockData) erc1155Batchevents = await self.ethClient.get_log_entries(startBlockNumber=blockNumber, endBlockNumber=blockNumber, topics=[self.erc1155TansferBatchEventSignatureHash]) logging.info(f'Found {len(erc1155Batchevents)} erc1155Batch events in block #{blockNumber}') for event in erc1155Batchevents: erc1155Transfers += await self._process_erc1155_batch_event(event=dict(event), blockData=blockData) # NOTE(krishan711): these need to be merged because of floor seeps e.g. https://etherscan.io/tx/0x88affc90581254ca2ceb04cefac281c4e704d457999c6a7135072a92a7befc8b retrievedTokenTransfers += await self._merge_erc1155_transfers(erc1155Transfers=erc1155Transfers) blockNumber = blockData['number'] blockHash = blockData['hash'].hex() blockDate = datetime.datetime.utcfromtimestamp(blockData['timestamp']) return ProcessedBlock(blockNumber=blockNumber, blockHash=blockHash, blockDate=blockDate, retrievedTokenTransfers=retrievedTokenTransfers)
async def reprocess_metadata(startId: Optional[int], endId: Optional[int], batchSize: Optional[int]): databaseConnectionString = Database.create_psql_connection_string( username=os.environ["DB_USERNAME"], password=os.environ["DB_PASSWORD"], host=os.environ["DB_HOST"], port=os.environ["DB_PORT"], name=os.environ["DB_NAME"]) database = Database(connectionString=databaseConnectionString) saver = Saver(database=database) retriever = Retriever(database=database) s3manager = S3Manager(region='eu-west-1', accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET']) tokenQueue = SqsMessageQueue( region='eu-west-1', accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET'], queueUrl= 'https://sqs.eu-west-1.amazonaws.com/097520841056/notd-token-queue') awsRequester = AwsRequester(accessKeyId=os.environ['AWS_KEY'], accessKeySecret=os.environ['AWS_SECRET']) requester = Requester() ethClient = RestEthClient( url= 'https://nd-foldvvlb25awde7kbqfvpgvrrm.ethereum.managedblockchain.eu-west-1.amazonaws.com', requester=awsRequester) tokenMetadataProcessor = TokenMetadataProcessor( requester=requester, ethClient=ethClient, s3manager=s3manager, bucketName=os.environ['S3_BUCKET']) openseaApiKey = os.environ['OPENSEA_API_KEY'] collectionProcessor = CollectionProcessor( requester=requester, ethClient=ethClient, openseaApiKey=openseaApiKey, s3manager=s3manager, bucketName=os.environ['S3_BUCKET']) tokenManger = TokenManager(saver=saver, retriever=retriever, tokenQueue=tokenQueue, collectionProcessor=collectionProcessor, tokenMetadataProcessor=tokenMetadataProcessor) await s3manager.connect() await tokenQueue.connect() await database.connect() if not startId: startId = 0 if not endId: maxTokenMetadata = await retriever.list_token_metadatas( limit=1, orders=[ Order(fieldName=TokenMetadatasTable.c.tokenMetadataId.key, direction=Direction.DESCENDING) ]) print(maxTokenMetadata) endId = maxTokenMetadata[0].tokenMetadataId + 1 currentId = startId while currentId < endId: start = currentId end = min(currentId + batchSize, endId) query = TokenMetadatasTable.select() query = query.where(TokenMetadatasTable.c.tokenMetadataId >= start) query = query.where(TokenMetadatasTable.c.tokenMetadataId < end) query = query.where( TokenMetadatasTable.c.updatedDate < datetime.datetime(2022, 2, 13)) query = query.order_by(TokenMetadatasTable.c.tokenMetadataId.asc()) tokenMetadatasToChange = [ token_metadata_from_row(row) for row in await database.execute(query=query) ] logging.info(f'Working on {start} - {end}') logging.info(f'Updating {len(tokenMetadatasToChange)} transfers...') await asyncio.gather(*[ _reprocess_metadata_from_s3( tokenMetadataProcessor=tokenMetadataProcessor, s3manager=s3manager, tokenManger=tokenManger, tokenMetadata=tokenMetadata) for tokenMetadata in tokenMetadatasToChange ]) currentId = currentId + batchSize await s3manager.disconnect() await tokenQueue.disconnect() await awsRequester.close_connections() await requester.close_connections() await database.disconnect()