def setUp(self): self.client = Client('localhost') self.client.execute('DROP TABLE IF EXISTS test') self.client.execute( 'CREATE TABLE test (id String, x Int32, dict String) ENGINE = ReplacingMergeTree() ORDER BY id' ) self.new_client = CustomClickhouse()
class ClickhouseTokenHolders(): def __init__(self, indices=INDICES): self.indices = indices self.client = CustomClickhouse() def extract_token_transactions(self): """ Creates materialized view with token transactions extracted from Transfer events This function is an entry point for prepare-erc-transactions-view operation """ value_sql = utils.generate_sql_for_value("data") sql = """ CREATE MATERIALIZED VIEW IF NOT EXISTS {index} ENGINE = ReplacingMergeTree() ORDER BY id POPULATE AS ( SELECT concat('0x', substring(topics[2], 27, 40)) AS from, concat('0x', substring(topics[3], 27, 40)) AS to, {value_sql}, data_value AS value, id, address AS token, transactionHash, blockNumber FROM {event} ANY INNER JOIN ( SELECT id AS address, decimals FROM {contract} ) USING address WHERE topics[1] = '{transfer_topic}' ) """.format( index=self.indices["token_transaction"], value_sql=value_sql, transfer_topic=TRANSFER_EVENT, event=self.indices["event"], contract=self.indices["contract_description"], ) self.client.send_sql_request(sql)
class ClickhouseIndices: def __init__(self, indices=INDICES): self.client = CustomClickhouse() self.indices = indices def _create_index(self, index, fields={}, primary_key=["id"]): """ Create specified index in database with specified field types and primary key Parameters ---------- index : str Name of index fields : dict Fields and their types and index primary_key : list All possible primary keys in index """ fields["id"] = "String" fields_string = ", ".join( ["{} {}".format(name, type) for name, type in fields.items()]) primary_key_string = ",".join(primary_key) create_sql = """ CREATE TABLE IF NOT EXISTS {} ({}) ENGINE = ReplacingMergeTree() ORDER BY ({}) """.format(index, fields_string, primary_key_string) self.client.send_sql_request(create_sql) def prepare_indices(self): """ Create all indices specified in schema/schema.py This function is an entry point for prepare-indices operation """ for key, index in self.indices.items(): if key in INDEX_FIELDS: self._create_index(index, INDEX_FIELDS[key], PRIMARY_KEYS.get(key, ["id"]))
def __init__(self, indices=INDICES): self.client = CustomClickhouse() self.indices = indices
class ClickhouseBancorTrades: def __init__(self, indices=INDICES): self.indices = indices self.client = CustomClickhouse() def extract_trades(self): return_raw_sql = utils.generate_sql_for_value("return_raw") amount_raw_sql = utils.generate_sql_for_value("amount_raw") self.client.send_sql_request(""" CREATE VIEW {trades_index} AS ( SELECT id, from_token, to_token, trader, amount, return, transactionHash FROM ( SELECT id, from_token, to_token, trader, amount, substring(data, 65, 66) AS return_raw, {return_raw_sql}, return_raw_value AS return, transactionHash FROM ( SELECT id, concat('0x', substring(topics[2], 27)) AS from_token, concat('0x', substring(topics[3], 27)) AS to_token, concat('0x', substring(topics[4], 27)) AS trader, data, substring(data, 3, 64) AS amount_raw, {amount_raw_sql}, amount_raw_value AS amount, transactionHash FROM ( SELECT * FROM {events_index} WHERE topics[1] = '{conversion_event}' AND address IN( SELECT address FROM {contracts_index} WHERE standard_bancor_converter = 1 ) ) ANY LEFT JOIN ( SELECT id AS from_token, decimals FROM {tokens_index} ) USING from_token ) ANY LEFT JOIN ( SELECT id AS to_token, decimals FROM {tokens_index} ) USING to_token ) ) """.format(trades_index=self.indices["bancor_trade"], events_index=self.indices["event"], tokens_index=self.indices["contract_description"], contracts_index=self.indices["contract"], transactions_index=self.indices["internal_transaction"], conversion_event=CONVERSION_EVENT, amount_raw_sql=amount_raw_sql, return_raw_sql=return_raw_sql))
class ClickhouseContracts(utils.ClickhouseContractTransactionsIterator): doc_type = "itx" index = "internal_transaction" block_prefix = "abi_extracted" def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS): self.indices = indices self.client = CustomClickhouse() self.pool = Pool(processes=NUMBER_OF_PROCESSES) self.parity_hosts = parity_hosts def _split_on_chunks(self, iterable, size): """ Split given iterable onto chunks """ return utils.split_on_chunks(iterable, size) def _get_contracts_abi(self, all_addresses): """ Get ABI for specified contracts in parallel mode Parameters ---------- all_addresses : list Contract addresses Returns ------- list List of ABIs for each contract in list """ chunks = self._split_on_chunks(list(enumerate(all_addresses)), NUMBER_OF_PROCESSES) dict_chunks = [dict(chunk) for chunk in chunks] abis = { key: abi for abis_dict in self.pool.map(_get_contracts_abi_sync, dict_chunks) for key, abi in abis_dict.items() } return [abis[key] for key in sorted(abis.keys())] def _get_range_query(self): """ Get range query based on all specified blocks range in config.py Returns ------- str SQL query for blockNumber located in specified range """ ranges = [range_tuple[0:2] for range_tuple in self.parity_hosts] range_query = utils.make_range_query("blockNumber", *ranges) return range_query def _iterate_contracts_without_abi(self): """ Iterate through contracts without previous attemp to extract ABI from etherscan.io within block range specified in config.py. Returns ------- generator Generator that iterates through contracts by conditions above """ query = 'ANY LEFT JOIN {} USING id WHERE abi_extracted IS NULL AND {}'.format( self.indices["contract_abi"], self._get_range_query()) return self._iterate_contracts(partial_query=query, fields=["address"]) def _convert_abi(self, abi): """ Return JSON string for given ABI if it is not empty. Otherwise return None """ if abi: return json.dumps(abi) else: return None def save_contracts_abi(self): """ Save contracts ABI to a database This function is an entry point for download-contracts-abi operation """ for contracts in self._iterate_contracts_without_abi(): abis = self._get_contracts_abi( [contract["_source"]["address"] for contract in contracts]) documents = [{ 'abi': self._convert_abi(abis[index]), 'abi_extracted': True, "id": contract["_id"] } for index, contract in enumerate(contracts)] self.client.bulk_index(index=self.indices["contract_abi"], docs=documents)
def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS): self.indices = indices self.client = CustomClickhouse() self.pool = Pool(processes=NUMBER_OF_PROCESSES) self.parity_hosts = parity_hosts
def __init__(self, indices=INDICES, parity_host=PARITY_HOSTS[0][-1]): super().__init__(indices, CustomClickhouse(), parity_host)
class ClickhouseInputs(utils.ClickhouseContractTransactionsIterator): _contracts_abi = {} block_prefix = "inputs_decoded" def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS): self.indices = indices self.client = CustomClickhouse() self.pool = Pool(processes=NUMBER_OF_PROCESSES) self.parity_hosts = parity_hosts def _set_contracts_abi(self, abis): """Sets current contracts ABI for this object""" self._contracts_abi = { address: json.loads(abi) for address, abi in abis.items() } def _split_on_chunks(self, iterable, size): """ Split given iterable onto chunks """ return utils.split_on_chunks(iterable, size) def _decode_inputs_batch(self, encoded_params): """ Decode inputs in parallel mode Parameters ---------- encoded_params : dict Transaction hashes and attached tuples with contract ABI and transaction input Returns ------- dict Transaction hashes and parsed inputs for each transaction """ chunks = list(self._split_on_chunks(list(encoded_params.items()), NUMBER_OF_PROCESSES)) chunks = [dict(chunk) for chunk in chunks] decoded_inputs = self.pool.map(_decode_inputs_batch_sync, chunks) return {hash: input for chunk in decoded_inputs for hash, input in chunk.items()} def _get_range_query(self): """ Get range query based on all specified blocks range in config.py Returns ------- str SQL query to find blocks located in range specified in conig """ ranges = [range_tuple[0:2] for range_tuple in self.parity_hosts] range_query = utils.make_range_query("blockNumber", *ranges) return range_query def _iterate_contracts_with_abi(self, max_block): """ Iterate through contracts with non-empty ABI within block range specified in config.py with unprocessed transactions before specified block Parameters ---------- max_block : int Block number Returns ------- generator Generator that iterates through contracts by conditions above """ query = "ANY INNER JOIN {} USING id WHERE abi IS NOT NULL AND {}".format( self.indices["contract_abi"], self._get_range_query() ) return self._iterate_contracts(max_block, query, fields=["abi", "address"]) def _add_id_to_inputs(self, decoded_inputs): """ Add transaction hash as id to decoded_inputs Parameters ---------- decoded_inputs : dict Dictionary with transaction hashes and input info dicts """ for hash, input in decoded_inputs.items(): input.update({ "id": hash }) def _decode_inputs_for_contracts(self, contracts, max_block): """ Decode inputs for specified contracts before specified block Treats exceptions during parsing Parameters ---------- contracts : list Contracts info in JSON format, i.e. {"_id": TRANSACTION_ID, "_source": {"document": "fields"}} max_block : int Block number """ for transactions in self._iterate_transactions_by_targets(contracts, max_block): try: inputs = { transaction["_id"]: ( self._contracts_abi[transaction["_source"][self.contract_field]], transaction["_source"]["input"] ) for transaction in transactions } decoded_inputs = self._decode_inputs_batch(inputs) self._add_id_to_inputs(decoded_inputs) self.client.bulk_index(index=self.indices[self.input_index], docs=list(decoded_inputs.values())) except Exception as exception: print(exception) def decode_inputs(self): """ Decode inputs for all transactions to contracts with ABI in ElasticSearch This function is an entry point for parse-*-inputs operation """ max_block = self._get_max_block({self.block_flag_name: 1}) for contracts in self._iterate_contracts_with_abi(max_block): self._set_contracts_abi( {contract["_source"]["address"]: contract["_source"]["abi"] for contract in contracts}) self._decode_inputs_for_contracts(contracts, max_block) self._save_max_block([contract["_id"] for contract in contracts], max_block)
class ClickhouseEvents: def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS): self.client = CustomClickhouse() self.indices = indices self.web3 = Web3( HTTPProvider(parity_hosts[0][-1], request_kwargs={'timeout': 100})) def _iterate_block_ranges(self, range_size=EVENTS_RANGE_SIZE): """ Iterate over unprocessed block ranges with given size Parameters ---------- range_size : list Size of each block range Returns ------- generator Generator that iterates through unprocessed block ranges """ range_query = "distinct(toInt32(floor(number / {}))) AS range".format( range_size) flags_query = "ANY LEFT JOIN (SELECT id, value FROM {} FINAL WHERE name = 'events_extracted') USING id WHERE value IS NULL".format( self.indices["block_flag"]) for ranges_chunk in self.client.iterate(index=self.indices["block"], fields=[range_query], query=flags_query, return_id=False): for range in ranges_chunk: range_bounds = (range["_source"]["range"] * range_size, (range["_source"]["range"] + 1) * range_size) yield range_bounds def _get_events(self, block_range): """ Get events from parity for given block range Parameters ---------- block_range : tuple Start and end of block range Returns ------- list Events inside given block range (not including end block) """ event_filter = self.web3.eth.filter({ "fromBlock": block_range[0], "toBlock": block_range[1] - 1 }) events = event_filter.get_all_entries() return events def _save_events(self, events): """ Prepare and save each event to a database Parameters ---------- events : list Events extracted from parity """ events = [self._process_event(event) for event in events] if events: self.client.bulk_index(index=self.indices["event"], docs=events) def _process_event(self, event): """ Prepare event - parse hexadecimal numbers, assign id, lowercase each string Parameters ---------- event : dict Event extracted from parity Returns ------- dict Prepared event """ processed_event = event.copy() processed_event["transactionLogIndex"] = int( event["transactionLogIndex"], 0) processed_event["id"] = "{}.{}".format( event['transactionHash'].hex(), processed_event["transactionLogIndex"]) processed_event["address"] = event["address"].lower() processed_event["blockHash"] = event["blockHash"].hex() processed_event["transactionHash"] = event["transactionHash"].hex() processed_event["topics"] = [topic.hex() for topic in event["topics"]] return processed_event def _save_processed_blocks(self, block_range): """ Save events_extracted flag for processed blocks Parameters ---------- block_range : tuple Start and end of processed block range """ block_flags = [{ "id": block, "name": "events_extracted", "value": 1 } for block in range(*block_range)] self.client.bulk_index(index=self.indices["block_flag"], docs=block_flags) def extract_events(self): """ Extract parity events to a database This function is an entry point for extract-events operation """ for block_range in self._iterate_block_ranges(): events = self._get_events(block_range) self._save_events(events) self._save_processed_blocks(block_range)
def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS): self.client = CustomClickhouse() self.indices = indices self.web3 = Web3( HTTPProvider(parity_hosts[0][-1], request_kwargs={'timeout': 100}))
class ClickhouseTestCase(unittest.TestCase): def setUp(self): self.client = Client('localhost') self.client.execute('DROP TABLE IF EXISTS test') self.client.execute( 'CREATE TABLE test (id String, x Int32, dict String) ENGINE = ReplacingMergeTree() ORDER BY id' ) self.new_client = CustomClickhouse() def _add_records(self): documents = [{ 'x': 1, "id": "1" }, { 'x': 2, "id": "2" }, { 'x': 3, "id": "3" }, { 'x': 100, "id": "100" }] formatted_documents = [{ "_id": doc["id"], "_source": { 'x': doc["x"] } } for doc in documents] self.client.execute('INSERT INTO test (id, x) VALUES', documents) return formatted_documents def test_search(self): formatted_documents = self._add_records() result = self.new_client.search(index="test", fields=["x"]) self.assertCountEqual(formatted_documents, result) def test_search_with_query(self): formatted_documents = self._add_records() formatted_documents = [ doc for doc in formatted_documents if doc["_source"]['x'] < 3 ] result = self.new_client.search(index="test", query="WHERE x < 3", fields=["x"]) self.assertSequenceEqual(formatted_documents, result) def test_count(self): formatted_documents = self._add_records() formatted_documents = [ doc for doc in formatted_documents if doc["_source"]['x'] < 3 ] result = self.new_client.count(index="test", query="WHERE x < 3") assert result == len(formatted_documents) def test_iterate(self): test_per = 2 formatted_documents = self._add_records() formatted_documents = [ doc for doc in formatted_documents if doc["_source"]['x'] < 4 ] result = self.new_client.iterate(index="test", fields=["x"], query="WHERE x < 4", per=test_per) self.assertSequenceEqual(formatted_documents[0:test_per], next(result)) self.assertSequenceEqual(formatted_documents[test_per:2 * test_per], next(result)) def test_multiple_iterate(self): test_per = 2 self._add_records() first_result = self.new_client.iterate(index="test", fields=["x"], per=test_per) next(first_result) second_result = self.new_client.iterate(index="test", fields=["x"], per=test_per) next(second_result) def test_iterate_without_id(self): self._add_records() result = self.new_client.iterate(index="test", fields=["distinct(ceiling(x / 10))"], return_id=False) result = next(result) assert len(result) == 2 def test_iterate_with_and_without_final(self): self._add_records() self._add_records() result_with_final = self.new_client.iterate(index="test", fields=[]) result_without_final = self.new_client.iterate(index="test", fields=[], final=False) assert len(next(result_without_final)) > len(next(result_with_final)) def test_iterate_with_derived_fields(self): self._add_records() result = self.new_client.iterate(index="test", fields=["x - 1 AS y"]) result_record = next(result)[0] assert "y" in result_record["_source"] def test_bulk_index(self): documents = [{"x": i} for i in range(10)] self.new_client.bulk_index(index="test", docs=[d.copy() for d in documents], id_field="x") result = self.client.execute('SELECT id FROM test') self.assertCountEqual(result, [(str(doc["x"]), ) for doc in documents]) def test_bulk_index_check_schema(self): self.new_client.bulk_index(index="test", docs=[{"y": 1, "id": 1}]) result = self.client.execute('SELECT id FROM test') self.assertCountEqual(result, [('1', )]) def test_bulk_index_empty_fields(self): documents = [{"id": 1, "x": 1}] self.new_client.bulk_index(index="test", docs=[d for d in documents]) def test_bulk_index_dict_values(self): documents = [{"x": i, "dict": {"test": i}} for i in range(10)] self.new_client.bulk_index(index="test", docs=[d.copy() for d in documents], id_field="x") result = self.client.execute('SELECT dict FROM test') self.assertCountEqual(result, [(json.dumps(doc["dict"]), ) for doc in documents]) def test_bulk_index_split_records(self): test_docs = [{"docs": True}] test_chunks = ["records1", "records2"] self.new_client._split_records = MagicMock(return_value=test_chunks) self.new_client.client.execute = MagicMock() self.new_client._set_id = MagicMock() self.new_client._filter_schema = MagicMock() self.new_client.bulk_index(index="test_index", docs=test_docs) self.new_client._split_records.assert_called_with(test_docs) calls = [call(ANY, records) for records in test_chunks] self.new_client.client.execute.assert_has_calls(calls) def test_send_sql_request(self): formatted_documents = self._add_records() result = self.new_client.send_sql_request("SELECT max(x) FROM test") assert result == max(doc["_source"]["x"] for doc in formatted_documents) def test_split_records(self): test_record = {"test": "123"} test_record_size = sys.getsizeof(test_record) test_records = [test_record] * 5 chunks = list( self.new_client._split_records(test_records, max_bytes=test_record_size * 2 + 1)) self.assertSequenceEqual( chunks, [[test_record] * 2, [test_record] * 2, [test_record]]) def test_split_records_one_record(self): test_record = {"test": "123"} test_record_size = sys.getsizeof(test_record) test_records = [test_record] chunks = list( self.new_client._split_records(test_records, max_bytes=test_record_size)) self.assertSequenceEqual(chunks, [[test_record]]) def test_split_records_same_chunk(self): test_record = {"test": "123"} test_record_size = sys.getsizeof(test_record) test_records = [test_record] * 6 chunks = list( self.new_client._split_records(test_records, max_bytes=test_record_size * 2)) self.assertSequenceEqual( chunks, [[test_record] * 2, [test_record] * 2, [test_record] * 2])
class ClickhouseContractTransactions: def __init__(self, indices=INDICES): self.indices = indices self.client = CustomClickhouse() def _extract_first_bytes(self, func): """ Create contract method signature and return first 4 bytes of this signature Parameters ---------- func: str String that contains function name and arguments Returns ------- str String with first 4 bytes of method signature in hex format """ return str(Web3.toHex(Web3.sha3(text=func)[0:4]))[2:] def _extract_methods_signatures(self): """ Return dictionary with first bytes of standard method signatures Returns ------- dict Dictionary with first 4 bytes of methods signatures in hex format """ return { 'erc20': { 'totalSupply': self._extract_first_bytes('totalSupply()'), 'balanceOf': self._extract_first_bytes('balanceOf(address)'), 'allowance': self._extract_first_bytes('allowance(address,address)'), 'transfer': self._extract_first_bytes('transfer(address,uint256)'), 'transferFrom': self._extract_first_bytes('transferFrom(address,address,uint256)'), 'approve': self._extract_first_bytes('approve(address,uint256)'), }, 'erc223': { 'tokenFallback': self._extract_first_bytes('tokenFallback(address,uint256,bytes)') }, 'bancor_converter': { 'convert': self._extract_first_bytes('convert(address,address,uint256,uint256)') } } def _get_standards(self): """ Create dict with sql to create "standard_*" flag fields Returns ------- dict Dictionary with keys "standard_*", where * is standard name like ERC20, ERC721 and values that are queries for database to define related standard """ standards = self._extract_methods_signatures() return { "standard_" + standard: " AND ".join([ "(bytecode LIKE '%{}%')".format(signature) for signature in signatures.values() ]) for standard, signatures in standards.items() } def _get_fields(self): """ Get string with material view fields names and related queries Returns ------- str Part of SQL request to create material view. Contains field names and definitions """ standard_fields = self._get_standards() fields = { "id": "coalesce(address, id)", "blockNumber": "blockNumber", "address": "address", "owner": "from", "bytecode": "code" } fields.update(standard_fields) fields_string = ", ".join([ "{} AS {}".format(field, alias) for alias, field in fields.items() ]) return fields_string def extract_contract_addresses(self): """ Create material view for contracts extracted from internal transactions table This function is an entry point for prepare-erc-transactions-view operation """ fields_string = self._get_fields() engine_string = 'ENGINE = ReplacingMergeTree() ORDER BY id' condition = "type = 'create' AND error IS NULL AND parent_error IS NULL" sql = "CREATE MATERIALIZED VIEW IF NOT EXISTS {} {} POPULATE AS (SELECT {} FROM {} WHERE {})".format( self.indices["contract"], engine_string, fields_string, self.indices["internal_transaction"], condition ) self.client.send_sql_request(sql)
def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS): self.indices = indices self.client = CustomClickhouse() self.w3 = Web3(HTTPProvider(parity_hosts[0][2])) self.standard_token_abi = standard_token_abi self._set_external_links()
class ClickhouseContractMethods: """ Check if contract is token, is it compliant with token standards and get variables from it such as name or symbol Parameters ---------- indices: dict Dictionary containing exisiting database indices parity_hosts: list List of tuples that includes 3 elements: start block, end_block and Parity URL """ _external_links = {} _constants_types = [ ('name', { "string": lambda x: str(x).replace("\\x00", ""), "bytes32": lambda x: str(x).replace("\\x00", "")[2:-1].strip() }, ''), ('symbol', { "string": lambda x: str(x).replace("\\x00", ""), "bytes32": lambda x: str(x).replace("\\x00", "")[2:-1].strip() }, ''), ('decimals', { "uint8": None }, 18), ('totalSupply', { "uint256": None }, 0), ('owner', { "address": lambda x: x.lower() }, None) ] def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS): self.indices = indices self.client = CustomClickhouse() self.w3 = Web3(HTTPProvider(parity_hosts[0][2])) self.standard_token_abi = standard_token_abi self._set_external_links() def _set_external_links(self): """ Sets website slug and cmc_id for this object """ with open('{}/tokens.json'.format(CURRENT_DIR)) as json_file: tokens = json.load(json_file) for token in tokens: self._external_links[token["address"]] = { "website_slug": token["website_slug"], "cmc_id": token["cmc_id"], } def _iterate_unprocessed_contracts(self): """ Iterate over ERC20 contracts that were not processed yet Returns ------- generator Generator that iterates over contracts in database """ return self.client.iterate(index=self.indices["contract"], fields=["address"], query=""" WHERE standard_erc20 = 1 AND id not in( SELECT id FROM {} ) """.format(self.indices["contract_description"])) def _round_supply(self, supply, decimals): """ Divide supply by 10 ** decimals, and round it Parameters ---------- supply: int Contract total supply decimals: int Contract decimals Returns ------- str Contract total supply without decimals """ if decimals > 0: supply = supply / math.pow(10, decimals) supply = Decimal(supply) supply = round(supply) return min(supply, MAX_TOTAL_SUPPLY) def _get_constant(self, address, constant, types, placeholder=None): """ Get value through contract function marked as constant Tries every type from types dict and returns first value that are not empty If it fails, returns placeholder Parameters ---------- address: str Contract address constant: str Name of constant types: dict Dict with all possible types and converter functions for target value placeholder Default value for target value Returns ------- Value returned by a contract and converted with the function Placeholder, if there are no non-empty values """ contract_checksum_addr = self.w3.toChecksumAddress(address) contract_abi = [{ "constant": True, "inputs": [], "name": constant, "outputs": [{ "name": "", "type": None }], "payable": False, "type": "function" }] response = None for constant_type, convert in types.items(): try: contract_abi[0]["outputs"][0]["type"] = constant_type contract_instance = self.w3.eth.contract( address=contract_checksum_addr, abi=contract_abi) response = getattr(contract_instance.functions, constant)().call() if convert: response = convert(response) if response: return response except Exception as e: pass if type(response) != int: return placeholder else: return response def _get_constants(self, address): """ Return contract ERC20 info Parameters ---------- address: str Contract address Returns ------- list Name, symbol, decimals, total supply, owner address """ contract_constants = [] for constant, types, placeholder in self._constants_types: response = self._get_constant(address, constant, types, placeholder) contract_constants.append(response) contract_constants[3] = self._round_supply(contract_constants[3], contract_constants[2]) return contract_constants def _update_contract_descr(self, doc_id, body): """ Store contract description in database Parameters ---------- doc_id: str id of contract body: dict Dictionary with new values """ body["id"] = doc_id self.client.bulk_index(self.indices['contract_description'], docs=[body]) def _get_external_links(self, address): """ Add Cryptocompare and Coinmarketcap info as a field of this object """ external_links = self._external_links.get(address, { "website_slug": None, "cmc_id": None }) return external_links.get("website_slug"), external_links.get("cmc_id") def _classify_contract(self, contract): """ Extract contract ERC20 info and stores it into the database Extracts ERC20 token description from parity and from token.json file Parameters ---------- contract: dict Dictionary with contract info """ name, symbol, decimals, total_supply, owner = self._get_constants( contract['_source']['address']) website_slug, cmc_id = self._get_external_links( contract["_source"]["address"]) update_body = { 'token_name': name, 'token_symbol': symbol, 'decimals': decimals, 'total_supply': total_supply, 'token_owner': owner, "website_slug": website_slug, "cmc_id": cmc_id } self._update_contract_descr(contract['_id'], update_body) def search_methods(self): """ Extract public values for ERC20 contracts This function is an entry point for extract-tokens operation """ for contracts_chunk in self._iterate_unprocessed_contracts(): for contract in contracts_chunk: self._classify_contract(contract)
def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS): super().__init__(indices, CustomClickhouse(), parity_hosts) self.indices["miner_transaction"] = self.indices[ "internal_transaction"]
def __init__(self, indices=INDICES, parity_host=PARITY_HOSTS[0][-1]): self.indices = indices self.client = CustomClickhouse() self.web3 = Web3(HTTPProvider(parity_host))
class ClickhouseTokenPrices(ClickhouseContractTransactionsIterator): doc_type = 'token' block_prefix = 'prices_extracted' def __init__(self, indices=INDICES, parity_host=PARITY_HOSTS[0][-1]): self.indices = indices self.client = CustomClickhouse() self.web3 = Web3(HTTPProvider(parity_host)) def _iterate_cc_tokens(self): """ Iterate over ERC20 tokens Returns ------- generator Generator that iterates over ERC20 tokens """ return self._iterate_contracts(partial_query='WHERE standard_erc20 = 1', fields=["address"]) def _get_cc_tokens(self): """ Extract list of tokens Returns ------- list List of ERC20 contracts """ tokens = [token_chunk for token_chunk in self._iterate_cc_tokens()] token_list = [t['_source'] for token_chunk in tokens for t in token_chunk] return token_list def _construct_bulk_insert_ops(self, docs): """ Assign id to each document Parameters ---------- docs: list List of price records """ for doc in docs: doc["id"] = doc['address'] + '_' + doc['timestamp'].strftime("%Y-%m-%d") def _insert_multiple_docs(self, docs, index_name): """ Index multiple documents simultaneously Parameters ---------- docs: list List of dictionaries with new data doc_type: str Type of inserted documents index_name: str Name of the index that contains inserted documents """ for chunk in bulk_chunks(docs, docs_per_chunk=1000): self._construct_bulk_insert_ops(chunk) self.client.bulk_index(index=index_name, docs=chunk) def _set_moving_average(self, prices, window_size=MOVING_AVERAGE_WINDOW): """ Perform moving average procedure over a daily close prices Parameters ---------- prices: list List of prices window_size: str Size of window Returns ------- list Prices processed with moving average """ prices_stack = [] for price in prices: prices_stack.append(price["close"]) if len(prices_stack) == window_size: price["average"] = np.mean(prices_stack) prices_stack.pop(0) else: price["average"] = price["close"] def _process_hist_prices(self, prices): """ Prepare extracted prices to a database Performs moving average procedure over prices, sets address and timestamp fields Parameters ---------- prices: list List of tokens prices Returns ------- list List if prepared prices """ points = [] self._set_moving_average(prices) for price in prices: point = {} point['BTC'] = price["average"] point['BTC'] = float('{:0.10f}'.format(point['BTC'])) point['timestamp'] = datetime.datetime.fromtimestamp(price['time']) point['address'] = price['address'] points.append(point) return points def _make_historical_prices_req(self, address, days_count): """ Make call to CryptoCompare API to extract token historical data Parameters ---------- address: str Token address days_count: int Days limit Returns ------- list List of prices for specified symbol """ symbol = self._get_symbol_by_address(address) url = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym=BTC&limit={}'.format(symbol, days_count) try: res = requests.get(url).json() for point in res['Data']: point['address'] = address return res['Data'] except: print("No exchange rate for {}".format(symbol)) return def _get_last_avail_price_date(self): """ Get last price available in token_price index Returns ------- string Timestamp of last available date """ return self.client.send_sql_request('SELECT MAX(timestamp) FROM {}'.format(self.indices['price'])) def _get_days_count(self, now, last_price_date, limit=DAYS_LIMIT): """ Count number of days for that prices are unavailable Parameters ---------- now: date Current date last_price_date: date Timestamp of last available price Returns ------- int Number of days between current date and last price in database """ days_count = (now - last_price_date).days + 1 return min(days_count, DAYS_LIMIT) def _get_symbol_abi(self, output_type): """Return mock ABI to get token symbol""" return [{ "constant": True, "inputs": [], "name": "symbol", "outputs": [ { "name": "", "type": output_type } ], "payable": False, "stateMutability": "view", "type": "function" }] # TODO replace with contract_methods.py call def _get_symbol_by_address(self, address): """ Get symbol of specified token Parameters ---------- address: str Address of token Returns ------- str Symbol of specified token """ address = self.web3.toChecksumAddress(address) symbols = {} for output_type in ['string', 'bytes32']: contract = self.web3.eth.contract(abi=self._get_symbol_abi(output_type), address=address) try: symbols[output_type] = contract.functions.symbol().call() except Exception as e: print(e) pass if 'string' in symbols: return symbols['string'] else: return symbols.get('bytes32', "".encode('utf-8')).decode('utf-8').rstrip('\0') def _get_historical_multi_prices(self): """ Extract historical token prices from CryptoCompare Returns ------- list List ot token historical prices """ token_addresses = [ token['address'] for token in self._get_cc_tokens() ] now = datetime.datetime.now() last_price_date = self._get_last_avail_price_date() days_count = self._get_days_count(now, last_price_date) prices = [] for token in tqdm(token_addresses): price = self._make_historical_prices_req(token, days_count) if price != None: price = self._process_hist_prices(price) prices.append(price) else: continue prices = [p for price in prices for p in price] return prices def get_prices_within_interval(self): """ Extract historcial token prices and then add to this prices data from Coinmarketcap This function is an entry point for download-prices operation """ prices = self._get_historical_multi_prices() if prices != None: self._insert_multiple_docs(prices, self.indices['price'])