def publish(self, dataset, datadir, watch): dataset = dataset.lower() provider_info = self.mkt_contract.functions.getDataProviderInfo( Web3.toHex(dataset.encode()) ).call() if not provider_info[4]: raise MarketplaceDatasetNotFound(dataset=dataset) match = next( (l for l in self.addresses if l['pubAddr'] == provider_info[0]), None ) if not match: raise MarketplaceNoAddressMatch( dataset=dataset, address=provider_info[0]) print('Using address: {} to publish this dataset.'.format( provider_info[0])) if 'key' in match: key = match['key'] secret = match['secret'] else: key, secret = get_key_secret(provider_info[0], match['wallet']) filenames = glob.glob(os.path.join(datadir, '*.csv')) if not filenames: raise MarketplaceNoCSVFiles(datadir=datadir) def read_file(pathname): with open(pathname, 'rb') as f: return f.read() files = [] for idx, file in enumerate(filenames): log.info('Uploading file {} of {}: {}'.format( idx + 1, len(filenames), file)) files.append(('file', (os.path.basename(file), read_file(file)))) headers = get_signed_headers(dataset, key, secret) r = requests.post('{}/marketplace/publish'.format(AUTH_SERVER), files=files, headers=headers) if r.status_code != 200: raise MarketplaceHTTPRequest(request='upload file', error=r.status_code) if 'error' in r.json(): raise MarketplaceHTTPRequest(request='upload file', error=r.json()['error']) log.info('File processed successfully.') print('\nDataset {} uploaded and processed successfully.'.format( dataset))
def publish(self, dataset, datadir, watch): dataset = dataset.lower() provider_info = self.mkt_contract.functions.getDataProviderInfo( Web3.toHex(dataset) ).call() if not provider_info[4]: raise MarketplaceDatasetNotFound(dataset=dataset) match = next( (l for l in self.addresses if l['pubAddr'] == provider_info[0]), None ) if not match: raise MarketplaceNoAddressMatch( dataset=dataset, address=provider_info[0]) print('Using address: {} to publish this dataset.'.format( provider_info[0])) if 'key' in match: key = match['key'] secret = match['secret'] else: key, secret = get_key_secret(provider_info[0]) headers = get_signed_headers(dataset, key, secret) filenames = glob.glob(os.path.join(datadir, '*.csv')) if not filenames: raise MarketplaceNoCSVFiles(datadir=datadir) files = [] for file in filenames: files.append(('file', open(file, 'rb'))) r = requests.post('{}/marketplace/publish'.format(AUTH_SERVER), files=files, headers=headers) if r.status_code != 200: raise MarketplaceHTTPRequest(request='upload file', error=r.status_code) if 'error' in r.json(): raise MarketplaceHTTPRequest(request='upload file', error=r.json()['error']) print('Dataset {} uploaded successfully.'.format(dataset))
def register(self): while True: desc = input('Enter the name of the dataset to register: ') dataset = desc.lower().strip() provider_info = self.mkt_contract.functions.getDataProviderInfo( Web3.toHex(dataset)).call() if provider_info[4]: print('There is already a dataset registered under ' 'the name "{}". Please choose a different ' 'name.'.format(dataset)) else: break price = int( input('Enter the price for a monthly subscription to ' 'this dataset in ENG: ')) while True: freq = input('Enter the data frequency [daily, hourly, minute]: ') if freq.lower() not in ('daily', 'hourly', 'minute'): print('Not a valid frequency.') else: break while True: reg_pub = input( 'Does it include historical data? [default: Y]: ') or 'y' if reg_pub.lower() not in ('y', 'n'): print('Please answer Y or N.') else: if reg_pub.lower() == 'y': has_history = True else: has_history = False break while True: reg_pub = input( 'Doest it include live data? [default: Y]: ') or 'y' if reg_pub.lower() not in ('y', 'n'): print('Please answer Y or N.') else: if reg_pub.lower() == 'y': has_live = True else: has_live = False break address, address_i = self.choose_pubaddr() if 'key' in self.addresses[address_i]: key = self.addresses[address_i]['key'] secret = self.addresses[address_i]['secret'] else: key, secret = get_key_secret(address, self.addresses[address_i]['wallet']) grains = to_grains(price) tx = self.mkt_contract.functions.register( Web3.toHex(dataset), grains, address, ).buildTransaction({ 'from': address, 'nonce': self.web3.eth.getTransactionCount(address) }) signed_tx = self.sign_transaction(tx) try: tx_hash = '0x{}'.format( bin_hex(self.web3.eth.sendRawTransaction(signed_tx))) print('\nThis is the TxHash for this transaction: {}'.format( tx_hash)) except Exception as e: print('Unable to register the requested dataset: {}'.format(e)) return self.check_transaction(tx_hash) print('Waiting for the transaction to succeed...') while True: try: if self.web3.eth.getTransactionReceipt(tx_hash).status: break else: print('\nTransaction failed. Aborting...') return except AttributeError: pass for i in range(0, 10): print('.', end='', flush=True) time.sleep(1) print('\nWarming up the {} dataset'.format(dataset)) self.create_metadata( key=key, secret=secret, ds_name=dataset, data_frequency=freq, desc=desc, has_history=has_history, has_live=has_live, ) print('\n{} registered successfully'.format(dataset))
def ingest(self, ds_name=None, start=None, end=None, force_download=False): if ds_name is None: df_sets = self._list() if df_sets.empty: print('There are no datasets available yet.') return set_print_settings() while True: print(df_sets) dataset_num = input('Choose the dataset you want to ' 'ingest [0..{}]: '.format(df_sets.size - 1)) try: dataset_num = int(dataset_num) except ValueError: print( 'Enter a number between 0 and {}'.format(df_sets.size - 1)) else: if dataset_num not in range(0, df_sets.size): print('Enter a number between 0 and {}'.format( df_sets.size - 1)) else: ds_name = df_sets.iloc[dataset_num]['dataset'] break # ds_name = ds_name.lower() # TODO: catch error conditions provider_info = self.mkt_contract.functions.getDataProviderInfo( Web3.toHex(ds_name)).call() if not provider_info[4]: print('The requested "{}" dataset is not registered in ' 'the Data Marketplace.'.format(ds_name)) return address, address_i = self.choose_pubaddr() fns = self.mkt_contract.functions check_sub = fns.checkAddressSubscription(address, Web3.toHex(ds_name)).call() if check_sub[0] != address or self.to_text(check_sub[1]) != ds_name: print('You are not subscribed to dataset "{}" with address {}. ' 'Plese subscribe first.'.format(ds_name, address)) return if not check_sub[5]: print('Your subscription to dataset "{}" expired on {} UTC.' 'Please renew your subscription by running:\n' 'catalyst marketplace subscribe --dataset={}'.format( ds_name, pd.to_datetime(check_sub[4], unit='s', utc=True), ds_name)) if 'key' in self.addresses[address_i]: key = self.addresses[address_i]['key'] secret = self.addresses[address_i]['secret'] else: key, secret = get_key_secret(address, self.addresses[address_i]['wallet']) headers = get_signed_headers(ds_name, key, secret) log.info('Starting download of dataset for ingestion...') r = requests.post( '{}/marketplace/ingest'.format(AUTH_SERVER), headers=headers, stream=True, ) if r.status_code == 200: log.info('Dataset downloaded successfully. Processing dataset...') bundle_folder = get_data_source_folder(ds_name) shutil.rmtree(bundle_folder, ignore_errors=True) target_path = get_temp_bundles_folder() try: decoder = MultipartDecoder.from_response(r) # with maybe_show_progress( # iter(decoder.parts), # True, # label='Processing files') as part: counter = 1 for part in decoder.parts: log.info("Processing file {} of {}".format( counter, len(decoder.parts))) h = part.headers[b'Content-Disposition'].decode('utf-8') # Extracting the filename from the header name = re.search(r'filename="(.*)"', h).group(1) filename = os.path.join(target_path, name) with open(filename, 'wb') as f: # for chunk in part.content.iter_content( # chunk_size=1024): # if chunk: # filter out keep-alive new chunks # f.write(chunk) f.write(part.content) self.process_temp_bundle(ds_name, filename) counter += 1 except NonMultipartContentTypeException: response = r.json() raise MarketplaceHTTPRequest( request='ingest dataset', error=response, ) else: raise MarketplaceHTTPRequest( request='ingest dataset', error=r.status_code, ) log.info('{} ingested successfully'.format(ds_name))
def ingest(self, ds_name, start=None, end=None, force_download=False): # ds_name = ds_name.lower() # TODO: catch error conditions provider_info = self.mkt_contract.functions.getDataProviderInfo( Web3.toHex(ds_name)).call() if not provider_info[4]: print('The requested "{}" dataset is not registered in ' 'the Data Marketplace.'.format(ds_name)) return address, address_i = self.choose_pubaddr() fns = self.mkt_contract.functions check_sub = fns.checkAddressSubscription(address, Web3.toHex(ds_name)).call() if check_sub[0] != address or self.to_text(check_sub[1]) != ds_name: print('You are not subscribed to dataset "{}" with address {}. ' 'Plese subscribe first.'.format(ds_name, address)) return if not check_sub[5]: print('Your subscription to dataset "{}" expired on {} UTC.' 'Please renew your subscription by running:\n' 'catalyst marketplace subscribe --dataset={}'.format( ds_name, pd.to_datetime(check_sub[4], unit='s', utc=True), ds_name)) if 'key' in self.addresses[address_i]: key = self.addresses[address_i]['key'] secret = self.addresses[address_i]['secret'] else: key, secret = get_key_secret(address) headers = get_signed_headers(ds_name, key, secret) log.debug('Starting download of dataset for ingestion...') r = requests.post( '{}/marketplace/ingest'.format(AUTH_SERVER), headers=headers, stream=True, ) if r.status_code == 200: target_path = get_temp_bundles_folder() try: decoder = MultipartDecoder.from_response(r) for part in decoder.parts: h = part.headers[b'Content-Disposition'].decode('utf-8') # Extracting the filename from the header name = re.search(r'filename="(.*)"', h).group(1) filename = os.path.join(target_path, name) with open(filename, 'wb') as f: # for chunk in part.content.iter_content( # chunk_size=1024): # if chunk: # filter out keep-alive new chunks # f.write(chunk) f.write(part.content) self.process_temp_bundle(ds_name, filename) except NonMultipartContentTypeException: response = r.json() raise MarketplaceHTTPRequest( request='ingest dataset', error=response, ) else: raise MarketplaceHTTPRequest( request='ingest dataset', error=r.status_code, ) log.info('{} ingested successfully'.format(ds_name))