Beispiel #1
0
    def create_metadata(self,
                        key,
                        secret,
                        ds_name,
                        data_frequency,
                        desc,
                        has_history=True,
                        has_live=True):
        """

        Returns
        -------

        """
        headers = get_signed_headers(ds_name, key, secret)
        r = requests.post(
            '{}/marketplace/register'.format(AUTH_SERVER),
            json=dict(
                ds_name=ds_name,
                desc=desc,
                data_frequency=data_frequency,
                has_history=has_history,
                has_live=has_live,
            ),
            headers=headers,
        )

        if r.status_code != 200:
            raise MarketplaceHTTPRequest(request='register',
                                         error=r.status_code)

        if 'error' in r.json():
            raise MarketplaceHTTPRequest(request='upload file',
                                         error=r.json()['error'])
Beispiel #2
0
    def publish(self, dataset, datadir, watch):
        dataset = dataset.lower()
        provider_info = self.mkt_contract.functions.getDataProviderInfo(
            Web3.toHex(dataset.encode())
        ).call()

        if not provider_info[4]:
            raise MarketplaceDatasetNotFound(dataset=dataset)

        match = next(
            (l for l in self.addresses if l['pubAddr'] == provider_info[0]),
            None
        )
        if not match:
            raise MarketplaceNoAddressMatch(
                dataset=dataset,
                address=provider_info[0])

        print('Using address: {} to publish this dataset.'.format(
            provider_info[0]))

        if 'key' in match:
            key = match['key']
            secret = match['secret']
        else:
            key, secret = get_key_secret(provider_info[0], match['wallet'])

        filenames = glob.glob(os.path.join(datadir, '*.csv'))

        if not filenames:
            raise MarketplaceNoCSVFiles(datadir=datadir)

        def read_file(pathname):
            with open(pathname, 'rb') as f:
                return f.read()

        files = []
        for idx, file in enumerate(filenames):
            log.info('Uploading file {} of {}: {}'.format(
                idx + 1, len(filenames), file))
            files.append(('file', (os.path.basename(file), read_file(file))))

        headers = get_signed_headers(dataset, key, secret)
        r = requests.post('{}/marketplace/publish'.format(AUTH_SERVER),
                          files=files,
                          headers=headers)

        if r.status_code != 200:
            raise MarketplaceHTTPRequest(request='upload file',
                                         error=r.status_code)

        if 'error' in r.json():
            raise MarketplaceHTTPRequest(request='upload file',
                                         error=r.json()['error'])

        log.info('File processed successfully.')

        print('\nDataset {} uploaded and processed successfully.'.format(
            dataset))
Beispiel #3
0
    def publish(self, dataset, datadir, watch):
        dataset = dataset.lower()
        provider_info = self.mkt_contract.functions.getDataProviderInfo(
            Web3.toHex(dataset)
        ).call()

        if not provider_info[4]:
            raise MarketplaceDatasetNotFound(dataset=dataset)

        match = next(
            (l for l in self.addresses if l['pubAddr'] == provider_info[0]),
            None
        )
        if not match:
            raise MarketplaceNoAddressMatch(
                dataset=dataset,
                address=provider_info[0])

        print('Using address: {} to publish this dataset.'.format(
            provider_info[0]))

        if 'key' in match:
            key = match['key']
            secret = match['secret']
        else:
            key, secret = get_key_secret(provider_info[0])

        headers = get_signed_headers(dataset, key, secret)
        filenames = glob.glob(os.path.join(datadir, '*.csv'))

        if not filenames:
            raise MarketplaceNoCSVFiles(datadir=datadir)

        files = []
        for file in filenames:
            files.append(('file', open(file, 'rb')))

        r = requests.post('{}/marketplace/publish'.format(AUTH_SERVER),
                          files=files,
                          headers=headers)

        if r.status_code != 200:
            raise MarketplaceHTTPRequest(request='upload file',
                                         error=r.status_code)

        if 'error' in r.json():
            raise MarketplaceHTTPRequest(request='upload file',
                                         error=r.json()['error'])

        print('Dataset {} uploaded successfully.'.format(dataset))
Beispiel #4
0
    def ingest(self, ds_name=None, start=None, end=None, force_download=False):

        if ds_name is None:

            df_sets = self._list()
            if df_sets.empty:
                print('There are no datasets available yet.')
                return

            set_print_settings()
            while True:
                print(df_sets)
                dataset_num = input('Choose the dataset you want to '
                                    'ingest [0..{}]: '.format(df_sets.size -
                                                              1))
                try:
                    dataset_num = int(dataset_num)
                except ValueError:
                    print(
                        'Enter a number between 0 and {}'.format(df_sets.size -
                                                                 1))
                else:
                    if dataset_num not in range(0, df_sets.size):
                        print('Enter a number between 0 and {}'.format(
                            df_sets.size - 1))
                    else:
                        ds_name = df_sets.iloc[dataset_num]['dataset']
                        break

        # ds_name = ds_name.lower()

        # TODO: catch error conditions
        provider_info = self.mkt_contract.functions.getDataProviderInfo(
            Web3.toHex(ds_name)).call()

        if not provider_info[4]:
            print('The requested "{}" dataset is not registered in '
                  'the Data Marketplace.'.format(ds_name))
            return

        address, address_i = self.choose_pubaddr()
        fns = self.mkt_contract.functions
        check_sub = fns.checkAddressSubscription(address,
                                                 Web3.toHex(ds_name)).call()

        if check_sub[0] != address or self.to_text(check_sub[1]) != ds_name:
            print('You are not subscribed to dataset "{}" with address {}. '
                  'Plese subscribe first.'.format(ds_name, address))
            return

        if not check_sub[5]:
            print('Your subscription to dataset "{}" expired on {} UTC.'
                  'Please renew your subscription by running:\n'
                  'catalyst marketplace subscribe --dataset={}'.format(
                      ds_name, pd.to_datetime(check_sub[4], unit='s',
                                              utc=True), ds_name))

        if 'key' in self.addresses[address_i]:
            key = self.addresses[address_i]['key']
            secret = self.addresses[address_i]['secret']
        else:
            key, secret = get_key_secret(address,
                                         self.addresses[address_i]['wallet'])

        headers = get_signed_headers(ds_name, key, secret)
        log.info('Starting download of dataset for ingestion...')
        r = requests.post(
            '{}/marketplace/ingest'.format(AUTH_SERVER),
            headers=headers,
            stream=True,
        )
        if r.status_code == 200:
            log.info('Dataset downloaded successfully. Processing dataset...')
            bundle_folder = get_data_source_folder(ds_name)
            shutil.rmtree(bundle_folder, ignore_errors=True)
            target_path = get_temp_bundles_folder()
            try:
                decoder = MultipartDecoder.from_response(r)
                # with maybe_show_progress(
                #     iter(decoder.parts),
                #     True,
                #     label='Processing files') as part:
                counter = 1
                for part in decoder.parts:
                    log.info("Processing file {} of {}".format(
                        counter, len(decoder.parts)))
                    h = part.headers[b'Content-Disposition'].decode('utf-8')
                    # Extracting the filename from the header
                    name = re.search(r'filename="(.*)"', h).group(1)

                    filename = os.path.join(target_path, name)
                    with open(filename, 'wb') as f:
                        # for chunk in part.content.iter_content(
                        #         chunk_size=1024):
                        #     if chunk: # filter out keep-alive new chunks
                        #         f.write(chunk)
                        f.write(part.content)

                    self.process_temp_bundle(ds_name, filename)
                    counter += 1

            except NonMultipartContentTypeException:
                response = r.json()
                raise MarketplaceHTTPRequest(
                    request='ingest dataset',
                    error=response,
                )
        else:
            raise MarketplaceHTTPRequest(
                request='ingest dataset',
                error=r.status_code,
            )

        log.info('{} ingested successfully'.format(ds_name))
Beispiel #5
0
    def ingest(self, ds_name, start=None, end=None, force_download=False):

        # ds_name = ds_name.lower()

        # TODO: catch error conditions
        provider_info = self.mkt_contract.functions.getDataProviderInfo(
            Web3.toHex(ds_name)).call()

        if not provider_info[4]:
            print('The requested "{}" dataset is not registered in '
                  'the Data Marketplace.'.format(ds_name))
            return

        address, address_i = self.choose_pubaddr()
        fns = self.mkt_contract.functions
        check_sub = fns.checkAddressSubscription(address,
                                                 Web3.toHex(ds_name)).call()

        if check_sub[0] != address or self.to_text(check_sub[1]) != ds_name:
            print('You are not subscribed to dataset "{}" with address {}. '
                  'Plese subscribe first.'.format(ds_name, address))
            return

        if not check_sub[5]:
            print('Your subscription to dataset "{}" expired on {} UTC.'
                  'Please renew your subscription by running:\n'
                  'catalyst marketplace subscribe --dataset={}'.format(
                      ds_name, pd.to_datetime(check_sub[4], unit='s',
                                              utc=True), ds_name))

        if 'key' in self.addresses[address_i]:
            key = self.addresses[address_i]['key']
            secret = self.addresses[address_i]['secret']
        else:
            key, secret = get_key_secret(address)

        headers = get_signed_headers(ds_name, key, secret)
        log.debug('Starting download of dataset for ingestion...')
        r = requests.post(
            '{}/marketplace/ingest'.format(AUTH_SERVER),
            headers=headers,
            stream=True,
        )
        if r.status_code == 200:
            target_path = get_temp_bundles_folder()
            try:
                decoder = MultipartDecoder.from_response(r)
                for part in decoder.parts:
                    h = part.headers[b'Content-Disposition'].decode('utf-8')
                    # Extracting the filename from the header
                    name = re.search(r'filename="(.*)"', h).group(1)

                    filename = os.path.join(target_path, name)
                    with open(filename, 'wb') as f:
                        # for chunk in part.content.iter_content(
                        #         chunk_size=1024):
                        #     if chunk: # filter out keep-alive new chunks
                        #         f.write(chunk)
                        f.write(part.content)

                    self.process_temp_bundle(ds_name, filename)

            except NonMultipartContentTypeException:
                response = r.json()
                raise MarketplaceHTTPRequest(
                    request='ingest dataset',
                    error=response,
                )
        else:
            raise MarketplaceHTTPRequest(
                request='ingest dataset',
                error=r.status_code,
            )

        log.info('{} ingested successfully'.format(ds_name))