def _load(self): """ load entries into catalog """ self._entries = {} exps = set() samples = set() for row in get_runs(self.conn): run_description = json.loads(row['run_description']) # move these functions so they can be loaded elsewhere exp_name, sample_name = get_names_from_experiment_id( self.conn, row['exp_id']) dependent_parameters, independent_parameters = parameters_from_description( run_description) self._entries[row['guid']] = LocalCatalogEntry( name='run {}'.format(row['run_id']), description='run {} at {} with guid {}'.format( row['run_id'], str(self._db_path), row['guid']), driver=self._source_driver, direct_access='forbid', args={ 'db_path': str(self._db_path), 'guid': row['guid'], 'run_id': row['run_id'] }, cache=None, parameters=[], metadata={ "start_time": row['run_timestamp'], "stop_time": row['completed_timestamp'], "dependent_parameters": dependent_parameters, "independent_parameters": independent_parameters, "experiment_name": exp_name, "sample_name": sample_name, "table_name": row['result_table_name'], 'plots': make_default_plots(run_description), }, catalog_dir=str(self._db_path), getenv=False, getshell=False, catalog=self, ) self._guid_lookup[row['run_id']] = row['guid'] exps.add(exp_name) samples.add(sample_name) self._experiments = list(exps) self._samples = list(samples) self._run_id_lookup = { val: key for key, val in self._guid_lookup.items() }
def read_partition(self, idx): """Return a part of the data corresponding to i-th partition. By default, assumes i should be an integer between zero and npartitions; override for more complex indexing schemes. """ dep_params, _ = parameters_from_description(self.run_description) if isinstance(idx, str): param = idx elif isinstance(idx, int): param = dep_params[idx] else: raise ValueError('Partition index should be an integer or parameter name') return self._get_partition(param)
def _get_schema(self): """ return instance of Schema should take a roughly constant amount of time regardless of contents of dataset """ self._qcodes_dataset = DataSet(run_id=self.run_id, conn=self._conn) dep_params, indep_params = parameters_from_description(self.run_description) return Schema( datashape=None, dtype=None, shape=(self._dataset.number_of_results,), # not sure what else to do here npartitions= len(dep_params), extra_metadata={ 'dataset_metadata': self._dataset.metadata, } )
def _read_data(self, columns=()): if not columns: columns, _ = parameters_from_description(self.run_description) in_memory = tuple(self._datadict.keys()) to_read = list(set(columns).difference(in_memory)) data = get_parameter_data( self._conn, self._run_table_name, self.run_description, columns = to_read, ) for key, val in data.items(): self._datadict[key] = val return {col: self._datadict[col] for col in columns}
def read_chunked(self): """Return iterator over container fragments of data source""" dep_params, _ = parameters_from_description(self.run_description) for i in range(len(dep_params)): yield self._get_partition(i)