def test_update_runs_description(dataset): invalid_descs = ['{}', 'description'] for idesc in invalid_descs: with pytest.raises(ValueError): mut_queries.update_run_description(dataset.conn, dataset.run_id, idesc) desc = serial.to_json_for_storage(RunDescriber((InterDependencies_()))) mut_queries.update_run_description(dataset.conn, dataset.run_id, desc)
def test_default_jsonization_for_storage(some_interdeps): """ Test that a RunDescriber is json-dumped as version 2 """ idps_new = some_interdeps[0] idps_old = new_to_old(idps_new) new_desc = RunDescriber(idps_new) expected_json = json.dumps({'version': 2, 'interdependencies': idps_old._to_dict(), 'interdependencies_': idps_new._to_dict()}) assert serial.to_json_for_storage(new_desc) == expected_json
def _perform_start_actions(self) -> None: """ Perform the actions that must take place once the run has been started """ paramspecs = new_to_old(self._interdeps).paramspecs for spec in paramspecs: add_parameter(self.conn, self.table_name, spec) desc_str = serial.to_json_for_storage(self.description) update_run_description(self.conn, self.run_id, desc_str) set_run_timestamp(self.conn, self.run_id)
def _assert_xarray_metadata_is_as_expected(xarray_ds, qc_dataset): assert xarray_ds.ds_name == qc_dataset.name assert xarray_ds.sample_name == qc_dataset.sample_name assert xarray_ds.exp_name == qc_dataset.exp_name assert xarray_ds.snapshot == qc_dataset.snapshot_raw if qc_dataset.snapshot_raw is not None else "null" assert xarray_ds.guid == qc_dataset.guid assert xarray_ds.run_timestamp == qc_dataset.run_timestamp() assert xarray_ds.completed_timestamp == qc_dataset.completed_timestamp() assert xarray_ds.captured_run_id == qc_dataset.captured_run_id assert xarray_ds.captured_counter == qc_dataset.captured_counter assert xarray_ds.run_id == qc_dataset.run_id assert xarray_ds.run_description == serial.to_json_for_storage( qc_dataset.description)
def db_extractor(dbloc=None, extractpath=None, ids = [], overwrite = False, timestamp = True, paramtofilename = False, newline_slowaxes = True, no_folders = False, suppress_output = False, useopendbconnection = False, checktimes = False): # Only for debugging purposes if not suppress_output: if os.path.isfile(dbloc) and dbloc.endswith('.db'): print('*.db file found, continue to unpack...') else: print('*.db file location cannot be found..') return; if useopendbconnection == False: configuration = qc.config previously_opened_db = configuration['core']['db_location'] configuration['core']['db_location'] = dbloc configuration.save_to_home() initialise_database() starttime = datetime.datetime.now() times = [] times.append(datetime.datetime.now()) #Looping through all exps inside database for i in range(1,len(qc.dataset.experiment_container.experiments())+1,1): #print('Expid:',i) exp = qc.load_experiment(i) expname = exp.name samplename = exp.sample_name folderstring = f'Exp' + '{:02d}'.format(i) + f'({expname})' + '-Sample' + f'({samplename})' nmeas = exp.last_counter if extractpath != None: dbpath = os.path.abspath(extractpath) else: dbpath = os.path.abspath(dbloc) #Looping through all runs inside experiment if checktimes: times.append(datetime.datetime.now()) print('Loaded db and exp ',times[-1]-times[-2]) for j in range(1,nmeas+1): run = exp.data_set(j) runid = run.run_id #print('Runid',runid) runname = run.name #Loadin a new run if (not ids or runid in ids) and (run.number_of_results > 0): # Adding optional file folder settings if timestamp: timestampcut = str(run.run_timestamp()).replace(":", "").replace("-", "").replace(" ","-") else: timestampcut = '' if paramtofilename: runparams = '_' + run.parameters else: runparams = '' parameters = run.get_parameters() num_of_parameters = len(parameters) # Getting info on parameters used in the run meas_params = [] param_names = [[]] * num_of_parameters depends = [[]] * num_of_parameters for k in range(0,num_of_parameters): param_names[k] = parameters[k].name if parameters[k].depends_on: #Check if measure parameter (i.e. if it has depends), then collect depends[k] = parameters[k].depends_on meas_params.append(k) #Compare depends of meas_params and prepare two dicts which describes how the datasaver should process the run result_dict = {} # Meas axes dict, rows are independent measurements, values per row are dependent measurements depend_dict = {} # Depends (i.e. set axes) belonging to the measurements in the same column of above dict. n = 0 #Filling the dicts: for l in meas_params: params_with_equal_depends = [i for i, e in enumerate(depends) if e == depends[l]] if params_with_equal_depends not in result_dict.values(): #if 1 == 1: #comment this line and uncomment above line result_dict.update([(n, params_with_equal_depends)]) deps = parameters[l].depends_on #Split dependecy string deps = deps.split(', ') depsind = [] for o in range(0,len(deps)): depsind.append(param_names.index(deps[o])) depend_dict.update([(n, depsind)]) n = n + 1 if checktimes: times.append(datetime.datetime.now()) print('Determined meas and set params ',times[-1]-times[-2]) #Length of final result_dict determines number of files n=0 for i in range(0,len(result_dict)): # len(result_dict) gives number of independent measurement, i.e. .dat files #If number of files > 1, add a number in front if len(result_dict) > 1: filenamep2 = str(n) + "_" + run.name + runparams + ".dat" filenamejson = "run_snapshot.json" else: filenamep2 = run.name + "_" + runparams + ".dat" filenamejson = "run_snapshot.json" #Constructing final filepath filenamep1 = "{:03d}".format(runid) + '_' + timestampcut + '_' + run.name if no_folders == True: #If number of files > 1, add a number in front if len(result_dict) > 1: filenamep2 = '{:03d}'.format(runid) + '-' + str(n) + "_" + run.name + runparams + ".dat" filenamejson = '{:03d}'.format(runid) + '-' + "run_snapshot.json" else: filenamep2 = '{:03d}'.format(runid) + '-' + run.name + runparams + ".dat" filenamejson = '{:03d}'.format(runid) + '-' + "run_snapshot.json" folder = (dbpath.split('.')[0]) else: #If number of files > 1, add a number in front if len(result_dict) > 1: filenamep2 = str(n) + "_" + run.name + runparams + ".dat" filenamejson = "run_snapshot.json" else: filenamep2 = run.name + runparams + ".dat" filenamejson = "run_snapshot.json" folder = os.path.join((dbpath.split('.')[0]),folderstring,filenamep1) #folder = folder.replace(" ", "_").replace('?','_') folder = folder.replace('?','_') filenamep2 = filenamep2.replace(" ", "_").replace('?','_') filenamejson = filenamejson.replace(" ", "_").replace('?','_') fullpath = os.path.join(folder,filenamep2) fullpathjson = os.path.join(folder,filenamejson) if not os.path.exists(folder): os.makedirs(folder) if checktimes: times.append(datetime.datetime.now()) print('Constructing file and folder names ' ,times[-1]-times[-2]) #Check if file exists already if os.path.isfile(fullpath) and overwrite == False: #print('File found, skipping extraction') pass else: #Construct dat file header header = '' header += f"Run #{runid}: {runname}, Experiment: {expname}, Sample name: {samplename}, Number of values: " + str(run.number_of_results) + "\n" try: comment = run.get_metadata('Comment') header += f"Comment: {comment} \n" except: header += "\n" if checktimes: times.append(datetime.datetime.now()) print('Before reading from db ',times[-1]-times[-2]) all_param_data = run.get_parameter_data() if checktimes: times.append(datetime.datetime.now()) print('run.get_parameter_data() ',times[-1]-times[-2]) #run_matrix2 = [] meas_params = result_dict[i] # Collect measurement params set_params = depend_dict[i] # Collect depend params setdata = run.get_parameter_data(param_names[meas_params[0]]) if checktimes: times.append(datetime.datetime.now()) print('run.get_parameter_data(), only setdata ',times[-1]-times[-2]) headernames = '' headerlabelsandunits = '' if checktimes: times.append(datetime.datetime.now()) print('Db read out ',times[-1]-times[-2]) # Pre-allocate data array lset = len(set_params) lmeas = len(meas_params) lval = len((setdata[param_names[meas_params[0]]][param_names[0]]).flatten()) run_matrix=np.empty([lval,lset+lmeas]) run_matrix.fill(np.nan) # Collect depends (set axes) columns colcounter=0 for j in set_params: setdata = (all_param_data[param_names[meas_params[0]]][param_names[j]]).flatten() run_matrix[0:len(setdata),colcounter]= setdata headernames += parameters[j].name + "\t" headerlabelsandunits += parameters[j].label + " (" + parameters[j].unit +")" + "\t" colcounter=colcounter+1 # Collect measurement (meas axes) columns if checktimes: times.append(datetime.datetime.now()) print('Set_params in runmatrix ',times[-1]-times[-2]) for k in meas_params: measdata = (all_param_data[param_names[k]][param_names[k]]).flatten() run_matrix[0:len(measdata),colcounter]=measdata headernames += parameters[k].name + "\t" headerlabelsandunits += parameters[k].label + " (" + parameters[k].unit +")" + "\t" colcounter=colcounter+1 if checktimes: times.append(datetime.datetime.now()) print('Meas_params in runmatrix ',times[-1]-times[-2]) header += headernames + '\n' header += headerlabelsandunits # Confirming function is a good boy if not suppress_output: print("Saving measurement with id " + str(runid) + " to "+ fullpath) # Actual saving of file file = fullpath f = open(file, "wb") np.savetxt(f,np.array([]), header = header) if checktimes: times.append(datetime.datetime.now()) print('Opening txt file and saving header ',times[-1]-times[-2]) # Routine for properly slicing the slow axes (works for infinite dimensions) slicearray = np.array([]).astype(int) if newline_slowaxes == True: for i in range(0,len(set_params)-1): slicearray = np.concatenate((slicearray, np.where(run_matrix[:-1,i] != run_matrix[1:,i])[0]+1)) slicearray = np.unique(slicearray) if checktimes: times.append(datetime.datetime.now()) print('newline_slowaxes time consumption ',times[-1]-times[-2]) vsliced=np.split(run_matrix,slicearray, axis=0) for i in range(0,len(vsliced)): # This is just one write action if newline_slowaxes is turned off (and a bit faster then) np.savetxt(f,vsliced[i],delimiter='\t') if i != len(vsliced)-1: linestr = "\n" f.write(linestr.encode()) f.close() if checktimes: times.append(datetime.datetime.now()) print('Writing of the textfile ',times[-1]-times[-2]) # Saving of snapshot + run description to JSON file with open(fullpathjson, 'w') as f: if run.snapshot and run.description: total_json = {**json.loads(sz.to_json_for_storage(run.description)), **run.snapshot} if not run.snapshot: if run.description: total_json = {**json.loads(sz.to_json_for_storage(run.description))} print('Warning: Measurement {ruinid} has no snapshot.') else: print('Warning: Measurement {ruinid} has no snapshot or run description. Axes for plotting cannot be extracted.') json.dump(total_json, f, indent = 4) n = n + 1 if checktimes: times.append(datetime.datetime.now()) print('Total time ',times[-1]-times[0]) if useopendbconnection == False: configuration['core']['db_location'] = previously_opened_db configuration.save_to_home() initialise_database()
def _insert_run( conn: ConnectionPlus, exp_id: int, name: str, guid: str, parameters: Optional[List[ParamSpec]] = None, ): # get run counter and formatter from experiments run_counter, format_string = select_many_where(conn, "experiments", "run_counter", "format_string", where_column="exp_id", where_value=exp_id) run_counter += 1 formatted_name = format_table_name(format_string, name, exp_id, run_counter) table = "runs" parameters = parameters or [] run_desc = RunDescriber(old_to_new(InterDependencies(*parameters))) desc_str = serial.to_json_for_storage(run_desc) with atomic(conn) as conn: if parameters: query = f""" INSERT INTO {table} (name, exp_id, guid, result_table_name, result_counter, run_timestamp, parameters, is_completed, run_description) VALUES (?,?,?,?,?,?,?,?,?) """ curr = transaction(conn, query, name, exp_id, guid, formatted_name, run_counter, None, ",".join([p.name for p in parameters]), False, desc_str) _add_parameters_to_layout_and_deps(conn, formatted_name, *parameters) else: query = f""" INSERT INTO {table} (name, exp_id, guid, result_table_name, result_counter, run_timestamp, is_completed, run_description) VALUES (?,?,?,?,?,?,?,?) """ curr = transaction(conn, query, name, exp_id, guid, formatted_name, run_counter, None, False, desc_str) run_id = curr.lastrowid return run_counter, formatted_name, run_id