def flatten_remote_catalogs(self, catalog): from intake.catalog.base import Catalog cat_dict = {} for name in catalog: try: from intake.catalog.base import RemoteCatalog sub_cat = catalog[name] # @TODO remote catalogs are one level too high. This check is # pretty rough. Would rather check that a catalog's children # should be top-level. # This is making the terrible assumption that children # of a RemoteCatalog be treated as top-level. But until # we figure out how to tell that a catalog is a real catalog # with data, it's as good as we can get if isinstance(sub_cat(), RemoteCatalog): for name in sub_cat: cat_dict[name] = sub_cat[name] else: cat_dict[name] = sub_cat except Exception as e: log.error(e) msg.showMessage("Unable to query top level catalogs: ", str(e)) return Catalog.from_dict(cat_dict)
def fullReconstruction(self): from .widgets.volumeviewer import VolumeViewer volumeviewer = VolumeViewer() self.recontabs.addTab(volumeviewer, '????') currentitem = self.headermodel.item(self.rawtabview.currentIndex()) if not currentitem: msg.showMessage( 'Error: You must open files before reconstructing.') try: msg.showBusy() msg.showMessage('Running slice reconstruction...', level=msg.INFO) currentheader = self.headermodel.item( self.rawtabview.currentIndex()).header readprocess = self.workflow.processes[ 0] # hopefully! TODO: require a readprocess first readprocess.path.value = currentheader.startdoc['path'] numofsinograms = currentheader.meta_array('primary').shape[1] self.workflow.execute_all(None, readprocess=range( 0, int(numofsinograms), int(readprocess.chunksize.value))) except Exception as ex: msg.logError(ex) msg.showReady() msg.clearMessage()
def run(self): while True: try: search_state.process_queries() except Exception as e: msg.logError(e) msg.showMessage("Unable to query: ", str(e))
def appendCatalog(self, run_catalog: BlueskyRun, **kwargs): # catalog.metadata.update(self.schema()) ensemble = Ensemble() ensemble.append_catalog(run_catalog) self.ensemble_model.add_ensemble(ensemble, projection_mapping.values()) try: # Apply nxSTXM projection #add conditional to split xarray from intent xdata = project_all(run_catalog) if isinstance( xdata, list ): # temporary logic to allow for intents to be returned rather than xarray for intent in xdata: if isinstance(intent, ImageIntent): xdata = intent.image break else: raise ValueError("No data returned from ingestion.") self.catalog_viewer.setData( xdata) #, view_dims=('y (μm)', 'x (μm)') except Exception as e: msg.logError(e) msg.showMessage("Unable to display: ", str(e)) self.current_catalog = run_catalog
def fullReconstruction(self): from xicam.Tomography.widgets.volumeviewer import VolumeViewer volumeviewer = VolumeViewer() self.recontabs.addTab(volumeviewer, '????') currentitem = self.headermodel.item(self.rawtabview.currentIndex()) if not currentitem: msg.showMessage('Error: You must open files before reconstructing.') try: msg.showBusy() msg.showMessage('Running slice reconstruction...', level=msg.INFO) currentheader = self.headermodel.item(self.rawtabview.currentIndex()).header readprocess = self.workflow.processes[0] # hopefully! TODO: require a readprocess first readprocess.path.value = currentheader.startdoc['path'] numofsinograms = currentheader.meta_array('primary').shape[1] executor = DaskExecutor() client = distributed.Client() def chunkiterator(workflow): for i in range(0, int(numofsinograms), int(readprocess.chunksize.value)): readprocess.sinoindex.value = i yield executor.execute(workflow) _reconthread = QThreadFutureIterator(chunkiterator, self.workflow, callback_slot=partial(self.showReconstruction, mode=self.fullrecon), except_slot=self.exceptionCallback) _reconthread.start() except Exception as ex: msg.logError(ex) msg.showReady() msg.clearMessage()
def process_queries(self): # If there is a backlog, process only the newer query. block = True while True: try: query = self.query_queue.get_nowait() block = False except queue.Empty: if block: query = self.query_queue.get() break log.debug('Submitting query %r', query) try: t0 = time.monotonic() msg.showMessage("Running Query") msg.showBusy() self._results_catalog = self.selected_catalog.search(query) found_new = self.check_for_new_entries() duration = time.monotonic() - t0 log.debug('Query yielded %r results (%.3f s).', len(self._results_catalog), duration) if found_new and self.show_results_event.is_set(): self.new_results_catalog.emit() except Exception as e: msg.logError(e) msg.showMessage("Problem running query") finally: msg.hideBusy()
def appendCatalog(self, run_catalog, **kwargs): self.catalog_viewer.clear() try: msg.showMessage(f"Loading image for {run_catalog.name}") self.catalog_viewer.setCatalog(run_catalog) except Exception as e: msg.logError(e) msg.showMessage("Unable to display: ", str(e))
def set_selected_catalog(self, item): if len(self._subcatalogs) == 0: return name = self._subcatalogs[item] try: self.selected_catalog = self.root_catalog[name]() self.search() except Exception as e: log.error(e) msg.showMessage("Unable to contact catalog: ", str(e))
def collect_user_plugins(self): files = glob.glob(os.path.join(user_plugin_dir, '*.py')) for file in files: try: spec = importlib.util.spec_from_file_location("xicam.user", file) foo = importlib.util.module_from_spec(spec) spec.loader.exec_module(foo) except Exception as ex: msg.showMessage("An error occured in a user-defined plugin. See log for details.") msg.logError(ex)
def _update_thread(self, update_action:Callable): while True: if not self.passive.isChecked(): break if self.visibleRegion().isEmpty(): time.sleep(1 / self.maxfps) continue try: if not self.device.connected: with msg.busyContext(): msg.showMessage('Connecting to device...') self.device.wait_for_connection() update_action() except (RuntimeError, CaprotoTimeoutError, ConnectionTimeoutError, TimeoutError) as ex: threads.invoke_in_main_thread(self.error_text.setText, 'An error occurred communicating with this device.') msg.logError(ex) except Exception as e: threads.invoke_in_main_thread(self.error_text.setText, 'Unknown error occurred when attempting to communicate with device.') msg.logError(e) num_exposures_counter = self.device.cam.num_exposures_counter.get() num_exposures = self.device.cam.num_exposures.get() num_captured = self.device.hdf5.num_captured.get() num_capture = self.device.hdf5.num_capture.get() capturing = self.device.hdf5.capture.get() if capturing: current = num_exposures_counter + num_captured * num_exposures total = num_exposures * num_capture elif num_exposures == 1: # Show 'busy' for just one exposure current = 0 total = 0 else: current = num_exposures_counter total = num_exposures threads.invoke_in_main_thread(self._update_progress, current, total) while self.getting_frame: time.sleep(.01) t = time.time() max_period = 1 / self.maxfps current_elapsed = t - self._last_timestamp if current_elapsed < max_period: time.sleep(max_period - current_elapsed) self._last_timestamp = time.time()
def reload(self): t0 = time.monotonic() if self._results_catalog is not None: try: self._results_catalog.reload() new_results = self.check_for_new_entries() duration = time.monotonic() - t0 msg.logMessage("Reloaded search results {}.".format(duration)) if new_results and self.show_results_event.is_set(): self.new_results_catalog.emit() except Exception as e: log.error(e) msg.showMessage("Unable to query top level catalogs: ", str(e))
def sliceReconstruct(self): currentitem = self.headermodel.item(self.rawtabview.currentIndex()) if not currentitem: msg.showMessage('Error: You must open files before reconstructing.') try: msg.showBusy() msg.showMessage('Running slice reconstruction...', level=msg.INFO) paths = self.headermodel.item(self.rawtabview.currentIndex()).header.startdoc['paths'] self.workflow.execute(None, paths=paths, threadkey='slicereconstruct', callback_slot=partial(self.showReconstruction, mode=self.slice), except_slot=self.exceptionCallback) except Exception as ex: msg.logError(ex) msg.showReady() msg.clearMessage()
def emit_selected_result(self, selected, deselected): try: self.selected_rows |= set(index.row() for index in selected.indexes()) self.selected_rows -= set(index.row() for index in deselected.indexes()) entries = [] for row in sorted(self.selected_rows): uid = self.data(self.index(row, 0), Qt.UserRole) entry = self.search_state._results_catalog[uid] entries.append(entry) self.selected_result.emit(entries) except Exception as e: msg.logError(e) msg.showMessage("Problem getting info about for selected row")
def appendCatalog(self, run_catalog, **kwargs): try: self.stream_fields = get_all_image_fields(run_catalog) stream_names = get_all_streams(run_catalog) msg.showMessage(f"Loading primary image for {run_catalog.name}") # try and startup with primary catalog and whatever fields it has if "primary" in self.stream_fields: default_stream_name = "primary" if "primary" in stream_names else stream_names[ 0] else: default_stream_name = list(self.stream_fields.keys())[0] self.catalog_viewer.setCatalog(run_catalog, default_stream_name, None) except Exception as e: msg.logError(e) msg.showMessage("Unable to display: ", str(e))
def set_entries(self, entries): try: self.entries.clear() self.entries.extend(entries) if not entries: self.uid_label.setText('') self.streams.setText('') self.copy_uid_button.setEnabled(False) self.open_individually_button.setEnabled(False) elif len(entries) == 1: future = QThreadFuture(self.call_entry, entries, callback_slot=self.call_entries_finished, showBusy=True) future.start() else: self.uid_label.setText('(Multiple Selected)') self.streams.setText('') self.copy_uid_button.setEnabled(False) self.open_individually_button.setText('Open individually') self.open_individually_button.setEnabled(True) except Exception as e: msg.logError(e) msg.showMessage("Unable to get data about selected run")
def appendHeader(self, header: NonDBHeader, **kwargs): # get fileName and update status bar fileName = header.startdoc.get('sample_name', '????') msg.showMessage(f'Opening {fileName}.h5') # init item item = QStandardItem(fileName + '_' + str(self.headermodel.rowCount())) item.header = header item.selectedPixels = None self.headermodel.appendRow(item) self.headermodel.dataChanged.emit(QModelIndex(), QModelIndex()) # read out image shape imageEvent = next(header.events(fields=['image'])) imgShape = imageEvent['imgShape'] rc2ind = imageEvent['rc_index'] # get current MapView widget currentMapView = self.imageview.currentWidget() # transmit imgshape to currentMapView currentMapView.getImgShape(imgShape, rc2ind) # get xy coordinates of ROI selected pixels currentMapView.sigRoiPixels.connect( partial(self.appendSelection, 'pixel')) currentMapView.sigRoiState.connect(partial(self.appendSelection, 'ROI')) currentMapView.sigAutoMaskState.connect( partial(self.appendSelection, 'autoMask')) currentMapView.sigSelectMaskState.connect( partial(self.appendSelection, 'select')) self.preprocess.setHeader(field='spectra') self.FA_widget.setHeader(field='spectra') self.clusterwidget.setHeader(field='spectra') for i in range(4): self.FA_widget.roiList[i].sigRegionChangeFinished.connect( self.updateROI) self.clusterwidget.roi.sigRegionChangeFinished.connect(self.updateROI)
def _update_thread(self, update_action: Callable): while True: if self.cached_frame is not None: time.sleep(.01) continue t = time.time() max_period = 1 / self.maxfps current_elapsed = t - self._last_timestamp if current_elapsed < max_period: time.sleep(max_period - current_elapsed) if not self.passive.isChecked(): break if self.visibleRegion().isEmpty(): continue try: if not self.device.connected: with msg.busyContext(): msg.showMessage('Connecting to device...') self.device.wait_for_connection() update_action() except (RuntimeError, CaprotoTimeoutError, ConnectionTimeoutError, TimeoutError) as ex: threads.invoke_in_main_thread( self.error_text.setText, 'An error occurred communicating with this device.') msg.logError(ex) except Exception as e: threads.invoke_in_main_thread( self.error_text.setText, 'Unknown error occurred when attempting to communicate with device.' ) msg.logError(e)
def runDask(self): import client.dask_active_executor if client.dask_active_executor.active_executor is None: # warning message return ae = client.dask_active_executor.active_executor.executor def hipgisaxs_func(yaml_str): import subprocess import time timestamp = time.strftime("%Y.%m.%d.%H.%M.%S") filename = os.path.join(os.path.expanduser('~'), "test_remote.yml") with open(filename, 'w') as outfile: outfile.write(yaml_str) a = subprocess.check_output( ["srun", "--job-name=hipgisaxs", "--nodes=1", "--ntasks=1", "--ntasks-per-node=1", "--time=00:30:00", "/bin/bash", "/users/course79/rungisaxs.sh", filename]) return a self.writeyaml() with open(os.path.join(os.path.expanduser('~'), 'test.yml'), 'r') as outfile: fx_str = outfile.read() msg.showMessage('Submitting job to remote executor...') future_tag = ae.submit(hipgisaxs_func, fx_str, pure=False) msg.showMessage('Job received. Submitting to queue...') import time while future_tag.status == "pending": # msg.showMessage("Waiting for response from server...",timeout=5) time.sleep(1) if future_tag.status == "failure": msg.showMessage("Execution failed.", timeout=5) return msg.showMessage("Execution complete. Fetching result...", timeout=5) result = future_tag.result() out = np.array([np.fromstring(line, sep=' ') for line in result.splitlines()]) msg.logMessage(("result = ", out), msg.DEBUG) plugins.plugins['Viewer'].instance.opendata(out)
def process_queue(self): while True: try: priority_plan = self.queue.get( block=True, timeout=.1) # timeout is arbitrary, we'll come right back except Empty: continue priority, (args, kwargs) = priority_plan.priority, priority_plan.args self.sigStart.emit() msg.showBusy() try: self.RE(*args, **kwargs) except Exception as ex: msg.showMessage( "An error occured during a Bluesky plan. See the Xi-CAM log for details." ) msg.logError(ex) self.sigException.emit(ex) finally: msg.showReady() self.sigFinish.emit()
def show_results(self): header_labels_set = False self.show_results_event.clear() t0 = time.monotonic() counter = 0 try: msg.showBusy() while not self._new_uids_queue.empty(): counter += 1 row = [] new_uid = self._new_uids_queue.get() try: entry = self.get_run_by_uid(new_uid) row_data = self.apply_search_result_row(entry) except SkipRow as e: msg.showMessage(str(msg)) msg.logError(e) continue if not header_labels_set: # Set header labels just once. threads.invoke_in_main_thread( self.search_results_model.setHorizontalHeaderLabels, list(row_data)) header_labels_set = True for value in row_data.values(): item = QStandardItem() item.setData(value, Qt.DisplayRole) item.setData(new_uid, Qt.UserRole) row.append(item) if QThread.currentThread().isInterruptionRequested(): self.show_results_event.set() msg.logMessage("Interrupt requested") return threads.invoke_in_main_thread( self.search_results_model.appendRow, row) if counter: self.sig_update_header.emit() duration = time.monotonic() - t0 msg.showMessage("Displayed {} new results {}.".format( counter, duration)) self.show_results_event.set() except Exception as e: msg.showMessage("Error displaying runs") msg.logError(e) finally: msg.hideBusy()
def calculate(self): N = self.parameter['Components'] #set decompose method if self.parameter['Method'] == 'PCA': self.method = 'PCA' self.field = 'spectra' elif self.parameter['Method'] == 'NMF': self.method = 'NMF' self.field = 'volume' elif self.parameter['Method'] == 'MCR': self.method = 'MCR' self.field = 'spectra' if hasattr(self, '_dataSets'): wavROIList = [] for entry in self.parameter['Wavenumber Range'].split(','): try: wavROIList.append(val2ind(int(entry), self.wavenumbers)) except: continue # Select wavenumber region if len(wavROIList) % 2 == 0: wavROIList = sorted(wavROIList) wavROIidx = [] for i in range(len(wavROIList) // 2): wavROIidx += list( range(wavROIList[2 * i], wavROIList[2 * i + 1] + 1)) else: msg.logMessage('"Wavenumber Range" values must be in pairs', msg.ERROR) MsgBox('Factorization computation aborted.', 'error') return self.wavenumbers_select = self.wavenumbers[wavROIidx] # get map ROI selected region self.selectedPixelsList = [ self.headermodel.item(i).selectedPixels for i in range(self.headermodel.rowCount()) ] self.df_row_idx = [] # row index for dataframe data_fac msg.showMessage('Start computing', self.method + '. Image shape:', str(self.imgShapes)) self.dataRowSplit = [ 0 ] # remember the starting/end row positions of each dataset if self.field == 'spectra': # PCA workflow self.N_w = len(self.wavenumbers_select) self._allData = np.empty((0, self.N_w)) for i, data in enumerate( self._dataSets['spectra']): # i: map idx if self.selectedPixelsList[i] is None: n_spectra = len(data) tmp = np.zeros((n_spectra, self.N_w)) for j in range(n_spectra): tmp[j, :] = data[j][wavROIidx] self.df_row_idx.append((self.ind2rcList[i][j], j)) else: n_spectra = len(self.selectedPixelsList[i]) tmp = np.zeros((n_spectra, self.N_w)) for j in range(n_spectra): # j: jth selected pixel row_col = tuple(self.selectedPixelsList[i][j]) tmp[j, :] = data[self.rc2indList[i] [row_col]][wavROIidx] self.df_row_idx.append( (row_col, self.rc2indList[i][row_col])) self.dataRowSplit.append(self.dataRowSplit[-1] + n_spectra) self._allData = np.append(self._allData, tmp, axis=0) if len(self._allData) > 0: if self.method == 'PCA': self.data_fac_name = 'data_PCA' # define pop up plots labels # normalize and mean center if self.parameter[ 'Normalization'] == 'L1': # normalize data_norm = Normalizer(norm='l1').fit_transform( self._allData) elif self.parameter['Normalization'] == 'L2': data_norm = Normalizer(norm='l2').fit_transform( self._allData) else: data_norm = self._allData #subtract mean data_centered = StandardScaler( with_std=False).fit_transform(data_norm) # Do PCA self.PCA = PCA(n_components=N) self.PCA.fit(data_centered) self.data_PCA = self.PCA.transform(data_centered) # pop up plots self.popup_plots() elif self.method == 'MCR': self.data_fac_name = 'data_MCR' # define pop up plots labels # Do ICA to find initial estimate of ST matrix self.ICA = FastICA(n_components=N) self.ICA.fit(self._allData) # Do MCR self.MCR = McrAR(max_iter=100, c_regr=self.parameter['C regressor'], st_regr='NNLS', tol_err_change=1e-6, tol_increase=0.5) self.MCR.fit(self._allData, ST=self.ICA.components_) self.MCR.components_ = self.MCR.ST_opt_ self.data_MCR = self.MCR.C_opt_ #test ICA # self.MCR = self.ICA # self.data_MCR = self.ICA.transform(self._allData) # pop up plots self.popup_plots() else: msg.logMessage( 'The data matrix is empty. No PCA is performed.', msg.ERROR) MsgBox('The data matrix is empty. No PCA is performed.', 'error') self.PCA, self.data_PCA = None, None self.MCR, self.data_MCR = None, None # emit PCA and transformed data if self.method == 'PCA': self.sigPCA.emit((self.wavenumbers_select, self.PCA, self.data_PCA, self.dataRowSplit)) elif self.method == 'MCR': self.sigPCA.emit((self.wavenumbers_select, self.MCR, self.data_MCR, self.dataRowSplit)) elif self.field == 'volume': # NMF workflow data_files = [] wav_masks = [] row_idx = np.array([], dtype='int') self.allDataRowSplit = [0] # row split for complete datasets for i, file in enumerate(self._dataSets['volume']): ir_data, fmt = read_map.read_all_formats(file) n_spectra = ir_data.data.shape[0] self.allDataRowSplit.append(self.allDataRowSplit[-1] + n_spectra) data_files.append(ir_data) ds = data_prep.data_prepper(ir_data) wav_masks.append(ds.decent_bands) # row selection if self.selectedPixelsList[i] is None: row_idx = np.append( row_idx, np.arange(self.allDataRowSplit[-2], self.allDataRowSplit[-1])) for k, v in self.rc2indList[i].items(): self.df_row_idx.append((k, v)) else: n_spectra = len(self.selectedPixelsList[i]) for j in range(n_spectra): row_col = tuple(self.selectedPixelsList[i][j]) row_idx = np.append( row_idx, self.allDataRowSplit[-2] + self.rc2indList[i][row_col]) self.df_row_idx.append( (row_col, self.rc2indList[i][row_col])) self.dataRowSplit.append( self.dataRowSplit[-1] + n_spectra) # row split for ROI selected rows # define pop up plots labels self.data_fac_name = 'data_NMF' if len(self.df_row_idx) > 0: # aggregate datasets ir_data_agg = aggregate_data(self._dataSets['volume'], data_files, wav_masks) col_idx = list( set(wavROIidx) & set(ir_data_agg.master_wmask)) self.wavenumbers_select = self.wavenumbers[col_idx] ir_data_agg.data = ir_data_agg.data[:, col_idx] ir_data_agg.data = ir_data_agg.data[row_idx, :] # perform NMF self.NMF = NMF(n_components=N) self.data_NMF = self.NMF.fit_transform(ir_data_agg.data) # pop up plots self.popup_plots() else: msg.logMessage( 'The data matrix is empty. No NMF is performed.', msg.ERROR) MsgBox('The data matrix is empty. No NMF is performed.', 'error') self.NMF, self.data_NMF = None, None # emit NMF and transformed data : data_NMF self.sigPCA.emit((self.wavenumbers_select, self.NMF, self.data_NMF, self.dataRowSplit))
def exceptionCallback(self, ex): msg.notifyMessage("Reconstruction failed;\n see log for error") msg.showMessage("Reconstruction failed; see log for error") msg.logError(ex) msg.showReady()
def computeCluster(self): # check if embeddings exist if self.embedding is None: return msg.showMessage('Compute clusters.') # get num of clusters n_clusters = self.parameter['Clusters'] # set colorLUT self.colorLUT = cm.get_cmap('viridis', n_clusters + 1).colors[:, :3] * 255 # compute cluster cluster_object = KMeans(n_clusters=n_clusters, random_state=0).fit(self.embedding) self.labels = cluster_object.labels_ + 1 # update cluster image if self.selectedPixels is None: # full map self.cluster_map = self.labels.reshape(self.imgShape[0], self.imgShape[1]) elif self.selectedPixels.size == 0: self.cluster_map = np.zeros((self.imgShape[0], self.imgShape[1]), dtype=int) else: self.cluster_map = np.zeros((self.imgShape[0], self.imgShape[1]), dtype=int) self.cluster_map[self.selectedPixels[:, 0], self.selectedPixels[:, 1]] = self.labels self.cluster_map = np.flipud(self.cluster_map) self.clusterImage.setImage(self.cluster_map, levels=[0, n_clusters]) # self.clusterImage.setImage(self.cluster_map) self.clusterImage._image = self.cluster_map self.clusterImage.rc2ind = self.rc2ind self.clusterImage.row, self.clusterImage.col = self.imgShape[ 0], self.imgShape[1] self.clusterImage.txt.setPos(self.clusterImage.col, 0) self.clusterImage.cross.show() # update cluster mean mean_spectra = [] self.dfGroups = [] if self.selectedPixels is None: n_spectra = len(self.data) self.dataList = np.zeros((n_spectra, len(self.wavenumbers))) dataIdx = np.arange(n_spectra) for i in range(n_spectra): self.dataList[i] = self.data[i] else: n_spectra = len(self.selectedPixels) self.dataList = np.zeros((n_spectra, len(self.wavenumbers))) dataIdx = np.zeros(n_spectra, dtype=int) for i in range(n_spectra): # i: ith selected pixel row_col = tuple(self.selectedPixels[i]) dataIdx[i] = self.rc2ind[row_col] self.dataList[i] = self.data[dataIdx[i]] for ii in range(1, n_clusters + 1): sel = (self.labels == ii) # save each group spectra to a dataFrame self.dfGroups.append( pd.DataFrame(self.dataList[sel], columns=self.wavenumbers.tolist(), index=dataIdx[sel])) this_mean = np.mean(self.dataset[sel, :], axis=0) mean_spectra.append(this_mean) self.mean_spectra = np.vstack(mean_spectra) self.clusterMeanPlot.setColors(self.colorLUT) self.clusterMeanPlot._data = self.mean_spectra self.clusterMeanPlot.wavenumbers = self.wavenumbers_select self.clusterMeanPlot.plotClusterSpectra() # update scatter plot self.updateScatterPlot()
def computeEmbedding(self): # get current map idx if not self.isMapOpen(): return msg.showMessage('Compute embedding.') # Select wavenumber region wavROIList = [] for entry in self.parameter['Wavenumber Range'].split(','): try: wavROIList.append(val2ind(int(entry), self.wavenumbers)) except: continue if len(wavROIList) % 2 == 0: wavROIList = sorted(wavROIList) wavROIidx = [] for i in range(len(wavROIList) // 2): wavROIidx += list( range(wavROIList[2 * i], wavROIList[2 * i + 1] + 1)) else: msg.logMessage('"Wavenumber Range" values must be in pairs', msg.ERROR) MsgBox('Clustering computation aborted.', 'error') return self.wavenumbers_select = self.wavenumbers[wavROIidx] self.N_w = len(self.wavenumbers_select) # get current dataset if self.selectedPixels is None: n_spectra = len(self.data) self.dataset = np.zeros((n_spectra, self.N_w)) for i in range(n_spectra): self.dataset[i, :] = self.data[i][wavROIidx] else: n_spectra = len(self.selectedPixels) self.dataset = np.zeros((n_spectra, self.N_w)) for i in range(n_spectra): # i: ith selected pixel row_col = tuple(self.selectedPixels[i]) self.dataset[i, :] = self.data[self.rc2ind[row_col]][wavROIidx] # get parameters and compute embedding n_components = self.parameter['Components'] if self.parameter['Embedding'] == 'UMAP': n_neighbors = self.parameter['Neighbors'] metric = self.parameter['Metric'] min_dist = np.clip(self.parameter['Min Dist'], 0, 1) self.umap = UMAP(n_neighbors=n_neighbors, min_dist=min_dist, n_components=n_components, metric=metric, random_state=0) self.embedding = self.umap.fit_transform(self.dataset) elif self.parameter['Embedding'] == 'PCA': # normalize and mean center if self.parameter['Normalization'] == 'L1': # normalize data_norm = Normalizer(norm='l1').fit_transform(self.dataset) elif self.parameter['Normalization'] == 'L2': data_norm = Normalizer(norm='l2').fit_transform(self.dataset) else: data_norm = self.dataset # subtract mean data_centered = StandardScaler( with_std=False).fit_transform(data_norm) # Do PCA self.PCA = PCA(n_components=n_components) self.PCA.fit(data_centered) self.embedding = self.PCA.transform(data_centered) # save embedding to standardModelItem self.item.embedding = self.embedding # update cluster map self.computeCluster()
def getNN(self): msg.showMessage('Training NearestNeighbors model in scatter plot.') self.nbr = NearestNeighbors(n_neighbors=1, algorithm='auto').fit(self.scatterData) msg.showMessage('NearestNeighbors model training is finished.')
def batchProcess(self): # get current map idx if not self.isMapOpen(): return elif self.specItemModel.rowCount() == 0: MsgBox('No spectrum is loaded.\nPlease click "Load spectra" to import data.') return # check if baseline fit OK if self.out is None: self.out = Preprocessor(self.wavenumberList[self.selectMapidx], self.dataSets[self.selectMapidx][0]) # get plotchoice plotChoice = self.normBox.currentIndex() if plotChoice != 0: # calculate rubberband and kohler baseline baselineOK = self.out.rubber_band(**self.processArgs) and self.out.kohler(**self.processArgs) else: MsgBox('Plot type is "Raw spectrum".\nPlease change plot type to "Kohler" or "Rubberband".') return if not baselineOK: return # notice to user userMsg = YesNoDialog(f'Ready to batch process selected spectra.\nDo you want to continue?') userChoice = userMsg.choice() if userChoice == QMessageBox.No: # user choose to stop return self.isBatchProcessOn = True # init resultSetsDict, paramsDict self.resultSetsDict = {} self.paramsDict = {} self.paramsDict['specID'] = [] self.paramsDict['row_column'] = [] ind2rc = self.ind2rcList[self.selectMapidx] energy = self.out.energy n_energy = len(energy) for item in self.arrayList: self.resultSetsDict[item] = np.empty((0, n_energy)) for item in self.reportList: self.paramsDict[item] = [] # batch process begins n_spectra = self.specItemModel.rowCount() for i in range(n_spectra): msg.showMessage(f'Processing {i + 1}/{n_spectra} spectra') # select each spec and collect results self.specSelectModel.select(self.specItemModel.index(i, 0), QItemSelectionModel.ClearAndSelect) # get spec idx currentSpecItem = self.specItemModel.item(i) self.paramsDict['specID'].append(currentSpecItem.idx) self.paramsDict['row_column'].append(ind2rc[currentSpecItem.idx]) # append all results into a single array/list for item in self.arrayList: self.resultSetsDict[item] = np.append(self.resultSetsDict[item], self.resultDict[item].reshape(1, -1), axis=0) for item in self.reportList: self.paramsDict[item].append(self.resultDict[item]) # result collection completed. convert paramsDict to df self.dfDict = {} self.dfDict['param'] = pd.DataFrame(self.paramsDict).set_index('specID') for item in self.arrayList: # convert resultSetsDict to df self.dfDict[item] = pd.DataFrame(self.resultSetsDict[item], columns=energy.tolist(), index=self.paramsDict['specID']) # batch process completed self.isBatchProcessOn = False msg.showMessage(f'Batch processing is completed! Saving results to csv files.') # save df to files self.saveResults()