def __read_datalist(self): """ Read json filelist """ # - Check datalist files if self.datalistfile == "" or self.datalistfile_mask == "": logger.error("Data list files are empty!") return -1 # - Read data list for images and store number of instances per class, etc ret = self.__read_filelist(self.datalistfile) if ret is None: logger.error("Failed to read filelist for imgs!") return -1 datadict = ret[0] nchannels_set = ret[1] self.datalist = datadict["data"] self.nchannels = list(nchannels_set)[0] self.datasize = len(self.datalist) self.labels = [item["label"] for item in self.datalist] self.snames = [item["sname"] for item in self.datalist] self.classids = [item["id"] for item in self.datalist] self.classfract_map = dict(Counter(self.classids).items()) logger.info("#%d objects in dataset" % self.datasize) return 0
def __run_aereco(self): """ Run AE reconstruction """ # - Set FeatExtractorAE class ae = FeatExtractorAE(self.dl) ae.set_image_size(self.nx, self.ny) ae.normalize = self.normalize ae.scale_to_abs_max = self.scale_to_abs_max ae.scale_to_max = self.scale_to_max ae.log_transform_img = self.log_transform ae.scale_img = self.scale ae.scale_img_factors = self.scale_factors ae.standardize_img = self.standardize ae.img_means = self.img_means ae.img_sigmas = self.img_sigmas ae.chan_divide = self.chan_divide ae.chan_mins = self.chan_mins ae.erode = self.erode ae.erode_kernel = self.erode_kernel ae.add_channorm_layer = self.add_channorm_layer # - Run AE reco status = ae.reconstruct_data(self.modelfile_encoder, self.weightfile_encoder, self.modelfile_decoder, self.weightfile_decoder, winsize=self.winsize, outfile_metrics=self.outfile, save_imgs=self.save_imgs) if status < 0: logger.error("AE reconstruction failed (see logs)!") return -1 return 0
def __load_model(self, modelfile, weightfile): """ Load model and weights from input h5 file """ #============================== #== LOAD MODEL ARCHITECTURE #============================== # - Load model try: #self.model = model_from_json(open(modelfile_json).read()) self.model = load_model(modelfile, custom_objects={'recall_metric': recall_metric, 'precision_metric': precision_metric, 'f1score_metric': f1score_metric}) self.model.load_weights(weightfile) except Exception as e: logger.warn("Failed to load model from file %s (err=%s)!" % (modelfile, str(e))) return -1 if not self.model or self.model is None: logger.error("Model object is None, loading failed!") return -1 #=========================== #== SET LOSS & METRICS #=========================== self.model.compile(optimizer=self.optimizer, loss=self.loss_type, metrics=['accuracy', f1score_metric, precision_metric, recall_metric], run_eagerly=True) return 0
def compute_bkg(self, masks, sigma_clip=3): """ Compute image background """ # - Init bkg self.bkg_levels= [0]*self.nchannels self.bkg_rms= [0]*self.nchannels if len(masks)!=self.nchannels: logger.error("Number of input masks != nchannels, cannot compute bkg!") return -1 # - Compute bkg levels & rms logger.info("Computing image clipped stats of non-masked pixels ...") for i in range(self.nchannels): data= self.img_data[i] mask= masks[i] cond= np.logical_and(np.logical_and(data!=0, np.isfinite(data)), mask==0) data_1d= data[cond] print("--> data_1d.shape") print(data_1d.shape) mean, median, stddev= sigma_clipped_stats(data_1d, sigma=sigma_clip) self.bkg_levels[i]= median self.bkg_rms[i]= stddev return 0
def compute_img_moments(self): """ Compute image moments """ # - Init pars self.moments_c= [] self.moments_hu= [] self.moments_zern= [] # - Compute raw moments and centroids if self.__compute_contour_pars()<0: logger.error("Failed to compute contour pars!") return -1 # - Compute moments (central, Hu, Zernike) of intensity images # NB: use centroid from refch for all channels centroid= self.centroids[self.refch] #centroid= self.center_of_masses[self.refch] for i in range(self.nchannels): data= self.img_data[i] radius= self.radii[i] ret= self.__compute_moments(data, centroid, radius) if ret is None: logger.error("Failed to compute moments for image %s (id=%s, ch=%d)!" % (self.sname, self.label, i+1)) return -1 self.moments_c.append(ret[0]) self.moments_hu.append(ret[1]) self.moments_zern.append(ret[2]) return 0
def run(self, datalist): """ Run data checker """ # - Init self.param_dict_list = [] # - Read data logger.info("Read data list %s ..." % (self.datalist)) self.__read_data(datalist) # - Run AE reco logger.info("Running autoencoder reconstruction ...") if self.__run_aereco() < 0: logger.error("AE reconstruction failed!") return -1 # - Select AE reco data logger.info("Reading and thresholding AE reco metrics ...") if self.__fill_metric_data() < 0: logger.error("Failed to read and threshold AE reco metrics!") return -1 # - Save output data logger.info("Saving output to file ...") if self.__save() < 0: logger.warn("Failed to save output data to file %s!" % (self.outfile)) return -1 return 0
def __select_cols(self, selcols): """ Select data columns provided in selcols list """ # - Check sel cols if not selcols: logger.error("Empty sel col list!") return -1 # - Remove any duplicated col ids, sort and set colsel flags selcols = list(set(selcols)) selcols.sort() selcolflags = [False] * self.nfeatures for col in selcols: if col < 0 or col >= self.nfeatures: logger.error( "Given sel col id %d is not in nfeature col range [0,%d]!" % (col, self.nfeatures - 1)) return -1 selcolflags[col] = True print("--> Selected columns") print(selcols) print("--> Selected column flags") print(selcolflags) # - Extract selected data columns logger.info( "Extracting selected data columns (N=%d) from original data ..." % (len(selcols))) self.data_sel = self.data[:, selcolflags] self.data_preclassified_sel = self.data_preclassified[:, selcolflags] self.selfeatids = selcols return 0
def select_features(self, selcolids): """ Select feature cols (0 index is the first feature, not sname) """ # - Check if param dict is filled if not self.param_dict or self.param_dict is None: logger.error("Parameter dict is empty!") return -1 # - Get list of sel keys given col indices keys= list(self.param_dict.keys()) keys_sel= [keys[selcol+1] for selcol in selcolids] # +1 because 0 index is the first feature, not sname # - Create new dict with selected pars param_dict_sel= collections.OrderedDict() param_dict_sel["sname"]= self.param_dict["sname"] for key in keys_sel: param_dict_sel[key]= self.param_dict[key] param_dict_sel["id"]= self.param_dict["id"] # - Override old dict self.param_dict= param_dict_sel return 0
def __read_fits(self, filename): """ Read FITS image and return data """ # - Open file try: hdu= fits.open(filename,memmap=False) except Exception as ex: errmsg= 'Cannot read image file: ' + filename #cls._logger.error(errmsg) logger.error(errmsg) raise IOError(errmsg) # - Read data data= hdu[0].data data_size= np.shape(data) nchan= len(data.shape) if nchan==4: output_data= data[0,0,:,:] elif nchan==2: output_data= data else: errmsg= 'Invalid/unsupported number of channels found in file ' + filename + ' (nchan=' + str(nchan) + ')!' #cls._logger.error(errmsg) logger.error(errmsg) hdu.close() raise IOError(errmsg) # - Read metadata header= hdu[0].header # - Close file hdu.close() return output_data, header
def read_img(self): """ Read input image and generate Montage metadata """ # - Read FITS (ALL PROC) logger.info("[PROC %d] Reading input image %s ..." % (procId, self.imgfile_fullpath)) try: data, header, wcs= Utils.read_fits(self.imgfile_fullpath) except Exception as e: logger.error("[PROC %d] Failed to read input image %s (err=%s)!" % (procId, self.imgfile_fullpath, str(e))) return -1 data= ret[0] header= ret[1] wcs= ret[2] # - Write input image Montage metadata (PROC 0) status= 0 if procId==MASTER: status= Utils.write_montage_fits_metadata(inputfile=self.imgfile_fullpath, metadata_file=self.img_metadata, jobdir=self.jobdir_scutout) else: # OTHER PROCS status= -1 if comm is not None: status= comm.bcast(status, root=MASTER) if status<0: logger.error("[PROC %d] Failed to generate Montage metadata for input image %s, exit!" % (procId, self.imgfile_fullpath)) return -1 return 0
def __load_model(self, modelfile): """ Load model and weights from input h5 file """ #============================== #== LOAD MODEL ARCHITECTURE #============================== try: self.model= load_model(modelfile) except Exception as e: logger.warn("Failed to load model from file %s (err=%s)!" % (modelfile, str(e))) return -1 if not self.model or self.model is None: logger.error("Model object is None, loading failed!") return -1 #=========================== #== SET LOSS & METRICS #=========================== self.model.compile(optimizer=self.optimizer, loss=self.loss_type, metrics=['accuracy', f1score_metric, precision_metric, recall_metric], run_eagerly=True) # - Print and draw model self.model.summary() plot_model(self.model,to_file='model.png',show_shapes=True) return 0
def run(self, img_group_1, img_group_2): """ Compute spectral index """ # - Read image data if self.__read_imgs() < 0: logger.error("Failed to read input imgs!") return -1 # - Check data integrity good_data = self.__has_good_data(check_mask=True, check_bad=True, check_neg=True, check_same=True) if not good_data: logger.warn("Source data selected as bad, skip this source...") return -1 # - Compute spectral index if self.__compute_spectral_index(img_group_1, img_group_2) < 0: logger.error("Failed to compute spectral index (see logs)!") return -1 # - Fill dict data self.__fill_data() return 0
def subtract_bkg(self, bkgs, subtract_only_refch=False): """ Subtract image background """ if len(bkgs)!=self.nchannels: logger.error("Number of input bkgs != nchannels, cannot subtract bkg!") return -1 # - Subtract bkg if subtract_only_refch: data= self.img_data[self.refch] mask= self.img_data_mask[self.refch] bkg= bkgs[self.refch] self.img_data[self.refch]-= bkg self.img_data[self.refch][mask==0]= 0 else: for i in range(self.nchannels): data= self.img_data[i] mask= self.img_data_mask[i] bkg= bkgs[i] self.img_data[i]-= bkg self.img_data[i][mask==0]= 0 # - Draw data & masks? if self.draw: fig, axs = plt.subplots(2, self.nchannels) for i in range(self.nchannels): axs[0, i].imshow(self.img_data[i]) axs[1, i].imshow(self.img_data_mask[i]) plt.show() return 0
def read_regions(self): """ Read regions """ # - Read regions logger.info("[PROC %d] Reading DS9 region file %s ..." % (procId, self.regionfile)) ret= Utils.read_regions([self.regionfile]) if ret is None: logger.error("[PROC %d] Failed to read regions (check format)!" % (procId)) return -1 regs= ret[0] snames= ret[1] slabels= ret[2] # - Select region by tag regs_sel= regs snames_sel= snames slabels_sel= slabels if self.filter_regions_by_tags and self.tags: logger.info("[PROC %d] Selecting DS9 region with desired tags ..." % (procId)) regs_sel, snames_sel, slabels_sel= Utils.select_regions(regs, self.tags) if not regs_sel: logger.warn("[PROC %d] No region left for processing (check input region file)!" % (procId)) return -1 self.sname_label_map= {} for i in range(len(snames_sel)): sname= snames_sel[i] slabel= slabels_sel[i] self.sname_label_map[sname]= slabel print("sname_label_map") print(self.sname_label_map) # - Compute centroids & radius centroids, radii= Utils.compute_region_info(regs_sel) # - Assign sources to each processor self.nsources= len(regs_sel) source_indices= list(range(0, self.nsources)) source_indices_split= np.array_split(source_indices, nproc) source_indices_proc= list(source_indices_split[procId]) self.nsources_proc= len(source_indices_proc) imin= source_indices_proc[0] imax= source_indices_proc[self.nsources_proc-1] self.snames_proc= snames_sel[imin:imax+1] self.slabels_proc= slabels_sel[imin:imax+1] self.regions_proc= regs_sel[imin:imax+1] self.centroids_proc= centroids[imin:imax+1] self.radii_proc= radii[imin:imax+1] logger.info("[PROC %d] #%d sources assigned to this processor ..." % (procId, self.nsources_proc)) print("snames_proc %d" % (procId)) print(self.snames_proc) return 0
def run_ae_reconstruction(self, datalist): """ Run AE reconstruction """ if procId==MASTER: aereco_status= 0 # - Create data loader dl= DataLoader(filename=datalist) # - Read datalist logger.info("[PROC %d] Reading datalist %s ..." % (procId, datalist)) dataread_status= dl.read_datalist() if dataread_status<0: logger.error("[PROC %d] Failed to read input datalist %s" % (procId, datalist)) aereco_status= -1 else: # - Run AE reco logger.info("[PROC %d] Running autoencoder classifier reconstruction ..." % (procId)) ae= FeatExtractorAE(dl) ae.resize= self.resize_img ae.set_image_size(self.nx, self.ny) ae.normalize= self.normalize_img ae.scale_to_abs_max= self.scale_img_to_abs_max ae.scale_to_max= self.scale_img_to_max ae.log_transform_img= self.log_transform_img ae.scale_img= self.scale_img ae.scale_img_factors= self.scale_img_factors ae.standardize_img= self.standardize_img ae.img_means= self.img_means ae.img_sigmas= self.img_sigmas ae.chan_divide= self.img_chan_divide ae.chan_mins= self.img_chan_mins ae.erode= self.img_erode ae.erode_kernel= self.img_erode_kernel ae.add_channorm_layer= self.add_channorm_layer aereco_status= ae.reconstruct_data( self.modelfile_encoder, self.weightfile_encoder, self.modelfile_decoder, self.weightfile_decoder, winsize= self.winsize, outfile_metrics=self.outfile_aerecometrics, save_imgs= False ) else: aereco_status= 0 if comm is not None: aereco_status= comm.bcast(aereco_status, root=MASTER) if aereco_status<0: logger.error("[PROC %d] Failed to run autoencoder reconstruction on data %s, exit!" % (procId, datalist)) return -1 return 0
def __merge_data(self, dlist): """ Merge feature data """ # - Check input list if not dlist: logger.error("Empty data dict list given!") return -1 for i in range(len(dlist)): d = dlist[i] if not d: logger.error("Data dict %d is empty!" % (i + 1)) return -1 # - Compute number of vars nvars_tot = 0 for d in dlist: print("d") print(d) nentries = len(d.keys()) firstitem = next(iter(d.items())) nvars = len(firstitem[1].keys()) - 2 nvars_tot += nvars logger.info("Data dict has #%d entries (#%d vars) ..." % (nentries, nvars)) logger.info("Merged set is expected to have %d vars ..." % (nvars_tot)) # - Merge features logger.info("Merging feature data for input data dict ...") dmerged = collections.OrderedDict() for d in dlist: for key, value in d.items(): if key not in dmerged: dmerged[key] = collections.OrderedDict({}) dmerged[key].update(value) dmerged[key].move_to_end("id") # - Remove rows with less number of entries logger.info("Removing rows with number of vars !=%d ..." % (nvars_tot)) self.par_dict_list = [] for key, value in dmerged.items(): nvars = len(value.keys()) - 2 if nvars != nvars_tot: logger.info( "Removing entry (%s) as number of vars (%d) is !=%d ..." % (key, nvars, nvars_tot)) #del dmerged[key] continue self.par_dict_list.append(value) return 0
def __predict(self): #==================================================== #== CHECK DATA & MODEL #==================================================== # - Check if data are set if self.data is None: logger.error("Input data array is None!") return -1 # - Check if clustering model is set if self.clusterer is None: logger.error("Clusterer is not set!") return -1 # - Retrieve prediction data from current model logger.info( "Retrieving prediction data from current model (if any) ...") self.prediction_data = self.clusterer.prediction_data_ #==================================================== #== CLUSTER DATA USING SAVED MODEL #==================================================== logger.info("Encode input data using loaded model ...") self.labels, self.probs = hdbscan.approximate_predict( self.clusterer, self.data) #================================ #== SAVE CLUSTERED DATA #================================ logger.info("Saving unsupervised encoded data to file ...") N = self.data.shape[0] print("Cluster data N=", N) snames = np.array(self.source_names).reshape(N, 1) objids = np.array(self.data_classids).reshape(N, 1) clustered_data = np.concatenate( (snames, objids, self.labels, self.probs), axis=1) head = "# sname id clustid clustprob" Utils.write_ascii(clustered_data, self.outfile, head) #================================ #== PLOT #================================ logger.info("Plotting results ...") self.__plot_predict(self.clusterer, self.data, self.labels, self.source_names, self.data_labels, self.prediction_data, self.prediction_extra_data, self.outfile_plot) return 0
def set_data_from_file(self, filename): """ Set data from input file. Expected format: sname, N features, classid """ # - Read table row_start = 0 try: table = ascii.read(filename, data_start=row_start) except: logger.error("Failed to read feature file %s!" % filename) return -1 print(table.colnames) print(table) ncols = len(table.colnames) nfeat = ncols - 2 # - Set data vectors rowIndex = 0 self.data_labels = [] self.data_classids = [] self.source_names = [] featdata = [] for data in table: sname = data[0] classid = data[ncols - 1] label = self.classid_label_map[classid] self.source_names.append(sname) self.data_labels.append(label) self.data_classids.append(classid) featdata_curr = [] for k in range(nfeat): featdata_curr.append(data[k + 1]) featdata.append(featdata_curr) self.data = np.array(featdata) if self.data.size == 0: logger.error("Empty feature data vector read!") return -1 self.nsamples = data_shape[0] self.nfeatures = data_shape[1] logger.info("#nsamples=%d" % (self.nsamples)) # - Set pre-classified data logger.info("Setting pre-classified data (if any) ...") self.__set_preclass_data() return 0
def __read_sdata(self, index): """ Read source data """ # - Check index if index<0 or index>=self.datasize: logger.error("Invalid index %d given!" % index) return None # - Init sdata sdata= SData() sdata.refch= self.refch sdata.kernsize= self.kernsize sdata.draw= self.draw sdata.save_ssim_pars= self.save_ssim_pars sdata.negative_pix_fract_thr= self.negative_pix_fract_thr sdata.bad_pix_fract_thr= self.bad_pix_fract_thr sdata_mask= SData() sdata_mask.refch= self.refch sdata_mask.kernsize= self.kernsize sdata_mask.draw= self.draw sdata_mask.save_ssim_pars= self.save_ssim_pars sdata_mask.negative_pix_fract_thr= self.negative_pix_fract_thr sdata_mask.bad_pix_fract_thr= self.bad_pix_fract_thr # - Read source image data logger.debug("Reading source image data %d ..." % index) #d= self.datalist["data"][index] d= self.datalist[index] if sdata.set_from_dict(d)<0: logger.error("Failed to set source image data %d!" % index) return None if sdata.read_imgs()<0: logger.error("Failed to read source images %d!" % index) return None # - Read source masked image data logger.debug("Reading source masked image data %d ..." % index) #d= self.datalist_mask["data"][index] d= self.datalist_mask[index] if sdata_mask.set_from_dict(d)<0: logger.error("Failed to set source masked image data %d!" % index) return None if sdata_mask.read_imgs()<0: logger.error("Failed to read source masked images %d!" % index) return None return sdata, sdata_mask
def __create_pipeline(self): """ Build the feature selector pipeline """ # - Create classifier inventory logger.info("Creating classifier inventory ...") self.__create_classifier_inventory() # - Set min/max nfeat range nf_min = self.nfeat_min nf_max = self.nfeat_max if nf_max == -1: nf_max = self.nfeatures self.nfeats = [] for i in range(nf_min, nf_max + 1): self.nfeats.append(i) # - Create models self.model = self.__create_model() if self.model is None: logger.error("Created model is None!") return -1 for i in range(len(self.nfeats)): m = self.__create_model() self.models.append(m) # - Define dataset split (unique for all models) self.cv = StratifiedKFold(n_splits=self.cv_nsplits, shuffle=True, random_state=self.cv_seed) # - Create RFE & pipeline self.rfe = RFECV( estimator=self.model, step=1, #cv=self.cv, min_features_to_select=self.nfeat_min) self.pipeline = Pipeline(steps=[('featsel', self.rfe), ('model', self.model)]) for i in range(len(self.nfeats)): n = self.nfeats[i] r = RFE( estimator=self.models[i], #cv=self.cv, n_features_to_select=n) p = Pipeline(steps=[('featsel', r), ('model', self.models[i])]) self.pipelines.append(p) return 0
def run_from_dictlist(self, dlist, outfile='featdata_merged.dat'): """ Run feature merger """ # - Read feature data and merge logger.info("Merging input feature data dicts ...") if self.__merge_data(dlist) < 0: logger.error("Failed to merge data!") return -1 # - Save data logger.info("Saving merged data to file %s ..." % (outfile)) self.__save(outfile) return 0
def run_predict(self, datafile, modelfile): """ Run predict using input dataset """ #================================ #== LOAD DATA #================================ # - Check inputs if datafile == "": logger.error("Empty data file specified!") return -1 if self.set_data_from_file(datafile) < 0: logger.error("Failed to read datafile %s!" % datafile) return -1 #================================ #== LOAD MODEL #================================ logger.info("Loading the UMAP reducer from file %s ..." % modelfile) try: self.reducer = pickle.load((open(modelfile, 'rb'))) except Exception as e: logger.error("Failed to load model from file %s!" % (modelfile)) return -1 #================================ #== PREDICT #================================ if self.__predict() < 0: logger.error("Predict failed!") return -1 return 0
def classify_sources(self): """ Run source classification """ # - Run source classification if procId==MASTER: sclass_status= 0 # - Define sclassifier class multiclass= True if self.binary_class: multiclass= False sclass= SClassifier(multiclass=multiclass) sclass.normalize= self.normalize_feat sclass.outfile= self.outfile_sclass sclass.outfile_metrics= self.outfile_sclass_metrics sclass.outfile_cm= self.outfile_sclass_cm sclass.outfile_cm_norm= self.outfile_sclass_cm_norm sclass.save_labels= self.save_class_labels sclass.find_outliers= self.find_outliers sclass.outlier_modelfile= self.modelfile_outlier sclass.outlier_thr= self.anomaly_thr sclass.outlier_max_samples= self.max_samples sclass.outlier_max_features= self.max_features sclass.save_outlier= self.save_outlier sclass.outlier_outfile= self.outfile_outlier # - Run classification sclass_status= sclass.run_predict( #data=self.feat_colors, class_ids=self.feat_colors_classids, snames=self.feat_colors_snames, data=self.feat_all, class_ids=self.feat_all_classids, snames=self.feat_all_snames, modelfile=self.modelfile, scalerfile=self.scalerfile ) if sclass_status<0: logger.error("[PROC %d] Failed to run classifier on data %s!" % (procId, featfile_allfeat)) else: sclass_status= 0 if comm is not None: sclass_status= comm.bcast(sclass_status, root=MASTER) if sclass_status<0: logger.error("[PROC %d] Failed to run classifier on data %s, exit!" % (procId, featfile_allfeat)) return -1 return 0
def run(self, datafile): """ Run feature selection """ #================================ #== LOAD DATA #================================ # - Check inputs if datafile == "": logger.error("Empty data file specified!") return -1 if self.set_data_from_file(datafile) < 0: logger.error("Failed to read datafile %s!" % datafile) return -1 #================================ #== EVALUATE MODELS #================================ logger.info("Evaluating models ...") if self.__evaluate_model() < 0: logger.error("Failed to evaluate models!") return -1 #================================ #== SAVE #================================ logger.info("Saving results ...") if self.__save() < 0: logger.error("Failed to save results!") return -1 return 0
def run(self, data, class_ids=[], snames=[]): """ Run feature selection using input dataset """ #================================ #== LOAD DATA #================================ # - Check inputs if data is None: logger.error("None input data specified!") return -1 if self.set_data(data, class_ids, snames) < 0: logger.error("Failed to read datafile %s!" % datafile) return -1 #================================ #== EVALUATE MODELS #================================ logger.info("Evaluating models ...") if self.__evaluate_model() < 0: logger.error("Failed to evaluate models!") return -1 #================================ #== SAVE #================================ logger.info("Saving results ...") if self.__save() < 0: logger.error("Failed to save results!") return -1 return 0
def run_predict(self, data, class_ids=[], snames=[], modelfile=''): """ Run precit using input dataset """ #================================ #== LOAD DATA #================================ # - Check inputs if data is None: logger.error("None input data specified!") return -1 if self.set_data(data, class_ids, snames) < 0: logger.error("Failed to read datafile %s!" % datafile) return -1 #================================ #== LOAD MODEL #================================ logger.info("Loading the clustering model from file %s ..." % modelfile) try: self.clusterer, self.prediction_extra_data = pickle.load( (open(modelfile, 'rb'))) except Exception as e: logger.error("Failed to load model from file %s!" % (modelfile)) return -1 #================================ #== PREDICT #================================ if self.__predict() < 0: logger.error("Predict failed!") return -1 return 0
def select(self, data, selcols, class_ids=[], snames=[]): """ Select data columns provided in selcols list """ #================================ #== LOAD DATA #================================ # - Check inputs if data is None: logger.error("None input data specified!") return -1 if self.set_data(data, class_ids, snames) < 0: logger.error("Failed to read datafile %s!" % datafile) return -1 #================================ #== SELECT COLUMNS #================================ logger.info("Extracting columns ...") if self.__select_cols(selcols) < 0: logger.error("Failed to select data columns!") return -1 #================================ #== SAVE #================================ logger.info("Saving results ...") if self.__save() < 0: logger.error("Failed to save results!") return -1 return 0
def select_from_file(self, datafile, selcols): """ Select data columns provided in selcols list """ #================================ #== LOAD DATA #================================ # - Check inputs if datafile == "": logger.error("Empty data file specified!") return -1 if self.set_data_from_file(datafile) < 0: logger.error("Failed to read datafile %s!" % datafile) return -1 #================================ #== SELECT COLUMNS #================================ logger.info("Extracting columns ...") if self.__select_cols(selcols) < 0: logger.error("Failed to select data columns!") return -1 #================================ #== SAVE #================================ logger.info("Saving results ...") if self.__save() < 0: logger.error("Failed to save results!") return -1 return 0
def shrink_masks(self, kernsizes=[]): """ Shrink masks """ # - Set erosion kernel sizes if not kernsizes or len(kernsizes)!=self.nchannels: logger.info("kernsizes not specified, setting kernsize=%d for all channels ..." % (self.kernsize)) kernsizes= [self.kernsize]*self.nchannels print("--> kernsizes") print(kernsizes) # - Erode masks if self.draw: fig, axs = plt.subplots(4, self.nchannels) try: counter= 0 for i in range(self.nchannels): data= np.copy(self.img_data[i]) mask= np.copy(self.img_data_mask[i]) # - Do erosion if kernsize is >0 if kernsizes[i]>0: structel= cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernsizes[i],kernsizes[i])) mask_eroded = cv2.erode(mask, structel, iterations = 1) self.img_data_mask[i]= mask_eroded data_eroded= self.img_data[i] data_eroded[mask_eroded==0]= 0 self.img_data[i]= data_eroded if self.draw: axs[0, i].imshow(data) counter+= 1 axs[1, i].imshow(mask) counter+= 1 axs[2, i].imshow(self.img_data[i]) counter+= 1 axs[3, i].imshow(self.img_data_mask[i]) counter+= 1 except Exception as e: logger.error("Failed to shrink masks (err=%s)!" % (str(e))) return -1 if self.draw: plt.show() return 0
def __predict(self): #==================================================== #== CHECK DATA & MODEL #==================================================== # - Check if data are set if self.data is None: logger.error("Input data array is None!") return -1 # - Check if reducer is set if self.reducer is None: logger.error("UMAP reducer is not set!") return -1 #==================================================== #== ENCODE DATA #==================================================== logger.info("Encode input data using loaded model ...") self.encoded_data_unsupervised = self.reducer.transform(self.data) #================================ #== SAVE ENCODED DATA #================================ # - Unsupervised encoded data logger.info("Saving unsupervised encoded data to file ...") N = self.encoded_data_unsupervised.shape[0] print("Unsupervised encoded data shape=", self.encoded_data_unsupervised.shape) print("Unsupervised encoded data N=", N) snames = np.array(self.source_names).reshape(N, 1) objids = np.array(self.data_ids).reshape(N, 1) # - Save unsupervised encoded data enc_data = np.concatenate( (snames, self.encoded_data_unsupervised, objids), axis=1) znames_counter = list(range(1, self.encoded_data_dim + 1)) znames = '{}{}'.format('z', ' z'.join(str(item) for item in znames_counter)) head = '{} {} {}'.format("# sname", znames, "id") Utils.write_ascii(enc_data, self.outfile_encoded_data_unsupervised, head) return 0