def read_map(filename): # CCP4 map file format # http://www.ccp4.ac.uk/html/maplib.html with open(filename, "rb") as f: # 1024 bytes header header_buf = f.read(1024) #1 NC # of Columns (fastest changing in map) #2 NR # of Rows #3 NS # of Sections (slowest changing in map) NCNRNS = tuple(numpy.frombuffer(header_buf, dtype="int32")[:3]) #4 MODE Data type # 0 = envelope stored as signed bytes (from # -128 lowest to 127 highest) # 1 = Image stored as Integer*2 # 2 = Image stored as Reals # 3 = Transform stored as Complex Integer*2 # 4 = Transform stored as Complex Reals # 5 == 0 # # Note: Mode 2 is the normal mode used in # the CCP4 programs. Other modes than 2 and 0 # may NOT WORK MODE = numpy.frombuffer(header_buf, dtype="int32")[3] dtype = ["i8", "int32", "float32", "cint32", "complex64", "i8"][MODE] if dtype_mode not in [0,2]: log.log_warning(logger, "WARNING: Map file data type \"MODE=%i\" may not work." % MODE) #24 NSYMBT Number of bytes used for storing symmetry operators NSYMBT = numpy.frombuffer(header_buf, dtype="int32")[23] if NSYMBT > 0: log.log_warning(logger, "WARNING: Omitting symmetry operations in map file.") f.read(NSYMBT) # The remaining bytes are data data = f.read() data = numpy.frombuffer(data, dtype=dtype).reshape(NCNRNS) return data, dx
def is_model_support(model_name, model_list): """ :param model_name: model name :param model_list: implemented model list :return: model """ if not (model_name in model_list): log_warning("use implemented model") raise NotImplementedError("implement a custom model(%s) in /nets/models/" % model_name)
def loss_fn(loss_fn: str = "mse"): """ :param loss_fn: implement loss function for training :return: loss function module(class) """ if loss_fn == "mse": return loss.MSELoss() elif loss_fn == "L1": return loss.L1Loss() elif loss_fn == "neg_pearson": return NegPearsonLoss() elif loss_fn == "multi_margin": return loss.MultiMarginLoss() elif loss_fn == "bce": return loss.BCELoss() elif loss_fn == "huber": return loss.HuberLoss() elif loss_fn == "cosine_embedding": return loss.CosineEmbeddingLoss() elif loss_fn == "cross_entropy": return loss.CrossEntropyLoss() elif loss_fn == "ctc": return loss.CTCLoss() elif loss_fn == "bce_with_logits": return loss.BCEWithLogitsLoss() elif loss_fn == "gaussian_nll": return loss.GaussianNLLLoss() elif loss_fn == "hinge_embedding": return loss.HingeEmbeddingLoss() elif loss_fn == "KLDiv": return loss.KLDivLoss() elif loss_fn == "margin_ranking": return loss.MarginRankingLoss() elif loss_fn == "multi_label_margin": return loss.MultiLabelMarginLoss() elif loss_fn == "multi_label_soft_margin": return loss.MultiLabelSoftMarginLoss() elif loss_fn == "nll": return loss.NLLLoss() elif loss_fn == "nll2d": return loss.NLLLoss2d() elif loss_fn == "pairwise": return loss.PairwiseDistance() elif loss_fn == "poisson_nll": return loss.PoissonNLLLoss() elif loss_fn == "smooth_l1": return loss.SmoothL1Loss() elif loss_fn == "soft_margin": return loss.SoftMarginLoss() elif loss_fn == "triplet_margin": return loss.TripletMarginLoss() elif loss_fn == "triplet_margin_distance": return loss.TripletMarginWithDistanceLoss() else: log_warning("use implemented loss functions") raise NotImplementedError( "implement a custom function(%s) in loss.py" % loss_fn)
def __init__(self, filename, chunksize=2, gzip_compression=False): self._filename = os.path.expandvars(filename) if os.path.exists(filename): log.log_warning(logger, "File %s exists and is being overwritten" % filename) self._f = h5py.File(filename, "w") self._i = 0 self._chunksize = chunksize self._create_dataset_kwargs = {} if gzip_compression: self._create_dataset_kwargs["compression"] = "gzip"
def _write_without_iterate(self, D, group_prefix="/"): for k in D.keys(): if isinstance(D[k],dict): group_prefix_new = group_prefix + k + "/" log.log_debug(logger, "Writing group %s" % group_prefix_new) if k not in self._f[group_prefix]: self._f.create_group(group_prefix_new) self._write_without_iterate(D[k], group_prefix_new) else: name = group_prefix + k log.log_debug(logger, "Writing dataset %s" % name) data = D[k] if k not in self._f[group_prefix]: if numpy.isscalar(data): maxshape = (None,) shape = (self._chunksize,) dtype = numpy.dtype(type(data)) if dtype == "S": dtype = h5py.new_vlen(str) axes = "experiment_identifier:value" else: data = numpy.asarray(data) try: h5py.h5t.py_create(data.dtype, logical=1) except TypeError: log.log_warning(logger, "Could not save dataset %s. Conversion to numpy array failed" % name) continue maxshape = tuple([None]+list(data.shape)) shape = tuple([self._chunksize]+list(data.shape)) dtype = data.dtype ndim = data.ndim axes = "experiment_identifier" if ndim == 1: axes = axes + ":x" elif ndim == 2: axes = axes + ":y:x" elif ndim == 3: axes = axes + ":z:y:x" log.log_debug(logger, "Create dataset %s [shape=%s, dtype=%s]" % (name,str(shape),str(dtype))) self._f.create_dataset(name, shape, maxshape=maxshape, dtype=dtype, **self._create_dataset_kwargs) self._f[name].attrs.modify("axes",[axes]) if self._f[name].shape[0] <= self._i: if numpy.isscalar(data): data_shape = [] else: data_shape = data.shape new_shape = tuple([self._chunksize*(self._i/self._chunksize+1)]+list(data_shape)) log.log_debug(logger, "Resize dataset %s [old shape: %s, new shape: %s]" % (name,str(self._f[name].shape),str(new_shape))) self._f[name].resize(new_shape) log.log_debug(logger, "Write to dataset %s at stack position %i" % (name, self._i)) if numpy.isscalar(data): self._f[name][self._i] = data else: self._f[name][self._i,:] = data[:]
def run_migration(): if not os.path.exists("./deals.db"): log.log_normal("running database initialization") initialize_db() log.log_normal("running migration scripts") mypath = "./db/migration_scripts/*.sql" files = glob.glob(mypath) ordered_files = sorted(files) for item in ordered_files: if script_loaded(item) == False: log.log_normal("Running Script [%s]" % (item)) run_script(item) else: log.log_warning("Script already loaded [%s]" % (item))
def __init__(self, values=None, formalism=None): self.rotation_matrix = None if values is None and formalism is None: # No rotation (rotation matrix = identity matrix) self.rotation_matrix = numpy.ones(shape=(3,3)) elif formalism.startswith("euler_angles_") and len(formalism) == len("euler_angles_xyz"): self.set_with_euler_angles(values, rotation_axes=formalism[-3:]) elif formalism == "rotation_matrix": self.set_with_rotation_matrix(values) elif formalism == "quaternion": self.set_with_quaternion(values) elif formalism in ["random","random_x","random_y","random_z"]: if values is not None: log_warning(logger, "Specified formalism=%s but values is not None." % formalism) self._set_as_random_formalism(formalism) else: log_and_raise_error(logger, "formalism=%s is not implemented" % formalism) return
def summary(model, model_name): """ :param model: torch.nn.module class :param model_name: implemented model name :return: model """ log_info("=========================================") log_info(model_name) log_info("=========================================") if model_name == "DeepPhys" or model_name == DeepPhys_DA: torchsummary.summary(model, (2, 3, 36, 36)) elif model_name == "PhysNet" or model_name == "PhysNet_LSTM": torchinfo.summary(model, (1, 3, 32, 128, 128)) elif model_name in "PPNet": torchinfo.summary(model, (1, 1, 250)) elif model_name == "MetaPhys" or "MetaPhys_task": print('rrrr') else: log_warning("use implemented model") raise NotImplementedError("implement a custom model(%s) in /nets/models/" % model_name)
def get_model(model_name: str = "DeepPhys"): """ :param model_name: model name :return: model """ if model_name == "DeepPhys": return DeepPhys() elif model_name == "DeepPhys_DA": return DeepPhys_DA() elif model_name == "PhysNet" or 'MetaPhysNet': return PhysNet() elif model_name == "MetaPhys" or "MetaPhys_task": return TSCAN() elif model_name == "PhysNet_LSTM": return PhysNet_2DCNN_LSTM() elif model_name == "PPNet": return PPNet() elif model_name == "MMAML_Phys": return FiLM() else: log_warning("use implemented model") raise NotImplementedError("implement a custom model(%s) in /nets/models/" % model_name)
def optimizers(model_params, learning_rate: float = 1, optim: str = "mse"): ''' call optimizer :param model_params: learning target's parameter :param learning_rate: learning rate :param optim: optimizer :return: selected optimizer object ''' if optim == "adam": return opt.Adam(model_params, learning_rate) elif optim == "sgd": return opt.SGD(model_params, learning_rate) elif optim == "rms_prop": return opt.RMSprop(model_params, learning_rate) elif optim == "ada_delta": return opt.Adadelta(model_params, learning_rate) elif optim == "ada_grad": return opt.Adagrad(model_params, learning_rate) elif optim == "ada_max": return opt.Adamax(model_params, learning_rate) elif optim == "ada_mw": return opt.AdamW(model_params, learning_rate) elif optim == "a_sgd": return opt.ASGD(model_params, learning_rate) elif optim == "lbfgs": return opt.LBFGS(model_params, learning_rate) elif optim == "n_adam": return opt.NAdam(model_params, learning_rate) elif optim == "r_adam": return opt.RAdam(model_params, learning_rate) elif optim == "rprop": return opt.Rprop(model_params, learning_rate) elif optim == "sparse_adam": return opt.SparseAdam(model_params, learning_rate) else: log_warning("use implemented optimizer") raise NotImplementedError( "implement a custom optimizer(%s) in optimizer.py" % optim)
def __init__(self, values=None, formalism=None): self.rotation_matrix = None if values is None and formalism is None: # No rotation (rotation matrix = identity matrix) self.rotation_matrix = numpy.ones(shape=(3, 3)) elif formalism.startswith("euler_angles_") and len(formalism) == len( "euler_angles_xyz"): self.set_with_euler_angles(values, rotation_axes=formalism[-3:]) elif formalism == "rotation_matrix": self.set_with_rotation_matrix(values) elif formalism == "quaternion": self.set_with_quaternion(values) elif formalism in ["random", "random_x", "random_y", "random_z"]: if values is not None: log_warning( logger, "Specified formalism=%s but values is not None." % formalism) self._set_as_random_formalism(formalism) else: log_and_raise_error(logger, "formalism=%s is not implemented" % formalism) return
def _conf_to_spsim_opts(D_source, D_particle, D_detector, ndim=2, qn=None, qmax=None): if ndim == 2: if qn is not None or qmax is not None: log_warning( logger, "As ndim=2 the passed values for qn and qmax take no effect.") if ndim == 3: if qn is None and qmax is None: log_and_raise_error( logger, "As ndim=3 both qn and qmax must be not None.") return import spsim # Create temporary file for pdb file tmpf_pdb = tempfile.NamedTemporaryFile(mode='w+b', bufsize=-1, suffix='.conf', prefix='tmp_spsim', dir=None, delete=False) tmpf_pdb_name = tmpf_pdb.name tmpf_pdb.close() # Write pdb file mol = spsim.get_molecule_from_atoms(D_particle["atomic_numbers"], D_particle["atomic_positions"]) spsim.write_pdb_from_mol(tmpf_pdb_name, mol) spsim.free_mol(mol) # Start with default spsim configuration opts = spsim.set_defaults() # Create temporary file for spsim configuration tmpf_conf = tempfile.NamedTemporaryFile(mode='w+b', bufsize=-1, suffix='.conf', prefix='tmp_spsim', dir=None, delete=False) # Write string sequence from configuration dicts s = [] s += "# THIS FILE WAS CREATED AUTOMATICALLY BY CONDOR\n" s += "# Temporary configuration file for spsim\n" s += "verbosity_level = 0;\n" s += "number_of_dimensions = %i;\n" % ndim s += "number_of_patterns = 1;\n" s += "origin_to_com = 1;\n" s += "input_type = \"pdb\";\n" #s += "pdb_filename = \"%s\";\n" % D_particle["pdb_filename"] s += "pdb_filename = \"%s\";\n" % tmpf_pdb_name if ndim == 2: D = D_detector["distance"] Lx = D_detector["pixel_size"] * D_detector["nx"] Ly = D_detector["pixel_size"] * D_detector["ny"] else: k0 = 2. * numpy.pi / D_source["wavelength"] D = qn / 2. * D_detector["pixel_size"] * k0 / qmax Lx = Ly = Lz = D_detector["pixel_size"] * qn s += "detector_distance = %.12e;\n" % D s += "detector_width = %.12e;\n" % Lx s += "detector_height = %.12e;\n" % Ly if ndim == 3: s += "detector_depth = %.12e;\n" % Lz s += "detector_pixel_width = %.12e;\n" % D_detector["pixel_size"] s += "detector_pixel_height = %.12e;\n" % D_detector["pixel_size"] if ndim == 3: s += "detector_pixel_depth = %.12e;\n" % D_detector["pixel_size"] if ndim == 2: s += "detector_center_x = %.12e;\n" % (D_detector["pixel_size"] * (D_detector["cx"] - (D_detector["nx"] - 1) / 2.)) s += "detector_center_y = %.12e;\n" % (D_detector["pixel_size"] * (D_detector["cy"] - (D_detector["ny"] - 1) / 2.)) else: s += "detector_center_x = 0;\n" s += "detector_center_y = 0;\n" s += "detector_center_z = 0;\n" s += "detector_binning = 1;\n" s += "experiment_wavelength = %.12e;\n" % D_source["wavelength"] s += "experiment_beam_intensity = %.12e;\n" % D_particle["intensity"] s += "experiment_polarization = \"ignore\";\n" # polarization correction will be done in Condor if needed (see experiment.py) #s += "use_cuda = 0;\n" intrinsic_rotation = condor.utils.rotation.Rotation( values=D_particle["extrinsic_quaternion"], formalism="quaternion") intrinsic_rotation.invert() e0, e1, e2 = intrinsic_rotation.get_as_euler_angles("zxz") if not numpy.isfinite(e0): print "ERROR: phi is not finite" if not numpy.isfinite(e1): print "ERROR: theta is not finite" if not numpy.isfinite(e2): print "ERROR: psi is not finite" s += "phi = %.12e;\n" % e0 s += "theta = %.12e;\n" % e1 s += "psi = %.12e;\n" % e2 s += "random_orientation = 0;\n" # Write string sequence to file tmpf_conf.writelines(s) # Close temporary file tmpf_conf_name = tmpf_conf.name tmpf_conf.close() # Read configuration into options struct spsim.read_options_file(tmpf_conf_name, opts) # This deletes the temporary files os.unlink(tmpf_pdb_name) os.unlink(tmpf_conf_name) return opts
def extract_wave_new(IndList, FilteredArr, s_before, s_after, n_ch, s_start,Threshold): IndArr = np.array(IndList, dtype=np.int32) SampArr = IndArr[:, 0] ChArr = IndArr[:, 1] n_ch = FilteredArr.shape[1] log_fd = GlobalVariables['log_fd'] if np.amax(SampArr)-np.amin(SampArr)>Parameters['CHUNK_OVERLAP']/2: s = ''' ************ ERROR ********************************************** Connected component found with width larger than CHUNK_OVERLAP/2. Spikes could be repeatedly detected, increase the size of CHUNK_OVERLAP and re-run. Component sample range: {sample_range} ***************************************************************** '''.format(sample_range=(s_start+np.amin(SampArr), s_start+np.amax(SampArr))) log_warning(s, multiline=True) #exit() bc = np.bincount(ChArr) # convert to bool and force it to have the right type ChMask = np.zeros(n_ch, dtype=np.bool8) ChMask[:len(bc)] = bc.astype(np.bool8) # Find peak sample: # 1. upsample channels we're using on thresholded range # 2. find weighted mean peak sample SampArrMin, SampArrMax = np.amin(SampArr)-3, np.amax(SampArr)+4 WavePlus = get_padded(FilteredArr, SampArrMin, SampArrMax) WavePlus = WavePlus[:, ChMask] # upsample WavePlus upsampling_factor = Parameters['UPSAMPLING_FACTOR'] if upsampling_factor>1: old_s = np.arange(WavePlus.shape[0]) new_s_i = np.arange((WavePlus.shape[0]-1)*upsampling_factor+1) new_s = np.array(new_s_i*(1.0/upsampling_factor), dtype=np.float32) f = interp1d(old_s, WavePlus, bounds_error=True, kind='cubic', axis=0) UpsampledWavePlus = f(new_s) else: UpsampledWavePlus = WavePlus # find weighted mean peak for each channel above threshold if Parameters['USE_WEIGHTED_MEAN_PEAK_SAMPLE']: peak_sum = 0.0 total_weight = 0.0 for ch in xrange(WavePlus.shape[1]): X = UpsampledWavePlus[:, ch] if Parameters['DETECT_POSITIVE']: X = -np.abs(X) i_intpeak = np.argmin(X) left, right = i_intpeak-1, i_intpeak+2 if right>len(X): left, right = left+len(X)-right, len(X) elif left<0: left, right = 0, right-left a_b_c = abc(np.arange(left, right, dtype=np.float32), X[left:right]) s_fracpeak = max_t(a_b_c) if Parameters['USE_SINGLE_THRESHOLD']: weight = -(X[i_intpeak]+Threshold) else: weight = -(X[i_intpeak]+Threshold[ch]) if weight<0: weight = 0 peak_sum += s_fracpeak*weight total_weight += weight s_fracpeak = (peak_sum/total_weight) else: if Parameters['DETECT_POSITIVE']: X = -np.abs(UpsampledWavePlus) else: X = UpsampledWavePlus s_fracpeak = 1.0*np.argmin(np.amin(X, axis=1)) # s_fracpeak currently in coords of UpsampledWavePlus s_fracpeak = s_fracpeak/upsampling_factor # s_fracpeak now in coordinates of WavePlus s_fracpeak += SampArrMin # s_fracpeak now in coordinates of FilteredArr # get block of given size around peaksample try: s_peak = int(s_fracpeak) except ValueError: # This is a bit of a hack. Essentially, the problem here is that # s_fracpeak is a nan because the interpolation didn't work, and # therefore we want to skip the spike. There's already code in # core.extract_spikes that does this if a LinAlgError is raised, # so we just use that to skip this spike (and write a message to the # log). raise np.linalg.LinAlgError WaveBlock = get_padded(FilteredArr, s_peak-s_before-1, s_peak+s_after+2) # Perform interpolation around the fractional peak old_s = np.arange(s_peak-s_before-1, s_peak+s_after+2) new_s = np.arange(s_peak-s_before, s_peak+s_after)+(s_fracpeak-s_peak) f = interp1d(old_s, WaveBlock, bounds_error=True, kind='cubic', axis=0) Wave = f(new_s) return Wave, s_peak, ChMask
def extract_wave(IndList, FilteredArr, s_before, s_after, n_ch, s_start,Threshold): ''' Extract an aligned wave corresponding to a spike. Arguments: IndList A list of pairs (sample_number, channel_number) returned from the thresholding and flood filling algorithm FilteredArr An array of shape (numsamples, numchannels) containing the filtered wave data s_before, s_after The number of samples to return before and after the peak Returns a tuple (Wave, PeakSample, ST): Wave The wave aligned around the peak (with interpolation to give subsample alignment), consisting of s_before+s_after+1 samples. PeakSample The index of the peak sample in FilteredArr (the peak sample in Wave will always be s_before). ChMask The mask for this spike, a boolean array of length the number of channels, with value 1 if the channel is used and 0 otherwise. ''' if Parameters['USE_WEIGHTED_MEAN_PEAK_SAMPLE'] or Parameters['UPSAMPLING_FACTOR']>1: return extract_wave_new(IndList, FilteredArr, s_before, s_after, n_ch, s_start,Threshold) IndArr = np.array(IndList, dtype=np.int32) SampArr = IndArr[:, 0] log_fd = GlobalVariables['log_fd'] if np.amax(SampArr)-np.amin(SampArr)>Parameters['CHUNK_OVERLAP']/2: s = ''' ************ ERROR ********************************************** Connected component found with width larger than CHUNK_OVERLAP/2. Spikes could be repeatedly detected, increase the size of CHUNK_OVERLAP and re-run. Component sample range: {sample_range} ***************************************************************** '''.format(sample_range=(s_start+np.amin(SampArr), s_start+np.amax(SampArr))) log_warning(s, multiline=True) #exit() ChArr = IndArr[:, 1] n_ch = FilteredArr.shape[1] # Find peak sample and channel # TODO: argmin only works for negative threshold crossings PeakInd = FilteredArr[SampArr, ChArr].argmin() PeakSample, PeakChannel = SampArr[PeakInd], ChArr[PeakInd] # Ensure that we get a fixed size chunk of the wave, padded with zeroes if # the segment from PeakSample-s_before-1 to PeakSample+s_after+1 goes # outside the bounds of FilteredArr. WavePlus = get_padded(FilteredArr, PeakSample-s_before-1, PeakSample+s_after+1) # Perform interpolation around the fractional peak Wave = interp_around_peak(WavePlus, s_before+1, PeakChannel, s_before, s_after) # Return the aligned wave, the peak sample index and the associated mask # which is computed by counting the number of times each channel index # appears in IndList and then converting to a bool (so that channel i is # True if channel i features at least once). bc = np.bincount(ChArr) # convert to bool and force it to have the right type ChMask = np.zeros(n_ch, dtype=np.bool8) ChMask[:len(bc)] = bc.astype(np.bool8) return Wave, PeakSample, ChMask
def read_map(filename): log.log_info(logger, "Automatic scaling of EM maps may not be reliable. Please make sure to check your map after using this functionality.") # CCP4 map file format # http://www.ccp4.ac.uk/html/maplib.html with open(filename, "rb") as f: # 1024 bytes header header_buf = f.read(1024) temp_int32 = numpy.frombuffer(header_buf, dtype="int32") temp_float32 = numpy.frombuffer(header_buf, dtype="float32") #1 NC # of Columns (fastest changing in map) #2 NR # of Rows #3 NS # of Sections (slowest changing in map) NC = temp_int32[0] NR = temp_int32[1] NS = temp_int32[2] if NC != NR or NR != NS: log.log_and_raise_error(logger, "Cannot read a map with unequal dimensions") N = NC #4 MODE Data type # 0 = envelope stored as signed bytes (from # -128 lowest to 127 highest) # 1 = Image stored as Integer*2 # 2 = Image stored as Reals # 3 = Transform stored as Complex Integer*2 # 4 = Transform stored as Complex Reals # 5 == 0 # # Note: Mode 2 is the normal mode used in # the CCP4 programs. Other modes than 2 and 0 # may NOT WORK MODE = temp_int32[3] dtype = ["int8", "int16", "float32", None, "complex64", "int8"][MODE] if MODE == 3: log.log_and_raise_error(logger, "Map file data type \"MODE=%i\" is not implemented yet." % MODE) if MODE not in [0,1,2,5]: log.log_warning(logger, "Map file data type \"MODE=%i\" not supported yet and may not work reliably." % MODE) #11 X length Cell Dimensions (Angstroms) #12 Y length " #13 Z length " dX = temp_float32[10]/float(N)*1E-10 dY = temp_float32[11]/float(N)*1E-10 dZ = temp_float32[12]/float(N)*1E-10 if dX != dY or dY != dZ: log.log_and_raise_error(logger, "Cannot read a map with unequal voxel dimensions") #17 MAPC Which axis corresponds to Cols. (1,2,3 for X,Y,Z) #18 MAPR Which axis corresponds to Rows (1,2,3 for X,Y,Z) #19 MAPS Which axis corresponds to Sects. (1,2,3 for X,Y,Z) MAPC = temp_int32[16] MAPR = temp_int32[17] MAPS = temp_int32[18] #24 NSYMBT Number of bytes used for storing symmetry operators NSYMBT = temp_int32[23] if NSYMBT > 0: log.log_and_raise_error(logger, "Omitting symmetry operations in map file.") f.read(NSYMBT) # The remaining bytes are data raw_data = f.read() raw_data = numpy.frombuffer(raw_data, dtype=dtype) # Now we need to project onto the right Z-Y-X array grid S,R,C = numpy.meshgrid(numpy.arange(NS), numpy.arange(NR), numpy.arange(NC), indexing='ij') S = S.flatten() R = R.flatten() C = C.flatten() if MAPC == 1: X = C Xlen = NC elif MAPC == 2: Y = C Ylen = NC elif MAPC == 3: Z = C Zlen = NC if MAPR == 1: X = R Xlen = NR elif MAPR == 2: Y = R Ylen = NR elif MAPR == 3: Z = R Zlen = NR if MAPS == 1: X = S Xlen = NS elif MAPS == 2: Y = S Ylen = NS elif MAPS == 3: Z = S Zlen = NS i = Z*(Ylen*Xlen) + Y*(Xlen) + X i.sort() data = numpy.zeros(Zlen*Ylen*Xlen, dtype=dtype) data[:] = raw_data[i] data = data.reshape((Zlen,Ylen,Xlen)) return data, dX
# ----------------------------------------------------------------------------------------------------- # General note: # All variables are in SI units by default. Exceptions explicit by variable name. # ----------------------------------------------------------------------------------------------------- import numpy, os import logging logger = logging.getLogger(__name__) import log try: import h5py except ImportError: log.log_warning(logger, "Could not import h5py.") class CXIWriter: def __init__(self, filename, chunksize=2, gzip_compression=False): self._filename = os.path.expandvars(filename) if os.path.exists(filename): log.log_warning(logger, "File %s exists and is being overwritten" % filename) self._f = h5py.File(filename, "w") self._i = 0 self._chunksize = chunksize self._create_dataset_kwargs = {} if gzip_compression: self._create_dataset_kwargs["compression"] = "gzip" def write(self, D): self._write_without_iterate(D)
def extract_wave_hilbert_old(IndList, FilteredArr, FilteredHilbertArr, s_before, s_after, n_ch, s_start, ThresholdStrong, ThresholdWeak): IndArr = np.array(IndList, dtype=np.int32) SampArr = IndArr[:, 0] ChArr = IndArr[:, 1] n_ch = FilteredArr.shape[1] log_fd = GlobalVariables['log_fd'] if np.amax(SampArr)-np.amin(SampArr)>Parameters['CHUNK_OVERLAP']/2: s = ''' ************ ERROR ********************************************** Connected component found with width larger than CHUNK_OVERLAP/2. Spikes could be repeatedly detected, increase the size of CHUNK_OVERLAP and re-run. Component sample range: {sample_range} ***************************************************************** '''.format(sample_range=(s_start+np.amin(SampArr), s_start+np.amax(SampArr))) log_warning(s, multiline=True) #exit() bc = np.bincount(ChArr) # convert to bool and force it to have the right type ChMask = np.zeros(n_ch, dtype=np.bool8) ChMask[:len(bc)] = bc.astype(np.bool8) n_unmasked_ch = np.sum(ChMask) # Find peak sample: # 1. upsample channels we're using on thresholded range # 2. find weighted mean peak sample SampArrMin, SampArrMax = np.amin(SampArr)-3, np.amax(SampArr)+4 # ChArrMin, ChArrMax = np.amin(ChArr), np.amax(ChArr) WavePlus = get_padded(FilteredArr, SampArrMin, SampArrMax) WavePlus = WavePlus[:, ChMask] # upsample WavePlus upsampling_factor = Parameters['UPSAMPLING_FACTOR'] if upsampling_factor>1: old_s = np.arange(WavePlus.shape[0]) new_s_i = np.arange((WavePlus.shape[0]-1)*upsampling_factor+1) new_s = np.array(new_s_i*(1.0/upsampling_factor), dtype=np.float32) f = interp1d(old_s, WavePlus, bounds_error=True, kind='cubic', axis=0) UpsampledWavePlus = f(new_s) else: UpsampledWavePlus = WavePlus # find weighted mean peak for each channel above threshold if Parameters['USE_WEIGHTED_MEAN_PEAK_SAMPLE']: peak_sum = 0.0 total_weight = 0.0 for ch in xrange(WavePlus.shape[1]): X = UpsampledWavePlus[:, ch] if Parameters['DETECT_POSITIVE']: X = -np.abs(X) i_intpeak = np.argmin(X) left, right = i_intpeak-1, i_intpeak+2 if right>len(X): left, right = left+len(X)-right, len(X) elif left<0: left, right = 0, right-left a_b_c = abc(np.arange(left, right, dtype=np.float32), X[left:right]) s_fracpeak = max_t(a_b_c) weight = -X[i_intpeak] if weight<0: weight = 0 peak_sum += s_fracpeak*weight total_weight += weight s_fracpeak = (peak_sum/total_weight) else: if Parameters['DETECT_POSITIVE']: X = -np.abs(UpsampledWavePlus) else: X = UpsampledWavePlus s_fracpeak = 1.0*np.argmin(np.amin(X, axis=1)) # s_fracpeak currently in coords of UpsampledWavePlus s_fracpeak = s_fracpeak/upsampling_factor # s_fracpeak now in coordinates of WavePlus s_fracpeak += SampArrMin # s_fracpeak now in coordinates of FilteredArr ################################# # NEW: FLOAT MASK ################################# # connected component as window in chunk with Hilbert # contains values only on weak threshold-exceeding points, # zeros everywhere else comp = np.zeros((SampArrMax - SampArrMin, n_ch), dtype=FilteredHilbertArr.dtype) comp[SampArr - SampArrMin, ChArr] = FilteredHilbertArr[SampArr, ChArr] # 1D array: for each channel, the peak of the Hilbert, relative to the # start of the chunk peaks = np.argmax(comp, axis=0) + SampArrMin # 1D array: values of the peaks, on each channel peaks_values = FilteredHilbertArr[peaks, np.arange(0, n_ch)] * ChMask FloatChMask = np.clip((peaks_values - ThresholdWeak) / (ThresholdStrong - ThresholdWeak), 0, 1) # ################################# # # New alignment # ################################# # # In the window of the chunk (connected component), we take the clipped Hilbert # # (masks between 0 and 1). # comp_clipped = np.clip((comp - ThresholdWeak) / (ThresholdStrong - ThresholdWeak), 0, 1) # # now we take the weighted average of the sample times in the component # s_fracpeak = np.sum(comp_clipped * np.arange(SampArrMax - SampArrMin).reshape((-1, 1))) / np.sum(comp_clipped) # s_fracpeak += SampArrMin ################################# # Realign spike with respect to s_fracpeak ################################# # get block of given size around peaksample try: s_peak = int(s_fracpeak) except ValueError: # This is a bit of a hack. Essentially, the problem here is that # s_fracpeak is a nan because the interpolation didn't work, and # therefore we want to skip the spike. There's already code in # core.extract_spikes that does this if a LinAlgError is raised, # so we just use that to skip this spike (and write a message to the # log). raise np.linalg.LinAlgError WaveBlock = get_padded(FilteredArr, s_peak-s_before-1, s_peak+s_after+2) # Perform interpolation around the fractional peak old_s = np.arange(s_peak-s_before-1, s_peak+s_after+2) new_s = np.arange(s_peak-s_before, s_peak+s_after)+(s_fracpeak-s_peak) try: f = interp1d(old_s, WaveBlock, bounds_error=True, kind='cubic', axis=0) except ValueError: # File "/usr/lib/python2.7/dist-packages/scipy/interpolate/interpolate.py", line 509, in _dot0 # return dot(a, b) #ValueError: matrices are not aligned raise InterpolationError Wave = f(new_s) return Wave, s_peak, s_fracpeak, ChMask, FloatChMask
def extract_wave_hilbert_new(IndList, FilteredArr, FilteredHilbertArr, s_before, s_after, n_ch, s_start, ThresholdStrong, ThresholdWeak): IndArr = np.array(IndList, dtype=np.int32) SampArr = IndArr[:, 0] ChArr = IndArr[:, 1] n_ch = FilteredArr.shape[1] log_fd = GlobalVariables['log_fd'] if np.amax(SampArr)-np.amin(SampArr)>Parameters['CHUNK_OVERLAP']/2: s = ''' ************ ERROR ********************************************** Connected component found with width larger than CHUNK_OVERLAP/2. Spikes could be repeatedly detected, increase the size of CHUNK_OVERLAP and re-run. Component sample range: {sample_range} ***************************************************************** '''.format(sample_range=(s_start+np.amin(SampArr), s_start+np.amax(SampArr))) log_warning(s, multiline=True) #exit() bc = np.bincount(ChArr) # convert to bool and force it to have the right type ChMask = np.zeros(n_ch, dtype=np.bool8) ChMask[:len(bc)] = bc.astype(np.bool8) n_unmasked_ch = np.sum(ChMask) # Find peak sample: # 1. upsample channels we're using on thresholded range # 2. find weighted mean peak sample SampArrMin, SampArrMax = np.amin(SampArr)-3, np.amax(SampArr)+4 # ChArrMin, ChArrMax = np.amin(ChArr), np.amax(ChArr) ################################# # NEW: FLOAT MASK ################################# # connected component as window in chunk with Hilbert # contains values only on weak threshold-exceeding points, # zeros everywhere else comp = np.zeros((SampArrMax - SampArrMin, n_ch), dtype=FilteredHilbertArr.dtype) comp[SampArr - SampArrMin, ChArr] = FilteredHilbertArr[SampArr, ChArr] # 1D array: for each channel, the peak of the Hilbert, relative to the # start of the chunk peaks = np.argmax(comp, axis=0) + SampArrMin # 1D array: values of the peaks, on each channel peaks_values = FilteredHilbertArr[peaks, np.arange(0, n_ch)] * ChMask FloatChMask = np.clip((peaks_values - ThresholdWeak) / (ThresholdStrong - ThresholdWeak), 0, 1) #embed() ################################# # New alignment ################################# # In the window of the chunk (connected component), we take the clipped Hilbert # (masks between 0 and 1). comp_clipped = np.clip((comp - ThresholdWeak) / (ThresholdStrong - ThresholdWeak), 0, 1) # No need to clip - might makes things worse - you lose the peaks! comp_normalised = (comp - ThresholdWeak) / (ThresholdStrong - ThresholdWeak) # now we take the weighted average of the sample times in the component s_fracpeak = np.sum(comp_normalised * np.arange(SampArrMax - SampArrMin).reshape((-1, 1))) / np.sum(comp_normalised) s_fracpeak += SampArrMin ################################# # Realign spike with respect to s_fracpeak ################################# # get block of given size around peaksample try: s_peak = int(s_fracpeak) except ValueError: # This is a bit of a hack. Essentially, the problem here is that # s_fracpeak is a nan because the interpolation didn't work, and # therefore we want to skip the spike. There's already code in # core.extract_spikes that does this if a LinAlgError is raised, # so we just use that to skip this spike (and write a message to the # log). raise np.linalg.LinAlgError WaveBlock = get_padded(FilteredArr, s_peak-s_before-1, s_peak+s_after+2) # Perform interpolation around the fractional peak old_s = np.arange(s_peak-s_before-1, s_peak+s_after+2) new_s = np.arange(s_peak-s_before, s_peak+s_after)+(s_fracpeak-s_peak) try: f = interp1d(old_s, WaveBlock, bounds_error=True, kind='cubic', axis=0) except ValueError: # File "/usr/lib/python2.7/dist-packages/scipy/interpolate/interpolate.py", line 509, in _dot0 # return dot(a, b) #ValueError: matrices are not aligned raise InterpolationError Wave = f(new_s) return Wave, s_peak, s_fracpeak, ChMask, FloatChMask
def extract_spikes(h5s, basename, DatFileNames, n_ch_dat, ChannelsToUse, ChannelGraph, max_spikes=None): # some global variables we use CHUNK_SIZE = Parameters['CHUNK_SIZE'] CHUNKS_FOR_THRESH = Parameters['CHUNKS_FOR_THRESH'] DTYPE = Parameters['DTYPE'] CHUNK_OVERLAP = Parameters['CHUNK_OVERLAP'] N_CH = Parameters['N_CH'] S_JOIN_CC = Parameters['S_JOIN_CC'] S_BEFORE = Parameters['S_BEFORE'] S_AFTER = Parameters['S_AFTER'] THRESH_SD = Parameters['THRESH_SD'] THRESH_SD_LOWER = Parameters['THRESH_SD_LOWER'] # filter coefficents for the high pass filtering filter_params = get_filter_params() print filter_params progress_bar = ProgressReporter() #m A code that writes out a high-pass filtered version of the raw data (.fil file) fil_writer = FilWriter(DatFileNames, n_ch_dat) # Just use first dat file for getting the thresholding data with open(DatFileNames[0], 'rb') as fd: # Use 5 chunks to figure out threshold DatChunk = get_chunk_for_thresholding(fd, n_ch_dat, ChannelsToUse, num_samples(DatFileNames[0], n_ch_dat)) FilteredChunk = apply_filtering(filter_params, DatChunk) # get the STD of the beginning of the filtered data if Parameters['USE_HILBERT']: first_chunks_std = np.std(FilteredChunk) print 'first_chunks_std', first_chunks_std, '\n' else: if Parameters['USE_SINGLE_THRESHOLD']: ThresholdSDFactor = np.median(np.abs(FilteredChunk))/.6745 else: ThresholdSDFactor = np.median(np.abs(FilteredChunk), axis=0)/.6745 Threshold = ThresholdSDFactor*THRESH_SD print 'Threshold = ', Threshold, '\n' Parameters['THRESHOLD'] = Threshold #Record the absolute Threshold used # set the high and low thresholds do_pickle = False if Parameters['USE_HILBERT']: ThresholdStrong = Parameters['THRESH_STRONG'] ThresholdWeak = Parameters['THRESH_WEAK'] do_pickle = True elif Parameters['USE_COMPONENT_ALIGNFLOATMASK']:#to be used with a single threshold only ThresholdStrong = Threshold ThresholdWeak = ThresholdSDFactor*THRESH_SD_LOWER do_pickle = True if do_pickle: picklefile = open("threshold.p","wb") pickle.dump([ThresholdStrong,ThresholdWeak], picklefile) threshold_outputstring = 'Threshold strong = ' + repr(ThresholdStrong) + '\n' + 'Threshold weak = ' + repr(ThresholdWeak) log_message(threshold_outputstring) n_samples = num_samples(DatFileNames, n_ch_dat) spike_count = 0 for (DatChunk, s_start, s_end, keep_start, keep_end) in chunks(DatFileNames, n_ch_dat, ChannelsToUse): ############## FILTERING ######################################## FilteredChunk = apply_filtering(filter_params, DatChunk) # write filtered output to file if Parameters['WRITE_FIL_FILE']: fil_writer.write(FilteredChunk, s_start, s_end, keep_start, keep_end) ############## THRESHOLDING ##################################### # NEW: HILBERT TRANSFORM if Parameters['USE_HILBERT']: FilteredChunkHilbert = np.abs(signal.hilbert(FilteredChunk, axis=0) / first_chunks_std) ** 2 BinaryChunkWeak = FilteredChunkHilbert > ThresholdWeak BinaryChunkStrong = FilteredChunkHilbert > ThresholdStrong BinaryChunkWeak = BinaryChunkWeak.astype(np.int8) BinaryChunkStrong = BinaryChunkStrong.astype(np.int8) #elif Parameters['USE_COMPONENT_ALIGNFLOATMASK']: else: # Usual method #FilteredChunk = apply_filtering(filter_params, DatChunk) Why did you filter twice!!!??? if Parameters['USE_COMPONENT_ALIGNFLOATMASK']: if Parameters['DETECT_POSITIVE']: BinaryChunkWeak = FilteredChunk > ThresholdWeak BinaryChunkStrong = FilteredChunk > ThresholdStrong else: BinaryChunkWeak = FilteredChunk < -ThresholdWeak BinaryChunkStrong = FilteredChunk < -ThresholdStrong BinaryChunkWeak = BinaryChunkWeak.astype(np.int8) BinaryChunkStrong = BinaryChunkStrong.astype(np.int8) else: if Parameters['DETECT_POSITIVE']: BinaryChunk = np.abs(FilteredChunk)>Threshold else: BinaryChunk = (FilteredChunk<-Threshold) BinaryChunk = BinaryChunk.astype(np.int8) # write filtered output to file #if Parameters['WRITE_FIL_FILE']: # fil_writer.write(FilteredChunk, s_start, s_end, keep_start, keep_end) # print 'I am here at line 313' ############### FLOOD FILL ###################################### ChannelGraphToUse = complete_if_none(ChannelGraph, N_CH) if (Parameters['USE_HILBERT'] or Parameters['USE_COMPONENT_ALIGNFLOATMASK']): if Parameters['USE_OLD_CC_CODE']: IndListsChunkOld = connected_components(BinaryChunkWeak, ChannelGraphToUse, S_JOIN_CC) IndListsChunk = [] #Final list of connected components. Go through all \weak' connected components # and only include in final list if there are some samples that also exceed the strong threshold # This method works better than connected_components_twothresholds. for IndListWeak in IndListsChunkOld: # embed() # if sum(BinaryChunkStrong[zip(*IndListWeak)]) != 0: i,j = np.array(IndListWeak).transpose() if sum(BinaryChunkStrong[i,j]) != 0: IndListsChunk.append(IndListWeak) else: IndListsChunk = connected_components_twothresholds(BinaryChunkWeak, BinaryChunkStrong, ChannelGraphToUse, S_JOIN_CC) BinaryChunk = 1 * BinaryChunkWeak + 1 * BinaryChunkStrong else: IndListsChunk = connected_components(BinaryChunk, ChannelGraphToUse, S_JOIN_CC) if Parameters['DEBUG']: #TO DO: Change plot_diagnostics for the HILBERT case if Parameters['USE_HILBERT']: plot_diagnostics_twothresholds(s_start,IndListsChunk,BinaryChunkWeak, BinaryChunkStrong,BinaryChunk,DatChunk,FilteredChunk,FilteredChunkHilbert,ThresholdStrong,ThresholdWeak) elif Parameters['USE_COMPONENT_ALIGNFLOATMASK']: plot_diagnostics_twothresholds(s_start,IndListsChunk,BinaryChunkWeak,BinaryChunkStrong,BinaryChunk,DatChunk,FilteredChunk,-FilteredChunk,ThresholdStrong,ThresholdWeak)#TODO: change HIlbert in plot_diagnostics_twothresholds else: plot_diagnostics(s_start,IndListsChunk,BinaryChunk,DatChunk,FilteredChunk,Threshold) if Parameters['WRITE_BINFIL_FILE']: fil_writer.write_bin(BinaryChunk, s_start, s_end, keep_start, keep_end) #print len(IndListsChunk), 'len(IndListsChunk)' ############## ALIGN AND INTERPOLATE WAVES ####################### nextbits = [] if Parameters['USE_HILBERT']: for IndList in IndListsChunk: try: wave, s_peak, sf_peak, cm, fcm = extract_wave_hilbert_new(IndList, FilteredChunk, FilteredChunkHilbert, S_BEFORE, S_AFTER, N_CH, s_start, ThresholdStrong, ThresholdWeak) s_offset = s_start + s_peak sf_offset = s_start + sf_peak if keep_start<=s_offset<keep_end: spike_count += 1 nextbits.append((wave, s_offset, sf_offset, cm, fcm)) except np.linalg.LinAlgError: s = '*** WARNING *** Unalignable spike discarded in chunk {chunk}.'.format( chunk=(s_start, s_end)) log_warning(s) except InterpolationError: s = '*** WARNING *** Interpolation error in chunk {chunk}.'.format( chunk=(s_start, s_end)) log_warning(s) # and return them in time sorted order nextbits.sort(key=lambda (wave, s, s_frac, cm, fcm): s_frac) for wave, s, s_frac, cm, fcm in nextbits: uwave = get_padded(DatChunk, int(s)-S_BEFORE-s_start, int(s)+S_AFTER-s_start).astype(np.int32) # cm = add_penumbra(cm, ChannelGraphToUse, # Parameters['PENUMBRA_SIZE']) # fcm = get_float_mask(wave, cm, ChannelGraphToUse, # 1.) yield uwave, wave, s, s_frac, cm, fcm # unfiltered wave,wave, s_peak, ChMask, FloatChMask elif Parameters['USE_COMPONENT_ALIGNFLOATMASK']: for IndList in IndListsChunk: try: if Parameters['DETECT_POSITIVE']: wave, s_peak, sf_peak, cm, fcm, comp_normalised, comp_normalised_power = extract_wave_twothresholds(IndList, FilteredChunk, FilteredChunk, S_BEFORE, S_AFTER, N_CH, s_start, ThresholdStrong, ThresholdWeak) else: wave, s_peak, sf_peak, cm, fcm,comp_normalised, comp_normalised_power = extract_wave_twothresholds(IndList, FilteredChunk, -FilteredChunk, S_BEFORE, S_AFTER, N_CH, s_start, ThresholdStrong, ThresholdWeak) s_offset = s_start+s_peak sf_offset = s_start + sf_peak if keep_start<=s_offset<keep_end: spike_count += 1 nextbits.append((wave, s_offset, sf_offset, cm, fcm)) except np.linalg.LinAlgError: s = '*** WARNING *** Unalignable spike discarded in chunk {chunk}.'.format( chunk=(s_start, s_end)) log_warning(s) except InterpolationError: s = '*** WARNING *** Interpolation error in chunk {chunk}.'.format( chunk=(s_start, s_end)) log_warning(s) # and return them in time sorted order nextbits.sort(key=lambda (wave, s, s_frac, cm, fcm): s_frac) for wave, s, s_frac, cm, fcm in nextbits: uwave = get_padded(DatChunk, int(s)-S_BEFORE-s_start, int(s)+S_AFTER-s_start).astype(np.int32) # cm = add_penumbra(cm, ChannelGraphToUse, # Parameters['PENUMBRA_SIZE']) # fcm = get_float_mask(wave, cm, ChannelGraphToUse, # 1.) yield uwave, wave, s, s_frac, cm, fcm # unfiltered wave,wave, s_peak, ChMask, FloatChMask else: #Original SpikeDetekt. This code duplication is regretable but probably easier to deal with for IndList in IndListsChunk: try: wave, s_peak, sf_peak, cm = extract_wave(IndList, FilteredChunk, S_BEFORE, S_AFTER, N_CH, s_start,Threshold) s_offset = s_start+s_peak sf_offset = s_start + sf_peak if keep_start<=s_offset<keep_end: spike_count += 1 nextbits.append((wave, s_offset, sf_offset, cm)) except np.linalg.LinAlgError: s = '*** WARNING *** Unalignable spike discarded in chunk {chunk}.'.format( chunk=(s_start, s_end)) log_warning(s) # and return them in time sorted order nextbits.sort(key=lambda (wave, s, s_frac, cm): s_frac) for wave, s, s_frac, cm in nextbits: uwave = get_padded(DatChunk, int(s)-S_BEFORE-s_start, int(s)+S_AFTER-s_start).astype(np.int32) cm = add_penumbra(cm, ChannelGraphToUse, Parameters['PENUMBRA_SIZE']) fcm = get_float_mask(wave, cm, ChannelGraphToUse, ThresholdSDFactor) yield uwave, wave, s, s_frac, cm, fcm # unfiltered wave,wave, s_peak, ChMask, FloatChMask progress_bar.update(float(s_end)/n_samples, '%d/%d samples, %d spikes found'%(s_end, n_samples, spike_count)) if max_spikes is not None and spike_count>=max_spikes: break progress_bar.finish()
def extract_spikes(h5s, basename, DatFileNames, n_ch_dat, ChannelsToUse, ChannelGraph, max_spikes=None): # some global variables we use CHUNK_SIZE = Parameters['CHUNK_SIZE'] CHUNKS_FOR_THRESH = Parameters['CHUNKS_FOR_THRESH'] DTYPE = Parameters['DTYPE'] CHUNK_OVERLAP = Parameters['CHUNK_OVERLAP'] N_CH = Parameters['N_CH'] S_JOIN_CC = Parameters['S_JOIN_CC'] S_BEFORE = Parameters['S_BEFORE'] S_AFTER = Parameters['S_AFTER'] THRESH_SD = Parameters['THRESH_SD'] # filter coefficents for the high pass filtering filter_params = get_filter_params() progress_bar = ProgressReporter() # m A code that writes out a high-pass filtered version of the raw data # (.fil file) fil_writer = FilWriter(DatFileNames, n_ch_dat) # Just use first dat file for getting the thresholding data with open(DatFileNames[0], 'rb') as fd: # Use 5 chunks to figure out threshold DatChunk = get_chunk_for_thresholding(fd, n_ch_dat, ChannelsToUse, num_samples(DatFileNames[0], n_ch_dat)) FilteredChunk = apply_filtering(filter_params, DatChunk) # .6745 converts median to standard deviation if Parameters['USE_SINGLE_THRESHOLD']: ThresholdSDFactor = np.median(np.abs(FilteredChunk)) / .6745 else: ThresholdSDFactor = np.median( np.abs(FilteredChunk), axis=0) / .6745 Threshold = ThresholdSDFactor * THRESH_SD print 'Threshold = ', Threshold, '\n' # Record the absolute Threshold used Parameters['THRESHOLD'] = Threshold n_samples = num_samples(DatFileNames, n_ch_dat) spike_count = 0 for (DatChunk, s_start, s_end, keep_start, keep_end) in chunks(DatFileNames, n_ch_dat, ChannelsToUse): ############## FILTERING ######################################## FilteredChunk = apply_filtering(filter_params, DatChunk) # write filtered output to file # if Parameters['WRITE_FIL_FILE']: fil_writer.write(FilteredChunk, s_start, s_end, keep_start, keep_end) ############## THRESHOLDING ##################################### if Parameters['DETECT_POSITIVE']: BinaryChunk = np.abs(FilteredChunk) > Threshold else: BinaryChunk = (FilteredChunk < -Threshold) BinaryChunk = BinaryChunk.astype(np.int8) # write binary chunk filtered output to file if Parameters['WRITE_BINFIL_FILE']: fil_writer.write_bin( BinaryChunk, s_start, s_end, keep_start, keep_end) ############### FLOOD FILL ###################################### ChannelGraphToUse = complete_if_none(ChannelGraph, N_CH) IndListsChunk = connected_components(BinaryChunk, ChannelGraphToUse, S_JOIN_CC) if Parameters['DEBUG']: plot_diagnostics( s_start, IndListsChunk, BinaryChunk, DatChunk, FilteredChunk, Threshold) fil_writer.write_bin( BinaryChunk, s_start, s_end, keep_start, keep_end) ############## ALIGN AND INTERPOLATE WAVES ####################### nextbits = [] for IndList in IndListsChunk: try: wave, s_peak, cm = extract_wave(IndList, FilteredChunk, S_BEFORE, S_AFTER, N_CH, s_start, Threshold) s_offset = s_start + s_peak if keep_start <= s_offset < keep_end: spike_count += 1 nextbits.append((wave, s_offset, cm)) except np.linalg.LinAlgError: s = '*** WARNING *** Unalignable spike discarded in chunk {chunk}.'.format( chunk=(s_start, s_end)) log_warning(s) # and return them in time sorted order nextbits.sort(key=lambda wave_s_cm: wave_s_cm[1]) for wave, s, cm in nextbits: uwave = get_padded(DatChunk, int(s) - S_BEFORE - s_start, int(s) + S_AFTER - s_start).astype(np.int32) cm = add_penumbra(cm, ChannelGraphToUse, Parameters['PENUMBRA_SIZE']) fcm = get_float_mask(wave, cm, ChannelGraphToUse, ThresholdSDFactor) yield uwave, wave, s, cm, fcm progress_bar.update(float(s_end) / n_samples, '%d/%d samples, %d spikes found' % (s_end, n_samples, spike_count)) if max_spikes is not None and spike_count >= max_spikes: break progress_bar.finish()