Example #1
0
def read_map(filename):
    # CCP4 map file format
    # http://www.ccp4.ac.uk/html/maplib.html
    with open(filename, "rb") as f:
        # 1024 bytes header
        header_buf = f.read(1024)
        #1      NC              # of Columns    (fastest changing in map)
        #2      NR              # of Rows
        #3      NS              # of Sections   (slowest changing in map)
        NCNRNS = tuple(numpy.frombuffer(header_buf, dtype="int32")[:3])
        #4      MODE            Data type
        #                  0 = envelope stored as signed bytes (from
        #                      -128 lowest to 127 highest)
        #                  1 = Image     stored as Integer*2
        #                  2 = Image     stored as Reals
        #                  3 = Transform stored as Complex Integer*2
        #                  4 = Transform stored as Complex Reals
        #                  5 == 0	
        #
        #                  Note: Mode 2 is the normal mode used in
        #                        the CCP4 programs. Other modes than 2 and 0
        #                        may NOT WORK
        MODE = numpy.frombuffer(header_buf, dtype="int32")[3]
        dtype = ["i8", "int32", "float32", "cint32", "complex64", "i8"][MODE]
        if dtype_mode not in [0,2]:
            log.log_warning(logger, "WARNING: Map file data type \"MODE=%i\" may not work." % MODE)
        #24      NSYMBT          Number of bytes used for storing symmetry operators
        NSYMBT = numpy.frombuffer(header_buf, dtype="int32")[23]
        if NSYMBT > 0:
            log.log_warning(logger, "WARNING: Omitting symmetry operations in map file.")
            f.read(NSYMBT)
        # The remaining bytes are data
        data = f.read()
        data = numpy.frombuffer(data, dtype=dtype).reshape(NCNRNS)
    return data, dx
Example #2
0
def is_model_support(model_name, model_list):
    """
    :param model_name: model name
    :param model_list: implemented model list
    :return: model
    """
    if not (model_name in model_list):
        log_warning("use implemented model")
        raise NotImplementedError("implement a custom model(%s) in /nets/models/" % model_name)
Example #3
0
def loss_fn(loss_fn: str = "mse"):
    """
    :param loss_fn: implement loss function for training
    :return: loss function module(class)
    """
    if loss_fn == "mse":
        return loss.MSELoss()
    elif loss_fn == "L1":
        return loss.L1Loss()
    elif loss_fn == "neg_pearson":
        return NegPearsonLoss()
    elif loss_fn == "multi_margin":
        return loss.MultiMarginLoss()
    elif loss_fn == "bce":
        return loss.BCELoss()
    elif loss_fn == "huber":
        return loss.HuberLoss()
    elif loss_fn == "cosine_embedding":
        return loss.CosineEmbeddingLoss()
    elif loss_fn == "cross_entropy":
        return loss.CrossEntropyLoss()
    elif loss_fn == "ctc":
        return loss.CTCLoss()
    elif loss_fn == "bce_with_logits":
        return loss.BCEWithLogitsLoss()
    elif loss_fn == "gaussian_nll":
        return loss.GaussianNLLLoss()
    elif loss_fn == "hinge_embedding":
        return loss.HingeEmbeddingLoss()
    elif loss_fn == "KLDiv":
        return loss.KLDivLoss()
    elif loss_fn == "margin_ranking":
        return loss.MarginRankingLoss()
    elif loss_fn == "multi_label_margin":
        return loss.MultiLabelMarginLoss()
    elif loss_fn == "multi_label_soft_margin":
        return loss.MultiLabelSoftMarginLoss()
    elif loss_fn == "nll":
        return loss.NLLLoss()
    elif loss_fn == "nll2d":
        return loss.NLLLoss2d()
    elif loss_fn == "pairwise":
        return loss.PairwiseDistance()
    elif loss_fn == "poisson_nll":
        return loss.PoissonNLLLoss()
    elif loss_fn == "smooth_l1":
        return loss.SmoothL1Loss()
    elif loss_fn == "soft_margin":
        return loss.SoftMarginLoss()
    elif loss_fn == "triplet_margin":
        return loss.TripletMarginLoss()
    elif loss_fn == "triplet_margin_distance":
        return loss.TripletMarginWithDistanceLoss()
    else:
        log_warning("use implemented loss functions")
        raise NotImplementedError(
            "implement a custom function(%s) in loss.py" % loss_fn)
Example #4
0
 def __init__(self, filename, chunksize=2, gzip_compression=False):
     self._filename = os.path.expandvars(filename)
     if os.path.exists(filename):
         log.log_warning(logger, "File %s exists and is being overwritten" % filename)
     self._f = h5py.File(filename, "w")
     self._i = 0
     self._chunksize = chunksize
     self._create_dataset_kwargs = {}
     if gzip_compression:
         self._create_dataset_kwargs["compression"] = "gzip"
Example #5
0
 def _write_without_iterate(self, D, group_prefix="/"):
     for k in D.keys():
         if isinstance(D[k],dict):
             group_prefix_new = group_prefix + k + "/"
             log.log_debug(logger, "Writing group %s" % group_prefix_new)
             if k not in self._f[group_prefix]:
                 self._f.create_group(group_prefix_new)
             self._write_without_iterate(D[k], group_prefix_new)
         else:
             name = group_prefix + k
             log.log_debug(logger, "Writing dataset %s" % name)
             data = D[k]
             if k not in self._f[group_prefix]:
                 if numpy.isscalar(data):
                     maxshape = (None,)
                     shape = (self._chunksize,)
                     dtype = numpy.dtype(type(data))
                     if dtype == "S":
                         dtype = h5py.new_vlen(str)
                     axes = "experiment_identifier:value"
                 else:
                     data = numpy.asarray(data)
                     try:
                         h5py.h5t.py_create(data.dtype, logical=1)
                     except TypeError:
                         log.log_warning(logger, "Could not save dataset %s. Conversion to numpy array failed" % name)
                         continue
                     maxshape = tuple([None]+list(data.shape))
                     shape = tuple([self._chunksize]+list(data.shape))
                     dtype = data.dtype
                     ndim = data.ndim
                     axes = "experiment_identifier"
                     if ndim == 1: axes = axes + ":x"
                     elif ndim == 2: axes = axes + ":y:x"
                     elif ndim == 3: axes = axes + ":z:y:x"
                 log.log_debug(logger, "Create dataset %s [shape=%s, dtype=%s]" % (name,str(shape),str(dtype)))
                 self._f.create_dataset(name, shape, maxshape=maxshape, dtype=dtype, **self._create_dataset_kwargs)
                 self._f[name].attrs.modify("axes",[axes])
             if self._f[name].shape[0] <= self._i:
                 if numpy.isscalar(data):
                     data_shape = []
                 else:
                     data_shape = data.shape
                 new_shape = tuple([self._chunksize*(self._i/self._chunksize+1)]+list(data_shape))
                 log.log_debug(logger, "Resize dataset %s [old shape: %s, new shape: %s]" % (name,str(self._f[name].shape),str(new_shape)))
                 self._f[name].resize(new_shape)
             log.log_debug(logger, "Write to dataset %s at stack position %i" % (name, self._i))
             if numpy.isscalar(data):
                 self._f[name][self._i] = data
             else:
                 self._f[name][self._i,:] = data[:]
Example #6
0
def run_migration():
    if not os.path.exists("./deals.db"):
        log.log_normal("running database initialization")
        initialize_db()

    log.log_normal("running migration scripts")
    mypath = "./db/migration_scripts/*.sql"
    files = glob.glob(mypath)
    ordered_files = sorted(files)
    for item in ordered_files:
        if script_loaded(item) == False:
            log.log_normal("Running Script [%s]" % (item))
            run_script(item)
        else:
            log.log_warning("Script already loaded [%s]" % (item))
Example #7
0
    def __init__(self, values=None, formalism=None):
        self.rotation_matrix = None
        if values is None and formalism is None:
            # No rotation (rotation matrix = identity matrix)
            self.rotation_matrix = numpy.ones(shape=(3,3))
        elif formalism.startswith("euler_angles_") and len(formalism) == len("euler_angles_xyz"):
            self.set_with_euler_angles(values, rotation_axes=formalism[-3:])
        elif formalism == "rotation_matrix":
            self.set_with_rotation_matrix(values)
        elif formalism == "quaternion":
            self.set_with_quaternion(values)
        elif formalism in ["random","random_x","random_y","random_z"]:
            if values is not None:
                log_warning(logger, "Specified formalism=%s but values is not None." % formalism)

            self._set_as_random_formalism(formalism)
        else:
            log_and_raise_error(logger, "formalism=%s is not implemented" % formalism)
            return
Example #8
0
def summary(model, model_name):
    """
    :param model: torch.nn.module class
    :param model_name: implemented model name
    :return: model
    """
    log_info("=========================================")
    log_info(model_name)
    log_info("=========================================")
    if model_name == "DeepPhys" or model_name == DeepPhys_DA:
        torchsummary.summary(model, (2, 3, 36, 36))
    elif model_name == "PhysNet" or model_name == "PhysNet_LSTM":
        torchinfo.summary(model, (1, 3, 32, 128, 128))
    elif model_name in "PPNet":
        torchinfo.summary(model, (1, 1, 250))
    elif model_name == "MetaPhys" or "MetaPhys_task":
        print('rrrr')
    else:
        log_warning("use implemented model")
        raise NotImplementedError("implement a custom model(%s) in /nets/models/" % model_name)
Example #9
0
def get_model(model_name: str = "DeepPhys"):
    """
    :param model_name: model name
    :return: model
    """
    if model_name == "DeepPhys":
        return DeepPhys()
    elif model_name == "DeepPhys_DA":
        return DeepPhys_DA()
    elif model_name == "PhysNet" or 'MetaPhysNet':
        return PhysNet()
    elif model_name == "MetaPhys" or "MetaPhys_task":
        return TSCAN()
    elif model_name == "PhysNet_LSTM":
        return PhysNet_2DCNN_LSTM()
    elif model_name == "PPNet":
        return PPNet()
    elif model_name == "MMAML_Phys":
        return FiLM()
    else:
        log_warning("use implemented model")
        raise NotImplementedError("implement a custom model(%s) in /nets/models/" % model_name)
Example #10
0
def optimizers(model_params, learning_rate: float = 1, optim: str = "mse"):
    '''
    call optimizer
    :param model_params: learning target's parameter
    :param learning_rate: learning rate
    :param optim: optimizer
    :return: selected optimizer object
    '''
    if optim == "adam":
        return opt.Adam(model_params, learning_rate)
    elif optim == "sgd":
        return opt.SGD(model_params, learning_rate)
    elif optim == "rms_prop":
        return opt.RMSprop(model_params, learning_rate)
    elif optim == "ada_delta":
        return opt.Adadelta(model_params, learning_rate)
    elif optim == "ada_grad":
        return opt.Adagrad(model_params, learning_rate)
    elif optim == "ada_max":
        return opt.Adamax(model_params, learning_rate)
    elif optim == "ada_mw":
        return opt.AdamW(model_params, learning_rate)
    elif optim == "a_sgd":
        return opt.ASGD(model_params, learning_rate)
    elif optim == "lbfgs":
        return opt.LBFGS(model_params, learning_rate)
    elif optim == "n_adam":
        return opt.NAdam(model_params, learning_rate)
    elif optim == "r_adam":
        return opt.RAdam(model_params, learning_rate)
    elif optim == "rprop":
        return opt.Rprop(model_params, learning_rate)
    elif optim == "sparse_adam":
        return opt.SparseAdam(model_params, learning_rate)
    else:
        log_warning("use implemented optimizer")
        raise NotImplementedError(
            "implement a custom optimizer(%s) in optimizer.py" % optim)
Example #11
0
    def __init__(self, values=None, formalism=None):
        self.rotation_matrix = None
        if values is None and formalism is None:
            # No rotation (rotation matrix = identity matrix)
            self.rotation_matrix = numpy.ones(shape=(3, 3))
        elif formalism.startswith("euler_angles_") and len(formalism) == len(
                "euler_angles_xyz"):
            self.set_with_euler_angles(values, rotation_axes=formalism[-3:])
        elif formalism == "rotation_matrix":
            self.set_with_rotation_matrix(values)
        elif formalism == "quaternion":
            self.set_with_quaternion(values)
        elif formalism in ["random", "random_x", "random_y", "random_z"]:
            if values is not None:
                log_warning(
                    logger, "Specified formalism=%s but values is not None." %
                    formalism)

            self._set_as_random_formalism(formalism)
        else:
            log_and_raise_error(logger,
                                "formalism=%s is not implemented" % formalism)
            return
Example #12
0
def _conf_to_spsim_opts(D_source,
                        D_particle,
                        D_detector,
                        ndim=2,
                        qn=None,
                        qmax=None):
    if ndim == 2:
        if qn is not None or qmax is not None:
            log_warning(
                logger,
                "As ndim=2 the passed values for qn and qmax take no effect.")
    if ndim == 3:
        if qn is None and qmax is None:
            log_and_raise_error(
                logger, "As ndim=3 both qn and qmax must be not None.")
            return
    import spsim
    # Create temporary file for pdb file
    tmpf_pdb = tempfile.NamedTemporaryFile(mode='w+b',
                                           bufsize=-1,
                                           suffix='.conf',
                                           prefix='tmp_spsim',
                                           dir=None,
                                           delete=False)
    tmpf_pdb_name = tmpf_pdb.name
    tmpf_pdb.close()
    # Write pdb file
    mol = spsim.get_molecule_from_atoms(D_particle["atomic_numbers"],
                                        D_particle["atomic_positions"])
    spsim.write_pdb_from_mol(tmpf_pdb_name, mol)
    spsim.free_mol(mol)
    # Start with default spsim configuration
    opts = spsim.set_defaults()
    # Create temporary file for spsim configuration
    tmpf_conf = tempfile.NamedTemporaryFile(mode='w+b',
                                            bufsize=-1,
                                            suffix='.conf',
                                            prefix='tmp_spsim',
                                            dir=None,
                                            delete=False)
    # Write string sequence from configuration dicts
    s = []
    s += "# THIS FILE WAS CREATED AUTOMATICALLY BY CONDOR\n"
    s += "# Temporary configuration file for spsim\n"
    s += "verbosity_level = 0;\n"
    s += "number_of_dimensions = %i;\n" % ndim
    s += "number_of_patterns = 1;\n"
    s += "origin_to_com = 1;\n"
    s += "input_type = \"pdb\";\n"
    #s += "pdb_filename = \"%s\";\n" % D_particle["pdb_filename"]
    s += "pdb_filename = \"%s\";\n" % tmpf_pdb_name
    if ndim == 2:
        D = D_detector["distance"]
        Lx = D_detector["pixel_size"] * D_detector["nx"]
        Ly = D_detector["pixel_size"] * D_detector["ny"]
    else:
        k0 = 2. * numpy.pi / D_source["wavelength"]
        D = qn / 2. * D_detector["pixel_size"] * k0 / qmax
        Lx = Ly = Lz = D_detector["pixel_size"] * qn
    s += "detector_distance = %.12e;\n" % D
    s += "detector_width = %.12e;\n" % Lx
    s += "detector_height = %.12e;\n" % Ly
    if ndim == 3:
        s += "detector_depth = %.12e;\n" % Lz
    s += "detector_pixel_width = %.12e;\n" % D_detector["pixel_size"]
    s += "detector_pixel_height = %.12e;\n" % D_detector["pixel_size"]
    if ndim == 3:
        s += "detector_pixel_depth = %.12e;\n" % D_detector["pixel_size"]
    if ndim == 2:
        s += "detector_center_x = %.12e;\n" % (D_detector["pixel_size"] *
                                               (D_detector["cx"] -
                                                (D_detector["nx"] - 1) / 2.))
        s += "detector_center_y = %.12e;\n" % (D_detector["pixel_size"] *
                                               (D_detector["cy"] -
                                                (D_detector["ny"] - 1) / 2.))
    else:
        s += "detector_center_x = 0;\n"
        s += "detector_center_y = 0;\n"
        s += "detector_center_z = 0;\n"
    s += "detector_binning = 1;\n"
    s += "experiment_wavelength = %.12e;\n" % D_source["wavelength"]
    s += "experiment_beam_intensity = %.12e;\n" % D_particle["intensity"]
    s += "experiment_polarization = \"ignore\";\n"  # polarization correction will be done in Condor if needed (see experiment.py)
    #s += "use_cuda = 0;\n"
    intrinsic_rotation = condor.utils.rotation.Rotation(
        values=D_particle["extrinsic_quaternion"], formalism="quaternion")
    intrinsic_rotation.invert()
    e0, e1, e2 = intrinsic_rotation.get_as_euler_angles("zxz")
    if not numpy.isfinite(e0):
        print "ERROR: phi is not finite"
    if not numpy.isfinite(e1):
        print "ERROR: theta is not finite"
    if not numpy.isfinite(e2):
        print "ERROR: psi is not finite"
    s += "phi = %.12e;\n" % e0
    s += "theta = %.12e;\n" % e1
    s += "psi = %.12e;\n" % e2
    s += "random_orientation = 0;\n"
    # Write string sequence to file
    tmpf_conf.writelines(s)
    # Close temporary file
    tmpf_conf_name = tmpf_conf.name
    tmpf_conf.close()
    # Read configuration into options struct
    spsim.read_options_file(tmpf_conf_name, opts)
    # This deletes the temporary files
    os.unlink(tmpf_pdb_name)
    os.unlink(tmpf_conf_name)
    return opts
Example #13
0
def extract_wave_new(IndList, FilteredArr, s_before, s_after, n_ch, s_start,Threshold):
    IndArr = np.array(IndList, dtype=np.int32)
    SampArr = IndArr[:, 0]
    ChArr = IndArr[:, 1]
    n_ch = FilteredArr.shape[1]
    log_fd = GlobalVariables['log_fd']
    if np.amax(SampArr)-np.amin(SampArr)>Parameters['CHUNK_OVERLAP']/2:
        s = '''
        ************ ERROR **********************************************
        Connected component found with width larger than CHUNK_OVERLAP/2.
        Spikes could be repeatedly detected, increase the size of
        CHUNK_OVERLAP and re-run.
        Component sample range: {sample_range}
        *****************************************************************
        '''.format(sample_range=(s_start+np.amin(SampArr),
                                 s_start+np.amax(SampArr)))
        log_warning(s, multiline=True)
        #exit()

    bc = np.bincount(ChArr)
    # convert to bool and force it to have the right type
    ChMask = np.zeros(n_ch, dtype=np.bool8)
    ChMask[:len(bc)] = bc.astype(np.bool8)
    
    # Find peak sample:
    # 1. upsample channels we're using on thresholded range
    # 2. find weighted mean peak sample
    SampArrMin, SampArrMax = np.amin(SampArr)-3, np.amax(SampArr)+4
    WavePlus = get_padded(FilteredArr, SampArrMin, SampArrMax)
    WavePlus = WavePlus[:, ChMask]
    # upsample WavePlus
    upsampling_factor = Parameters['UPSAMPLING_FACTOR']
    if upsampling_factor>1:
        old_s = np.arange(WavePlus.shape[0])
        new_s_i = np.arange((WavePlus.shape[0]-1)*upsampling_factor+1)
        new_s = np.array(new_s_i*(1.0/upsampling_factor), dtype=np.float32)
        f = interp1d(old_s, WavePlus, bounds_error=True, kind='cubic', axis=0)
        UpsampledWavePlus = f(new_s)
    else:
        UpsampledWavePlus = WavePlus
    # find weighted mean peak for each channel above threshold
    if Parameters['USE_WEIGHTED_MEAN_PEAK_SAMPLE']:
        peak_sum = 0.0
        total_weight = 0.0
        for ch in xrange(WavePlus.shape[1]):
            X = UpsampledWavePlus[:, ch]
            if Parameters['DETECT_POSITIVE']:
                X = -np.abs(X)
            i_intpeak = np.argmin(X)
            left, right = i_intpeak-1, i_intpeak+2
            if right>len(X):
                left, right = left+len(X)-right, len(X)
            elif left<0:
                left, right = 0, right-left
            a_b_c = abc(np.arange(left, right, dtype=np.float32),
                        X[left:right])
            s_fracpeak = max_t(a_b_c)
            if Parameters['USE_SINGLE_THRESHOLD']:
                weight = -(X[i_intpeak]+Threshold)
            else:
                weight = -(X[i_intpeak]+Threshold[ch])
            if weight<0:
                weight = 0
            peak_sum += s_fracpeak*weight
            total_weight += weight
        s_fracpeak = (peak_sum/total_weight)
    else:
        if Parameters['DETECT_POSITIVE']:
            X = -np.abs(UpsampledWavePlus)
        else:
            X = UpsampledWavePlus
        s_fracpeak = 1.0*np.argmin(np.amin(X, axis=1))
    # s_fracpeak currently in coords of UpsampledWavePlus
    s_fracpeak = s_fracpeak/upsampling_factor
    # s_fracpeak now in coordinates of WavePlus
    s_fracpeak += SampArrMin
    # s_fracpeak now in coordinates of FilteredArr
    
    # get block of given size around peaksample
    try:
        s_peak = int(s_fracpeak)
    except ValueError:
        # This is a bit of a hack. Essentially, the problem here is that
        # s_fracpeak is a nan because the interpolation didn't work, and
        # therefore we want to skip the spike. There's already code in
        # core.extract_spikes that does this if a LinAlgError is raised,
        # so we just use that to skip this spike (and write a message to the
        # log).
        raise np.linalg.LinAlgError 
    WaveBlock = get_padded(FilteredArr,
                           s_peak-s_before-1, s_peak+s_after+2)
    
    # Perform interpolation around the fractional peak
    old_s = np.arange(s_peak-s_before-1, s_peak+s_after+2)
    new_s = np.arange(s_peak-s_before, s_peak+s_after)+(s_fracpeak-s_peak)
    f = interp1d(old_s, WaveBlock, bounds_error=True, kind='cubic', axis=0)
    Wave = f(new_s)
    
    return Wave, s_peak, ChMask
Example #14
0
def extract_wave(IndList, FilteredArr, s_before, s_after, n_ch, s_start,Threshold):
    '''
    Extract an aligned wave corresponding to a spike.
    
    Arguments:
    
    IndList
        A list of pairs (sample_number, channel_number) returned from the
        thresholding and flood filling algorithm
    FilteredArr
        An array of shape (numsamples, numchannels) containing the filtered
        wave data
    s_before, s_after
        The number of samples to return before and after the peak
        
    Returns a tuple (Wave, PeakSample, ST):
    
    Wave
        The wave aligned around the peak (with interpolation to give subsample
        alignment), consisting of s_before+s_after+1 samples.
    PeakSample
        The index of the peak sample in FilteredArr (the peak sample in Wave
        will always be s_before).
    ChMask
        The mask for this spike, a boolean array of length the number of
        channels, with value 1 if the channel is used and 0 otherwise.
    '''
    if Parameters['USE_WEIGHTED_MEAN_PEAK_SAMPLE'] or Parameters['UPSAMPLING_FACTOR']>1:
        return extract_wave_new(IndList, FilteredArr,
                                s_before, s_after, n_ch, s_start,Threshold)
    IndArr = np.array(IndList, dtype=np.int32)
    SampArr = IndArr[:, 0]
    log_fd = GlobalVariables['log_fd']
    if np.amax(SampArr)-np.amin(SampArr)>Parameters['CHUNK_OVERLAP']/2:
        s = '''
        ************ ERROR **********************************************
        Connected component found with width larger than CHUNK_OVERLAP/2.
        Spikes could be repeatedly detected, increase the size of
        CHUNK_OVERLAP and re-run.
        Component sample range: {sample_range}
        *****************************************************************
        '''.format(sample_range=(s_start+np.amin(SampArr),
                                 s_start+np.amax(SampArr)))
        log_warning(s, multiline=True)
        #exit()
    ChArr = IndArr[:, 1]
    n_ch = FilteredArr.shape[1]
    
    # Find peak sample and channel
    # TODO: argmin only works for negative threshold crossings
    PeakInd = FilteredArr[SampArr, ChArr].argmin()
    PeakSample, PeakChannel = SampArr[PeakInd], ChArr[PeakInd]
    
    # Ensure that we get a fixed size chunk of the wave, padded with zeroes if
    # the segment from PeakSample-s_before-1 to PeakSample+s_after+1 goes
    # outside the bounds of FilteredArr.
    WavePlus = get_padded(FilteredArr,
                          PeakSample-s_before-1, PeakSample+s_after+1)
    # Perform interpolation around the fractional peak
    Wave = interp_around_peak(WavePlus, s_before+1,
                              PeakChannel, s_before, s_after)
    # Return the aligned wave, the peak sample index and the associated mask
    # which is computed by counting the number of times each channel index
    # appears in IndList and then converting to a bool (so that channel i is
    # True if channel i features at least once).
    bc = np.bincount(ChArr)
    # convert to bool and force it to have the right type
    ChMask = np.zeros(n_ch, dtype=np.bool8)
    ChMask[:len(bc)] = bc.astype(np.bool8)
    
    return Wave, PeakSample, ChMask
Example #15
0
def read_map(filename):
    log.log_info(logger, "Automatic scaling of EM maps may not be reliable. Please make sure to check your map after using this functionality.")
    # CCP4 map file format
    # http://www.ccp4.ac.uk/html/maplib.html
    with open(filename, "rb") as f:
        # 1024 bytes header
        header_buf = f.read(1024)
        temp_int32 = numpy.frombuffer(header_buf, dtype="int32")
        temp_float32 = numpy.frombuffer(header_buf, dtype="float32")
        #1      NC              # of Columns    (fastest changing in map)
        #2      NR              # of Rows
        #3      NS              # of Sections   (slowest changing in map)
        NC = temp_int32[0]
        NR = temp_int32[1]
        NS = temp_int32[2]
        if NC != NR or NR != NS:
            log.log_and_raise_error(logger, "Cannot read a map with unequal dimensions")
        N = NC
        #4      MODE            Data type
        #                  0 = envelope stored as signed bytes (from
        #                      -128 lowest to 127 highest)
        #                  1 = Image     stored as Integer*2
        #                  2 = Image     stored as Reals
        #                  3 = Transform stored as Complex Integer*2
        #                  4 = Transform stored as Complex Reals
        #                  5 == 0	
        #
        #                  Note: Mode 2 is the normal mode used in
        #                        the CCP4 programs. Other modes than 2 and 0
        #                        may NOT WORK        
        MODE = temp_int32[3]
        dtype = ["int8", "int16", "float32", None, "complex64", "int8"][MODE]
        if MODE == 3:
            log.log_and_raise_error(logger, "Map file data type \"MODE=%i\" is not implemented yet." % MODE)
        if MODE not in [0,1,2,5]:
            log.log_warning(logger, "Map file data type \"MODE=%i\" not supported yet and may not work reliably." % MODE)
        #11      X length        Cell Dimensions (Angstroms)
        #12      Y length                     "
        #13      Z length                     "
        dX = temp_float32[10]/float(N)*1E-10
        dY = temp_float32[11]/float(N)*1E-10
        dZ = temp_float32[12]/float(N)*1E-10
        if dX != dY or dY != dZ:
            log.log_and_raise_error(logger, "Cannot read a map with unequal voxel dimensions")
        #17      MAPC            Which axis corresponds to Cols.  (1,2,3 for X,Y,Z)
        #18      MAPR            Which axis corresponds to Rows   (1,2,3 for X,Y,Z)
        #19      MAPS            Which axis corresponds to Sects. (1,2,3 for X,Y,Z)
        MAPC = temp_int32[16]
        MAPR = temp_int32[17]
        MAPS = temp_int32[18]
        #24      NSYMBT          Number of bytes used for storing symmetry operators
        NSYMBT = temp_int32[23]
        if NSYMBT > 0:
            log.log_and_raise_error(logger, "Omitting symmetry operations in map file.")
            f.read(NSYMBT)
        # The remaining bytes are data
        raw_data = f.read()
        raw_data = numpy.frombuffer(raw_data, dtype=dtype)
        # Now we need to project onto the right Z-Y-X array grid
        S,R,C = numpy.meshgrid(numpy.arange(NS), numpy.arange(NR), numpy.arange(NC), indexing='ij')
        S = S.flatten()
        R = R.flatten()
        C = C.flatten()
        if MAPC == 1:
            X = C
            Xlen = NC
        elif MAPC == 2:
            Y = C
            Ylen = NC
        elif MAPC == 3:
            Z = C
            Zlen = NC
        if MAPR == 1:
            X = R
            Xlen = NR
        elif MAPR == 2:
            Y = R
            Ylen = NR
        elif MAPR == 3:
            Z = R
            Zlen = NR
        if MAPS == 1:
            X = S
            Xlen = NS
        elif MAPS == 2:
            Y = S
            Ylen = NS
        elif MAPS == 3:
            Z = S
            Zlen = NS
        i = Z*(Ylen*Xlen) + Y*(Xlen) + X
        i.sort()
        data = numpy.zeros(Zlen*Ylen*Xlen, dtype=dtype)
        data[:] = raw_data[i]
        data = data.reshape((Zlen,Ylen,Xlen))
    return data, dX
Example #16
0
# -----------------------------------------------------------------------------------------------------
# General note:
# All variables are in SI units by default. Exceptions explicit by variable name.
# -----------------------------------------------------------------------------------------------------

import numpy, os
  
import logging
logger = logging.getLogger(__name__)

import log

try:
    import h5py
except ImportError:
    log.log_warning(logger, "Could not import h5py.")

class CXIWriter:
    def __init__(self, filename, chunksize=2, gzip_compression=False):
        self._filename = os.path.expandvars(filename)
        if os.path.exists(filename):
            log.log_warning(logger, "File %s exists and is being overwritten" % filename)
        self._f = h5py.File(filename, "w")
        self._i = 0
        self._chunksize = chunksize
        self._create_dataset_kwargs = {}
        if gzip_compression:
            self._create_dataset_kwargs["compression"] = "gzip"

    def write(self, D):
        self._write_without_iterate(D)
Example #17
0
def read_map(filename):
    log.log_info(logger, "Automatic scaling of EM maps may not be reliable. Please make sure to check your map after using this functionality.")
    # CCP4 map file format
    # http://www.ccp4.ac.uk/html/maplib.html
    with open(filename, "rb") as f:
        # 1024 bytes header
        header_buf = f.read(1024)
        temp_int32 = numpy.frombuffer(header_buf, dtype="int32")
        temp_float32 = numpy.frombuffer(header_buf, dtype="float32")
        #1      NC              # of Columns    (fastest changing in map)
        #2      NR              # of Rows
        #3      NS              # of Sections   (slowest changing in map)
        NC = temp_int32[0]
        NR = temp_int32[1]
        NS = temp_int32[2]
        if NC != NR or NR != NS:
            log.log_and_raise_error(logger, "Cannot read a map with unequal dimensions")
        N = NC
        #4      MODE            Data type
        #                  0 = envelope stored as signed bytes (from
        #                      -128 lowest to 127 highest)
        #                  1 = Image     stored as Integer*2
        #                  2 = Image     stored as Reals
        #                  3 = Transform stored as Complex Integer*2
        #                  4 = Transform stored as Complex Reals
        #                  5 == 0	
        #
        #                  Note: Mode 2 is the normal mode used in
        #                        the CCP4 programs. Other modes than 2 and 0
        #                        may NOT WORK        
        MODE = temp_int32[3]
        dtype = ["int8", "int16", "float32", None, "complex64", "int8"][MODE]
        if MODE == 3:
            log.log_and_raise_error(logger, "Map file data type \"MODE=%i\" is not implemented yet." % MODE)
        if MODE not in [0,1,2,5]:
            log.log_warning(logger, "Map file data type \"MODE=%i\" not supported yet and may not work reliably." % MODE)
        #11      X length        Cell Dimensions (Angstroms)
        #12      Y length                     "
        #13      Z length                     "
        dX = temp_float32[10]/float(N)*1E-10
        dY = temp_float32[11]/float(N)*1E-10
        dZ = temp_float32[12]/float(N)*1E-10
        if dX != dY or dY != dZ:
            log.log_and_raise_error(logger, "Cannot read a map with unequal voxel dimensions")
        #17      MAPC            Which axis corresponds to Cols.  (1,2,3 for X,Y,Z)
        #18      MAPR            Which axis corresponds to Rows   (1,2,3 for X,Y,Z)
        #19      MAPS            Which axis corresponds to Sects. (1,2,3 for X,Y,Z)
        MAPC = temp_int32[16]
        MAPR = temp_int32[17]
        MAPS = temp_int32[18]
        #24      NSYMBT          Number of bytes used for storing symmetry operators
        NSYMBT = temp_int32[23]
        if NSYMBT > 0:
            log.log_and_raise_error(logger, "Omitting symmetry operations in map file.")
            f.read(NSYMBT)
        # The remaining bytes are data
        raw_data = f.read()
        raw_data = numpy.frombuffer(raw_data, dtype=dtype)
        # Now we need to project onto the right Z-Y-X array grid
        S,R,C = numpy.meshgrid(numpy.arange(NS), numpy.arange(NR), numpy.arange(NC), indexing='ij')
        S = S.flatten()
        R = R.flatten()
        C = C.flatten()
        if MAPC == 1:
            X = C
            Xlen = NC
        elif MAPC == 2:
            Y = C
            Ylen = NC
        elif MAPC == 3:
            Z = C
            Zlen = NC
        if MAPR == 1:
            X = R
            Xlen = NR
        elif MAPR == 2:
            Y = R
            Ylen = NR
        elif MAPR == 3:
            Z = R
            Zlen = NR
        if MAPS == 1:
            X = S
            Xlen = NS
        elif MAPS == 2:
            Y = S
            Ylen = NS
        elif MAPS == 3:
            Z = S
            Zlen = NS
        i = Z*(Ylen*Xlen) + Y*(Xlen) + X
        i.sort()
        data = numpy.zeros(Zlen*Ylen*Xlen, dtype=dtype)
        data[:] = raw_data[i]
        data = data.reshape((Zlen,Ylen,Xlen))
    return data, dX
Example #18
0
def extract_wave_hilbert_old(IndList, FilteredArr, FilteredHilbertArr, s_before, 
                     s_after, n_ch, s_start, ThresholdStrong, ThresholdWeak):
    IndArr = np.array(IndList, dtype=np.int32)
    SampArr = IndArr[:, 0]
    ChArr = IndArr[:, 1]
    n_ch = FilteredArr.shape[1]
    log_fd = GlobalVariables['log_fd']
    if np.amax(SampArr)-np.amin(SampArr)>Parameters['CHUNK_OVERLAP']/2:
        s = '''
        ************ ERROR **********************************************
        Connected component found with width larger than CHUNK_OVERLAP/2.
        Spikes could be repeatedly detected, increase the size of
        CHUNK_OVERLAP and re-run.
        Component sample range: {sample_range}
        *****************************************************************
        '''.format(sample_range=(s_start+np.amin(SampArr),
                                 s_start+np.amax(SampArr)))
        log_warning(s, multiline=True)
        #exit()

    bc = np.bincount(ChArr)
    # convert to bool and force it to have the right type
    ChMask = np.zeros(n_ch, dtype=np.bool8)
    ChMask[:len(bc)] = bc.astype(np.bool8)
    n_unmasked_ch = np.sum(ChMask)
    
    # Find peak sample:
    # 1. upsample channels we're using on thresholded range
    # 2. find weighted mean peak sample
    SampArrMin, SampArrMax = np.amin(SampArr)-3, np.amax(SampArr)+4
    # ChArrMin, ChArrMax = np.amin(ChArr), np.amax(ChArr)
    
    
    WavePlus = get_padded(FilteredArr, SampArrMin, SampArrMax)
    WavePlus = WavePlus[:, ChMask]
    
    # upsample WavePlus
    upsampling_factor = Parameters['UPSAMPLING_FACTOR']
    if upsampling_factor>1:
        old_s = np.arange(WavePlus.shape[0])
        new_s_i = np.arange((WavePlus.shape[0]-1)*upsampling_factor+1)
        new_s = np.array(new_s_i*(1.0/upsampling_factor), dtype=np.float32)
        f = interp1d(old_s, WavePlus, bounds_error=True, kind='cubic', axis=0)
        UpsampledWavePlus = f(new_s)
    else:
        UpsampledWavePlus = WavePlus
        
    # find weighted mean peak for each channel above threshold
    if Parameters['USE_WEIGHTED_MEAN_PEAK_SAMPLE']:
        peak_sum = 0.0
        total_weight = 0.0
        for ch in xrange(WavePlus.shape[1]):
            X = UpsampledWavePlus[:, ch]
            if Parameters['DETECT_POSITIVE']:
                X = -np.abs(X)
            i_intpeak = np.argmin(X)
            left, right = i_intpeak-1, i_intpeak+2
            if right>len(X):
                left, right = left+len(X)-right, len(X)
            elif left<0:
                left, right = 0, right-left
            a_b_c = abc(np.arange(left, right, dtype=np.float32),
                        X[left:right])
            s_fracpeak = max_t(a_b_c)
            weight = -X[i_intpeak]
            if weight<0:
                weight = 0
            peak_sum += s_fracpeak*weight
            total_weight += weight
        s_fracpeak = (peak_sum/total_weight)
    else:
        if Parameters['DETECT_POSITIVE']:
            X = -np.abs(UpsampledWavePlus)
        else:
            X = UpsampledWavePlus
        s_fracpeak = 1.0*np.argmin(np.amin(X, axis=1))
        
    # s_fracpeak currently in coords of UpsampledWavePlus
    s_fracpeak = s_fracpeak/upsampling_factor
    # s_fracpeak now in coordinates of WavePlus
    s_fracpeak += SampArrMin
    # s_fracpeak now in coordinates of FilteredArr
    
    
    
    #################################
    # NEW: FLOAT MASK
    #################################
    # connected component as window in chunk with Hilbert
    # contains values only on weak threshold-exceeding points, 
    # zeros everywhere else
    comp = np.zeros((SampArrMax - SampArrMin, n_ch), dtype=FilteredHilbertArr.dtype)
    comp[SampArr - SampArrMin, ChArr] = FilteredHilbertArr[SampArr, ChArr]
    # 1D array: for each channel, the peak of the Hilbert, relative to the
    # start of the chunk
    peaks = np.argmax(comp, axis=0) + SampArrMin
    # 1D array: values of the peaks, on each channel
    peaks_values = FilteredHilbertArr[peaks, np.arange(0, n_ch)] * ChMask
    FloatChMask = np.clip((peaks_values - ThresholdWeak) / (ThresholdStrong - ThresholdWeak), 0, 1)
    
    
    
    # #################################
    # # New alignment
    # #################################
    # # In the window of the chunk (connected component), we take the clipped Hilbert 
    # # (masks between 0 and 1).
    # comp_clipped = np.clip((comp - ThresholdWeak) / (ThresholdStrong - ThresholdWeak), 0, 1)
    # # now we take the weighted average of the sample times in the component
    # s_fracpeak = np.sum(comp_clipped * np.arange(SampArrMax - SampArrMin).reshape((-1, 1))) / np.sum(comp_clipped)
    # s_fracpeak += SampArrMin
    
    
    #################################
    # Realign spike with respect to s_fracpeak
    #################################
    # get block of given size around peaksample
    try:
        s_peak = int(s_fracpeak)
    except ValueError:
        # This is a bit of a hack. Essentially, the problem here is that
        # s_fracpeak is a nan because the interpolation didn't work, and
        # therefore we want to skip the spike. There's already code in
        # core.extract_spikes that does this if a LinAlgError is raised,
        # so we just use that to skip this spike (and write a message to the
        # log).
        raise np.linalg.LinAlgError 
    WaveBlock = get_padded(FilteredArr,
                           s_peak-s_before-1, s_peak+s_after+2)
    # Perform interpolation around the fractional peak
    old_s = np.arange(s_peak-s_before-1, s_peak+s_after+2)
    new_s = np.arange(s_peak-s_before, s_peak+s_after)+(s_fracpeak-s_peak)
    try:
        f = interp1d(old_s, WaveBlock, bounds_error=True, kind='cubic', axis=0)
    except ValueError: 
        #  File "/usr/lib/python2.7/dist-packages/scipy/interpolate/interpolate.py", line 509, in _dot0
        #  return dot(a, b)
        #ValueError: matrices are not aligned
        raise InterpolationError
    Wave = f(new_s)
    
    
    
    return Wave, s_peak, s_fracpeak, ChMask, FloatChMask
Example #19
0
def extract_wave_hilbert_new(IndList, FilteredArr, FilteredHilbertArr, s_before, 
                     s_after, n_ch, s_start, ThresholdStrong, ThresholdWeak):
    IndArr = np.array(IndList, dtype=np.int32)
    SampArr = IndArr[:, 0]
    ChArr = IndArr[:, 1]
    n_ch = FilteredArr.shape[1]
    log_fd = GlobalVariables['log_fd']
    if np.amax(SampArr)-np.amin(SampArr)>Parameters['CHUNK_OVERLAP']/2:
        s = '''
        ************ ERROR **********************************************
        Connected component found with width larger than CHUNK_OVERLAP/2.
        Spikes could be repeatedly detected, increase the size of
        CHUNK_OVERLAP and re-run.
        Component sample range: {sample_range}
        *****************************************************************
        '''.format(sample_range=(s_start+np.amin(SampArr),
                                 s_start+np.amax(SampArr)))
        log_warning(s, multiline=True)
        #exit()

    bc = np.bincount(ChArr)
    # convert to bool and force it to have the right type
    ChMask = np.zeros(n_ch, dtype=np.bool8)
    ChMask[:len(bc)] = bc.astype(np.bool8)
    n_unmasked_ch = np.sum(ChMask)
    
    # Find peak sample:
    # 1. upsample channels we're using on thresholded range
    # 2. find weighted mean peak sample
    SampArrMin, SampArrMax = np.amin(SampArr)-3, np.amax(SampArr)+4
    # ChArrMin, ChArrMax = np.amin(ChArr), np.amax(ChArr)
    
    
    #################################
    # NEW: FLOAT MASK
    #################################
    # connected component as window in chunk with Hilbert
    # contains values only on weak threshold-exceeding points, 
    # zeros everywhere else
    comp = np.zeros((SampArrMax - SampArrMin, n_ch), dtype=FilteredHilbertArr.dtype)
    comp[SampArr - SampArrMin, ChArr] = FilteredHilbertArr[SampArr, ChArr]
    # 1D array: for each channel, the peak of the Hilbert, relative to the
    # start of the chunk
    peaks = np.argmax(comp, axis=0) + SampArrMin
    # 1D array: values of the peaks, on each channel
    peaks_values = FilteredHilbertArr[peaks, np.arange(0, n_ch)] * ChMask
    FloatChMask = np.clip((peaks_values - ThresholdWeak) / (ThresholdStrong - ThresholdWeak), 0, 1)
    #embed()
    
    
    #################################
    # New alignment
    #################################
    # In the window of the chunk (connected component), we take the clipped Hilbert 
    # (masks between 0 and 1).
    
    comp_clipped = np.clip((comp - ThresholdWeak) / (ThresholdStrong - ThresholdWeak), 0, 1)
    # No need to clip - might makes things worse - you lose the peaks!
    comp_normalised = (comp - ThresholdWeak) / (ThresholdStrong - ThresholdWeak)
    
    # now we take the weighted average of the sample times in the component
    s_fracpeak = np.sum(comp_normalised * np.arange(SampArrMax - SampArrMin).reshape((-1, 1))) / np.sum(comp_normalised)
    s_fracpeak += SampArrMin
    
    
    #################################
    # Realign spike with respect to s_fracpeak
    #################################
    # get block of given size around peaksample
    try:
        s_peak = int(s_fracpeak)
    except ValueError:
        # This is a bit of a hack. Essentially, the problem here is that
        # s_fracpeak is a nan because the interpolation didn't work, and
        # therefore we want to skip the spike. There's already code in
        # core.extract_spikes that does this if a LinAlgError is raised,
        # so we just use that to skip this spike (and write a message to the
        # log).
        raise np.linalg.LinAlgError 
    WaveBlock = get_padded(FilteredArr,
                           s_peak-s_before-1, s_peak+s_after+2)
    # Perform interpolation around the fractional peak
    old_s = np.arange(s_peak-s_before-1, s_peak+s_after+2)
    new_s = np.arange(s_peak-s_before, s_peak+s_after)+(s_fracpeak-s_peak)
    try:
        f = interp1d(old_s, WaveBlock, bounds_error=True, kind='cubic', axis=0)
    except ValueError: 
        #  File "/usr/lib/python2.7/dist-packages/scipy/interpolate/interpolate.py", line 509, in _dot0
        #  return dot(a, b)
        #ValueError: matrices are not aligned
        raise InterpolationError
    Wave = f(new_s)
    
    return Wave, s_peak, s_fracpeak, ChMask, FloatChMask
Example #20
0
def extract_spikes(h5s, basename, DatFileNames, n_ch_dat,
                   ChannelsToUse, ChannelGraph,
                   max_spikes=None):
    # some global variables we use
    CHUNK_SIZE = Parameters['CHUNK_SIZE']
    CHUNKS_FOR_THRESH = Parameters['CHUNKS_FOR_THRESH']
    DTYPE = Parameters['DTYPE']
    CHUNK_OVERLAP = Parameters['CHUNK_OVERLAP']
    N_CH = Parameters['N_CH']
    S_JOIN_CC = Parameters['S_JOIN_CC']
    S_BEFORE = Parameters['S_BEFORE']
    S_AFTER = Parameters['S_AFTER']
    THRESH_SD = Parameters['THRESH_SD']
    THRESH_SD_LOWER = Parameters['THRESH_SD_LOWER']

    # filter coefficents for the high pass filtering
    filter_params = get_filter_params()
    print filter_params

    progress_bar = ProgressReporter()
    
    #m A code that writes out a high-pass filtered version of the raw data (.fil file)
    fil_writer = FilWriter(DatFileNames, n_ch_dat)

    # Just use first dat file for getting the thresholding data
    with open(DatFileNames[0], 'rb') as fd:
        # Use 5 chunks to figure out threshold
        DatChunk = get_chunk_for_thresholding(fd, n_ch_dat, ChannelsToUse,
                                              num_samples(DatFileNames[0],
                                                          n_ch_dat))
        FilteredChunk = apply_filtering(filter_params, DatChunk)
        # get the STD of the beginning of the filtered data
        if Parameters['USE_HILBERT']:
            first_chunks_std = np.std(FilteredChunk)
            print 'first_chunks_std',  first_chunks_std, '\n'
        else:
            if Parameters['USE_SINGLE_THRESHOLD']:
                ThresholdSDFactor = np.median(np.abs(FilteredChunk))/.6745
            else:
                ThresholdSDFactor = np.median(np.abs(FilteredChunk), axis=0)/.6745
            Threshold = ThresholdSDFactor*THRESH_SD
            print 'Threshold = ', Threshold, '\n' 
            Parameters['THRESHOLD'] = Threshold #Record the absolute Threshold used
            
        
    # set the high and low thresholds
    do_pickle = False
    if Parameters['USE_HILBERT']:
        ThresholdStrong = Parameters['THRESH_STRONG']
        ThresholdWeak = Parameters['THRESH_WEAK']
        do_pickle = True
    elif Parameters['USE_COMPONENT_ALIGNFLOATMASK']:#to be used with a single threshold only
        ThresholdStrong = Threshold
        ThresholdWeak = ThresholdSDFactor*THRESH_SD_LOWER
        do_pickle = True

    if do_pickle:
        picklefile =     open("threshold.p","wb")
        pickle.dump([ThresholdStrong,ThresholdWeak], picklefile)
        threshold_outputstring = 'Threshold strong = ' + repr(ThresholdStrong) + '\n' + 'Threshold weak = ' + repr(ThresholdWeak)
        log_message(threshold_outputstring)
        
    n_samples = num_samples(DatFileNames, n_ch_dat)
    spike_count = 0
    for (DatChunk, s_start, s_end,
         keep_start, keep_end) in chunks(DatFileNames, n_ch_dat, ChannelsToUse):
        ############## FILTERING ########################################
        FilteredChunk = apply_filtering(filter_params, DatChunk)
        
        # write filtered output to file
        if Parameters['WRITE_FIL_FILE']:
            fil_writer.write(FilteredChunk, s_start, s_end, keep_start, keep_end)

        ############## THRESHOLDING #####################################
        
        
        # NEW: HILBERT TRANSFORM
        if Parameters['USE_HILBERT']:
            FilteredChunkHilbert = np.abs(signal.hilbert(FilteredChunk, axis=0) / first_chunks_std) ** 2
            BinaryChunkWeak = FilteredChunkHilbert > ThresholdWeak
            BinaryChunkStrong = FilteredChunkHilbert > ThresholdStrong
            BinaryChunkWeak = BinaryChunkWeak.astype(np.int8)
            BinaryChunkStrong = BinaryChunkStrong.astype(np.int8)
        #elif Parameters['USE_COMPONENT_ALIGNFLOATMASK']:
        else: # Usual method
            #FilteredChunk = apply_filtering(filter_params, DatChunk) Why did you filter twice!!!???
            if Parameters['USE_COMPONENT_ALIGNFLOATMASK']:
                if Parameters['DETECT_POSITIVE']:
                    BinaryChunkWeak = FilteredChunk > ThresholdWeak
                    BinaryChunkStrong = FilteredChunk > ThresholdStrong
                else:
                    BinaryChunkWeak = FilteredChunk < -ThresholdWeak
                    BinaryChunkStrong = FilteredChunk < -ThresholdStrong
                BinaryChunkWeak = BinaryChunkWeak.astype(np.int8)
                BinaryChunkStrong = BinaryChunkStrong.astype(np.int8)
            else:
                if Parameters['DETECT_POSITIVE']:
                    BinaryChunk = np.abs(FilteredChunk)>Threshold
                else:
                    BinaryChunk = (FilteredChunk<-Threshold)
                BinaryChunk = BinaryChunk.astype(np.int8)
        # write filtered output to file
        #if Parameters['WRITE_FIL_FILE']:
        #    fil_writer.write(FilteredChunk, s_start, s_end, keep_start, keep_end)
        #    print 'I am here at line 313'

        ############### FLOOD FILL  ######################################
        ChannelGraphToUse = complete_if_none(ChannelGraph, N_CH)
        if (Parameters['USE_HILBERT'] or Parameters['USE_COMPONENT_ALIGNFLOATMASK']):
            if Parameters['USE_OLD_CC_CODE']:
                IndListsChunkOld = connected_components(BinaryChunkWeak,
                            ChannelGraphToUse, S_JOIN_CC)
                IndListsChunk = []  #Final list of connected components. Go through all \weak' connected components
            # and only include in final list if there are some samples that also exceed the strong threshold
            # This method works better than connected_components_twothresholds.
                for IndListWeak in IndListsChunkOld:
                   # embed()
#                    if sum(BinaryChunkStrong[zip(*IndListWeak)]) != 0:
                    i,j = np.array(IndListWeak).transpose()
                    if sum(BinaryChunkStrong[i,j]) != 0: 
                        IndListsChunk.append(IndListWeak)
            else:
                IndListsChunk = connected_components_twothresholds(BinaryChunkWeak, BinaryChunkStrong,
                            ChannelGraphToUse, S_JOIN_CC)
            BinaryChunk = 1 * BinaryChunkWeak + 1 * BinaryChunkStrong
        else:
            IndListsChunk = connected_components(BinaryChunk,
                            ChannelGraphToUse, S_JOIN_CC)
            
        
        if Parameters['DEBUG']:  #TO DO: Change plot_diagnostics for the HILBERT case
            if Parameters['USE_HILBERT']:
                plot_diagnostics_twothresholds(s_start,IndListsChunk,BinaryChunkWeak, BinaryChunkStrong,BinaryChunk,DatChunk,FilteredChunk,FilteredChunkHilbert,ThresholdStrong,ThresholdWeak)
            elif Parameters['USE_COMPONENT_ALIGNFLOATMASK']:
                plot_diagnostics_twothresholds(s_start,IndListsChunk,BinaryChunkWeak,BinaryChunkStrong,BinaryChunk,DatChunk,FilteredChunk,-FilteredChunk,ThresholdStrong,ThresholdWeak)#TODO: change HIlbert in plot_diagnostics_twothresholds
            else:
                plot_diagnostics(s_start,IndListsChunk,BinaryChunk,DatChunk,FilteredChunk,Threshold)
        if Parameters['WRITE_BINFIL_FILE']:
            fil_writer.write_bin(BinaryChunk, s_start, s_end, keep_start, keep_end)
        
        #print len(IndListsChunk), 'len(IndListsChunk)'
        ############## ALIGN AND INTERPOLATE WAVES #######################
        nextbits = []
        if Parameters['USE_HILBERT']:
            
            for IndList in IndListsChunk:
                try:
                    wave, s_peak, sf_peak, cm, fcm = extract_wave_hilbert_new(IndList, FilteredChunk,
                                                    FilteredChunkHilbert,
                                                    S_BEFORE, S_AFTER, N_CH,
                                                    s_start, ThresholdStrong, ThresholdWeak)
                    s_offset = s_start + s_peak
                    sf_offset = s_start + sf_peak
                    if keep_start<=s_offset<keep_end:
                        spike_count += 1
                        nextbits.append((wave, s_offset, sf_offset, cm, fcm))
                except np.linalg.LinAlgError:
                    s = '*** WARNING *** Unalignable spike discarded in chunk {chunk}.'.format(
                            chunk=(s_start, s_end))
                    log_warning(s)
                except InterpolationError:
                    s = '*** WARNING *** Interpolation error in chunk {chunk}.'.format(
                            chunk=(s_start, s_end))
                    log_warning(s)
            # and return them in time sorted order
            nextbits.sort(key=lambda (wave, s, s_frac, cm, fcm): s_frac)
            for wave, s, s_frac, cm, fcm in nextbits:
                uwave = get_padded(DatChunk, int(s)-S_BEFORE-s_start,
                                   int(s)+S_AFTER-s_start).astype(np.int32)
                # cm = add_penumbra(cm, ChannelGraphToUse,
                                  # Parameters['PENUMBRA_SIZE'])
                # fcm = get_float_mask(wave, cm, ChannelGraphToUse,
                                     # 1.)
                yield uwave, wave, s, s_frac, cm, fcm
                # unfiltered wave,wave, s_peak, ChMask, FloatChMask
        elif Parameters['USE_COMPONENT_ALIGNFLOATMASK']:
            for IndList in IndListsChunk:
                try:
                    if Parameters['DETECT_POSITIVE']:
                        wave, s_peak, sf_peak, cm, fcm, comp_normalised, comp_normalised_power = extract_wave_twothresholds(IndList, FilteredChunk,
                                                    FilteredChunk,
                                                    S_BEFORE, S_AFTER, N_CH,
                                                    s_start, ThresholdStrong, ThresholdWeak) 
                    else:
                        wave, s_peak, sf_peak, cm, fcm,comp_normalised, comp_normalised_power = extract_wave_twothresholds(IndList, FilteredChunk,
                                                    -FilteredChunk,
                                                    S_BEFORE, S_AFTER, N_CH,
                                                    s_start, ThresholdStrong, ThresholdWeak)
                    s_offset = s_start+s_peak
                    sf_offset = s_start + sf_peak
                    if keep_start<=s_offset<keep_end:
                        spike_count += 1
                        nextbits.append((wave, s_offset, sf_offset, cm, fcm))
                except np.linalg.LinAlgError:
                    s = '*** WARNING *** Unalignable spike discarded in chunk {chunk}.'.format(
                            chunk=(s_start, s_end))
                    log_warning(s)
                except InterpolationError:
                    s = '*** WARNING *** Interpolation error in chunk {chunk}.'.format(
                            chunk=(s_start, s_end))
                    log_warning(s)
            # and return them in time sorted order
            nextbits.sort(key=lambda (wave, s, s_frac, cm, fcm): s_frac)
            for wave, s, s_frac, cm, fcm in nextbits:
                uwave = get_padded(DatChunk, int(s)-S_BEFORE-s_start,
                                   int(s)+S_AFTER-s_start).astype(np.int32)
                # cm = add_penumbra(cm, ChannelGraphToUse,
                                  # Parameters['PENUMBRA_SIZE'])
                # fcm = get_float_mask(wave, cm, ChannelGraphToUse,
                                     # 1.)
                yield uwave, wave, s, s_frac, cm, fcm   
                # unfiltered wave,wave, s_peak, ChMask, FloatChMask
        else:    #Original SpikeDetekt. This code duplication is regretable but probably easier to deal with
            
            for IndList in IndListsChunk:
                try:
                    wave, s_peak, sf_peak, cm = extract_wave(IndList, FilteredChunk,
                                                    S_BEFORE, S_AFTER, N_CH,
                                                    s_start,Threshold)
                    s_offset = s_start+s_peak
                    sf_offset = s_start + sf_peak
                    if keep_start<=s_offset<keep_end:
                        spike_count += 1
                        nextbits.append((wave, s_offset, sf_offset, cm))
                except np.linalg.LinAlgError:
                    s = '*** WARNING *** Unalignable spike discarded in chunk {chunk}.'.format(
                            chunk=(s_start, s_end))
                    log_warning(s)
            # and return them in time sorted order
            nextbits.sort(key=lambda (wave, s, s_frac, cm): s_frac)
            for wave, s, s_frac, cm in nextbits:
                uwave = get_padded(DatChunk, int(s)-S_BEFORE-s_start,
                                   int(s)+S_AFTER-s_start).astype(np.int32)
                cm = add_penumbra(cm, ChannelGraphToUse,
                                  Parameters['PENUMBRA_SIZE'])
                fcm = get_float_mask(wave, cm, ChannelGraphToUse,
                                     ThresholdSDFactor)
                yield uwave, wave, s, s_frac, cm, fcm    
                # unfiltered wave,wave, s_peak, ChMask, FloatChMask

        progress_bar.update(float(s_end)/n_samples,
            '%d/%d samples, %d spikes found'%(s_end, n_samples, spike_count))
        if max_spikes is not None and spike_count>=max_spikes:
            break
    
    progress_bar.finish()
Example #21
0
def extract_spikes(h5s, basename, DatFileNames, n_ch_dat,
                   ChannelsToUse, ChannelGraph,
                   max_spikes=None):
    # some global variables we use
    CHUNK_SIZE = Parameters['CHUNK_SIZE']
    CHUNKS_FOR_THRESH = Parameters['CHUNKS_FOR_THRESH']
    DTYPE = Parameters['DTYPE']
    CHUNK_OVERLAP = Parameters['CHUNK_OVERLAP']
    N_CH = Parameters['N_CH']
    S_JOIN_CC = Parameters['S_JOIN_CC']
    S_BEFORE = Parameters['S_BEFORE']
    S_AFTER = Parameters['S_AFTER']
    THRESH_SD = Parameters['THRESH_SD']

    # filter coefficents for the high pass filtering
    filter_params = get_filter_params()

    progress_bar = ProgressReporter()

    # m A code that writes out a high-pass filtered version of the raw data
    # (.fil file)
    fil_writer = FilWriter(DatFileNames, n_ch_dat)

    # Just use first dat file for getting the thresholding data
    with open(DatFileNames[0], 'rb') as fd:
        # Use 5 chunks to figure out threshold
        DatChunk = get_chunk_for_thresholding(fd, n_ch_dat, ChannelsToUse,
                                              num_samples(DatFileNames[0],
                                                          n_ch_dat))
        FilteredChunk = apply_filtering(filter_params, DatChunk)
        # .6745 converts median to standard deviation
        if Parameters['USE_SINGLE_THRESHOLD']:
            ThresholdSDFactor = np.median(np.abs(FilteredChunk)) / .6745
        else:
            ThresholdSDFactor = np.median(
                np.abs(FilteredChunk),
                axis=0) / .6745
        Threshold = ThresholdSDFactor * THRESH_SD

        print 'Threshold = ', Threshold, '\n'
        # Record the absolute Threshold used
        Parameters['THRESHOLD'] = Threshold

    n_samples = num_samples(DatFileNames, n_ch_dat)

    spike_count = 0
    for (DatChunk, s_start, s_end,
         keep_start, keep_end) in chunks(DatFileNames, n_ch_dat, ChannelsToUse):
        ############## FILTERING ########################################
        FilteredChunk = apply_filtering(filter_params, DatChunk)

        # write filtered output to file
        # if Parameters['WRITE_FIL_FILE']:
        fil_writer.write(FilteredChunk, s_start, s_end, keep_start, keep_end)

        ############## THRESHOLDING #####################################
        if Parameters['DETECT_POSITIVE']:
            BinaryChunk = np.abs(FilteredChunk) > Threshold
        else:
            BinaryChunk = (FilteredChunk < -Threshold)
        BinaryChunk = BinaryChunk.astype(np.int8)
        # write binary chunk filtered output to file
        if Parameters['WRITE_BINFIL_FILE']:
            fil_writer.write_bin(
                BinaryChunk,
                s_start,
                s_end,
                keep_start,
                keep_end)
        ############### FLOOD FILL  ######################################
        ChannelGraphToUse = complete_if_none(ChannelGraph, N_CH)
        IndListsChunk = connected_components(BinaryChunk,
                                             ChannelGraphToUse, S_JOIN_CC)
        if Parameters['DEBUG']:
            plot_diagnostics(
                s_start,
                IndListsChunk,
                BinaryChunk,
                DatChunk,
                FilteredChunk,
                Threshold)
            fil_writer.write_bin(
                BinaryChunk,
                s_start,
                s_end,
                keep_start,
                keep_end)

        ############## ALIGN AND INTERPOLATE WAVES #######################
        nextbits = []
        for IndList in IndListsChunk:
            try:
                wave, s_peak, cm = extract_wave(IndList, FilteredChunk,
                                                S_BEFORE, S_AFTER, N_CH,
                                                s_start, Threshold)
                s_offset = s_start + s_peak
                if keep_start <= s_offset < keep_end:
                    spike_count += 1
                    nextbits.append((wave, s_offset, cm))
            except np.linalg.LinAlgError:
                s = '*** WARNING *** Unalignable spike discarded in chunk {chunk}.'.format(
                    chunk=(s_start, s_end))
                log_warning(s)
        # and return them in time sorted order
        nextbits.sort(key=lambda wave_s_cm: wave_s_cm[1])
        for wave, s, cm in nextbits:
            uwave = get_padded(DatChunk, int(s) - S_BEFORE - s_start,
                               int(s) + S_AFTER - s_start).astype(np.int32)
            cm = add_penumbra(cm, ChannelGraphToUse,
                              Parameters['PENUMBRA_SIZE'])
            fcm = get_float_mask(wave, cm, ChannelGraphToUse,
                                 ThresholdSDFactor)
            yield uwave, wave, s, cm, fcm
        progress_bar.update(float(s_end) / n_samples,
                            '%d/%d samples, %d spikes found' % (s_end, n_samples, spike_count))
        if max_spikes is not None and spike_count >= max_spikes:
            break

    progress_bar.finish()