Beispiel #1
0
def run(*args, **kwargs):
    import argparse
    parser = argparse.ArgumentParser(
        'Data Visualization',
        epilog="""Use: Mousewheel/arrow keys to scroll,
                                     <Shift>/<Ctrl>+<left>/<right> for larger jumps.
                                     <Shift>/<Ctrl>+Mousewheel to scale.
                                     Use <q> or <Esc> to exit. """)
    parser.add_argument('path',
                        help='Relative or absolute path to directory',
                        default='.',
                        nargs='?')
    parser.add_argument('-d',
                        '--debug',
                        action='store_true',
                        help='Debug mode -- verbose output, no confirmations.')
    parser.add_argument('-c',
                        '--cols',
                        help='Number of columns',
                        default=1,
                        type=int)
    parser.add_argument('-C',
                        '--channels',
                        help='Number of channels',
                        type=int)
    parser.add_argument('-l',
                        '--layout',
                        help='Path to probe file defining channel order')
    parser.add_argument('-D',
                        '--dtype',
                        help='Data type if needed (e.g. float32 dat files')
    parser.add_argument('-J',
                        '--jump',
                        help='Jump to timepoint (in seconds)',
                        type=float,
                        default=0)

    cli_args = parser.parse_args(*args)
    if 'layout' in cli_args and cli_args.layout is not None:
        layout = util.run_prb(cli_args.layout)
        channels, bad_channels = util.flat_channel_list(
            layout)[:cli_args.channels]
    else:
        channels = None
        bad_channels = None

    Vis(op.abspath(op.expanduser(cli_args.path)),
        n_cols=cli_args.cols,
        n_channels=cli_args.channels,
        channels=channels,
        bad_channels=bad_channels,
        dtype=cli_args.dtype,
        start=cli_args.jump)
    app.run()
Beispiel #2
0
def main(args):
    parser = argparse.ArgumentParser(
        'Generate .fet and .fd files for features from spike waveforms')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help="Verbose (debug) output")

    parser.add_argument('target',
                        default='.',
                        help="""Directory with waveform .mat files.""")
    parser.add_argument(
        '-o',
        '--out_path',
        help='Output file path Defaults to current working directory')
    parser.add_argument('--sampling-rate',
                        type=float,
                        help='Sampling rate. Default 30000 Hz',
                        default=3e4)
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        help='Force overwrite of existing files.')
    parser.add_argument('-a',
                        '--align',
                        help='Alignment method, default: min',
                        default='min')
    parser.add_argument('-F',
                        '--features',
                        nargs='*',
                        help='Features to use. Default: energy',
                        default=['energy'])
    parser.add_argument('--to_fet',
                        nargs='*',
                        help='Features to include in fet file, default: all',
                        default='all')
    parser.add_argument(
        '--ignore-prb',
        action='store_true',
        help='Do not load channel validity from dead channels in .prb files')
    parser.add_argument('--no-report',
                        action='store_true',
                        help='Do not generate report file (saves time)')
    cli_args = parser.parse_args(args)

    matpath = Path(cli_args.target).resolve()
    if matpath.is_file():
        matfiles = [matpath]
    else:
        matfiles = sorted(
            list(map(Path.resolve, matpath.glob('tetrode??.mat'))))

    logger.debug(f'Target files: {[mf.name for mf in matfiles]}')
    logger.info('Found {} waveform files'.format(len(matfiles)))
    logger.debug(f'Requested to fet: {cli_args.to_fet}')

    # TODO:
    # per feature arguments
    sigma = 0.8

    for nt, matfile in tqdm(enumerate(matfiles), total=len(matfiles)):
        outpath = matfile.parent / 'FD'
        if not outpath.exists():
            outpath.mkdir()

        # Load prb file if it exists and set channel validity based on dead channels
        prb_path = matfile.with_suffix('.prb')
        if prb_path.exists():
            prb = run_prb(prb_path)
        else:
            logger.warning(
                f'No probe file found for {matfile.name} and no channel validity given.'
            )
            prb = None
        if prb is None or 'dead_channels' not in prb:
            channel_validity = [1, 1, 1, 1]
        else:
            channel_validity = [
                int(ch not in prb['dead_channels'])
                for ch in prb['channel_groups'][0]['channels']
            ]
        logger.debug('Channel validity: {}'.format(channel_validity) +
                     ('' if all(channel_validity) else
                      f', {4 - sum(channel_validity)} dead channel(s)'))

        hf = h5py.File(matfile, 'r')

        waveforms = np.array(hf['spikes'], dtype=PRECISION).reshape(
            [N_SAMPLES, N_CHANNELS, -1])

        timestamps = np.array(hf['index'], dtype='double')
        gauss = gaussian_filter(waveforms, sigma)
        # indices = timestamps * sampling_rate / 1e4

        features = {}
        # Allow to calculate all available features
        if len(cli_args.features) == 1 and cli_args.features[0].lower(
        ) == 'all':
            cli_args.features = AVAILABLE_FEATURES

        for fet_name in map(str.lower, cli_args.features):
            if fet_name == 'energy':
                logger.debug(f'Calculating {fet_name} feature')
                features['energy'] = scale_feature(feature_energy(waveforms))

            elif fet_name == 'energy24':
                logger.debug(f'Calculating {fet_name} feature')
                features['energy24'] = scale_feature(
                    feature_energy24(waveforms))

            elif fet_name == 'peak':
                logger.debug(f'Calculating {fet_name} feature')
                features['peak'] = feature_peak(waveforms)

            elif fet_name == 'cpca':
                logging.debug(f'Calculating {fet_name} feature')
                cpca = scale_feature(feature_cPCA(waveforms))
                logger.debug('cPCA shape {}'.format(cpca.shape))
                features['cPCA'] = cpca

            elif fet_name == 'cpca24':
                logging.debug(f'Calculating {fet_name} feature')
                cpca24 = scale_feature(feature_cPCA24(waveforms))
                logger.debug('cPCA24 shape {}'.format(cpca24.shape))
                features['cPCA24'] = cpca24

            elif fet_name == 'chwpca':
                logging.debug(f'Calculating {fet_name} feature')
                chwpca = scale_feature(feature_chwPCA(waveforms))
                logger.debug('chwPCA shape {}'.format(chwpca.shape))
                features['chwPCA'] = chwpca

            else:
                raise NotImplementedError(
                    "Unknonw feature: {}".format(fet_name))

        # TODO:
        # fet_cpca_4 = fet_cpca[:, :4]

        # # Position feature
        # n_bytes = [250154314, 101099824, 237970294]
        # fet_pos = feature_position(matpath / 'XY_data.mat', dat_offsets=n_bytes, timestamps=timestamps,
        #                            indices=indices)

        # Generate .fet file used for clustering
        # TODO: Best move this out into the cluster module?
        if 'none' in map(str.lower, cli_args.to_fet):
            logger.warning('Skipping fet file generation')
        else:
            fet_file_path = outpath / matfile.with_suffix('.fet.0').name

            if len(cli_args.to_fet) == 1 and cli_args.to_fet[0].lower(
            ) == 'all':
                logger.debug('Writing all features to fet file.')
                included_features = list(map(str.lower, features.keys()))
            else:
                included_features = [
                    fn for fn in map(str.lower, features.keys())
                    if fn in list(map(str.lower, cli_args.to_fet))
                ]

            logger.info(f'Writing features {list(included_features)} to .fet')
            fet_data = [
                fd for fn, fd in features.items()
                if fn.lower() in included_features
            ]

            logger.debug(f'Writing .fet file {fet_file_path}')
            write_features_fet(feature_data=fet_data, outpath=fet_file_path)

        # Write .fd file for each feature
        for fet_name, fet_data in features.items():
            logger.debug(f'Writing feature {fet_name}.fd file')
            write_feature_fd(feature_names=fet_name,
                             feature_data=fet_data,
                             timestamps=timestamps,
                             outpath=outpath,
                             tetrode_path=matfile,
                             channel_validity=channel_validity)

        logger.debug('Generating waveform graphic')
        with open(matfile.with_suffix('.html'), 'w') as frf:
            frf.write('<head></head><body><h1>{}</h1>'.format(matfile.name))

            frf.write('<h2>Waveforms (n={})</h2>'.format(waveforms.shape[2]))
            density_agg = 'log'

            with np.errstate(invalid='ignore'
                             ):  # ignore some matplotlib colormap usage errors
                images = ds_shade_waveforms(waveforms, how=density_agg)
            fig = ds_plot_waveforms(images, density_agg)
            frf.write(fig2html(fig) + '</br>')
            del fig

            for fet_name, fet_data in features.items():
                frf.write('<h3>Feature: {}</h3>\n'.format(fet_name))

                df_fet = pd.DataFrame(fet_data)

                # numerical column names are an issue with datashader, stringify 'em
                df_fet.rename(columns={k: str(k)
                                       for k in df_fet.columns},
                              inplace=True)
                df_fet['time'] = timestamps

                fet_columns = df_fet.columns[:-1]

                # Features vs. features
                images = []
                titles = []
                for cc in list(combinations(fet_columns, 2)):
                    fet_title = f'{fet_name}:{cc[1]} vs {fet_name}:{cc[0]}'
                    logger.debug(f'plotting feature {fet_title}')

                    # Calculate display limits, try to exclude outliers
                    # TODO: correct axis labeling
                    perc_lower = 0.05
                    perc_upper = 99.9
                    x_range = (np.percentile(df_fet[cc[0]], perc_lower),
                               np.percentile(df_fet[cc[0]], perc_upper))
                    y_range = (np.percentile(df_fet[cc[1]], perc_lower),
                               np.percentile(df_fet[cc[1]], perc_upper))
                    with np.errstate(invalid='ignore'):
                        shade = ds_shade_feature(df_fet[[cc[0], cc[1]]],
                                                 x_range=x_range,
                                                 y_range=y_range,
                                                 color_map='inferno')
                    images.append(shade)
                    titles.append(fet_title)

                fet_fig = ds_plot_features(images,
                                           how='log',
                                           fet_titles=titles)
                frf.write(fig2html(fet_fig) + '</br>\n')
                del fet_fig

                # Features over time
                t_images = []
                t_titles = []
                x_range = (0, df_fet['time'].max())

                # Calculate display limits, try to exclude outliers
                # TODO: correct axis labeling
                perc_lower = 0.1
                perc_upper = 99.9
                y_range = (np.percentile(df_fet[cc[1]], perc_lower),
                           np.percentile(df_fet[cc[1]], perc_upper))

                for cc in fet_columns:
                    t_title = f'{fet_name}:{cc} vs. time'
                    logger.debug(f'plotting {t_title}')
                    with np.errstate(invalid='ignore'):
                        shade = ds_shade_feature(df_fet[['time', cc]],
                                                 x_range=x_range,
                                                 y_range=y_range,
                                                 color_map='viridis')
                    t_images.append(shade)
                    t_titles.append(t_title)

                t_fig = ds_plot_features(t_images,
                                         how='log',
                                         fet_titles=t_titles)
                frf.write(fig2html(t_fig) + '</br>\n')
                del t_fig

                frf.write('</hr>\n')
def main(args):
    # TODO: Mutually exclusive bad/good channels
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('input', help='Dat file')
    parser.add_argument('-o', '--out', help='Directory to store reference file in', default='.')
    parser.add_argument('-r', '--reference', help='Path to reference file, if already at hand.')
    parser.add_argument('-b', '--bad_channels', type=int, nargs='+', help='Dead channel indices')
    parser.add_argument('-g', '--good_channels', type=int, nargs='+', help='Indices of channels to include')
    parser.add_argument('-C', '--channels', type=int, help='Number of channels in input file.')
    parser.add_argument('-Z', '--zero_bad_channels', action='store_true', help='Set bad channels to zero.')
    parser.add_argument('-i', '--inplace', action='store_true', help='Subtract reference in place.')
    parser.add_argument('-m', '--make-only', action='store_true', help='Only create the reference file.')
    parser.add_argument('-l', '--layout', help='Path to probe file defining channel order')
    parser.add_argument('-k', '--keep', action='store_true', help='Keep intermediate reference file')
    cli_args = parser.parse_args(args)

    # get number of channels in data, either from the cli args or data set config
    n_channels = cli_args.channels if 'channels' in cli_args else None
    cfg = dat.metadata_from_target(cli_args.input, n_channels=n_channels)
    if n_channels is None:
        n_channels = cfg['CHANNELS']['n_channels']
    logger.debug(cfg)

    # FIXME: Assumes "pre-ordered" channels, i.e. 0:n_channels
    probe_file = cli_args.layout if 'layout' in cli_args else None

    if probe_file is None and has_prb(cli_args.input):
        probe_file = has_prb(cli_args.input)
        logger.warning('No probe file given, but .prb file found. Using {}'.format(probe_file))

    if probe_file is not None:
        layout = run_prb(probe_file)

        channels, bad_channels = flat_channel_list(layout)[:n_channels]
    else:
        channels = None
        bad_channels = None

    logger.debug('Good: {}, bad: {}'.format(channels, bad_channels))

    if cli_args.make_only:
        raise NotImplemented

    rv = ref(cli_args.input,
             ref_path=cli_args.reference,
             out_dir=cli_args.out,
             n_channels=n_channels,
             ch_idx_good=cli_args.good_channels,
             ch_idx_bad=bad_channels,
             zero_bad_channels=cli_args.zero_bad_channels,
             make_only=cli_args.make_only,
             inplace=cli_args.inplace,
             keep=cli_args.keep)
    if not rv:
        raise RuntimeError('Failed to create reference! Rv: {}'.format(rv))
    else:
        reffed_path = Path(rv)

    # Copy the prb file
    if not cli_args.inplace:
        logger.warning('Copying probe file to follow referenced data.')
        copy_as(probe_file, reffed_path.with_suffix('.prb'))
Beispiel #4
0
def main(args):
    parser = argparse.ArgumentParser(
        'Convert file formats/layouts. Default result is int16 .dat file.')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help="Verbose (debug) output")

    # Input/output
    parser.add_argument(
        'target',
        nargs='*',
        default='.',
        help=
        """Path/list of paths to directories containing raw .continuous data OR path
                                to .session definition file. Listing multiple files will result in data sets
                                being concatenated in listed order.""")
    parser.add_argument(
        '-o',
        '--out_path',
        help='Output file path Defaults to current working directory')
    parser.add_argument('-P',
                        '--out_prefix',
                        help='Output file prefix. Default is name of target.')
    parser.add_argument('-T',
                        '--template_fname',
                        help='Output file template. Default: {}'.format(
                            DEFAULT_SHORT_TEMPLATE))

    parser.add_argument('-f',
                        '--format',
                        help='Output format. Default is: {}'.format(
                            list(FORMATS.keys())[2]),
                        choices=FORMATS.keys(),
                        default=list(FORMATS.keys())[2])
    parser.add_argument('--fname_channels',
                        action='store_true',
                        help='Include original channel numbers in file names.')

    # Channel arrangement
    channel_group = parser.add_mutually_exclusive_group()
    channel_group.add_argument('-c',
                               "--channel-count",
                               type=int,
                               help='Number of consecutive channels.')
    channel_group.add_argument(
        '-C',
        "--channel-list",
        nargs='*',
        type=int,
        help='List of channels in order they are to be merged.')
    channel_group.add_argument('-l',
                               '--layout',
                               help="Path to klusta .probe file.")
    parser.add_argument('-g',
                        '--channel-groups',
                        type=int,
                        nargs="+",
                        help="limit to only a subset of the channel groups")
    parser.add_argument('-S',
                        '--split-groups',
                        action='store_true',
                        help='Split channel groups into separate files.')
    parser.add_argument(
        '-d',
        '--dead-channels',
        nargs='*',
        type=int,
        help='List of dead channels. If flag set, these will be set to zero.')
    parser.add_argument('-z', '--zero-dead-channels', action='store_true')
    parser.add_argument(
        '--dry-run',
        action='store_true',
        help='Do not write data files (but still create prb/prm')
    parser.add_argument('-p', "--params", help='Path to .params file.')
    parser.add_argument('-D',
                        "--duration",
                        type=int,
                        help='Limit duration of recording (s)')
    parser.add_argument('--remove-trailing-zeros', action='store_true')
    parser.add_argument('--out_fname_template',
                        action='store_true',
                        help='Template for file naming.')

    cli_args = parser.parse_args(args)
    logger.debug('Arguments: {}'.format(cli_args))

    if cli_args.remove_trailing_zeros:
        raise NotImplementedError("Can't remove trailing zeros just yet.")

    targets = [op.abspath(op.expanduser(t)) for t in cli_args.target]
    formats = list(set([util.detect_format(target) for target in targets]))

    # Input file format
    logger.debug('Inputs found: {}'.format(formats))
    format_input = formats[0]
    assert len(formats) == 1
    logger.debug('Using module: {}'.format(format_input.__name__))

    # Output file format
    format_output = FORMATS[cli_args.format.lower()]
    logger.debug('Output module: {}'.format(format_output.__name__))

    # Set up channel layout (channels, references, dead channels) from command line inputs or layout file
    # List of bad channels, will be added to channel group dict
    dead_channels = cli_args.dead_channels if cli_args.dead_channels is not None else []

    # One of channel_count, channel_list, layout_file path from mutex parser group channel_group
    layout = None
    if cli_args.channel_count is not None:
        channel_groups = {
            0: {
                'channels': list(range(cli_args.channel_count)),
                'dead_channels': dead_channels
            }
        }

    elif cli_args.channel_list is not None:
        channel_groups = {
            0: {
                'channels': cli_args.channel_list,
                'dead_channels': dead_channels
            }
        }

    elif cli_args.layout is not None:
        layout = util.run_prb(op.abspath(op.expanduser(cli_args.layout)))
        logger.debug('Opened layout file {}'.format(layout))
        if cli_args.split_groups:
            channel_groups = layout['channel_groups']
            if 'dead_channels' in layout:
                if len(dead_channels) and (layout['dead_channels'] !=
                                           dead_channels):
                    raise ValueError(
                        'Conflicting bad channel lists: args: {}, layout: {}'.
                        format(layout['dead_channels'], dead_channels))
                dead_channels = layout['dead_channels']
            if cli_args.channel_groups:
                channel_groups = {
                    i: channel_groups[i]
                    for i in cli_args.channel_groups if i in channel_groups
                }
        else:
            channels, dead_channels = util.flat_channel_list(layout)
            logger.warning(
                'Not splitting groups! Creating new monotonically increasing channel map.'
            )

            # make a new channel group by merging in the existing ones
            channel_groups = {
                0: {
                    'channels': channels,
                    'dead_channels': dead_channels
                }
            }
    else:
        logger.debug(
            'No channels given on CLI, will try to get channel number from target later.'
        )
        channel_groups = None

    # Generate configuration from input files found by the format
    # This step already checks for the existence of the data files
    # and can fail prematurely if the wrong naming template is being used
    # This needs more work.
    logger.debug('Getting metadata for all targets')
    targets_metadata_list = [
        format_input.metadata_from_target(t) for t in targets
    ]

    if channel_groups is None:
        target_channels = list(
            set([ch for t in targets_metadata_list for ch in t['CHANNELS']]))

        channel_groups = {
            0: {
                'channels': target_channels,
                'dead_channels': dead_channels
            }
        }

    # Output file path
    if cli_args.out_path is None:
        out_path = os.getcwd()
        logger.info(
            'Using current working directory "{}" as output path.'.format(
                out_path))
    else:
        out_path = op.abspath(op.expanduser(cli_args.out_path))

    # Create the output path if necessary
    if len(out_path) and not op.exists(out_path):
        os.mkdir(out_path)
        logger.debug('Creating output path {}'.format(out_path))

    out_fext = format_output.FMT_FEXT
    out_prefix = cli_args.out_prefix if cli_args.out_prefix is not None else op.basename(
        cli_args.target[0])
    logger.debug('Prefix: {}'.format(out_prefix))
    default_template = DEFAULT_FULL_TEMPLATE if cli_args.fname_channels else DEFAULT_SHORT_TEMPLATE
    fname_template = default_template if cli_args.template_fname is None else cli_args.template_fname
    logger.debug('Filename template: {}'.format(fname_template))

    # +++++++++++++++++++++++++++++++++++++++++ MAIN LOOP ++++++++++++++++++++++++++++++++++++++++++++++++++++++
    # Iterates over all channel groups, calling continuous_to_dat for each to be bundled together
    # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    total_duration_written = 0
    for cg_id, channel_group in channel_groups.items():
        logger.debug('channel group: {}'.format(channel_group))

        # TODO: Check file name length, shorten if > 256 characters
        # Possible parameters: outfile prefix [outfile], channel group id [cg_id]
        # channel ranges from consecutive channels, for output file naming
        crs = util.fmt_channel_ranges(channel_group['channels'])
        output_basename = fname_template.format(prefix=out_prefix,
                                                cg_id=cg_id,
                                                crs=crs)
        output_fname = ''.join([output_basename, out_fext])
        output_file_path = op.join(out_path, output_fname)

        duration_written = 0
        # First target, file mode is write, after that, append to output file
        for file_mode, target_metadata in enumerate(targets_metadata_list):
            duration = None if cli_args.duration is None else cli_args.duration - duration_written
            target_path = target_metadata['TARGET']

            logger.debug(
                'Starting conversion for target {}'.format(target_path))

            if not cli_args.dry_run and WRITE_DATA:
                duration_written += continuous_to_dat(
                    target_metadata=target_metadata,
                    output_path=output_file_path,
                    channel_group=channel_group,
                    dead_channel_ids=dead_channels,
                    zero_dead_channels=cli_args.zero_dead_channels,
                    file_mode='a' if file_mode else 'w',
                    duration=duration)
            total_duration_written += duration_written

        # create the per-group .prb files
        # FIXME: Dead channels are big mess
        with open(op.join(out_path, output_basename + '.prb'), 'w') as prb_out:
            if cli_args.split_groups or (layout is None):
                # One prb file per channel group
                ch_out = channel_group['channels']
                cg_out = {0: {'channels': list(range(len(ch_out)))}}
                dead_channels = sorted(
                    [ch_out.index(dc) for dc in dead_channels if dc in ch_out])

            else:
                # Same channel groups, but with flat numbering
                cg_out, dead_channels = util.monotonic_prb(layout)
            prb_out.write('dead_channels = {}\n'.format(
                pprint.pformat(dead_channels)))
            prb_out.write('channel_groups = {}'.format(pprint.pformat(cg_out)))

        # FIXME: Generation of .prm
        # For now in separate script. Should take a .prm template that will be adjusted
        # # Template parameter file
        # prm_file_input = cli_args.params
        # with open(op.join(out_path, output_basename + '.prm'), 'w') as prm_out:
        #     if prm_file_input:
        #         f = open(prm_file_input, 'r')
        #         prm_in = f.read()
        #         f.close()
        #     else:
        #         prm_in = pkgr.resource_string('config', 'default.prm').decode()
        #     prm_out.write(prm_in.format(experiment_name=output_basename,
        #                                 probe_file=output_basename + '.prb',
        #                                 raw_file=output_file_path,
        #                                 n_channels=len(channel_group['channels'])))

        logger.debug('Done! Total data length written: {}'.format(
            util.fmt_time(total_duration_written)))
Beispiel #5
0
def main(args):
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('input', help='Dat file')
    parser.add_argument('-o',
                        '--out',
                        help='Directory to store segments in',
                        default='.')
    parser.add_argument('-c',
                        '--clean',
                        action='store_true',
                        help='Remove the original dat file when successful')
    parser.add_argument('-C',
                        '--channels',
                        type=int,
                        help='Number of channels in input file.')
    parser.add_argument('-d', '--dtype', default='int16')
    parser.add_argument(
        '-p',
        '--prefix',
        default='tetrode',
        help='Prefix to output file name. Default: "tetrode"')  # '{infile}_'
    parser.add_argument('--keep_dead',
                        help='Do not skip tetrodes with all-dead channels',
                        action='store_true')

    grouping = parser.add_mutually_exclusive_group()
    grouping.add_argument('-l',
                          '--layout',
                          help='Path to probe file defining channel order')
    grouping.add_argument('-g',
                          '--groups_of',
                          type=int,
                          help='Split into regular groups of n channels')

    cli_args = parser.parse_args(args)
    logger.debug('cli_args: {}'.format(cli_args))

    in_path = os.path.abspath(os.path.expanduser(cli_args.input))
    bp, ext = os.path.splitext(in_path)

    probe_file = cli_args.layout
    if not any([cli_args.layout, cli_args.groups_of]):
        if os.path.exists(bp + '.prb'):
            probe_file = bp + '.prb'
        else:
            logger.error(
                'No information on how to split the channels. Either by groups_of, or with a prb file'
            )
            sys.exit(1)

    if probe_file is not None:
        layout = run_prb(probe_file)
        channel_groups = layout['channel_groups']
        dead_channels = layout[
            'dead_channels'] if 'dead_channels' in layout else []
        n_channels = sum(
            [len(cg['channels']) for idx, cg in channel_groups.items()])
        logger.debug('{} channels from prb file'.format(n_channels))
    else:
        if cli_args.channels is None:
            logging.warning('No channel count given. Guessing...')
            n_channels = dat.guess_n_channels(in_path)
            logging.warning(
                'Guessed there to be {} channels'.format(n_channels))
        else:
            n_channels = cli_args.channels
        assert not n_channels % cli_args.groups_of
        channel_groups = {
            cg: {
                'channels':
                list(
                    range(cg * cli_args.groups_of,
                          (cg + 1) * cli_args.groups_of))
            }
            for cg in range(n_channels // cli_args.groups_of)
        }
        dead_channels = []

    logging.debug('channel_groups: {}'.format(channel_groups))

    mm = np.memmap(in_path, dtype=cli_args.dtype,
                   mode='r').reshape(-1, n_channels)

    # Select valid channel groups, skip group with all-dead channels
    indices = []
    for cg in channel_groups.keys():
        channels = channel_groups[cg]['channels']
        dead = [ch in dead_channels for ch in channels]
        if all(dead):
            logger.warning(
                f'Skipping tetrode {cg} because all channels are dead. Use --keep_dead to not skip.'
            )
            continue
        indices.append(cg)

    # # Create per-tetrode probe file
    # # FIXME: Dead channels are big mess
    # with open(op.join(out_path, output_basename + '.prb'), 'w') as prb_out:
    #     if cli_args.split_groups or (layout is None):
    #         # One prb file per channel group
    #         ch_out = channel_group['channels']
    #         cg_out = {0: {'channels': list(range(len(ch_out)))}}
    #         dead_channels = sorted([ch_out.index(dc) for dc in dead_channels if dc in ch_out])
    #
    #     else:
    #         # Same channel groups, but with flat numbering
    #         cg_out, dead_channels = util.monotonic_prb(layout)
    #     prb_out.write('dead_channels = {}\n'.format(pprint.pformat(dead_channels)))
    #     prb_out.write('channel_groups = {}'.format(pprint.pformat(cg_out)))

    batch_size = 1_000_000
    n_samples = mm.shape[0]
    pbar = tqdm(total=n_samples, unit_scale=True, unit='Samples')
    postfix = '{cg_id:0' + str(math.floor(math.log10(max(indices))) +
                               1) + 'd}.dat'

    with ExitStack() as stack:
        out_files = {}
        for cg_id in indices:
            dat_path = Path((cli_args.prefix + postfix).format(cg_id=cg_id,
                                                               infile=bp))
            prb_path = dat_path.with_suffix('.prb')

            # Create per-tetrode probe file
            ch_out = channel_groups[cg_id]['channels']
            cg_out = {0: {'channels': list(range(len(ch_out)))}}
            dead_ch = sorted(
                [ch_out.index(dc) for dc in dead_channels if dc in ch_out])
            write_prb(prb_path, cg_out, dead_ch)

            # Create file object for .dat file and append to exit stack for clean shutdown
            of = open(dat_path, 'wb')
            out_files[cg_id] = stack.enter_context(of)

        samples_remaining = n_samples
        while samples_remaining > 0:
            pbar.update(batch_size)
            offset = n_samples - samples_remaining
            arr = mm[offset:offset + batch_size, :]
            for cg_id in out_files.keys():
                arr.take(channel_groups[cg_id]['channels'],
                         axis=1).tofile(out_files[cg_id])
            samples_remaining -= batch_size

    del mm

    try:
        if cli_args.clean:
            logger.warning('Deleting file {}'.format(in_path))
            os.remove(in_path)
    except PermissionError:
        logger.error("Couldn't clean up files. Sadface.")
Beispiel #6
0
def main(args):
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('input', help='Dat file')
    parser.add_argument('-o',
                        '--out',
                        help='Directory to store segments in',
                        default='.')
    parser.add_argument('-c',
                        '--clean',
                        action='store_true',
                        help='Remove the original dat file when successful')
    parser.add_argument('-C',
                        '--channels',
                        type=int,
                        help='Number of channels in input file.')
    parser.add_argument('-d', '--dtype', default='int16')
    parser.add_argument('-p', '--prefix', default='{infile}_')

    grouping = parser.add_mutually_exclusive_group()
    grouping.add_argument('-l',
                          '--layout',
                          help='Path to probe file defining channel order')
    grouping.add_argument('-g',
                          '--groups_of',
                          type=int,
                          help='Split into regular groups of n channels')

    cli_args = parser.parse_args(args)
    logger.debug('cli_args: {}'.format(cli_args))

    in_path = os.path.abspath(os.path.expanduser(cli_args.input))
    bp, ext = os.path.splitext(in_path)

    probe_file = cli_args.layout
    if not any([cli_args.layout, cli_args.groups_of]):
        if os.path.exists(bp + '.prb'):
            probe_file = bp + '.prb'
        else:
            logger.error(
                'No information on how to split the channels. Either by groups_of, or with a prb file'
            )
            sys.exit(1)

    if probe_file is not None:
        layout = run_prb(probe_file)
        channel_groups = layout['channel_groups']
        n_channels = sum(
            [len(cg['channels']) for idx, cg in channel_groups.items()])
        logger.debug('{} channels from prb file'.format(n_channels))
    else:
        if cli_args.channels is None:
            logging.warning('No channel count given. Guessing...')
            n_channels = dat.guess_n_channels(in_path)
            logging.warning(
                'Guessed there to be {} channels'.format(n_channels))
        else:
            n_channels = cli_args.channels
        assert not n_channels % cli_args.groups_of
        channel_groups = {
            cg: {
                'channels':
                list(
                    range(cg * cli_args.groups_of,
                          (cg + 1) * cli_args.groups_of))
            }
            for cg in range(n_channels // cli_args.groups_of)
        }

    logging.debug('channel_groups: {}'.format(channel_groups))

    mm = np.memmap(in_path, dtype=cli_args.dtype,
                   mode='r').reshape(-1, n_channels)

    indices = list(channel_groups.keys())
    batch_size = 1000000
    n_samples = mm.shape[0]
    pbar = tqdm(total=n_samples, unit_scale=True, unit='Samples')
    postfix = '{cg_id:0' + str(math.floor(math.log10(len(indices))) +
                               1) + 'd}.dat'

    with ExitStack() as stack:
        out_files = [
            stack.enter_context(
                open((cli_args.prefix + postfix).format(cg_id=cg_id,
                                                        infile=bp), 'wb'))
            for cg_id in range(len(indices))
        ]
        samples_remaining = n_samples
        while samples_remaining > 0:
            pbar.update(batch_size)
            offset = n_samples - samples_remaining
            arr = mm[offset:offset + batch_size, :]
            for cg_id in range(len(indices)):
                arr.take(channel_groups[indices[cg_id]]['channels'],
                         axis=1).tofile(out_files[cg_id])
            samples_remaining -= batch_size

#        logger.debug('Writing .prm files')
#        for outfile in out_files:
#            print(outfile.name)
#            make_prm(outfile.name, 'tetrode.prb')

    del mm

    try:
        if cli_args.clean:
            logger.warning('Deleting file {}'.format(in_path))
            os.remove(in_path)
    except PermissionError:
        logger.error("Couldn't clean up files. Sadface.")