def run(*args, **kwargs): import argparse parser = argparse.ArgumentParser( 'Data Visualization', epilog="""Use: Mousewheel/arrow keys to scroll, <Shift>/<Ctrl>+<left>/<right> for larger jumps. <Shift>/<Ctrl>+Mousewheel to scale. Use <q> or <Esc> to exit. """) parser.add_argument('path', help='Relative or absolute path to directory', default='.', nargs='?') parser.add_argument('-d', '--debug', action='store_true', help='Debug mode -- verbose output, no confirmations.') parser.add_argument('-c', '--cols', help='Number of columns', default=1, type=int) parser.add_argument('-C', '--channels', help='Number of channels', type=int) parser.add_argument('-l', '--layout', help='Path to probe file defining channel order') parser.add_argument('-D', '--dtype', help='Data type if needed (e.g. float32 dat files') parser.add_argument('-J', '--jump', help='Jump to timepoint (in seconds)', type=float, default=0) cli_args = parser.parse_args(*args) if 'layout' in cli_args and cli_args.layout is not None: layout = util.run_prb(cli_args.layout) channels, bad_channels = util.flat_channel_list( layout)[:cli_args.channels] else: channels = None bad_channels = None Vis(op.abspath(op.expanduser(cli_args.path)), n_cols=cli_args.cols, n_channels=cli_args.channels, channels=channels, bad_channels=bad_channels, dtype=cli_args.dtype, start=cli_args.jump) app.run()
def main(args): parser = argparse.ArgumentParser( 'Generate .fet and .fd files for features from spike waveforms') parser.add_argument('-v', '--verbose', action='store_true', help="Verbose (debug) output") parser.add_argument('target', default='.', help="""Directory with waveform .mat files.""") parser.add_argument( '-o', '--out_path', help='Output file path Defaults to current working directory') parser.add_argument('--sampling-rate', type=float, help='Sampling rate. Default 30000 Hz', default=3e4) parser.add_argument('-f', '--force', action='store_true', help='Force overwrite of existing files.') parser.add_argument('-a', '--align', help='Alignment method, default: min', default='min') parser.add_argument('-F', '--features', nargs='*', help='Features to use. Default: energy', default=['energy']) parser.add_argument('--to_fet', nargs='*', help='Features to include in fet file, default: all', default='all') parser.add_argument( '--ignore-prb', action='store_true', help='Do not load channel validity from dead channels in .prb files') parser.add_argument('--no-report', action='store_true', help='Do not generate report file (saves time)') cli_args = parser.parse_args(args) matpath = Path(cli_args.target).resolve() if matpath.is_file(): matfiles = [matpath] else: matfiles = sorted( list(map(Path.resolve, matpath.glob('tetrode??.mat')))) logger.debug(f'Target files: {[mf.name for mf in matfiles]}') logger.info('Found {} waveform files'.format(len(matfiles))) logger.debug(f'Requested to fet: {cli_args.to_fet}') # TODO: # per feature arguments sigma = 0.8 for nt, matfile in tqdm(enumerate(matfiles), total=len(matfiles)): outpath = matfile.parent / 'FD' if not outpath.exists(): outpath.mkdir() # Load prb file if it exists and set channel validity based on dead channels prb_path = matfile.with_suffix('.prb') if prb_path.exists(): prb = run_prb(prb_path) else: logger.warning( f'No probe file found for {matfile.name} and no channel validity given.' ) prb = None if prb is None or 'dead_channels' not in prb: channel_validity = [1, 1, 1, 1] else: channel_validity = [ int(ch not in prb['dead_channels']) for ch in prb['channel_groups'][0]['channels'] ] logger.debug('Channel validity: {}'.format(channel_validity) + ('' if all(channel_validity) else f', {4 - sum(channel_validity)} dead channel(s)')) hf = h5py.File(matfile, 'r') waveforms = np.array(hf['spikes'], dtype=PRECISION).reshape( [N_SAMPLES, N_CHANNELS, -1]) timestamps = np.array(hf['index'], dtype='double') gauss = gaussian_filter(waveforms, sigma) # indices = timestamps * sampling_rate / 1e4 features = {} # Allow to calculate all available features if len(cli_args.features) == 1 and cli_args.features[0].lower( ) == 'all': cli_args.features = AVAILABLE_FEATURES for fet_name in map(str.lower, cli_args.features): if fet_name == 'energy': logger.debug(f'Calculating {fet_name} feature') features['energy'] = scale_feature(feature_energy(waveforms)) elif fet_name == 'energy24': logger.debug(f'Calculating {fet_name} feature') features['energy24'] = scale_feature( feature_energy24(waveforms)) elif fet_name == 'peak': logger.debug(f'Calculating {fet_name} feature') features['peak'] = feature_peak(waveforms) elif fet_name == 'cpca': logging.debug(f'Calculating {fet_name} feature') cpca = scale_feature(feature_cPCA(waveforms)) logger.debug('cPCA shape {}'.format(cpca.shape)) features['cPCA'] = cpca elif fet_name == 'cpca24': logging.debug(f'Calculating {fet_name} feature') cpca24 = scale_feature(feature_cPCA24(waveforms)) logger.debug('cPCA24 shape {}'.format(cpca24.shape)) features['cPCA24'] = cpca24 elif fet_name == 'chwpca': logging.debug(f'Calculating {fet_name} feature') chwpca = scale_feature(feature_chwPCA(waveforms)) logger.debug('chwPCA shape {}'.format(chwpca.shape)) features['chwPCA'] = chwpca else: raise NotImplementedError( "Unknonw feature: {}".format(fet_name)) # TODO: # fet_cpca_4 = fet_cpca[:, :4] # # Position feature # n_bytes = [250154314, 101099824, 237970294] # fet_pos = feature_position(matpath / 'XY_data.mat', dat_offsets=n_bytes, timestamps=timestamps, # indices=indices) # Generate .fet file used for clustering # TODO: Best move this out into the cluster module? if 'none' in map(str.lower, cli_args.to_fet): logger.warning('Skipping fet file generation') else: fet_file_path = outpath / matfile.with_suffix('.fet.0').name if len(cli_args.to_fet) == 1 and cli_args.to_fet[0].lower( ) == 'all': logger.debug('Writing all features to fet file.') included_features = list(map(str.lower, features.keys())) else: included_features = [ fn for fn in map(str.lower, features.keys()) if fn in list(map(str.lower, cli_args.to_fet)) ] logger.info(f'Writing features {list(included_features)} to .fet') fet_data = [ fd for fn, fd in features.items() if fn.lower() in included_features ] logger.debug(f'Writing .fet file {fet_file_path}') write_features_fet(feature_data=fet_data, outpath=fet_file_path) # Write .fd file for each feature for fet_name, fet_data in features.items(): logger.debug(f'Writing feature {fet_name}.fd file') write_feature_fd(feature_names=fet_name, feature_data=fet_data, timestamps=timestamps, outpath=outpath, tetrode_path=matfile, channel_validity=channel_validity) logger.debug('Generating waveform graphic') with open(matfile.with_suffix('.html'), 'w') as frf: frf.write('<head></head><body><h1>{}</h1>'.format(matfile.name)) frf.write('<h2>Waveforms (n={})</h2>'.format(waveforms.shape[2])) density_agg = 'log' with np.errstate(invalid='ignore' ): # ignore some matplotlib colormap usage errors images = ds_shade_waveforms(waveforms, how=density_agg) fig = ds_plot_waveforms(images, density_agg) frf.write(fig2html(fig) + '</br>') del fig for fet_name, fet_data in features.items(): frf.write('<h3>Feature: {}</h3>\n'.format(fet_name)) df_fet = pd.DataFrame(fet_data) # numerical column names are an issue with datashader, stringify 'em df_fet.rename(columns={k: str(k) for k in df_fet.columns}, inplace=True) df_fet['time'] = timestamps fet_columns = df_fet.columns[:-1] # Features vs. features images = [] titles = [] for cc in list(combinations(fet_columns, 2)): fet_title = f'{fet_name}:{cc[1]} vs {fet_name}:{cc[0]}' logger.debug(f'plotting feature {fet_title}') # Calculate display limits, try to exclude outliers # TODO: correct axis labeling perc_lower = 0.05 perc_upper = 99.9 x_range = (np.percentile(df_fet[cc[0]], perc_lower), np.percentile(df_fet[cc[0]], perc_upper)) y_range = (np.percentile(df_fet[cc[1]], perc_lower), np.percentile(df_fet[cc[1]], perc_upper)) with np.errstate(invalid='ignore'): shade = ds_shade_feature(df_fet[[cc[0], cc[1]]], x_range=x_range, y_range=y_range, color_map='inferno') images.append(shade) titles.append(fet_title) fet_fig = ds_plot_features(images, how='log', fet_titles=titles) frf.write(fig2html(fet_fig) + '</br>\n') del fet_fig # Features over time t_images = [] t_titles = [] x_range = (0, df_fet['time'].max()) # Calculate display limits, try to exclude outliers # TODO: correct axis labeling perc_lower = 0.1 perc_upper = 99.9 y_range = (np.percentile(df_fet[cc[1]], perc_lower), np.percentile(df_fet[cc[1]], perc_upper)) for cc in fet_columns: t_title = f'{fet_name}:{cc} vs. time' logger.debug(f'plotting {t_title}') with np.errstate(invalid='ignore'): shade = ds_shade_feature(df_fet[['time', cc]], x_range=x_range, y_range=y_range, color_map='viridis') t_images.append(shade) t_titles.append(t_title) t_fig = ds_plot_features(t_images, how='log', fet_titles=t_titles) frf.write(fig2html(t_fig) + '</br>\n') del t_fig frf.write('</hr>\n')
def main(args): # TODO: Mutually exclusive bad/good channels import argparse parser = argparse.ArgumentParser() parser.add_argument('input', help='Dat file') parser.add_argument('-o', '--out', help='Directory to store reference file in', default='.') parser.add_argument('-r', '--reference', help='Path to reference file, if already at hand.') parser.add_argument('-b', '--bad_channels', type=int, nargs='+', help='Dead channel indices') parser.add_argument('-g', '--good_channels', type=int, nargs='+', help='Indices of channels to include') parser.add_argument('-C', '--channels', type=int, help='Number of channels in input file.') parser.add_argument('-Z', '--zero_bad_channels', action='store_true', help='Set bad channels to zero.') parser.add_argument('-i', '--inplace', action='store_true', help='Subtract reference in place.') parser.add_argument('-m', '--make-only', action='store_true', help='Only create the reference file.') parser.add_argument('-l', '--layout', help='Path to probe file defining channel order') parser.add_argument('-k', '--keep', action='store_true', help='Keep intermediate reference file') cli_args = parser.parse_args(args) # get number of channels in data, either from the cli args or data set config n_channels = cli_args.channels if 'channels' in cli_args else None cfg = dat.metadata_from_target(cli_args.input, n_channels=n_channels) if n_channels is None: n_channels = cfg['CHANNELS']['n_channels'] logger.debug(cfg) # FIXME: Assumes "pre-ordered" channels, i.e. 0:n_channels probe_file = cli_args.layout if 'layout' in cli_args else None if probe_file is None and has_prb(cli_args.input): probe_file = has_prb(cli_args.input) logger.warning('No probe file given, but .prb file found. Using {}'.format(probe_file)) if probe_file is not None: layout = run_prb(probe_file) channels, bad_channels = flat_channel_list(layout)[:n_channels] else: channels = None bad_channels = None logger.debug('Good: {}, bad: {}'.format(channels, bad_channels)) if cli_args.make_only: raise NotImplemented rv = ref(cli_args.input, ref_path=cli_args.reference, out_dir=cli_args.out, n_channels=n_channels, ch_idx_good=cli_args.good_channels, ch_idx_bad=bad_channels, zero_bad_channels=cli_args.zero_bad_channels, make_only=cli_args.make_only, inplace=cli_args.inplace, keep=cli_args.keep) if not rv: raise RuntimeError('Failed to create reference! Rv: {}'.format(rv)) else: reffed_path = Path(rv) # Copy the prb file if not cli_args.inplace: logger.warning('Copying probe file to follow referenced data.') copy_as(probe_file, reffed_path.with_suffix('.prb'))
def main(args): parser = argparse.ArgumentParser( 'Convert file formats/layouts. Default result is int16 .dat file.') parser.add_argument('-v', '--verbose', action='store_true', help="Verbose (debug) output") # Input/output parser.add_argument( 'target', nargs='*', default='.', help= """Path/list of paths to directories containing raw .continuous data OR path to .session definition file. Listing multiple files will result in data sets being concatenated in listed order.""") parser.add_argument( '-o', '--out_path', help='Output file path Defaults to current working directory') parser.add_argument('-P', '--out_prefix', help='Output file prefix. Default is name of target.') parser.add_argument('-T', '--template_fname', help='Output file template. Default: {}'.format( DEFAULT_SHORT_TEMPLATE)) parser.add_argument('-f', '--format', help='Output format. Default is: {}'.format( list(FORMATS.keys())[2]), choices=FORMATS.keys(), default=list(FORMATS.keys())[2]) parser.add_argument('--fname_channels', action='store_true', help='Include original channel numbers in file names.') # Channel arrangement channel_group = parser.add_mutually_exclusive_group() channel_group.add_argument('-c', "--channel-count", type=int, help='Number of consecutive channels.') channel_group.add_argument( '-C', "--channel-list", nargs='*', type=int, help='List of channels in order they are to be merged.') channel_group.add_argument('-l', '--layout', help="Path to klusta .probe file.") parser.add_argument('-g', '--channel-groups', type=int, nargs="+", help="limit to only a subset of the channel groups") parser.add_argument('-S', '--split-groups', action='store_true', help='Split channel groups into separate files.') parser.add_argument( '-d', '--dead-channels', nargs='*', type=int, help='List of dead channels. If flag set, these will be set to zero.') parser.add_argument('-z', '--zero-dead-channels', action='store_true') parser.add_argument( '--dry-run', action='store_true', help='Do not write data files (but still create prb/prm') parser.add_argument('-p', "--params", help='Path to .params file.') parser.add_argument('-D', "--duration", type=int, help='Limit duration of recording (s)') parser.add_argument('--remove-trailing-zeros', action='store_true') parser.add_argument('--out_fname_template', action='store_true', help='Template for file naming.') cli_args = parser.parse_args(args) logger.debug('Arguments: {}'.format(cli_args)) if cli_args.remove_trailing_zeros: raise NotImplementedError("Can't remove trailing zeros just yet.") targets = [op.abspath(op.expanduser(t)) for t in cli_args.target] formats = list(set([util.detect_format(target) for target in targets])) # Input file format logger.debug('Inputs found: {}'.format(formats)) format_input = formats[0] assert len(formats) == 1 logger.debug('Using module: {}'.format(format_input.__name__)) # Output file format format_output = FORMATS[cli_args.format.lower()] logger.debug('Output module: {}'.format(format_output.__name__)) # Set up channel layout (channels, references, dead channels) from command line inputs or layout file # List of bad channels, will be added to channel group dict dead_channels = cli_args.dead_channels if cli_args.dead_channels is not None else [] # One of channel_count, channel_list, layout_file path from mutex parser group channel_group layout = None if cli_args.channel_count is not None: channel_groups = { 0: { 'channels': list(range(cli_args.channel_count)), 'dead_channels': dead_channels } } elif cli_args.channel_list is not None: channel_groups = { 0: { 'channels': cli_args.channel_list, 'dead_channels': dead_channels } } elif cli_args.layout is not None: layout = util.run_prb(op.abspath(op.expanduser(cli_args.layout))) logger.debug('Opened layout file {}'.format(layout)) if cli_args.split_groups: channel_groups = layout['channel_groups'] if 'dead_channels' in layout: if len(dead_channels) and (layout['dead_channels'] != dead_channels): raise ValueError( 'Conflicting bad channel lists: args: {}, layout: {}'. format(layout['dead_channels'], dead_channels)) dead_channels = layout['dead_channels'] if cli_args.channel_groups: channel_groups = { i: channel_groups[i] for i in cli_args.channel_groups if i in channel_groups } else: channels, dead_channels = util.flat_channel_list(layout) logger.warning( 'Not splitting groups! Creating new monotonically increasing channel map.' ) # make a new channel group by merging in the existing ones channel_groups = { 0: { 'channels': channels, 'dead_channels': dead_channels } } else: logger.debug( 'No channels given on CLI, will try to get channel number from target later.' ) channel_groups = None # Generate configuration from input files found by the format # This step already checks for the existence of the data files # and can fail prematurely if the wrong naming template is being used # This needs more work. logger.debug('Getting metadata for all targets') targets_metadata_list = [ format_input.metadata_from_target(t) for t in targets ] if channel_groups is None: target_channels = list( set([ch for t in targets_metadata_list for ch in t['CHANNELS']])) channel_groups = { 0: { 'channels': target_channels, 'dead_channels': dead_channels } } # Output file path if cli_args.out_path is None: out_path = os.getcwd() logger.info( 'Using current working directory "{}" as output path.'.format( out_path)) else: out_path = op.abspath(op.expanduser(cli_args.out_path)) # Create the output path if necessary if len(out_path) and not op.exists(out_path): os.mkdir(out_path) logger.debug('Creating output path {}'.format(out_path)) out_fext = format_output.FMT_FEXT out_prefix = cli_args.out_prefix if cli_args.out_prefix is not None else op.basename( cli_args.target[0]) logger.debug('Prefix: {}'.format(out_prefix)) default_template = DEFAULT_FULL_TEMPLATE if cli_args.fname_channels else DEFAULT_SHORT_TEMPLATE fname_template = default_template if cli_args.template_fname is None else cli_args.template_fname logger.debug('Filename template: {}'.format(fname_template)) # +++++++++++++++++++++++++++++++++++++++++ MAIN LOOP ++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Iterates over all channel groups, calling continuous_to_dat for each to be bundled together # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ total_duration_written = 0 for cg_id, channel_group in channel_groups.items(): logger.debug('channel group: {}'.format(channel_group)) # TODO: Check file name length, shorten if > 256 characters # Possible parameters: outfile prefix [outfile], channel group id [cg_id] # channel ranges from consecutive channels, for output file naming crs = util.fmt_channel_ranges(channel_group['channels']) output_basename = fname_template.format(prefix=out_prefix, cg_id=cg_id, crs=crs) output_fname = ''.join([output_basename, out_fext]) output_file_path = op.join(out_path, output_fname) duration_written = 0 # First target, file mode is write, after that, append to output file for file_mode, target_metadata in enumerate(targets_metadata_list): duration = None if cli_args.duration is None else cli_args.duration - duration_written target_path = target_metadata['TARGET'] logger.debug( 'Starting conversion for target {}'.format(target_path)) if not cli_args.dry_run and WRITE_DATA: duration_written += continuous_to_dat( target_metadata=target_metadata, output_path=output_file_path, channel_group=channel_group, dead_channel_ids=dead_channels, zero_dead_channels=cli_args.zero_dead_channels, file_mode='a' if file_mode else 'w', duration=duration) total_duration_written += duration_written # create the per-group .prb files # FIXME: Dead channels are big mess with open(op.join(out_path, output_basename + '.prb'), 'w') as prb_out: if cli_args.split_groups or (layout is None): # One prb file per channel group ch_out = channel_group['channels'] cg_out = {0: {'channels': list(range(len(ch_out)))}} dead_channels = sorted( [ch_out.index(dc) for dc in dead_channels if dc in ch_out]) else: # Same channel groups, but with flat numbering cg_out, dead_channels = util.monotonic_prb(layout) prb_out.write('dead_channels = {}\n'.format( pprint.pformat(dead_channels))) prb_out.write('channel_groups = {}'.format(pprint.pformat(cg_out))) # FIXME: Generation of .prm # For now in separate script. Should take a .prm template that will be adjusted # # Template parameter file # prm_file_input = cli_args.params # with open(op.join(out_path, output_basename + '.prm'), 'w') as prm_out: # if prm_file_input: # f = open(prm_file_input, 'r') # prm_in = f.read() # f.close() # else: # prm_in = pkgr.resource_string('config', 'default.prm').decode() # prm_out.write(prm_in.format(experiment_name=output_basename, # probe_file=output_basename + '.prb', # raw_file=output_file_path, # n_channels=len(channel_group['channels']))) logger.debug('Done! Total data length written: {}'.format( util.fmt_time(total_duration_written)))
def main(args): import argparse parser = argparse.ArgumentParser() parser.add_argument('input', help='Dat file') parser.add_argument('-o', '--out', help='Directory to store segments in', default='.') parser.add_argument('-c', '--clean', action='store_true', help='Remove the original dat file when successful') parser.add_argument('-C', '--channels', type=int, help='Number of channels in input file.') parser.add_argument('-d', '--dtype', default='int16') parser.add_argument( '-p', '--prefix', default='tetrode', help='Prefix to output file name. Default: "tetrode"') # '{infile}_' parser.add_argument('--keep_dead', help='Do not skip tetrodes with all-dead channels', action='store_true') grouping = parser.add_mutually_exclusive_group() grouping.add_argument('-l', '--layout', help='Path to probe file defining channel order') grouping.add_argument('-g', '--groups_of', type=int, help='Split into regular groups of n channels') cli_args = parser.parse_args(args) logger.debug('cli_args: {}'.format(cli_args)) in_path = os.path.abspath(os.path.expanduser(cli_args.input)) bp, ext = os.path.splitext(in_path) probe_file = cli_args.layout if not any([cli_args.layout, cli_args.groups_of]): if os.path.exists(bp + '.prb'): probe_file = bp + '.prb' else: logger.error( 'No information on how to split the channels. Either by groups_of, or with a prb file' ) sys.exit(1) if probe_file is not None: layout = run_prb(probe_file) channel_groups = layout['channel_groups'] dead_channels = layout[ 'dead_channels'] if 'dead_channels' in layout else [] n_channels = sum( [len(cg['channels']) for idx, cg in channel_groups.items()]) logger.debug('{} channels from prb file'.format(n_channels)) else: if cli_args.channels is None: logging.warning('No channel count given. Guessing...') n_channels = dat.guess_n_channels(in_path) logging.warning( 'Guessed there to be {} channels'.format(n_channels)) else: n_channels = cli_args.channels assert not n_channels % cli_args.groups_of channel_groups = { cg: { 'channels': list( range(cg * cli_args.groups_of, (cg + 1) * cli_args.groups_of)) } for cg in range(n_channels // cli_args.groups_of) } dead_channels = [] logging.debug('channel_groups: {}'.format(channel_groups)) mm = np.memmap(in_path, dtype=cli_args.dtype, mode='r').reshape(-1, n_channels) # Select valid channel groups, skip group with all-dead channels indices = [] for cg in channel_groups.keys(): channels = channel_groups[cg]['channels'] dead = [ch in dead_channels for ch in channels] if all(dead): logger.warning( f'Skipping tetrode {cg} because all channels are dead. Use --keep_dead to not skip.' ) continue indices.append(cg) # # Create per-tetrode probe file # # FIXME: Dead channels are big mess # with open(op.join(out_path, output_basename + '.prb'), 'w') as prb_out: # if cli_args.split_groups or (layout is None): # # One prb file per channel group # ch_out = channel_group['channels'] # cg_out = {0: {'channels': list(range(len(ch_out)))}} # dead_channels = sorted([ch_out.index(dc) for dc in dead_channels if dc in ch_out]) # # else: # # Same channel groups, but with flat numbering # cg_out, dead_channels = util.monotonic_prb(layout) # prb_out.write('dead_channels = {}\n'.format(pprint.pformat(dead_channels))) # prb_out.write('channel_groups = {}'.format(pprint.pformat(cg_out))) batch_size = 1_000_000 n_samples = mm.shape[0] pbar = tqdm(total=n_samples, unit_scale=True, unit='Samples') postfix = '{cg_id:0' + str(math.floor(math.log10(max(indices))) + 1) + 'd}.dat' with ExitStack() as stack: out_files = {} for cg_id in indices: dat_path = Path((cli_args.prefix + postfix).format(cg_id=cg_id, infile=bp)) prb_path = dat_path.with_suffix('.prb') # Create per-tetrode probe file ch_out = channel_groups[cg_id]['channels'] cg_out = {0: {'channels': list(range(len(ch_out)))}} dead_ch = sorted( [ch_out.index(dc) for dc in dead_channels if dc in ch_out]) write_prb(prb_path, cg_out, dead_ch) # Create file object for .dat file and append to exit stack for clean shutdown of = open(dat_path, 'wb') out_files[cg_id] = stack.enter_context(of) samples_remaining = n_samples while samples_remaining > 0: pbar.update(batch_size) offset = n_samples - samples_remaining arr = mm[offset:offset + batch_size, :] for cg_id in out_files.keys(): arr.take(channel_groups[cg_id]['channels'], axis=1).tofile(out_files[cg_id]) samples_remaining -= batch_size del mm try: if cli_args.clean: logger.warning('Deleting file {}'.format(in_path)) os.remove(in_path) except PermissionError: logger.error("Couldn't clean up files. Sadface.")
def main(args): import argparse parser = argparse.ArgumentParser() parser.add_argument('input', help='Dat file') parser.add_argument('-o', '--out', help='Directory to store segments in', default='.') parser.add_argument('-c', '--clean', action='store_true', help='Remove the original dat file when successful') parser.add_argument('-C', '--channels', type=int, help='Number of channels in input file.') parser.add_argument('-d', '--dtype', default='int16') parser.add_argument('-p', '--prefix', default='{infile}_') grouping = parser.add_mutually_exclusive_group() grouping.add_argument('-l', '--layout', help='Path to probe file defining channel order') grouping.add_argument('-g', '--groups_of', type=int, help='Split into regular groups of n channels') cli_args = parser.parse_args(args) logger.debug('cli_args: {}'.format(cli_args)) in_path = os.path.abspath(os.path.expanduser(cli_args.input)) bp, ext = os.path.splitext(in_path) probe_file = cli_args.layout if not any([cli_args.layout, cli_args.groups_of]): if os.path.exists(bp + '.prb'): probe_file = bp + '.prb' else: logger.error( 'No information on how to split the channels. Either by groups_of, or with a prb file' ) sys.exit(1) if probe_file is not None: layout = run_prb(probe_file) channel_groups = layout['channel_groups'] n_channels = sum( [len(cg['channels']) for idx, cg in channel_groups.items()]) logger.debug('{} channels from prb file'.format(n_channels)) else: if cli_args.channels is None: logging.warning('No channel count given. Guessing...') n_channels = dat.guess_n_channels(in_path) logging.warning( 'Guessed there to be {} channels'.format(n_channels)) else: n_channels = cli_args.channels assert not n_channels % cli_args.groups_of channel_groups = { cg: { 'channels': list( range(cg * cli_args.groups_of, (cg + 1) * cli_args.groups_of)) } for cg in range(n_channels // cli_args.groups_of) } logging.debug('channel_groups: {}'.format(channel_groups)) mm = np.memmap(in_path, dtype=cli_args.dtype, mode='r').reshape(-1, n_channels) indices = list(channel_groups.keys()) batch_size = 1000000 n_samples = mm.shape[0] pbar = tqdm(total=n_samples, unit_scale=True, unit='Samples') postfix = '{cg_id:0' + str(math.floor(math.log10(len(indices))) + 1) + 'd}.dat' with ExitStack() as stack: out_files = [ stack.enter_context( open((cli_args.prefix + postfix).format(cg_id=cg_id, infile=bp), 'wb')) for cg_id in range(len(indices)) ] samples_remaining = n_samples while samples_remaining > 0: pbar.update(batch_size) offset = n_samples - samples_remaining arr = mm[offset:offset + batch_size, :] for cg_id in range(len(indices)): arr.take(channel_groups[indices[cg_id]]['channels'], axis=1).tofile(out_files[cg_id]) samples_remaining -= batch_size # logger.debug('Writing .prm files') # for outfile in out_files: # print(outfile.name) # make_prm(outfile.name, 'tetrode.prb') del mm try: if cli_args.clean: logger.warning('Deleting file {}'.format(in_path)) os.remove(in_path) except PermissionError: logger.error("Couldn't clean up files. Sadface.")