def get_set_orientation(set_path): """Calculate set orientation. Method will take 2 points and compare treir indices and start_times. Returns True in set has direct orientation, False otherwise. Parameters ---------- set_path : relative path to set. """ points = _get_points_in_set(join(environ['LAN10_DATA_PATH'], set_path)) _, meta1, _ = dfparser.parse_from_file(points[0], nodata=True) _, meta2, _ = dfparser.parse_from_file(points[1], nodata=True) ind1 = int(meta1['external_meta']['point_index']) time1 = tparse(meta1['params']['start_time']) ind2 = int(meta2['external_meta']['point_index']) time2 = tparse(meta2['params']['start_time']) if ind1 > ind2: if time1 > time2: return True else: return False else: if time1 > time2: return False else: return True
def crs_compare_different_timesteps(): """Сравнение графиков скоростей счета при разных шагах. Тест показывает причину возникновения "пилы" на графиках. """ _, meta, data = dfparser.parse_from_file(path.join(DATA_ROOT, POINT_PATH)) _, times = df_events_to_np(meta, data) deltas = times[1:] - times[:-1] hist, bins = np.histogram(deltas, bins=60, range=(-320 * 2, 320 * 63)) _, axes2 = plt.subplots() axes2.step((bins[1:] + bins[:-1])/2, hist, where="mid") axes2.grid() # plt.show() time_thrs_640, crs_gen_640 = get_crs(meta, times) fig, axes = plt.subplots() fig.canvas.set_window_title('cr_steps_compare') axes.set_title("Effective Count Rate / Time threshold") axes.set_xlabel("Time Threshold, ns") axes.set_ylabel("Effective Count Rate, Hz") axes.plot(time_thrs_640, crs_gen_640, label="Step = 640 ns") axes.legend(loc=4)
def get_set_spectrum(set_abs_path, borders=None, bins=30): """Calculate energy spectrum for set.""" points = natsorted(glob.glob(path.join(set_abs_path, "p*.df"))) out = {} for point in points: _, meta, data = dfparser.parse_from_file(point) parsed_data = dfparser.Point() parsed_data.ParseFromString(data) del data amps = [] times = [] for channel in parsed_data.channels: for block in channel.blocks: amps.append(np.array(block.events.amplitudes, np.int16)) times.append(np.array(block.events.times, np.uint64)) amps = np.hstack(amps) times = np.hstack(times) hist, bins = np.histogram(amps, bins, range=borders, density=True) hist_unnorm, _ = np.histogram(amps, bins, range=borders) out[path.relpath(point, set_abs_path)] = { "meta": meta, "hist": hist, "hist_unnorm": hist_unnorm, "bins": bins } return out
def get_amps(filepath: str) -> np.ndarray: """Extract amplitudes from processed file.""" _, _, data = dfparser.parse_from_file(filepath) p_high = dfparser.Point() p_high.ParseFromString(data) amps = np.hstack( [list(block.events.amplitudes) for block in p_high.channels[0].blocks]) return amps
def _main(): # Parse arguments from command line args = __parse_args() points = natsorted(glob.glob(path.join(args.data_root, args.set, "p*.df"))) amps = {} for p in points: # Read dataforge point _, meta, data = dfparser.parse_from_file(p) # Parse Binary data point = dfparser.Point() point.ParseFromString(data) hv = int(meta['external_meta']['HV1_value']) time = meta['params']['events_num'] * meta['params']['b_size'] / \ meta['params']['sample_freq'] if hv not in amps: amps[hv] = { 'time': 0, 'amps': [] } amps[hv]['time'] += time for idx, channel in enumerate(point.channels): for block in channel.blocks: amps[hv]['amps'].append(np.array( block.events.amplitudes, np.int16)) for hv in amps: amps[hv]['amps'] = np.hstack(amps[hv]['amps']) amps[hv]['count_rate'] = len(amps[hv]['amps'][ amps[hv]['amps'] >= args.ampl_threshold]) / amps[hv]['time'] hvs = sorted(list(amps.keys())) crs = [amps[hv]['count_rate'] for hv in hvs] _, axes = plt.subplots() if args.x_scale: axes.set_xscale(args.x_scale) if args.y_scale: axes.set_yscale(args.y_scale) axes.set_title(args.set) axes.set_xlabel("High voltage, V") axes.set_ylabel("Count rate, Ev/s") axes.plot(hvs, crs, label='Set points count rate') axes.legend() plt.show()
def get_point_meta(filename): """Get point metadata. Parameters ---------- filename : relative path to point. """ _, meta, _ = dfparser.parse_from_file( join(environ['LAN10_DATA_PATH'], filename), nodata=True) return meta
def split_by_groups(points): """Split all points by voltage groups.""" v_groups = {} for p in tqdm(points, desc='grouping by voltage'): _, meta, _ = dfparser.parse_from_file(p, nodata=True) voltage = int(meta['external_meta']['HV1_value']) if voltage not in v_groups: v_groups[voltage] = [] v_groups[voltage].append(p) return v_groups
def _lan_amps_f(fp): _, meta, data = dfparser.parse_from_file(fp) point = dfparser.Point() point.ParseFromString(data) amps = [] for idx, channel in enumerate(point.channels): for block in channel.blocks: amps.append(np.array(block.events.amplitudes, np.int16)) return np.hstack(amps)
def _main(): # Parse arguments from command line args = __parse_args() # Read dataforge point _, meta, data = dfparser.parse_from_file(args.input) # Parse Binary data point = dfparser.Point() point.ParseFromString(data) # Extract amlitudes from each block amps = {} for idx, channel in enumerate(point.channels): for block in channel.blocks: if idx not in amps: amps[idx] = [] amps[idx].append(np.array(block.events.amplitudes, np.int16)) for idx in amps: amps[idx] = np.hstack(amps[idx]) if not args.split_channels: plots = { "all-channels": np.hstack(amps.values()) } else: plots = amps _, axes = plt.subplots() axes.set_title(args.input) axes.set_xlabel("Channels, ch") axes.set_ylabel("Counts") for idx, plot in enumerate(plots): # Calculate histogram hist, bins = np.histogram( plots[plot], bins=args.bins, range=( args.ampl_threshold, args.ampl_max)) # Calculate bins centers bins_centers = (bins[:-1] + bins[1:]) / 2 # Drawing graph label = idx if "channels" in meta: if str(idx) in meta["channels"]: label = meta["channels"][str(idx)] axes.step(bins_centers, hist, where='mid', label=label) axes.legend() plt.show()
def get_point_amps(filename): """Get point amplitudes array. Parameters ---------- filename : relative path to point. """ _, meta, data = dfparser.parse_from_file( join(environ['LAN10_DATA_PATH'], filename)) try: return _lan_amps(data) except ValueError: return np.array([], dtype=np.int16)
def read_madc_sets(madc_sets_raw, group_abs_path): """Read begin and end times for each set in group. Parameters ---------- madc_sets_raw : list Sets list for current group. group_abs_path : str Group absolute path. Returns ------- madc_sets : dictionary A dictionary keyed by sets containing begin and end times. """ madc_sets = {} for madc_set in madc_sets_raw: files = listdir( path.join(group_abs_path, madc_set)) points = natsorted([f for f in files if f.startswith('p')]) if points: _, meta_p_0, _ = dfparser.parse_from_file( path.join(group_abs_path, madc_set, points[0]), nodata=True) _, meta_p_last, _ = dfparser.parse_from_file( path.join(group_abs_path, madc_set, points[-1]), nodata=True) madc_sets[madc_set] = { "begin": timeparser.parse(meta_p_0["start_time"][0]), "end": timeparser.parse(meta_p_last["end_time"][-1]) } return madc_sets
def _main(): args = _parse_args() if args.output is None: output = path.join(path.dirname(args.input), "%s_extr.df" % (path.splitext(args.input)[0])) else: output = args.output _, meta, data = dfparser.parse_from_file(args.input) meta_out, data_out = df_frames_to_events(meta, data, extract_amps_approx2, frame_l=args.frame_l, frame_r=args.frame_r) with open(output, "wb") as out_file: out_file.write(dfparser.create_message(meta_out, data_out))
def __extract_amps(filename): _, meta, data = dfparser.parse_from_file(filename) # Parse Binary data point = dfparser.Point() point.ParseFromString(data) # Extract amlitudes from each block amps = {} for idx, channel in enumerate(point.channels): for block in channel.blocks: if idx not in amps: amps[idx] = [] amps[idx].append(np.array(block.events.amplitudes, np.int16)) for idx in amps: amps[idx] = np.hstack(amps[idx]) return np.hstack(amps.values())
def __main(): _, meta, data = dfparser.parse_from_file(path.join(DATA_ROOT, POINT_PATH)) amps, times = df_events_to_np(_, data) amps_filtered = filter_bad_events(meta, times, amps, TIME_FILTER_THRESH) _, hist_ax = plt.subplots() hist_ax.set_title("Rejected events histogramm") hist_ax.set_xlabel("Amplitude, ch") hist_ax.set_ylabel("Events, num") hist, bins = np.histogram(amps_filtered, range=(AMPL_THRESH, AMPL_MAX), bins=BINS) hist_ax.step((bins[1:] + bins[:-1]) / 2, hist, where="mid", label="Rejected") hist, bins = np.histogram(amps, range=(AMPL_THRESH, AMPL_MAX), bins=BINS) hist_ax.step((bins[1:] + bins[:-1]) / 2, hist, where="mid", label="All")
def _main(): args = _parse_args() _, _, data = dfparser.parse_from_file(args.input) point = dfparser.Point() point.ParseFromString(data) del data datas = [] for channel in point.channels: amps = channel.blocks[0].events.amplitudes times = channel.blocks[0].events.times data = np.zeros((len(amps), 3)) data[:, 0] = list(times) data[:, 1] = list(amps) datas.append(data) datas = np.vstack(datas) datas = datas[datas[:, 0].argsort()] datas[:, 2][1:] = datas[:, 0][1:] - datas[:, 0][:-1] returned = datas[datas[:, 2] < args.threshold, :] delta_max = args.delta_max if not delta_max: delta_max = args.threshold returned = returned[np.logical_and(returned[:, 1] >= args.amp_min, returned[:, 1] <= args.amp_max)] returned = returned[np.logical_and(returned[:, 2] >= args.delta_min, returned[:, 2] <= args.delta_max)] plot = sns.jointplot( returned[:, 2], returned[:, 1], kind="hex", stat_func=None, xlim=(args.delta_min, delta_max), ylim=(args.amp_min, args.amp_max), joint_kws={'gridsize': (args.bins_delta, args.bins_amp)}) plot.set_axis_labels("Time delta, ns", "Amplitude, ch") plt.show()
def process_file(data_file): """Обработка отдельного файла.""" try: filepath_rel = path.relpath(data_file, ARGS.data_path) filepath_out = path.join(ARGS.out_path, filepath_rel) _, meta_real, data_real = dfparser.parse_from_file(data_file) meta_real_, data_real_ = df_frames_to_events(meta_real, data_real, extract_amps_approx, correct_time=True) if not path.exists(path.dirname(filepath_out)): makedirs(path.dirname(filepath_out)) with open(filepath_out, "wb") as out_file: out_file.write(dfparser.create_message(meta_real_, data_real_)) except struct.error: pass except DecodeError: pass except NotImplementedError: pass
def _main(): # Parse arguments from command line args = __parse_args() # Read dataforge point _, _, data = dfparser.parse_from_file(args.input) # Parse Binary data point = dfparser.Point() point.ParseFromString(data) # Extract event times from each block times = [] for channel in point.channels: for block in channel.blocks: times.append(np.array(block.events.times, np.uint64)) # Combine times into one array times = np.hstack(times) # Calculate time differences diffs = times[1:] - times[:-1] # Calculate histogram hist, bins = np.histogram(diffs, bins=args.bins, range=(args.ampl_threshold, args.ampl_max)) # Calculate bins centers bins_centers = (bins[:-1] + bins[1:]) / 2 # Drawing graph _, axes = plt.subplots() axes.set_title(args.input) axes.set_xlabel("Time, ns") axes.set_ylabel("Counts") axes.step(bins_centers, hist, where='mid') plt.show()
def _madc_amps_f(fp): _, meta, data = dfparser.parse_from_file(fp) amps = np.array( [unpack('H', bytes(a))[0] for a in (zip(data[0::7], data[1::7]))]) return amps
meta, data, _ = generate_df( area_l=AREA_L, area_r=AREA_R, time=float(args.wildcard), dist_file=abspath( join(dirname(__file__), '../../signal_utils/data/dist.dat'))) point = dfparser.Point() point.ParseFromString(data) chi2s = _extr_chi2(point) else: files = glob(join(args.root, args.wildcard)) chi2s = np.array([]) for p in tqdm(files): point = dfparser.Point() _, meta, data = dfparser.parse_from_file(p) point.ParseFromString(data) chi2s = np.append(chi2s, _extr_chi2(point)) np.save(args.output, chi2s) else: chi2s = np.load(args.input) hist, bins = np.histogram(chi2s, bins=BINS, range=RANGE, density=True) x_r = (bins[:-1] + bins[1:]) / 2 fig, ax = plt.subplots() if args.root == 'gen': title = r"$\chi ^ 2$ generated data" else:
def _main(): args = parse_args() # Read dataforge point # Creating graph _, axes = plt.subplots() graphs_all = {} for input_file in glob(args.input): header, _, data = dfparser.parse_from_file(input_file) # Read binary data manually due to machine header error # Dont need for good data header_len = dfparser.type_codes.ENVELOPE_HEADER_CODES[ header["type"]]["header_len"] with (open(input_file, 'rb')) as raw_file: raw_file.seek(header_len + header['meta_len']) data = raw_file.read() # Prettify data. Remove double spaces data_tabed = re.sub(b'[ \t]+', b'\t', data) # Remove icorrect column name '#f\t' from data data_tabed = data_tabed[3:] # Parse data as TSV format tsv = csv.DictReader(io.StringIO(data_tabed.decode()), delimiter='\t') # Extracting values from parsed TSV graphs = {} for row in tsv: for key in row.keys(): if key != 'timestamp': if row[key] != '@null': # Filter null values if key not in graphs: graphs[key] = {'x': [], 'y': []} # Append timestamp to point graphs[key]['x'].append( dateutil.parser.parse(row['timestamp'])) # Append value to point graphs[key]['y'].append(float(row[key])) for key in graphs: if key not in graphs_all: graphs_all[key] = [] graphs_all[key].append(graphs[key]) for idx_key, key in enumerate(graphs_all.keys()): if key not in args.exclude: # Filter excluded data for idx, graph in enumerate(graphs_all[key]): label = None if idx == 1: label = key axes.plot(graph['x'], graph['y'], color=sns.color_palette()[idx_key], label=label) axes.legend() for text, x_coord in args.mark: axes.annotate(text, xy=(x_coord, 0), xytext=(x_coord, 1e2), arrowprops=dict(facecolor='black', shrink=0.05)) # Applying parameters if args.title: axes.set_title(args.title) else: axes.set_title(path.basename(args.input)) if args.x_scale: axes.set_xscale(args.x_scale) if args.y_scale: axes.set_yscale(args.y_scale) plt.show()
def main(): """Compare Lan10-12PCI points energy spectrum for several points.""" seaborn.set_context("poster") data = np.genfromtxt("/home/chernov/Downloads/set_43_detector.out") x_data = data[:, 0] y_points = data[:, 1:].transpose() y_points = (y_points.T / np.max(y_points, axis=1)).T df_data_root = "/home/chernov/data_processed" points_path = [ "2017_05/Fill_3/set_43/p0(30s)(HV1=16000).df", "2017_05/Fill_3/set_43/p36(30s)(HV1=17000).df", "2017_05/Fill_3/set_43/p80(30s)(HV1=15000).df", "2017_05/Fill_3/set_43/p102(30s)(HV1=14000).df" ] bins = 500 range_ = (0, 8000) for idx, point_rel in enumerate(points_path): _, _, data = dfparser.parse_from_file( path.join(df_data_root, point_rel)) point = dfparser.Point() point.ParseFromString(data) amps = np.hstack([ list(block.events.amplitudes) for block in point.channels[0].blocks ]) hist, x_point = np.histogram(amps, bins=bins, range=range_) hist = hist / np.max(hist[np.where(x_point > 3000)[0][0]:]) func = interp1d(x_point[1:] + x_point[:-1], hist, bounds_error=False, fill_value=0) func_mod = lambda x, a, b, c: c * func(a * x + b) x_peak = np.where(np.logical_and(x_data > 1000, x_data < 1600)) popt, _ = curve_fit(func_mod, x_data[x_peak], y_points[idx][x_peak], p0=[3.68, 700, 1]) fig, axes = plt.subplots() fig.canvas.set_window_title(point_rel) fig.suptitle("CAMAC MADC vs. Lan10-12PCI spectrums") axes.set_title("File - %s. \nOptimized parameters: a=%s, b=%s, c=%s" % (point_rel, *np.round(popt, 2))) axes.set_xlabel("Bins, ch") axes.set_xlim(0, 2000) # axes.set_yscale("log", nonposx='clip') x_interp = np.linspace(0, 2000, 500) axes.plot(x_interp, func_mod(x_interp, *popt), label="Lan10-12PCI") axes.plot(x_data, y_points[idx], label="CAMAC MADC") axes.legend()
def match_lan10_sets(lan10_sets_raw, lan10_root, lan10_group_rel_path, madc_sets, err_sec): """Find correspondense between lan10 and madc sets. Parameters ---------- lan10_sets_raw : list Lan10 sets list for current group. lan10_group_abs_path : str Lan10 group absolute path. madc_sets : str MADC sets time borders dictionary (return value from read_madc_sets). err_sec : float Maximal error between borders in seconds. Returns ------- corrs : dictionary Sets mapping. """ corrs = [] for lan10_set in lan10_sets_raw: set_abs_path = path.join(lan10_root, lan10_group_rel_path, lan10_set) set_rel_path = path.relpath(set_abs_path, lan10_root) files = listdir(set_abs_path) points = natsorted([f for f in files if f.startswith('p')]) if points: check_beginning = True try: _, meta, _ = dfparser.parse_from_file( path.join(set_abs_path, points[0]), nodata=True) begin = timeparser.parse(meta["params"]["start_time"]) except struct.error: check_beginning = False check_ending = True try: _, meta, _ = dfparser.parse_from_file( path.join(set_abs_path, points[-1]), nodata=True) end = timeparser.parse(meta["params"]["end_time"]) except struct.error: check_ending = False if not (check_beginning or check_ending): corrs.append({"from": set_rel_path, "to": SetState.CORRUPTED}) else: detached = True for key in madc_sets: if check_beginning: begin_err = abs( (madc_sets[key]["begin"] - begin) .total_seconds() ) if check_ending: end_err = abs( (madc_sets[key]["end"] - end).total_seconds() ) if (not check_beginning or begin_err < err_sec) and \ (not check_ending or end_err < err_sec): corrs.append({ "from": set_rel_path, "to": path.join(path.dirname(set_rel_path), key) }) detached = False break if detached: corrs.append({ "from": set_rel_path, "to": SetState.DETACHED }) else: corrs.append({ "from": set_rel_path, "to": SetState.EMPTY }) return corrs
def _amp_cr(amps): filtered = amps[np.logical_and(amps >= args.left_border, amps <= args.right_border)] return len(filtered) if __name__ == "__main__": args = __parse_args() sns.set_context("poster") points = glob(path.join(args.root, args.fill, '*/p*)'), recursive=True) filtered = [] for point in points: _, meta, _ = dfparser.parse_from_file(point, nodata=True) if int(meta['external_meta']['HV1_value']) == args.voltage: filtered.append(point) peaks = [] for point in tqdm(filtered): try: _, meta, data = dfparser.parse_from_file(point, nodata=False) amps = _madc_amps(data) num = path.basename(path.dirname(point))[4:] if num.endswith('_bad'): num = num[:-4] peak = {
def process_set(lan10_root, madc_root, out_root, overwrite, corrs): """Process group dataset. Parameters ---------- lan10_root : str Lan10 data root path. out_root : str Output data root path. corrs : list Correspondency list between sets (return value from match_lan10_sets). """ for corr in corrs: if corr["to"] == SetState.CORRUPTED: out_path = path.join(out_root, "%s-corrupted" % corr["from"]) if path.exists(out_path): if overwrite: shutil.rmtree(out_path) else: continue shutil.copytree(path.join(lan10_root, corr["from"]), out_path) elif corr["to"] == SetState.DETACHED: out_path = path.join(out_root, "%s-detached" % corr["from"]) if path.exists(out_path): if overwrite: shutil.rmtree(out_path) else: continue shutil.copytree(path.join(lan10_root, corr["from"]), out_path) elif corr["to"] == SetState.EMPTY: out_path = path.join(out_root, "%s-empty" % corr["from"]) if path.exists(out_path): if overwrite: shutil.rmtree(out_path) else: continue shutil.copytree(path.join(lan10_root, corr["from"]), out_path) else: lan10_set_path = path.join(lan10_root, corr["from"]) madc_set_path = path.join(madc_root, corr["to"]) out_set_path = path.join(out_root, corr["to"]) if path.exists(out_set_path): if overwrite: shutil.rmtree(out_set_path) else: continue makedirs(out_set_path) for metafile in ["meta", "voltage", "scenario"]: if path.exists(path.join(madc_set_path, metafile)): shutil.copy(path.join(madc_set_path, metafile), path.join(out_set_path, metafile)) for point in listdir(lan10_set_path): try: _, meta, data = dfparser.parse_from_file( path.join(lan10_set_path, point) ) meta["compression"] = "zlib" with open(path.join(out_set_path, point), "wb") as out_file: out_file.write( dfparser.create_message( meta, zlib.compress(data) ) ) except struct.error: shutil.copy( path.join(lan10_set_path, point), path.join(out_set_path, "%s-corrupted" % point) )
def main(): """Execute main function.""" df_madc_data_root = "/home/chernov/data_on_server_madc" df_data_root = "/home/chernov/data_processed" set_path = "2017_05/Fill_2/set_8" threshold_madc = 450 threshold_madc_h = 3100 threshold = threshold_madc * 2.03 threshold_h = threshold_madc_h * 2.03 points = sorted(glob(path.join(df_data_root, set_path, "p**"))) points_madc = sorted(glob(path.join(df_madc_data_root, set_path, "p**"))) counts_madc = {} for point in points_madc: _, meta, data = dfparser.parse_from_file(point) hv_val = int(meta['external_meta']['HV1_value']) if hv_val not in counts_madc: counts_madc[hv_val] = [] amps = parse_madc_binary(data) filt = np.logical_and(amps > threshold_madc, amps < threshold_madc_h) counts_madc[hv_val].append(amps[filt].size) counts = {} for point in points: _, meta, data = dfparser.parse_from_file(point) hv_val = int(meta['external_meta']['HV1_value']) time = meta['params']['b_size'] * meta['params']['events_num'] / \ meta['params']['sample_freq'] point_ds = dfparser.Point() point_ds.ParseFromString(data) amps = np.hstack([ list(block.events.amplitudes) for block in point_ds.channels[0].blocks ]) if hv_val not in counts: counts[hv_val] = [] filt = np.logical_and(amps > threshold, amps < threshold_h) counts[hv_val].append(float(amps[filt].size) / time * 30.0) counts = {key: np.mean(counts[key]) for key in counts} counts_madc = {key: np.mean(counts_madc[key]) for key in counts_madc} counts_x = list(counts.keys()) counts_y = [counts[key] for key in counts.keys()] counts_madc_x = list(counts_madc.keys()) counts_madc_y = [counts_madc[key] for key in counts_madc.keys()] fig, axes = plt.subplots() fig.canvas.set_window_title("energy_spectrum_compare") fig.suptitle("CAMAC MADC vs. Lan10-12PCI energy spectrums \n" "Set - %s" % (set_path)) axes.plot(counts_x, counts_y, 'ro', label='Lan10-12PCI') axes.plot(counts_madc_x, counts_madc_y, 'bo', label='MADC') axes.set_xlabel("Voltage, V") axes.set_ylabel("Efficient counts") axes.legend() print(counts_x) print(counts_madc_x) # assert counts_x == counts_madc_x fig, axes = plt.subplots() fig.canvas.set_window_title("energy_spectrum_compare_ratio") fig.suptitle("CAMAC MADC vs. Lan10-12PCI energy spectrums ratio \n" "Set - %s" % (set_path)) axes.plot(counts_x, 1 - np.array(counts_y) / np.array(counts_madc_y), 'ro', label='') axes.set_xlabel("Voltage, V") axes.set_ylabel("MADS vs Lan10_12PCI ratio")