def _untar(self, tar): """Untar and ungzip opened tar archive. Loaded files are written to self._tmp_files list """ sys.stderr.write('{:.<30}'.format('Checking tar archive')) sys.stderr.flush() archive_files = [] for tarinfo in tar: if os.path.splitext(tarinfo.name)[1] not in (".drr", ".his", ".list", ".log"): raise GeneralError('File {} does not seem to belong to gzipped HIS/DRR histograms'.format(tarinfo.name)) if os.path.exists(tarinfo.name): raise GeneralError('File {} already exists'.format(tarinfo.name)) archive_files.append(tarinfo.name) sys.stderr.write('[Done]\n') self._tmp_files += archive_files sys.stderr.write('{:.<30}'.format('Extracting tar archive')) sys.stderr.flush() tar.extractall() sys.stderr.write('[Done]\n') tar.close()
def load_histogram(self, his_id): """Loads histogram with given id from the file. Returns array of: [dim, data_x, data_y, weights] where dim is 1 or 2 (dimension of the histogram) data_x is the X axis data data_y is the Y axis data (for 2D histograms) or None (for 1D) weights is the histograms data, a 1D array (for 1D histogram) or 2D array for (2D histogram) matching the shapes of data_x, data_y """ if self.histograms.get(his_id) is None: raise GeneralError("Histogram {} not found".format(his_id)) offset = self.histograms[his_id]['offset'] his_name = '{}{}'.format(self.base_name, '.his') with open(his_name, 'rb') as his_file: length = 1 dim = self.histograms[his_id]['dimension'] if dim > 2: raise GeneralError('Histograms with dimensions >2 not supported') for d in range(self.histograms[his_id]['dimension']): length *= self.histograms[his_id]['scaled'][d] if self.histograms[his_id]['half_words_per_ch'] == 1: data = array('h') elif self.histograms[his_id]['half_words_per_ch'] == 2: data = array('i') else: msg = 'half-words per channel histograms are not supported' raise GeneralError('{} {}'.format( self.histograms[his_id]['half_words_per_ch']), msg) his_file.seek(offset * 2) data.fromfile(his_file, length) x_axis = numpy.arange(self.histograms[his_id]['minc'][0] + self._dx, self.histograms[his_id]['maxc'][0] + self._dx + 1.0) if self.histograms[his_id]['dimension'] == 2: y_axis = numpy.arange(self.histograms[his_id]['minc'][1] + self._dx, self.histograms[his_id]['maxc'][1] + self._dx + 1.0) data = numpy.reshape(data, (self.histograms[his_id]['scaled'][1], self.histograms[his_id]['scaled'][0])) data = numpy.transpose(data) if self.histograms[his_id]['dimension'] == 1: return [1, x_axis, None, numpy.array(data)] else: return [2, x_axis, y_axis, data]
def rebin1d(self, bin_size): """Bin 1D histogram, bin_size must be an integer larger than 1. Returns a new, rebinned histogram. Be careful with errors, as they are calculated as sqrt(N), where N is the number of counts after rebinning. If the errors before rebinning are different than sqrt(N) are therefore it is not the correct value! """ if self.dim != 1: raise GeneralError('This function rebins 1D histograms only') # Drop the end of the histogram if lenght of histogram % bin_size # is not 0 drop = len(self.weights) % bin_size if drop != 0: weights = self.weights[0:-drop] x_axis = self.x_axis[0:-drop] else: weights = self.weights[:] x_axis = self.x_axis[:] weights = weights.reshape((-1, bin_size)).sum(axis=1) x_axis = x_axis.reshape((-1, bin_size)).mean(axis=1) errors = np.sqrt(abs(weights)) histo = Histogram(dim=self.dim) histo.x_axis = x_axis histo.weights = weights histo.title = '{}, bin {}'.format(self.title, bin_size) histo.errors = errors return histo
def plot1d(self, plot, xlim=None, ylim=None): """ Plot 1D histogram The mode defines the way the data are presented, 'histogram' is displayed with steps 'function' with continuus line 'errorbar' with yerrorbars The norm (normalization factor) and bin_size are given for the display purposes only. The histogram is not altered. """ histo = plot.histogram if plot.mode == 'histogram': plt.plot(histo.x_axis, histo.weights, ls='steps-mid', label=histo.title) elif plot.mode == 'function': plt.plot(histo.x_axis, histo.weights, ls='-', label=histo.title) elif plot.mode == 'errorbar': plt.errorbar(histo.x_axis, histo.weights, yerr=histo.errors, marker='o', ls='None', label=histo.title) else: raise GeneralError('Unknown plot mode {}'.format(plot.mode)) if xlim is not None: plt.xlim(xlim) if ylim is not None: plt.ylim(ylim) if self.legend: plt.legend(loc=0, numpoints=1, fontsize='small')
def normalize1d(self, norm, bin_size=1, xmin=None, xmax=None): """Normalize 1D histogram using density, norm must be an int, float or 'area' string - indicating normalization of the density to 1, using a range of xmin to xmax (or whole range by default). Each bin is divided by the normalization factor n_i = n_i / norm / bin_size """ if self.dim != 1: raise GeneralError('This function normalizes 1D histograms only') histo = Histogram(dim=self.dim) histo.x_axis = self.x_axis histo.weights = self.weights histo.errors = self.errors if bin_size == 0: print('Warning: bin_size 0 overridden,' + ' using 1 instead') bin_size = 1 if isinstance(norm, str): if norm.lower() == 'area': normalization = histo.weights[xmin:xmax].sum() if normalization == 0: print('Warning: normalization 0 overridden,' + ' using 1 instead') normalization = 1 else: raise GeneralError("Normalization must be int," + " float or 'area' string") elif isinstance(norm, float) or isinstance(norm, int): if norm == 0: normalization = 1 print('Warning: normalization 0 overridden, using 1 instead') else: normalization = norm else: raise GeneralError("Normalization must be int," + " float or 'area' string") histo.title = '{}, / {:.2e}'.format(self.title, normalization * bin_size) histo.weights = histo.weights / normalization / bin_size histo.errors = histo.errors / normalization / bin_size return histo
def bin_size(self, bs): if self.histogram.dim != 1: raise GeneralError('Currently only 1D histograms can be binned') if isinstance(bs, int): # You can only bin further the histogram, there is no # way back (load original data again) if bs > self._bin_size: self._bin_size = bs self.histogram = self.histogram.rebin1d(bs) elif bs <= self._bin_size: pass else: raise GeneralError('Attempt to set bin size to {}'.\ format(bs)) else: raise GeneralError('Attempt to set bin size to {}'.\ format(bs))
def __init__(self, file_name): self.base_name, ext = os.path.splitext(file_name) if len(ext) > 0 and ext in (".gz", ".his", ".tgz"): self.file_type = 'his' self.data_file = HisFile(file_name) elif len(ext) > 0 and ext in ".txt": self.file_type = 'txt' self.data_file = numpy.loadtxt(file_name) else: raise GeneralError( 'Files other than txt, his, tgz and gz are not supported')
def _add_errors(self, error1, error2): """Add two error arrays \sigma = \sqrt{\sigma_1^2 + \sigma_2^2} """ if error1.shape != error2.shape: raise GeneralError('Shape of array mismatches') errors = numpy.zeros(error1.shape) for index, d in numpy.ndenumerate(error1): errors[index] = math.sqrt(error1[index]**2 + error2[index]**2) return errors
def mode(self, mode): """Deactivate all plots that have different mode (dimension)""" if mode not in [1, 2]: raise GeneralError('Only 1D and 2D plotting modes are possible') if mode == 2: self.plotter.ylin() Experiment._mode = mode for p in self.plots: if p.histogram.dim != mode: p.active = False
def rounding(self, method): """Set method of rounding of axis values (down, middle or up) """ if method == 'low': self._dx = 0 elif method == 'mid': self._dx = 0.5 elif method == 'high': self._dx = 1.0 else: raise GeneralError('Unknown round method {}'.format(method)) self._rounding = method
def __init__(self, peaks, baseline, plot_name): self.plot_name = plot_name self.params = Parameters() self.peaks = peaks self.baseline = baseline if baseline == 'linear': self.params.add('a0') self.params.add('a1') elif baseline == 'quadratic': self.params.add('a0') self.params.add('a1') self.params.add('a2', value=0.0) else: raise GeneralError("Unknown background type {}".format(baseline)) for peak_index in range(len(self.peaks)): self.params.add('x{}'.format(peak_index)) self.params.add('s{}'.format(peak_index)) self.params.add('A{}'.format(peak_index)) if self.peaks[peak_index].get('model') == 'gauss_l': self.params.add('sL{}'.format(peak_index))
def plot2d(self, plot, xc=None, yc=None, logz=False): """Plot 2D histogram xc is x range, yc is y range """ if plot.histogram.dim != 2: raise GeneralError('plot2d function needs a 2D histogram!') x = plot.histogram.x_axis y = plot.histogram.y_axis w = plot.histogram.weights if xc is not None: x = x[xc[0]:xc[1]] w = w[xc[0]:xc[1], :] if yc is not None: y = y[yc[0]:yc[1]] w = w[:, yc[0]:yc[1]] initial_nx = len(x) initial_ny = len(y) nx = len(x) ny = len(y) binx = 1 biny = 1 # Rebin data if larger than defined number of bins (max_2d_bin) # This is needed due to the performance of matplotlib with large arrays if nx > self.max_2d_bin: binx = math.ceil(nx / self.max_2d_bin) missing = binx * self.max_2d_bin - nx if missing > 0: addx = numpy.arange(plot.histogram.x_axis[-1] + 1, plot.histogram.x_axis[-1] + missing + 1) x = numpy.concatenate((x, addx)) nx = len(x) z = numpy.zeros((missing, ny)) w = numpy.concatenate((w, z), axis=0) x = numpy.reshape(x, (-1, binx)) x = x.mean(axis=1) if ny > self.max_2d_bin: biny = math.ceil(ny / self.max_2d_bin) missing = biny * self.max_2d_bin - ny if missing > 0: addy = numpy.arange(plot.histogram.y_axis[-1] + 1, plot.histogram.y_axis[-1] + missing + 1) y = numpy.concatenate((y, addy)) z = numpy.zeros((nx, missing)) w = numpy.concatenate((w, z), axis=1) y = numpy.reshape(y, (-1, biny)) y = y.mean(axis=1) nx = len(x) ny = len(y) if nx != initial_nx or ny != initial_ny: w = numpy.reshape(w, (nx, binx, ny, biny)).mean(3).mean(1) w = numpy.transpose(w) title = plot.histogram.title # If logaritmic scale is used, mask values <= 0 if logz: w = numpy.ma.masked_where(w <= 0, numpy.log10(w)) title += ' (log10)' plt.title(title) CS = plt.pcolormesh(x, y, w, cmap=self.cmap) plt.xlim(xc) plt.ylim(yc) plt.colorbar()
def parse(self, spectrum, show, pause): spectra_ids = spectrum.get('id') id_list = [] if self.file_type == 'his': for element in spectra_ids.split(','): element = element.split('-') if len(element) > 1: new_elements = [] for i in range(int(element[0]), int(element[1]) + 1): id_list.append(i) else: id_list.append(int(element[0])) elif self.file_type == 'txt': if spectra_ids != '': raise GeneralError('Spectrum id not supported for txt files') else: id_list.append('') peaks = spectrum.findall('peak') x_min = int(spectrum.get('min')) x_max = int(spectrum.get('max')) smin = spectrum.get('smin') smax = spectrum.get('smax') for spectrum_id in id_list: plot_name = '{}_{}'.format(self.base_name, spectrum_id) PF = PeakFitter(peaks, spectrum.get('baseline'), plot_name) if self.file_type == 'txt': data_x = self.data_file[x_min:x_max, 0] data_y = self.data_file[x_min:x_max, 1] data_dy = self.data_file[x_min:x_max, 2] for iy, y in enumerate(data_dy): if y <= 0: data_dy[iy] = 1.0 elif self.file_type == 'his': data = self.data_file.load_histogram(spectrum_id) if data[0] != 1: raise GeneralError('Only 1D histograms are supported') data_x = data[1][x_min:x_max] data_y = data[3][x_min:x_max] data_dy = [] for y in data_y: dy = numpy.sqrt(y) if y > 0 else 1.0 data_dy.append(dy) data_dy = numpy.array(data_dy) if smin is not None and smax is not None: PF.restrict_width(float(smin), float(smax)) fit_result = PF.fit(data_x, data_y, data_dy, show, pause) if show == 'plot' or show == 'svg': plt.clf() plt.xlabel('Channel') plt.ylabel('Counts') plt.plot(data_x, data_y, linestyle='steps-mid') plt.plot(data_x, fit_result['baseline'], linestyle='--') plt.plot(fit_result['x_axis'], fit_result['fit'], linewidth=1.0) if show == 'svg': svg_name = 'fit_{0}_{1}-{2}'.format( self.plot_name, int(data_x[0]), int(data_x[-1])) svg_name = svg_name.replace('.', '').\ replace('/', '') + '.svg' plt.savefig(svg_name) else: plt.draw() time.sleep(pause) elif show == 'quiet': pass for i, peak in enumerate(peaks): if peak.get('ignore') == 'True': continue x0 = PF.params['x{}'.format(i)].value dx = PF.params['x{}'.format(i)].stderr A = PF.params['A{}'.format(i)].value dA = PF.params['A{}'.format(i)].stderr s = PF.params['s{}'.format(i)].value E = peaks[i].get('E') name = peaks[i].get('name') Area = PF.find_area(data_x, i) print('{:>8} {:>8} {:>8.2f} {:>8.2f}'.\ format(name, E, x0, dx), '{:>8.1f} {:>8.1f} {:>8.3f} {:>8.1f}'.\ format(A, dA, s, Area))
def process(self, pause, show, verbose, rounding): """For each file in file_list, for each spectrum in a given files performes fit and adds result to entry in the dict """ for file_name, spectra in self.file_list.items(): his = hisfile.HisFile(file_name, rounding) for hisId, lines in spectra.items(): data = his.load_histogram(hisId) for line in lines: xmin = line['min'] xmax = line['max'] if data[0] != 1: raise GeneralError('Only 1D histograms are suitable' + 'for this calibration') data_x, data_y = data[1][xmin:xmax], data[3][xmin:xmax] data_dy = numpy.sqrt(numpy.abs(data[3][xmin:xmax])) # 0 counts have error 1 (poisson!) for i, dy in enumerate(data_dy): if dy == 0: data_dy[i] = 1 try: fitter = peak_fitter.PeakFitter(line['model']) result = fitter.fit(data_x, data_y, data_dy) except ValueError: msg = ('Fit problems with spectrum {} line {}: {}') raise GeneralError( msg.format(hisId, line['line'], 'numerical issue encountered')) except peak_fitter.GeneralError as err: msg = ('Fit problems with spectrum {} line {}: {}') raise GeneralError( msg.format(hisId, line['line'], err.msg)) if line['model'].startswith("gauss_doublet"): line['dx'] = result.params['mu'].stderr line['x0'] = result.params['mu'].value line['x1'] = result.params['mu1'].value line['Area'] = result.params['A'].value line['Area1'] = result.params['A1'].value line['dA'] = result.params['A'].stderr line['dA1'] = result.params['A1'].stderr else: x0 = result.params['mu'].value dx = result.params['mu'].stderr line['x0'] = x0 line['dx'] = dx line['Area'], line['dA'] = self._find_area( fitter, x0, dx, xmin, data_x, data_y) line['redchi'] = result.redchi if result.params['mu'].stderr == 0: msg = ('Warning, line {} in spectrum {}:' + ' could not determine uncertainity\n') sys.stderr.write(msg.format(line['line'], hisId)) if show == 'plot' or show == 'png': x = numpy.linspace(data_x[0], data_x[-1], 1000) y0 = fitter.fit_func(result.params, x) plt.clf() plt.xlabel('Channel') plt.ylabel('Counts') plt.title('Spectrum {} line {}'.format( hisId, line['line'])) plt.plot(x, y0) plt.errorbar(data_x, data_y, data_dy, fmt='o') xpos = (plt.xlim()[0] + (plt.xlim()[1] - plt.xlim()[0]) * 0.1) ypos = (plt.ylim()[1] - (plt.ylim()[1] - plt.ylim()[0]) * 0.1) text = ('$\mu$ = {0:.2f}\n' + '$\chi^2/\mathcal{{N}}$' + ' = {1:.2f}').format(result.params['mu'].value, result.redchi) plt.text(xpos, ypos, r'{}'.format(text)) if show == 'png': png_name = '{}_{}_{:.0f}.png'.format( file_name, hisId, float(line['line'])) plt.savefig(png_name) print('File', png_name, 'saved') elif show == 'plot': plt.draw() time.sleep(pause) elif show == 'text': msg = ('Line {} in spectrum {},' + ' x0 = {:.2f},' + ' redchi = {:.2f}\n').format( line['line'], hisId, result.params['mu'].value, result.redchi) sys.stderr.write(msg) elif show == 'quiet': pass else: raise GeneralException( 'Unknown show method {}'.format(show)) if verbose: sys.stderr.write('{}\n'.format(20 * '-')) sys.stderr.write('Line {} spectrum {}\n'.format( line['line'], hisId)) sys.stderr.write('Reduced Chi2: {:.3f}\n'.format( result.redchi)) for key, par in result.params.items(): sys.stderr.write('{} {:.3f} +/- {:.3f}\n'.format( key, par.value, par.stderr)) fwhm_factor = 2 * math.sqrt(math.log(2) * 2) sys.stderr.write('{} {:.3f} +/- {:.3f}\n'.format( 'FWHM', fwhm_factor * result.params['s'].value, fwhm_factor * result.params['s'].stderr))
def norm(self, n): if self.histogram.dim != 1: raise GeneralError('Currently only 1D histograms can be normalized') self.histogram = self.histogram.normalize1d(n, self.bin_size)