def stack_cols(self, other): """Stack two :class:`Dataset` instances together by joining at the column level, and return a new combined ``Dataset`` instance. If either ``Dataset`` has headers set, than the other must as well.""" if not isinstance(other, Dataset): return if self.headers or other.headers: if not self.headers or not other.headers: raise HeadersNeeded if self.height != other.height: raise InvalidDimensions try: new_headers = self.headers + other.headers except TypeError: new_headers = None _dset = Dataset() for column in self.headers: _dset.append_col(col=self[column]) for column in other.headers: _dset.append_col(col=other[column]) _dset.headers = new_headers return _dset
def parse(self, taggers=list()): """ Open the csv file and dump it in a tablib.Dataset object """ self.logger.info("Will parse input %(csv_path)s csv file" % {"csv_path": self.csv_path}) data = Dataset() with open(self.csv_path, "rb") as csv_file: google_contact = UnicodeReader(csv_file) for row_num, row in enumerate(google_contact): if row_num == 0: data.headers = row continue gRow = GoogleContactRow(headers=data.headers, row=row) gRow.standard_cleanup() gRow.format_names() tags = [] for tagger in taggers: tags += getattr(gRow, tagger)() tags = list(set(tags)) # Get the row index index = format_index(gRow[data.headers.index("Name")]) # Empty index # drop this row if not index: self.logger.info("Ignored row without index (%(row_num)d)" % {"row_num": row_num}) continue # Duplicate? if self.is_duplicate(index): self.logger.info( "Found duplicate row for %(name)s (num: %(row_num)d)" % {"name": index, "row_num": row_num} ) # Drop this row if self.drop: self.logger.debug("Dropped duplicate row %(row_num)d" % {"row_num": row_num}) continue # Merge this row if self.merge: row_dst = self.hash.index(index) data[row_dst] = merge_lists(gRow, data[row_dst]) self.logger.debug( "Merged duplicate row %(row_src)d with %(row_dst)d" % {"row_src": row_num, "row_dst": row_dst} ) continue self.hash += (index,) data.append(gRow, tags=tags) self.logger.debug("row %d tags %s", row_num, tags) self.data = data self.logger.debug("File columns are:\n%s", "\n".join(self.data.headers))
def transpose(self): """Transpose a :class:`Dataset`, turning rows into columns and vice versa, returning a new ``Dataset`` instance. The first row of the original instance becomes the new header row.""" # Don't transpose if there is no data if not self: return _dset = Dataset() # The first element of the headers stays in the headers, # it is our "hinge" on which we rotate the data new_headers = [self.headers[0]] + self[self.headers[0]] _dset.headers = new_headers for column in self.headers: if column == self.headers[0]: # It's in the headers, so skip it continue # Adding the column name as now they're a regular column row_data = [column] + self[column] row_data = Row(row_data) _dset.append(row=row_data) return _dset
def _prepare_table(string): dataset = Dataset() for i, line in enumerate(string.split('\n')[1:]): if '====' in line or not line: continue row = line.split() if i == 1: dataset.headers = row else: dataset.append([numberfy(num) for num in row]) return dataset
def _prepare_cluters(string): dataset = Dataset() string = string.replace('PULSE HEIGHT', 'PULSE-HEIGHT').replace(' +/-', '_+/-') for i, line in enumerate(string.split('\n')[3:]): if '*****' in line or not line: continue line = line.split() if i == 0: dataset.headers = line else: dataset.append( [numberfy(cell.replace('_', ' ')) for cell in line]) return dataset
def _prepare_table(string): dataset = Dataset() for i, line in enumerate(string.split('\n')[1:]): if '*****' in line or not line: continue row = line.split() if i == 1: row[4] += ' ' + row[5] dataset.headers = row[:5] else: dataset.append([numberfy(i) for i in row]) return dataset
def _prepare_table(string): result = [] for line in string.split('\n'): if '*****' in line or not line: continue line = line.lstrip().strip('*').split('*') row = [] for cell in line: cell = cell.strip() row.append(cell) result.append(row) dataset = Dataset() dataset.headers = result[0] for row in result[1:]: dataset.append([numberfy(i) for i in row]) return dataset
def _prepare_clusters_str(self, data): dataset = Dataset() stringRep = data.replace('PULSE HEIGHT', 'PULSE-HEIGHT').replace(' +/-', '_+/-') for i, line in enumerate(stringRep.split('\n')[3:]): if '*****' in line or not line or 'GEANT' in line: continue if 'NO. PULSE-HEIGHT' not in line and '+/-' not in line: continue parts = line.split() if i == 0: dataset.headers = parts else: if '+/-' in line: dataset.append( [numberfy(cell.replace('_', ' ')) for cell in parts]) return dataset
def _prepare_table(string): dataset = Dataset() good = None for i, line in enumerate(string.split('\n')[1:]): if '====' in line or not line: continue row = line.split() if i == 1: dataset.headers = row else: if len(row) == len(dataset.headers): good = line dataset.append([numberfy(num) for num in row]) else: print(good) print(row) return dataset
if __name__ == '__main__': with GimelSession(user=USER_NAME, password=PASSWORD, output_file=SESSION_FILE) as g: g.start_gimmel() # Insert the name of the particle here: g.send_particle_in_bulk('pi-0', momentum, times) with open(SESSION_FILE) as f: text = f.read() events = parse(text) raw = Dataset() raw.headers = ('Event ID', 'P Parent Particle', 'Cluster1 y', 'Cluster1 z', 'Pulse Height 1', 'Cluster2 y', 'Cluster2 z', 'Pulse Height 2') for event in events: sp.call('cls', shell=True) event_id += 1 progress = int(100 * event_id / times) print('Processing..', str(progress) + '% completed.') if len(event.raw.strip()) == 0: event_id -= 1 # elif len(event.calorimeter.clusters.clusters) == 1 and len(event.tracks.tracks) == 0: # number_of_decays += 1 elif len(event.calorimeter.clusters.clusters) == 2 and len(
IDNum = data["id"] with GimelSession(user=USER_NAME, password=PASSWORD, output_file=SESSION_FILE) as g: g.start_gimmel() g.send_command(IDNum) g.send_particles_ascending_energies(particle, minimum_energy, step_size, number_of_injections, per_energy) with open(SESSION_FILE) as f: text = f.read() if particle is 'photon': events = parse(text) dataset = Dataset() dataset.headers = ('P', 'pulseheight', 'x', 'dx', 'y', 'dy', 'z', 'dz', 'ywidth', 'zwidth') for event in events: row = [] if len(event.clusters.clusters.clusters) is 1: row.append(event.energy) row.append(event.clusters.clusters.clusters[0].pulse_height) row.append(event.clusters.clusters.clusters[0].x.value) row.append(event.clusters.clusters.clusters[0].x.error) row.append(event.clusters.clusters.clusters[0].y.value) row.append(event.clusters.clusters.clusters[0].y.error) row.append(event.clusters.clusters.clusters[0].z.value) row.append(event.clusters.clusters.clusters[0].z.error) row.append(event.clusters.clusters.clusters[0].ywidth) row.append(event.clusters.clusters.clusters[0].zwidth) dataset.append(row) with open(EXEL_OUTPUT, 'wb') as f:
progress = 0 if __name__ == '__main__': with GimelSession(user=USER_NAME, password=PASSWORD, output_file=SESSION_FILE) as g: g.start_gimmel() # Insert the name of the particle here: g.send_particle_in_bulk('k-short', momentum, times) with open(SESSION_FILE) as f: text = f.read() events = parse(text) raw = Dataset() raw.headers = ('Event ID','P Parent Particle', 'Kappa1', 'd Kappa1', 'tandip1', 'd tandip1', 'Kappa2', 'd Kappa2', 'tandip2', 'd tandip2', 'Vertex x', 'd Vertex x', 'Vertex y', 'd Vertex y', 'Vertex z', 'd Vertex z', 'Phi', 'd Phi') for event in events: sp.call('cls',shell=True) event_id += 1 progress = int(100*event_id/times) print('Processing..', str(progress)+'% completed.') if len(event.raw.strip()) == 0: event_id -= 1 elif len(event.tracks.tracks) >= 2 and len(event.verteces.verteces) > 0: row_raw = []
from gimel_parser import parse USER_NAME = '' PASSWORD = '' SESSION_FILE = 'calibration.txt' EXEL_OUTPUT = 'calibration_stats.xlsx' minimum_energy = 1 step_size = 0.2 number_of_injections = 250 if __name__ == '__main__': with GimelSession(user=USER_NAME, password=PASSWORD, output_file=SESSION_FILE) as g: g.start_gimmel() g.send_particles_ascending_energies('electron', minimum_energy, step_size, number_of_injections) with open(SESSION_FILE) as f: text = f.read() events = parse(text) dataset = Dataset() dataset.headers = ('P', 'Kappa', 'd Kappa', 'Calorimeter Pulse Hight') for event in events: row = [] row.append(event.energy) row.append(event.tracks.tracks[0].parameters.akappa) row.append(event.tracks.tracks[0].error_matrix['akappa']['akappa']) row.append(event.calorimeter.clusters['PULSE-HEIGHT'][0]) dataset.append(row) with open(EXEL_OUTPUT, 'wb') as f: f.write(dataset.export('xlsx'))