def parse(self, taggers=list()): """ Open the csv file and dump it in a tablib.Dataset object """ self.logger.info("Will parse input %(csv_path)s csv file" % {"csv_path": self.csv_path}) data = Dataset() with open(self.csv_path, "rb") as csv_file: google_contact = UnicodeReader(csv_file) for row_num, row in enumerate(google_contact): if row_num == 0: data.headers = row continue gRow = GoogleContactRow(headers=data.headers, row=row) gRow.standard_cleanup() gRow.format_names() tags = [] for tagger in taggers: tags += getattr(gRow, tagger)() tags = list(set(tags)) # Get the row index index = format_index(gRow[data.headers.index("Name")]) # Empty index # drop this row if not index: self.logger.info("Ignored row without index (%(row_num)d)" % {"row_num": row_num}) continue # Duplicate? if self.is_duplicate(index): self.logger.info( "Found duplicate row for %(name)s (num: %(row_num)d)" % {"name": index, "row_num": row_num} ) # Drop this row if self.drop: self.logger.debug("Dropped duplicate row %(row_num)d" % {"row_num": row_num}) continue # Merge this row if self.merge: row_dst = self.hash.index(index) data[row_dst] = merge_lists(gRow, data[row_dst]) self.logger.debug( "Merged duplicate row %(row_src)d with %(row_dst)d" % {"row_src": row_num, "row_dst": row_dst} ) continue self.hash += (index,) data.append(gRow, tags=tags) self.logger.debug("row %d tags %s", row_num, tags) self.data = data self.logger.debug("File columns are:\n%s", "\n".join(self.data.headers))
def transpose(self): """Transpose a :class:`Dataset`, turning rows into columns and vice versa, returning a new ``Dataset`` instance. The first row of the original instance becomes the new header row.""" # Don't transpose if there is no data if not self: return _dset = Dataset() # The first element of the headers stays in the headers, # it is our "hinge" on which we rotate the data new_headers = [self.headers[0]] + self[self.headers[0]] _dset.headers = new_headers for column in self.headers: if column == self.headers[0]: # It's in the headers, so skip it continue # Adding the column name as now they're a regular column row_data = [column] + self[column] row_data = Row(row_data) _dset.append(row=row_data) return _dset
def as_tablib_dataset(report, parent_context): render_context = _report(report, parent_context) dataset = Dataset() for element in render_context["elements"].values(): table = element["table"] dataset.append_separator(element["title"]) for i, row in enumerate(table.as_values()): dataset.append(row) return dataset
def _prepare_table(string): dataset = Dataset() for i, line in enumerate(string.split('\n')[1:]): if '====' in line or not line: continue row = line.split() if i == 1: dataset.headers = row else: dataset.append([numberfy(num) for num in row]) return dataset
def _prepare_cluters(string): dataset = Dataset() string = string.replace('PULSE HEIGHT', 'PULSE-HEIGHT').replace(' +/-', '_+/-') for i, line in enumerate(string.split('\n')[3:]): if '*****' in line or not line: continue line = line.split() if i == 0: dataset.headers = line else: dataset.append( [numberfy(cell.replace('_', ' ')) for cell in line]) return dataset
def _prepare_table(string): dataset = Dataset() for i, line in enumerate(string.split('\n')[1:]): if '*****' in line or not line: continue row = line.split() if i == 1: row[4] += ' ' + row[5] dataset.headers = row[:5] else: dataset.append([numberfy(i) for i in row]) return dataset
def sort(self, col, reverse=False, include_tags=True): """Sort a :class:`Dataset` by a specific column, given string (for header) or integer (for column index). The order can be reversed by setting ``reverse`` to ``True``. Returns a new :class:`Dataset` instance where columns have been sorted. """ if isinstance(col, str) or isinstance(col, unicode): if not self.headers: raise HeadersNeeded else: if self.headers: col = self.headers[col] old_headers = self.headers[:] if include_tags: if len(self._tags_list) != self.height: raise InvalidDimensions else: _dset = copy(self) _dset.rpush_col(self._tags_list, header='tags') tag_getter = itemgetter(self.width +1) _dset._tags_list = [] _sorted = sorted(_dset.dict, key=itemgetter(col), reverse=reverse) _dataset = Dataset(headers=self.headers) for item in _sorted: if self.headers: tags = tag_getter(item).split(' ') changed_row = Row([item[key] for key in old_headers], tags=tags) else: changed_row = item _dataset.append(row=changed_row._row, tags=changed_row.tags, include_tags=True) else: _dset = copy(self) _sorted = sorted(_dset.dict, key=itemgetter(col), reverse=reverse) _dataset = Dataset(headers=self.headers) for item in _sorted: if self.headers: row = [item[key] for key in self.headers] else: row = item _dataset.append(row=row) return _dataset
def _prepare_table(string): result = [] for line in string.split('\n'): if '*****' in line or not line: continue line = line.lstrip().strip('*').split('*') row = [] for cell in line: cell = cell.strip() row.append(cell) result.append(row) dataset = Dataset() dataset.headers = result[0] for row in result[1:]: dataset.append([numberfy(i) for i in row]) return dataset
def _prepare_clusters_str(self, data): dataset = Dataset() stringRep = data.replace('PULSE HEIGHT', 'PULSE-HEIGHT').replace(' +/-', '_+/-') for i, line in enumerate(stringRep.split('\n')[3:]): if '*****' in line or not line or 'GEANT' in line: continue if 'NO. PULSE-HEIGHT' not in line and '+/-' not in line: continue parts = line.split() if i == 0: dataset.headers = parts else: if '+/-' in line: dataset.append( [numberfy(cell.replace('_', ' ')) for cell in parts]) return dataset
def _prepare_table(string): dataset = Dataset() good = None for i, line in enumerate(string.split('\n')[1:]): if '====' in line or not line: continue row = line.split() if i == 1: dataset.headers = row else: if len(row) == len(dataset.headers): good = line dataset.append([numberfy(num) for num in row]) else: print(good) print(row) return dataset
row_raw.append(number_of_decays + 1) row_raw.append(momentum) y1 = event.calorimeter.clusters.clusters[0].y.value z1 = event.calorimeter.clusters.clusters[0].z.value y2 = event.calorimeter.clusters.clusters[1].y.value z2 = event.calorimeter.clusters.clusters[1].z.value ph1 = event.calorimeter.clusters.clusters[0].pulse_height ph2 = event.calorimeter.clusters.clusters[1].pulse_height row_raw.append(y1) row_raw.append(z1) row_raw.append(ph1) row_raw.append(y2) row_raw.append(z2) row_raw.append(ph2) raw.append(row_raw) number_of_decays += 1 with open(RAW_OUTPUT, 'wb') as f: f.write(raw.export('xlsx')) if (progress < 98): print('Something went wrong. Please run the program again.') else: print('Execution completed.') print(event_id, 'event(s) were simulated.') print('There were', number_of_decays, 'interesting events out of', event_id, 'events.')
dataset.headers = ('P', 'pulseheight', 'x', 'dx', 'y', 'dy', 'z', 'dz', 'ywidth', 'zwidth') for event in events: row = [] if len(event.clusters.clusters.clusters) is 1: row.append(event.energy) row.append(event.clusters.clusters.clusters[0].pulse_height) row.append(event.clusters.clusters.clusters[0].x.value) row.append(event.clusters.clusters.clusters[0].x.error) row.append(event.clusters.clusters.clusters[0].y.value) row.append(event.clusters.clusters.clusters[0].y.error) row.append(event.clusters.clusters.clusters[0].z.value) row.append(event.clusters.clusters.clusters[0].z.error) row.append(event.clusters.clusters.clusters[0].ywidth) row.append(event.clusters.clusters.clusters[0].zwidth) dataset.append(row) with open(EXEL_OUTPUT, 'wb') as f: f.write(dataset.export('xls')) elif particle is 'electron' or particle is 'muon': events = parse(text) dataset = Dataset() dataset.headers = ('P', 'tandip', 'Kappa', 'd Kappa', 'Calorimeter Pulse Heights') for event in events: row = [] row.append(event.energy) if len(event.tracks.tracks) is not 0: if event.tracks.tracks[0].parameters.tandip is not None: row.append(event.tracks.tracks[0].parameters.tandip) else: