def test_read_hdf(tmpdir, test_df): # Write it out_path = os.path.join(tmpdir, 'test.h5') test_df.to_batdata_hdf(out_path) # Read it data = BatteryDataFrame.from_batdata_hdf(out_path) assert data.metadata.name == 'Test data' # Test reading from an already-open file store = HDFStore(out_path, 'r') data = BatteryDataFrame.from_batdata_hdf(store) assert data.metadata.name == 'Test data'
def test_dict(test_df): # Test writing it d = test_df.to_batdata_dict() assert d['metadata']['name'] == 'Test data' assert 'data' in d # Test reading it data = BatteryDataFrame.from_batdata_dict(d) assert len(data) == 3 assert data.metadata.name == 'Test data'
def compute_energy_per_cycle(df: BatteryDataFrame): """ Calculate the maximum energy and capacity on a per-cycle basis Parameters ---------- df : BatteryDataFrame Input dataframe Returns ------- cycle_ind : array array of cycle numbers energies : array array of maximum for each cycle. Units: W-hr capacities : array array of maximum for each cycle. Units: A-hr Examples -------- none yet """ # Initialize the output arrays energies = np.array([]) capacities = np.array([]) cycle_ind = np.array([]) # Loop over each cycle for cyc, cycle_data in df.query("state=='discharging'").groupby('cycle_number'): # Calculate accumulated energy/capacity for each sub-segment ene = 0 cap = 0 for _, subseg in cycle_data.groupby('substep_index'): # Sort by test time, just in case subseg_sorted = subseg.sort_values('test_time') # Use current as always positive convention, opposite of what our standard uses t = subseg_sorted['test_time'].values i = -1 * subseg_sorted['current'].values v = subseg_sorted['voltage'].values # integrate for energy and capacity and convert to # Watt/hrs. and Amp/hrs. respectively ene += np.trapz(i * v, t) / 3600 cap += np.trapz(i, t) / 3600 # TODO (wardlt): This version of append re-allocates arrays, O(n). Consider using list.append instead, # which uses linked lists O(1) energies = np.append(energies, ene) capacities = np.append(capacities, cap) cycle_ind = np.append(cycle_ind, cyc) return cycle_ind, energies, capacities
def compute_charging_curve(df: BatteryDataFrame, discharge: bool = True) -> pd.DataFrame: """Compute estimates for the battery capacity for each measurement of the charging or discharging sections of each cycle. The capacity for each cycle are determined independently, and is assumed to start at zero at the beginning of the cycle. Parameters ---------- df: BatteryDataFrame Battery dataset. Must have test_time, voltage and current columns. Processing will add "capacity" and "energy" columns with units of A-hr and W-hr, respectively discharge: bool Whether to compute the discharge or charge curve Returns ------- curves: pd.DataFrame Charge and discharge curves for each cycle in a single dataframe """ # Get only the [dis]charging data df = pd.DataFrame(df[df['state'] == (ChargingState.discharging if discharge else ChargingState.charging)]) # Add columns for the capacity and energy df['capacity'] = 0 df['energy'] = 0 # Compute the capacity and energy for each cycle for cid, cycle in df.groupby('cycle_number'): # Compute in segments over each subset (avoid issues with rests) for _, subcycle in cycle.groupby('substep_index'): # Integrate over it cap = cumtrapz(subcycle['current'], subcycle['test_time'], initial=0) / 3600 # Computes capacity in A-hr eng = cumtrapz(subcycle['current'] * subcycle['voltage'], subcycle['test_time'], initial=0) / 3600 # Energy in A-hr # Multiply by -1 for the discharging segment if discharge: cap *= -1 eng *= -1 df.loc[subcycle.index, 'capacity'] = cap df.loc[subcycle.index, 'energy'] = eng return df
def parse_to_dataframe( self, group: List[str], metadata: Optional[Union[BatteryMetadata, dict]] = None ) -> BatteryDataFrame: """Parse a set of files into a Pandas dataframe Parameters ---------- group: list of str List of files to parse as part of the same test. Ordered sequentially metadata: dict, optional Metadata for the battery, should adhere to the BatteryMetadata schema Returns ------- pd.DataFrame DataFrame containing the information from all files """ # Initialize counters for the cycle numbers, etc.. Used to determine offsets for the start_cycle = 0 start_time = 0 # Read the data for each file # Keep track of the ending index and ending time output_dfs = [] for file_number, file in enumerate(group): # Read the file df_out = self.generate_dataframe(file, file_number, start_cycle, start_time) output_dfs.append(df_out) # Increment the start cycle and time to determine starting point of next file start_cycle += df_out['cycle_number'].max( ) - df_out['cycle_number'].min() + 1 start_time = df_out['test_time'].max() # Combine the data from all files df_out = pd.concat(output_dfs, ignore_index=True) # Attach the metadata and return the data return BatteryDataFrame(data=df_out, metadata=metadata)
def test_df(): return BatteryDataFrame(data={ 'current': [1, 0, -1], 'voltage': [2, 2, 2] }, metadata={'name': 'Test data'})