def peak_finder(df_run, cd, windowlength, polyorder, datatype, lenmax,
                    peak_thresh):
        """Determines the index of each peak in a dQdV curve
        V_series = Pandas series of voltage data
        dQdV_series = Pandas series of differential capacity data
        cd = either 'c' for charge and 'd' for discharge.
        
        Output:
        i = list of indexes for each found peak"""
        (cycle_ind_col, data_point_col, volt_col, curr_col, dis_cap_col,
         char_cap_col, charge_or_discharge) = ccf.col_variables(datatype)
        V_series = df_run[volt_col]
        #dQdV_series = df_run['dQ/dV']
        # this makes the peak finding smoothing independent of any smoothing that has already occured.
        dQdV_series = df_run['Smoothed_dQ/dV']
        #assert len(dQdV_series) > 10

        sigx, sigy = fitters.cd_dataframe(V_series, dQdV_series, cd)
        ################################################
        wl = lenmax / 20
        wlint = int(round(wl))
        if wlint % 2 == 0:
            windowlength_new = wlint + 1
        else:
            windowlength_new = wlint
        ###############################################
        #the below is to make sure the window length ends up an odd number - even though we are basing it on the length of the df
        if len(sigy) > windowlength_new and windowlength_new > polyorder:
            #has to be larger than 69 so that windowlength > 3 - necessary for sav golay function
            sigy_smooth = scipy.signal.savgol_filter(sigy, windowlength_new,
                                                     polyorder)
        else:
            sigy_smooth = sigy
        # this used to be sigy_smooth in the .indexes function below -= changed it to just sigy for graphite
        # change was made on 9.12.18  . also changed min_dist=lenmax/50 to min_dist= 10
        ###################################################
        peak_thresh_ft = float(peak_thresh)
        i = peakutils.indexes(sigy_smooth,
                              thres=peak_thresh_ft,
                              min_dist=lenmax / 50)
        ###################################################
        #i = peakutils.indexes(sigy_smooth, thres=0.7, min_dist=50) # used to be 0.25
        #i = peakutils.indexes(sigy_smooth, thres=.3 /
        #                      max(sigy_smooth), min_dist=9)
        #print(i)

        if i is not None and len(i) > 0:
            sigx_volts = sigx[i]
            peak_heights = list(sigy[i].tolist())
        else:
            sigx_volts = []
            peak_heights = []
        return i, sigx_volts, peak_heights
def generate_model_for_jupyter(df_clean, filename, peak_thresh, database):
    # this function is analagous to the generate_model function in the app.py file
    # run this when get descriptors button is pushed, and re-run it when user puts in new voltage
    # create model based off of initial peaks
    # show user model, then ask if more peak locations should be used (shoulders etc)
    datatype = df_clean.loc[0, ('datatype')]
    (cycle_ind_col, data_point_col, volt_col, curr_col, dis_cap_col,
     char_cap_col, charge_or_discharge) = ccf.col_variables(datatype)

    chargeloc_dict = {}
    param_df = pd.DataFrame(columns=[
        'Cycle', 'Model_Parameters_charge', 'Model_Parameters_discharge'
    ])
    if len(df_clean[cycle_ind_col].unique()) > 1:
        length_list = [
            len(df_clean[df_clean[cycle_ind_col] == cyc])
            for cyc in df_clean[cycle_ind_col].unique() if cyc != 1
        ]
        lenmax = max(length_list)
    else:
        length_list = 1
        lenmax = len(df_clean)

    mod_pointsdf = pd.DataFrame()
    for cyc in df_clean[cycle_ind_col].unique():
        new_df_mody, model_c_vals, model_d_vals, peak_heights_c, peak_heights_d = get_model_dfs_for_jupyter(
            df_clean, datatype, cyc, lenmax, peak_thresh)
        mod_pointsdf = mod_pointsdf.append(new_df_mody)
        param_df = param_df.append(
            {
                'Cycle': cyc,
                'Model_Parameters_charge': str(model_c_vals),
                'Model_Parameters_discharge': str(model_d_vals),
                'charge_peak_heights': str(peak_heights_c),
                'discharge_peak_heights': str(peak_heights_d)
            },
            ignore_index=True)

    # want this outside of for loop to update the db with the complete df of new params
    dbfs.update_database_newtable(mod_pointsdf,
                                  filename.split('.')[0] + '-ModPoints',
                                  database)
    # this will replace the data table in there if it exists already
    dbfs.update_database_newtable(param_df,
                                  filename.split('.')[0] + 'ModParams',
                                  database)

    param_dicts_to_df(filename.split('.')[0] + 'ModParams', database)

    # print("That model has been added to the database")
    return
    def imp_one_cycle(testdf, cd, cyc_loop, battery, datatype, windowlength,
                      polyorder, lenmax):
        """imports and fits a single charge discharge cycle of a battery
        file_val = directory containing current cycle
        cd = either 'c' for charge or 'd' for discharge
        cyc_loop = cycle number
        battery = battery name
        output: a dictionary of descriptors for a single battery"""

        # make sure this is an Excel spreadsheet by checking the file extension
        # assert file_val.split('.')[-1] == ('xlsx' or 'xls')

        # reads excel file into pandas
        # testdf = pd.read_excel(file_val)

        # extracts charge and discharge from the dataset
        (cycle_ind_col, data_point_col, volt_col, curr_col, dis_cap_col,
         char_cap_col, charge_or_discharge) = ccf.col_variables(datatype)
        charge, discharge = ccf.sep_char_dis(testdf, datatype)

        # determines if the charge, discharge indicator was inputted correctly
        # assigns daframe for fitting accordingly
        if cd == 'c':
            df_run = charge
        elif cd == 'd':
            df_run = discharge
        else:
            raise TypeError(
                "Cycle type must be either 'c' for charge or 'd' for discharge."
            )
        print('Generating descriptors for cycle number: ' + str(cyc_loop) + cd)
        # determines if a cycle should be passed into the descriptor
        # fitting function
        if (len(charge[volt_col].index) >= 10) and (len(
                discharge[volt_col].index) >= 10):
            # generates a dictionary of descriptors
            c = fitters.descriptor_func(df_run, cd, cyc_loop, battery,
                                        windowlength, polyorder, datatype,
                                        lenmax)
            # df_run[volt_col], df_run['Smoothed_dQ/dV']
            #c is the dictionary of descriptors here
        # eliminates cycle number and notifies user of cycle removal
        else:
            notice = 'Cycle ' + str(cyc_loop) + ' in battery ' + battery + \
                ' had fewer than 10 datapoints and was removed from the dataset.'
            print(notice)
            c = 'throw'
        # print('here is the c parameter in the imp_one_cycle: ')
        # print(c)
        return c
    def descriptor_func(df_run, cd, cyc, battery, windowlength, polyorder,
                        datatype, lenmax):
        """Generates dictionary of descriptors/error parameters
        V_series = Pandas series of voltage data
        dQdV_series = Pandas series of differential capacity data
        cd = either 'c' for charge and 'd' for discharge.
        Output:
        dictionary with keys 'coefficients', 'peakLocation(V)',
        'peakHeight(dQdV)', 'peakSIGMA', 'errorParams"""
        (cycle_ind_col, data_point_col, volt_col, curr_col, dis_cap_col,
         char_cap_col, charge_or_discharge) = ccf.col_variables(datatype)

        V_series = df_run[volt_col]
        dQdV_series = df_run['Smoothed_dQ/dV']
        # make sure a single column of the data frame is passed to
        # the function
        assert isinstance(V_series, pd.core.series.Series)
        assert isinstance(dQdV_series, pd.core.series.Series)

        # appropriately reclassifies data from pandas to numpy
        sigx_bot, sigy_bot = fitters.cd_dataframe(V_series, dQdV_series, cd)
        peak_thresh = 0.3
        # returns the indices of the peaks for the dataset
        i, volts_of_i, peak_heights = fitters.peak_finder(
            df_run, cd, windowlength, polyorder, datatype, lenmax, peak_thresh)
        #print('Here are the peak finder fitters - indices of peaks in dataset')
        #print(i)

        # THIS is where we will append whatever user inputted indices - they
        # will be the same for each cycle (but allowed to vary in the model gen section)
        # generates the necessary model parameters for the fit calculation
        par, mod, indices = fitters.model_gen(V_series, dQdV_series, cd, i,
                                              cyc, thresh)

        # returns a fitted lmfit model object from the parameters and data
        model = fitters.model_eval(V_series, dQdV_series, cd, par, mod)
        ############################ SPLIT  here - have user evaluate model before adding coefficients into df
        # initiates collection of coefficients
        coefficients = []

        for k in np.arange(1):  # this was 4 for polynomial changed 10-10-18
            # key calculation for coefficient collection
            #coef = 'c' + str(k)
            coef1 = 'base_sigma'
            coef2 = 'base_center'
            coef3 = 'base_amplitude'
            coef4 = 'base_fwhm'
            coef5 = 'base_height'
            # extracting coefficients from model object
            coefficients.append(model.best_values[coef1])
            coefficients.append(model.best_values[coef2])
            coefficients.append(model.best_values[coef3])
            coefficients.append(model.best_values[coef4])
            coefficients.append(model.best_values[coef5])

        # creates a dictionary of coefficients
        desc = {'coefficients' + '-' + str(cd): list(coefficients)}
        sig = []
        if len(i) > 0:
            # generates numpy array for peak calculation
            sigx, sigy = fitters.cd_dataframe(V_series, dQdV_series, cd)

            # determines peak location and height locations from raw data
            desc.update({
                'peakLocation(V)' + '-' + str(cd):
                list(sigx[i].tolist()),
                'peakHeight(dQdV)' + '-' + str(cd):
                list(sigy[i].tolist())
            })

            # initiates loop to extract
            #sig = []
            for index in i:
                # determines appropriate string to call standard
                # deviation object from model
                center, sigma, amplitude, fraction, comb = fitters.label_gen(
                    index)
                sig.append(model.best_values[sigma])
        else:
            desc.update({
                'peakLocation(V)' + '-' + str(cd): list([np.NaN]),
                'peakHeight(dQdV)' + '-' + str(cd): list([np.NaN])
            })
            #pass

            # updates dictionary with sigma key and object
        desc.update({'peakSIGMA' + '-' + str(cd): list(sig)})
        # print('Here is the desc within the descriptor_func function: ')
        # print(desc)
        # adds keys for the error parameters of each fit
        desc.update({
            'errorParams' + '-' + str(cd):
            list([model.aic, model.bic, model.redchi])
        })

        return desc
def get_model_dfs_for_jupyter(df_clean, datatype, cyc, lenmax, peak_thresh):
    # this function is analagous to the get_model_dfs function in the app.py file
    (cycle_ind_col, data_point_col, volt_col, curr_col, dis_cap_col,
     char_cap_col, charge_or_discharge) = ccf.col_variables(datatype)
    clean_charge, clean_discharge = ccf.sep_char_dis(
        df_clean[df_clean[cycle_ind_col] == cyc], datatype)
    windowlength = 75
    polyorder = 3
    # speed this up by moving the initial peak finder out of this, and just have those two things passed to it
    i_charge, volts_i_ch, peak_heights_c = descriptors.fitters.peak_finder(
        clean_charge, 'c', windowlength, polyorder, datatype, lenmax,
        peak_thresh)

    V_series_c = clean_charge[volt_col]
    dQdV_series_c = clean_charge['Smoothed_dQ/dV']
    par_c, mod_c, indices_c = descriptors.fitters.model_gen(
        V_series_c, dQdV_series_c, 'c', i_charge, cyc, peak_thresh)
    model_c = descriptors.fitters.model_eval(V_series_c, dQdV_series_c, 'c',
                                             par_c, mod_c)
    if model_c is not None:
        mod_y_c = mod_c.eval(params=model_c.params, x=V_series_c)
        myseries_c = pd.Series(mod_y_c)
        myseries_c = myseries_c.rename('Model')
        model_c_vals = model_c.values
        new_df_mody_c = pd.concat([
            myseries_c, V_series_c, dQdV_series_c, clean_charge[cycle_ind_col]
        ],
                                  axis=1)
    else:
        mod_y_c = None
        new_df_mody_c = None
        model_c_vals = None
    # now the discharge:
    i_discharge, volts_i_dc, peak_heights_d = descriptors.fitters.peak_finder(
        clean_discharge, 'd', windowlength, polyorder, datatype, lenmax,
        peak_thresh)
    V_series_d = clean_discharge[volt_col]
    dQdV_series_d = clean_discharge['Smoothed_dQ/dV']
    par_d, mod_d, indices_d = descriptors.fitters.model_gen(
        V_series_d, dQdV_series_d, 'd', i_discharge, cyc, peak_thresh)
    model_d = descriptors.fitters.model_eval(V_series_d, dQdV_series_d, 'd',
                                             par_d, mod_d)
    if model_d is not None:
        mod_y_d = mod_d.eval(params=model_d.params, x=V_series_d)
        myseries_d = pd.Series(mod_y_d)
        myseries_d = myseries_d.rename('Model')
        new_df_mody_d = pd.concat([
            -myseries_d, V_series_d, dQdV_series_d,
            clean_discharge[cycle_ind_col]
        ],
                                  axis=1)
        model_d_vals = model_d.values
    else:
        mod_y_d = None
        new_df_mody_d = None
        model_d_vals = None
    # save the model parameters in the database with the data
    if new_df_mody_c is not None or new_df_mody_d is not None:
        new_df_mody = pd.concat([new_df_mody_c, new_df_mody_d], axis=0)
    else:
        new_df_mody = None
    # combine the charge and discharge
    # update model_c_vals and model_d_vals with peak heights

    return new_df_mody, model_c_vals, model_d_vals, peak_heights_c, peak_heights_d