コード例 #1
0
def makeCSVlistFromFolderName(folderName, basePath=sf.folderPath):
    '''Make a list of all the filenames in a folder'''
    #    basePath = ''
    #    fullPath = basePath + folderName
    #    print(fullPath)
    listofcsvs = listdir(sf.addFolderPath(folderName))

    #    print(listofcsvs)
    listofcsvs = [
        sf.addFolderPath(x, folderName=folderName) for x in listofcsvs
    ]
    #    print(listofcsvs)
    return listofcsvs
コード例 #2
0
ファイル: SFS.py プロジェクト: crees00/SchoolsData
def processListOfPickles(listOfPickles, folderName='', printOut=True):
    ''' Feeds to showBestFeaturesOfLoadedDict '''
    folderName = sf.addFolderPath(folderName)
    dictOfPickleNamesAndOutLists = {}
    for pickle in listOfPickles:
        print(os.path.join(folderName, pickle))
        if pickle[-4:] != '.pik':
            print(pickle, 'not opened')
            continue
        newDict = pickling.load_dill(os.path.join(folderName, pickle))
        print('\nunpacking', pickle)
        dictOfPickleNamesAndOutLists[pickle] = showBestFeaturesOfLoadedDict(
            newDict, printOut)
    if printOut:
        bestAcc, numFeatures, bestRun = 0, 0, ''
        for runName, outList in dictOfPickleNamesAndOutLists.items():
            for group in outList:
                if group[1] > bestAcc:
                    bestAcc = group[1]
                    numFeatures = group[0]
                    bestRun = runName
        print('\nBest Results:')
        print('best accuracy, num features, best run')
        print(bestAcc, numFeatures, bestRun)
        print()
    return dictOfPickleNamesAndOutLists
コード例 #3
0
def makeNewDoneRunListFromOutFile(outFile):
    ''' Input .csv file which is combo of all csvs
    so each column is a new run'''
    import re
    df = pd.read_csv(sf.addFolderPath(outFile))
    cols = [x for x in (set(df.columns) - {'Unnamed: 0'})]
    cols = [re.sub('_[0-9]of5_', '_', x) for x in cols]
    print('made newRunDoneList with', len(cols), 'items')
    assert (len(cols) >
            100), 'df needs to be transposed - need col names to be run names'
    return cols
コード例 #4
0
def load_dill(name):
    '''Doesn't do the path'''
    try:
        with open(name, 'rb') as f:
            return dill.load(f)
    except FileNotFoundError:
        with open(sf.addFolderPath(name), 'rb') as f:
            return dill.load(f)


#if __name__ == "__main__":
#    save_dill(modelDict, 'modelDictAllFeatures0908')
#    aReloaded = load_dill('modelDictWithDill')
#    pass
コード例 #5
0
ファイル: schoolClass.py プロジェクト: crees00/SchoolsData
def runAll():
    global predsThatAreNotInDF
    predsThatAreNotInDF = []
    global inspList
    global SchoolDict
    inspList = []
    df = pd.read_csv(sf.addFolderPath("bigDFnoDups1.csv"))
    df = df.apply(loadInspections, axis=1)
    SchoolDict, allInspNos, dupInsps = assignInspectionsToSchools(inspList)
    df = df.apply(addPredecessorURNsFromDF, axis=1)
    SchoolDict = addAllPredecessors(SchoolDict)
    stuck = calcStuck(SchoolDict)
    SchoolDict, openAndUninspected = setAllStatuses(SchoolDict)
    openStuck = whichStuckAreOpen(stuck)
    SchoolDict = feedToSort(SchoolDict)
    dfForClustering = clusterDF(SchoolDict, sf.addFolderPath("clusterDF.csv"))
    SchoolDict = makeGoodsAndBadsLists(SchoolDict)
    dfOut = makeURNvsYearInspCats(SchoolDict, sf.addFolderPath("dfOut.csv"))
    #    finalPt, steps = makeMatrices(SchoolDict)
    openSchoolDict = findOpenSchools(SchoolDict)
    threeGenerations = findGrandParents(SchoolDict)
    #    groupDict = grouping(openSchoolDict)
    groupDict = {}
    return SchoolDict, openSchoolDict, groupDict
コード例 #6
0
def makeDFofDiffsWithTrueValues(bigDF, write=''):
    accdf = pd.DataFrame()
    for col in bigDF.columns:
        accdf[col] = bigDF[col] - bigDF['TheTruth']
    if write != '':
        accdf.to_csv(sf.addFolderPath(write))

    bestScore = 0
    for col in accdf:
        if col == 'TheTruth':
            continue
        runScore = accdf[col].value_counts()[0]
        if runScore > bestScore:
            bestScore = runScore
            bestRun = col
    return
コード例 #7
0
ファイル: schoolClass.py プロジェクト: crees00/SchoolsData
def setAllStatuses(SchoolDict):
    openAndUninspected = []
    if where in ["ONS", "Cdrive"]:
        file = "Data\edubaseallstatefunded20190704.csv"
    else:
        file = "edubaseallstatefunded20190627.csv"
    openSchools = pd.read_csv(sf.addFolderPath(file), encoding="latin-1")
    openSchoolsSet = set(openSchools["URN"])
    for URN in openSchoolsSet:
        try:
            SchoolDict[URN].setStatus("open")
        except KeyError:
            SchoolDict[URN] = School(URN)
            SchoolDict[URN].setStatus("open")
            openAndUninspected.append(SchoolDict[URN])
    print("school statuses set")
    return SchoolDict, openAndUninspected
コード例 #8
0
def exportResultsDF(dic, write=''):
    print('saving..')
    dictToDF  ={}
    for name in dic.keys():
        mod = dic[name]
#        entry = dictToDF[name]
#        entry = {key: mod.value() for (key, value) in entryInput.items()}
        entry = {}
        for key, func in entryInput.items():
            entry[key] = func(mod)

        dictToDF[name]=entry
    outDF = pd.DataFrame(dictToDF)
    if len(write)>0:
        outDF.to_csv(sf.addFolderPath( write))
        print(write,'file written')
    return outDF
コード例 #9
0
def makeAvgResults(modelDict, write=''):
    ''' For k-fold cross validation
    Makes an average of the k folds for each run setup'''
    modelScoresDict, modelAvgDict = {}, {}
    # Fill up a dict of dicts with a list of scores for each run
    for run in modelDict.keys():
        loc = run.find('of')
        if loc > 0:
            avgName = run[:loc - 2] + run[loc + 3:]
            if avgName not in modelScoresDict.keys():
                entry = {}
                for key, func in entryInput.items():
                    #                    print(modelDict[run])
                    #                    print(key, func)
                    entry[key] = [func(modelDict[run])]
#                    print(entry)
#                entry = {key: [func(modelDict[run])] for (key, func) in entryInput.items()}
                modelScoresDict[avgName] = entry
            else:
                for score in modelScoresDict[avgName].keys():
                    #                    print(modelScoresDict[avgName])
                    #                    print(modelScoresDict[avgName][score])
                    scoreToAppend = entryInput[score](modelDict[run])
                    modelScoresDict[avgName][score].append(scoreToAppend)

    # Fill up a dict of dicts with just an average score for each set of runs
    for runName, runResultsDict in modelScoresDict.items():
        modelAvgDict[runName] = {}
        for score in (set(runResultsDict.keys()) - {
                'cr', 'longName', 'params', 'runCode', 'runName', 'tpr', 'fpr',
                'cm'
        }):
            modelAvgDict[runName][score] = np.mean(runResultsDict[score])
            modelAvgDict[runName]['acc variance'] = np.var(
                runResultsDict['acc'])

    if len(write) > 0:
        outDF = pd.DataFrame(modelAvgDict)
        outDF.to_csv(sf.addFolderPath(write))
        print(write, 'file written')


#
    return modelAvgDict, modelScoresDict
コード例 #10
0
def makeDFofResults(write=''):
    numSchools = len(df)
    bigDF = pd.DataFrame({'tester': np.zeros(len(df))},
                         index=list(range(len(df))))
    oldName = ''
    for modelName, modelInstance in modelDict.items():
        #    if modelName[-11:]=='_42_brute_1':
        newName = ''.join(re.split('_[0-9]of[0-9]', modelName))

        #        print(modelName)
        modData = modelInstance.getData()
        xTest = modData.getxTest()
        yTest = modData.getyTest()
        xTestIndices = list(xTest.index)
        clf = modelInstance.getCLF()
        y_pred = clf.predict(xTest)  # numpy ndarray of 0s and 1s
        # turn array of predictions for this test set into a dataframe
        y_predDFOneRun = pd.DataFrame({newName: y_pred}, index=xTestIndices)

        # combine the 5 dataframes into a single one
        if oldName != newName:  # First fold of this set of 5 runs
            DF5runs = y_predDFOneRun
            numOfDFsAdded = 1
        else:
            DF5runs = DF5runs.append(y_predDFOneRun)
            numOfDFsAdded += 1
        if DF5runs.shape[1] > 1:
            raise 'modelDict not in order so change the way bigDF is made'
        if numOfDFsAdded == 5:
            bigDF = bigDF.join(DF5runs)
        oldName = newName

    #        bigDF = bigDF.join(y_predDF)
    #        print(y_pred)
    #        print(modelName)
    #        print(modData.getxTest().index)
    bigDF.drop('tester', axis=1, inplace=True)

    bigDF['TheTruth'] = df['Class'][:]
    if write != '':
        bigDF.to_csv(sf.addFolderPath(write))

    return bigDF
コード例 #11
0
def makeLabelledSubsets(dictOfURNGroups, cat1, cat2, df, write=''):
    """Add a column called Stuck to the df
    1 if the URN is in the 'stuck' list
    0 if not stuck
    If write != False then write to .csv
    """
    import creatingAMonster as cam
    print("Adding/updating stuck column in df...")
    posURNs, negURNs = dictOfURNGroups[cat1], dictOfURNGroups[cat2]
    allURNs = posURNs + negURNs
    URNsToDrop = set(df['URN']) - set(allURNs)
    #    df = df[~df['URN'].isin(URNsToDrop)]
    df["Class"] = df.apply(lambda row: np.where(
        (int(row["URN"]) in posURNs), 1,
        np.where(int(row["URN"]) in URNsToDrop, 2, 0)),
                           axis=1)
    print(df['Class'].value_counts())
    df = cam.dropColsFromList(df, ['Stuck'])
    if len(write) > 0:
        df.to_csv(sf.addFolderPath(write))
    return df
コード例 #12
0
def combineIntermediateResultsCSVs(listOfCSVFilenames, outFile=''):
    ''' Runs take too much memory and crash computer so at intervals it dumps
    the results to csv and wipes the memory clean. This function is just to 
    stitch the csv files together so the results are all in one place
    '''
    global dupLists
    #    global droppedCols
    dupLists = {}
    allDups = []
    bigDF = pd.read_csv(listOfCSVFilenames[0])
    bigDF.set_index(keys='Unnamed: 0')
    droppedCols = {'test'}
    droppedTingsTest = []
    for fileName in listOfCSVFilenames[1:]:
        nextDFtoJoin = pd.read_csv(fileName)
        colsToDrop = set(bigDF.columns) & set(nextDFtoJoin.columns)

        colsToDrop = colsToDrop | {'Unnamed: 0'}
        droppedTingsTest.append(colsToDrop)
        droppedCols = droppedCols | colsToDrop
        #        print(colsToDrop)
        for col in colsToDrop:
            if col in nextDFtoJoin.columns:
                nextDFtoJoin.drop(col, axis=1, inplace=True)
        bigDF = bigDF.join(nextDFtoJoin)  #, rsuffix='_'+fileName[:-4]+'_DUP')

        for col in bigDF.columns:
            if col[-4:] == '_DUP':
                if col not in allDups:
                    allDups.append(col)
                    try:
                        dupLists[fileName].append(col)
                    except KeyError:
                        dupLists[fileName] = [col]
    if outFile != '':
        bigDF.to_csv(sf.addFolderPath(outFile), index=False)
    return bigDF
コード例 #13
0
ファイル: SFS.py プロジェクト: crees00/SchoolsData
import setFolder as sf
import genericModelClass as gmc
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
import pickling
import os
import matplotlib.pyplot as plt
from math import ceil
csv = sf.addFolderPath(
    'bbbbVgsbbbsdf7.csv'
)  #:#,'bbbbVgsbbbsAllCols.csv']:#,'bbbVgsbbsLessCols.csv','bbbVgbbLessCols.csv', 'bbbbVgbbbLessCols.csv']:
df = pd.read_csv(csv)
xCols = [
    x for x in (set(df.columns) - {
        "URN", "Stuck", "Class", "Unnamed: 0", 'Unnamed: 0.1',
        'GOR_Not Applicable'
    })
]
x = df[xCols]
y = df["Class"]

runDict = {
    #         'NN_original_2_5_adam_0.0001':{'clf':MLPClassifier(hidden_layer_sizes=(5,5), solver='adam', max_iter=1000)},
    #     'RF_original_200_11_gini_True':{'clf':RandomForestClassifier(n_estimators=200, max_depth=11, criterion='gini', bootstrap=True)},
    #              'SVM_original_4_rbf_2_0.016':{'clf':SVC(C=4, gamma=0.016)},
コード例 #14
0
    },
    'RFE': {
        30: 'No\nRFE',
        5: '5',
        10: '10',
        15: '15',
        20: '20',
        25: '25'
    }
}
for item in paramDict.keys():
    paramDict[item] = {('p' + str(i)): paramDict[item][i - 1]
                       for i in range(1, 5)}

#df = pd.read_csv(sf.addFolderPath('paramsearch3forDF7Added.csv'))
df = pd.read_csv(sf.addFolderPath('paramSearch2forOldStuckAdded.csv'))

measureList = ['auc', 'acc', 'recall1', 'recall0', 'precision1', 'precision0']
#paramScatterPlots(df, 'auc', subplots=True)
#scores= bestModelsBarPlot(df, mins=mins)
#makeSubplots(df, measureList, mins=mins, figsize=(15,7), ymax=1, chosenMeasure='precision1')
#
for score in measureList:
    RFEBarPlot(df,
               score=score,
               OS=True,
               subPlots=True,
               mins=mins,
               barwidth=0.3)

#scores = findParamsOfBestRuns(df, mins=mins)
コード例 #15
0
           'GNB':{'cols':CS.GNBcols, 'model':GaussianBayes},
#           'AB':{'cols':CS.RFcols2, 'model':AdaBoost}
           }

if __name__ == "__main__":
    import emailing
    import pickling
#    doneRuns=[]
    files = ['bbbbVgsbbbsdf7.csv']#['stuckForDF7.csv']*10#'bbbbVgsbbbsdf7.csv']#*1000
    for fileName in files:#['bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv','bbbbVgsbbbs6.csv']:#,'bbbbVgsbbbsAllCols.csv']:#,'bbbVgsbbsLessCols.csv','bbbVgbbLessCols.csv', 'bbbbVgbbbLessCols.csv']:
#        for cols in [CS.KNNcols]:#[SFS1Cols,SFS2Cols,chosenCols1, lessCols, cols]:   
        for modelType in colDict.keys():#['GNB','LR','KNN','SVM','NN','RF']:
            pickling.save_dill(doneRuns, f"doneRuns_{len(doneRuns)}")
#            modelDict={}
            modelDataDict={}
            df = pd.read_csv(sf.addFolderPath( fileName))
#            cols = colDict[modelType]['cols']
            cols = list(df.columns)
#            xCols = [x for x in (set(df.columns) - {"URN", "Stuck","Class", "Unnamed: 0",'Unnamed: 0.1'})]
#            cols.append('PerformancePctRank')
            xCols = [x for x in (set(cols) - {"URN", "Stuck","Class", "Unnamed: 0",'Unnamed: 0.1','PTRWM_EXP__18','GOR_Not Applicable'})]
            x = df[xCols]
            print(x.columns)
            y = df["Class"]
            try:
                modelsAtStart = len(modelDict)
            except NameError:
                modelsAtStart = 0
            # Generate data and model instances, run the models
            modelDataDict, modelDict = runAGroup(
                [
コード例 #16
0
def save_dill(obj, name):
    '''Puts the path in for you'''
    with open(sf.addFolderPath(name + '.pik'), 'wb') as f:
        dill.dump(obj, f)
    print('pickled')
コード例 #17
0
    0 if not stuck
    If write != False then write to .csv
    """
    import creatingAMonster as cam
    print("Adding/updating stuck column in df...")
    posURNs, negURNs = dictOfURNGroups[cat1], dictOfURNGroups[cat2]
    allURNs = posURNs + negURNs
    URNsToDrop = set(df['URN']) - set(allURNs)
    #    df = df[~df['URN'].isin(URNsToDrop)]
    df["Class"] = df.apply(lambda row: np.where(
        (int(row["URN"]) in posURNs), 1,
        np.where(int(row["URN"]) in URNsToDrop, 2, 0)),
                           axis=1)
    print(df['Class'].value_counts())
    df = cam.dropColsFromList(df, ['Stuck'])
    if len(write) > 0:
        df.to_csv(sf.addFolderPath(write))
    return df


dictOfURNs = makeURNListFromGroupDict(newGrouping(openSchoolDict, True))
inputDF = pd.read_csv(
    sf.addFolderPath('AllDatanotNormedForFeaturePlots_bbbbVgsbbbsImputed.csv'))
#dfWithCats = makeLabelledSubsets(dictOfURNs, 'bbb','gbb',inputDF, 'bbbVgbbLessCols.csv')
#dfWithCats = makeLabelledSubsets(dictOfURNs, 'bbbb','gbbb',inputDF, 'bbbbVgbbbLessCols.csv')
#dfWithCats = makeLabelledSubsets(dictOfURNs, 'bbb','gsbbs', inputDF, 'bbbVgsbbsLessCols.csv')
dfWithCats = makeLabelledSubsets(
    dictOfURNs, 'bbbb', 'gsbbbs', inputDF,
    sf.addFolderPath(
        'AllDatanotNormedForFeaturePlots_bbbbVgsbbbsImputedWithClass.csv'))
コード例 #18
0
    roc_curve,
    roc_auc_score,
    auc,
    classification_report,
)
import re
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE
from sklearn.feature_selection import RFE
import itertools
from random import sample
import datetime
import colSubsets as CS
import setFolder as sf

df = pd.read_csv(sf.addFolderPath('bbbbVgsbbbsdf7.csv'))


def makeDFofResults(write=''):
    numSchools = len(df)
    bigDF = pd.DataFrame({'tester': np.zeros(len(df))},
                         index=list(range(len(df))))
    oldName = ''
    for modelName, modelInstance in modelDict.items():
        #    if modelName[-11:]=='_42_brute_1':
        newName = ''.join(re.split('_[0-9]of[0-9]', modelName))

        #        print(modelName)
        modData = modelInstance.getData()
        xTest = modData.getxTest()
        yTest = modData.getyTest()
コード例 #19
0
"""
Created on Sat Aug 31 12:10:27 2019

Plot histograms of each variable with class1 vs class0
Class1 (bad next) is blue
Class0 (good next) is orange

@author: Chris
"""
from os import listdir
import matplotlib.pyplot as plt
import pandas as pd
import setFolder as sf
#df = pd.read_csv(sf.addFolderPath( 'notNormedForFeaturePlots_bbbbVgsbbbs.csv'))
df = pd.read_csv(
    sf.addFolderPath('AllDatanotNormedForFeaturePlots_bbbbVgsbbbs.csv'))
plotCols = [
    x for x in (set(df.columns) -
                {"URN", "Stuck", "Class", "Unnamed: 0", 'Unnamed: 0.1'})
]

plotCols = [
    'PNUMFSM',
    'Total revenue balance (1) as a % of total revenue income (6) 2017-18',
    'ISPRIMARY', 'Premises.2018', 'PTKS1GROUP_L__18', 'PNUMEAL', 'ISSECONDARY',
    'PERCTOT', 'AcademyNew', 'Pupil:     Teacher Ratio',
    'Supply.Staff_4yrDiff', 'Mean Gross FTE Salary of All Teachers (£s)',
    'TOTPUPS__18', 'PerformancePctRank', 'AGEL'
]
plt.figure(figsize=(16, 20))
i = 0
コード例 #20
0
def processCSV(csv, write=False, addCols=True):
    ''' Takes in avg results csv
    Reads run name and extracts run info, putting into cols to use later 
    for plotting etc.
    Adds one hot cols for RFE & OS
    Adds p1/2/3/4 cols with the param values for that run
    '''
    if type(csv) == str:
        df = pd.read_csv(sf.addFolderPath(csv))
    else:
        df = csv
    df.set_index('Unnamed: 0', inplace=True)
    df = df.sort_values(by='acc', axis=1, ascending=False)
    df = df.transpose()
    print(f'Analysing {csv}:')
    # Make subset of df that has the minimum scores in the dict
    dfWithMins = df.copy()
    minScores = {
        'acc': 0.5,
        'recall1': 0.3,
        'F0': 0.1,
        'F1': 0.1,
        'precision0': 0.1
    }
    for measure, score in minScores.items():
        dfWithMins = df[df[measure] > score]

        best = {}
    for col in [
            'F0', 'F1', 'acc', 'auc', 'precision0', 'precision1', 'recall0',
            'recall1'
    ]:
        print(f"Best {col}:")
        runName = dfWithMins.loc[:, col].idxmax()
        try:
            best[col] = (dfWithMins.loc[:, col].max(), runName,
                         dfWithMins.loc[runName, :])
            print(best[col][0], 'for', best[col][1])


#            print(best[col][2])
        except KeyError:
            pass

    df.columns.rename('Run', inplace=True)

    #    df=df.head(100)
    if addCols:
        #    df['RunName'] = df.index
        df['Model'] = None
        df['RFE'] = None
        df['OS'] = None
        df['p1'], df['p2'], df['p3'], df['p4'] = None, None, None, None
        for runName in df.index:
            # Identify model type in 'Model' col
            nameDict = {
                'SV': 'SVM',
                'NN': 'NN',
                'RF': 'RF',
                'GN': 'GNB',
                'LR': 'LR',
                'KN': 'KNN'
            }
            df.loc[runName, 'Model'] = nameDict[runName[:2]]
            end = 3
            if runName[:2] in ['GN', 'SV', 'KN']:
                end = 4

    #        end = len(df.loc[runName,'Model'])+1
    # Put no. of RFE vals in RFE col - 0 if RFE not used
            if len(re.findall('RFE', runName)) > 0:
                RFE = runName[runName.find('RFE') + 3:runName.find('RFE') + 5]
                try:
                    RFE = int(RFE)
                    end += 6
                except ValueError:
                    RFE = int(RFE[0])
                    end += 5
            else:
                RFE = 0
            df.loc[runName, 'RFE'] = RFE

            # Put 1 in 'OS' col if oversampled
            if len(re.findall('OS', runName)) > 0:
                df.loc[runName, 'OS'] = 1
                end += 3
            else:
                df.loc[runName, 'OS'] = 0
            # Fix for 'original'
            if end < 5:
                end += 9

            # Sort out params
            if runName[:2] in ['SV', 'NN', 'RF', 'KN']:
                bits = []
                string = ''
                for char in runName[end:len(runName)]:
                    if char == '_':
                        if string == 'None':
                            string = 20
                        try:
                            string = float(string)
                            bits.append(string)
                        except ValueError:
                            bits.insert(0, string)
                        string = ''
                    else:
                        string += char
                if string == 'False':
                    string = 0
                elif string == 'True':
                    string = 1
                bits.append(float(string))
                for i, param in enumerate(['p1', 'p2', 'p3', 'p4']):
                    if (runName[:2] == 'KN') and i == 3:
                        df.loc[runName, param] = 0
                    else:
                        df.loc[runName, param] = bits[i]
        # Make model type one hot
        df = pd.get_dummies(df, columns=['Model'], prefix='', prefix_sep='')

        if write:
            df.to_csv(sf.addFolderPath(csv[:-4] + 'Added.csv'))

    return df