def createBaseDataFrame(experimentParameters,folderName,experimentNumber,dataType,layoutDict): if experimentParameters['format'] == 'tube': fullFormatDf = pickle.load(open('misc/tubeLayout-'+folderName+'-cell.pkl','rb')) dfList = [] levelLabelDict = experimentParameters['levelLabelDict'] for fileName in os.listdir('inputData/bulkCSVFiles/'): if '.csv' in fileName: performCommaCheck(fileName) bulkTubeCSVFileName = fileName columnMultiIndexTuples = cellDataProcessing.parseCellCSVHeaders(pd.read_csv('inputData/bulkCSVFiles/'+bulkTubeCSVFileName).columns) columnMultiIndex = pd.MultiIndex.from_tuples(columnMultiIndexTuples,names=['CellType','Marker','Statistic']) fullData = pd.read_csv('inputData/bulkCSVFiles/'+bulkTubeCSVFileName,header=0) if 'Unnamed' in fullData.columns[-1]: data = fullData.iloc[:-2,1:-1].values else: data = fullData.iloc[:-2,1:].values sampleNames = fullData.iloc[:-2,0].values.ravel() sampleIndexStart = fullFormatDf.values.ravel().tolist().index(sampleNames[0]) sampleIndexEnd = fullFormatDf.values.ravel().tolist().index(sampleNames[-1]) rowMultiIndex = fullFormatDf.iloc[sampleIndexStart:sampleIndexEnd+1,:].index timeDataList = [] timeSubsets = [] times = levelLabelDict[list(levelLabelDict.keys())[-1]] #Can use sample name file to assign time values if 'sampleNameFile.xlsx' in os.listdir('misc') or 'sampleNameFile.csv' in os.listdir('misc'): if 'sampleNameFile.xlsx' in os.listdir('misc'): sampleNameDf = pd.read_excel('misc/sampleNameFile.xlsx') else: sampleNameDf = pd.read_csv('misc/sampleNameFile.csv') if 'Time' in sampleNameDf.columns: for time in times: timeIndices = [] for row in range(sampleNameDf.shape[0]): if sampleNameDf[list(levelLabelDict.keys())[-1]].values[row] == time: timeIndices.append(row) timeSubsets.append(timeIndices) #Otherwise just assume 1 timepoint (HACK NEED TO FIX EVENTUALLY) else: timeSubsets.append(list(range(data.shape[0]))) #Otherwise just assume 1 timepoint (HACK NEED TO FIX EVENTUALLY) else: timeSubsets.append(list(range(data.shape[0]))) for timeSubset in timeSubsets: dataList = [] columnTupleList = [] for i,columnTuple in enumerate(columnMultiIndexTuples): ser = pd.Series(data[timeSubset,i],index=rowMultiIndex) dataList.append(ser) columnTupleList.append(tuple(columnTuple)) fullExperimentDf = pd.concat(dataList,keys=columnTupleList,names=['CellType','Marker','Statistic']) timeDataList.append(fullExperimentDf) k = pd.concat(timeDataList,keys=times,names=[list(levelLabelDict.keys())[-1]]) repeatList = [] for name in k.index: if name not in repeatList: repeatList.append(name) else: print('Repeated:') print(name) partialExperimentDf = pd.concat(timeDataList,keys=times,names=[list(levelLabelDict.keys())[-1]]).unstack(list(levelLabelDict.keys())[-1]) dfList.append(partialExperimentDf) fullExperimentDf = pd.concat(dfList) else: realDataType = dataType if 'barcodingDict' in list(experimentParameters.keys()): decodeBarcodedPlates(experimentParameters,folderName,dataType) if 'unpackingDict' in list(experimentParameters.keys()): unpackMultiplexedPlates(experimentParameters,folderName,dataType) #Legacy experiment parameter files compatibility if 'paired' in experimentParameters.keys(): if experimentParameters['paired']: numRowPlates = 2 else: numRowPlates = 1 numColumnPlates = int(experimentParameters['numPlates']/numRowPlates) else: numRowPlates = 1 numColumnPlates = experimentParameters['numPlates'] #Combine plate and well IDs into a single ID val for every single sample in the experiment identificationMatrix = np.empty(layoutDict['plateID'].shape,dtype=object) for row in range(identificationMatrix.shape[0]): for col in range(identificationMatrix.shape[1]): wellID = layoutDict['wellID'][row,col] plateID = layoutDict['plateID'][row,col] fullID = plateID+'-'+wellID identificationMatrix[row,col] = fullID plateNames = np.unique(layoutDict['plateID']) levelLabelDict = experimentParameters['levelLabelDict'] plateDimensions = experimentParameters['overallPlateDimensions'] levels = list(levelLabelDict.keys()) conditionLevels = list(levelLabelDict.keys())[:-1] conditionLevelValues = levelLabelDict.copy() del conditionLevelValues[list(levelLabelDict.keys())[-1]] allLevelValues = experimentParameters['levelLabelDict'] sortedData,sortedFiles = cleanUpFlowjoCSV(plateNames,folderName,dataType,experimentParameters) allRawData,newLevelList = returnMultiIndex(sortedData,sortedFiles,realDataType,folderName) dfList = [] for rawData,plateID in zip(allRawData,plateNames): fullTupleList = [] index = 0 for row in range(rawData.shape[0]): sampleID = int(rawData.iloc[row,0])-1 wellID = plateRowLetters[int(sampleID/plateDimensions[1])]+str(plateColumnNumbers[sampleID % plateDimensions[1]]) fullID = plateID+'-'+wellID sampleLocation = np.argwhere(identificationMatrix == fullID)[0] column = [] tupleList = [] for levelID in layoutDict['keys']: levelValueID = layoutDict['keys'][levelID][sampleLocation[0],sampleLocation[1]] if levelValueID != 'blank': level = levels[levelID] levelValue = allLevelValues[level][levelValueID] tupleList.append(levelValue) else: print('wat2') if len(tupleList) != 0: fullTupleList.append(tupleList) index+=1 mi = pd.MultiIndex.from_tuples(fullTupleList,names=levels) columnSeriesList = [] columnTupleList = [] for column,columnTuple in enumerate(newLevelList): columnSeries = pd.Series(rawData.values[:,column+1],index=mi) columnSeriesList.append(columnSeries) columnTupleList.append(tuple(columnTuple)) platedf = pd.concat(columnSeriesList,axis=0,keys=columnTupleList,names=dataTypeLevelNames[realDataType]) dfList.append(platedf) idx=pd.IndexSlice fullExperimentDf = pd.concat(dfList) #dfl = [fullExperimentDf.xs([12.0],level=['Time']),fullExperimentDf.xs([60.0],level=['Time']),fullExperimentDf.xs([96.0],level=['Time']),fullExperimentDf.xs([156.0],level=['Time'])] tempdf = fullExperimentDf.to_frame('temp') temp = [] for row in range(fullExperimentDf.shape[0]): name = list(tempdf.iloc[row,:].name) if name in temp: print(name) print(row) else: temp.append(name) #Remove blanks for i,level in enumerate(fullExperimentDf.index.names): tempLevelValues = pd.unique(fullExperimentDf.index.get_level_values(level)) if 'Blank' in tempLevelValues: fullExperimentDf = fullExperimentDf.drop('Blank',level=i) temp = [] temp2 = [] tempdf = fullExperimentDf.to_frame('wat') for row in range(fullExperimentDf.shape[0]): name = list(tempdf.iloc[row,:].name) if name not in temp: temp.append(name) else: temp2.append(name) fullExperimentDf = fullExperimentDf.unstack(list(levelLabelDict.keys())[-1]) fullExperimentDf = reorderDfByInputOrder(experimentParameters,fullExperimentDf) return fullExperimentDf
def calibrateExperiment(folderName,secondPath,concUnit,concUnitPrefix,numberOfCalibrationSamples,initialStandardVolume): #Get cytokine calibration curve data sortedData,newMultiIndexedObject = cleanUpFlowjoCSV(['Calibration'],folderName,'cyt') calibration = sortedData[0] data = np.array(calibration.values[:,1:8],dtype=float) fittingParameters = np.zeros((np.shape(listOfCytokines)[0],4)) concLOD = np.zeros((np.shape(listOfCytokines)[0],2)) #Initial concentration all cytokine standards is given by CBA kit manual as 5000 pGg/mL: when standards are diluted in 2mL conc = 5000 #pg/mL serialDilutionFactor = 2 #1:serialDilutionFactor dilution between each standard well #Smaller initial dilution (0.5mL instead of 2mL for example) increase the initial concentration of the first calibration sample initialConc = (conc*1e-12) /((initialStandardVolume*1e-3)/2) #g/L (pg/mL * 1e-12 g/pg)/(1e-3 L/mL) #Calibration samples are always diluted by a factor of serialdilutionFactor (so with 12 calibration samples, the last sample is (serialDilutionFactor^-11) the concentration of the first, which is pure standard (2^0) cbaStandardsConcentrations = np.flipud(initialConc*np.power(serialDilutionFactor,np.linspace(-numberOfCalibrationSamples+1,0,numberOfCalibrationSamples))) #More x values along the above concentration bounds are sampled to use to construct calibration curve. Plot points are extended slightly at high range to allow visualization of upper LOD (not accessible with experimental dilution) cbaStandardsConcentrationsPlotPoints = np.flipud(initialConc*np.power(2,np.linspace(-numberOfCalibrationSamples+1,4,101))) fig1=plt.figure(num=1,figsize=(10,10)) plt.gcf().set_facecolor('white') color_list = plt.cm.jet(np.linspace(0,1,7)) ax=fig1.add_subplot(1,1,1) concLOD = {} for i in range(len(listOfCytokines)): #amplitude bounded from range/2 to range*2, EC50 bounded from minimum to maximum standard concentration tested, Hill coefficient bounded from 0 to 2, Background bounded from 0 to minimum GFI*2 lowerCurveFitBounds = [(np.max(data[:,i])-np.min(data[:,i]))/2,np.min(cbaStandardsConcentrations),0,0] upperCurveFitBounds = [(np.max(data[:,i])-np.min(data[:,i]))*2, np.max(cbaStandardsConcentrations), 2,np.min(data[:,i])*2] #use scipy curve fit to determine best hill equation fit for data, searching within the bounds given above popt,pcov = curve_fit(Hill, cbaStandardsConcentrations,np.log10(data[:,i]),sigma=np.log10(data[:,i]),bounds=(lowerCurveFitBounds,upperCurveFitBounds)) rsquared = round(r_squared(cbaStandardsConcentrations,np.log10(data[:,i]),Hill,popt),3) for j in range(len(popt)): #Convert just ec50 value to desired units (nM,uM etc) if j == 1: fittingParameters[i,j] = np.multiply(popt[j],(concUnit/MWofCytokines[i])) #other values in 4 parameter logistic equation are tied to intensity y-value, which doesn't change, or are the hill coefficient, which is completely separate, so parameters are kept the same else: fittingParameters[i,j] = popt[j] #Convert x values of experimental data points and curve fit points to desired units (nM,uM,etc.) convertedCBAStandards = np.multiply(cbaStandardsConcentrations,(concUnit/MWofCytokines[i])) convertedCBAStandardsPlotPoints = np.multiply(cbaStandardsConcentrationsPlotPoints,(concUnit/MWofCytokines[i])) #Plot on log-log scale the experimental points and the curve fit line with previously determined curve fitting parameters plt.loglog(convertedCBAStandards,data[:,i],'o',color=color_list[i,:],label=listOfCytokines[i]) plt.loglog(convertedCBAStandardsPlotPoints,np.power(10,Hill(convertedCBAStandardsPlotPoints,*fittingParameters[i,:])),color=color_list[i,:],label=listOfCytokines[i]+'_fit; R2 = '+str(rsquared)) #Get LOD for each cytokine calibration curve (aka the linear range of the calibration curve) backgroundGFI = fittingParameters[i,3] amplitudeGFI = fittingParameters[i,0] #Approximate LOD by determining concentration values at LOD% and 1-LOD% (3% and 97%) of curve. Must be used on log10(curve), as calibration curve is plotted in logscale LODpercent = 0.03 #LOD% more than background GFI used for lower LOD GFI lowerGFILOD = math.log10(10**((1+LODpercent)*math.log10(backgroundGFI))) #LOD% less than maximum GFI (Background + amplitude) used for upper LOD GFI upperGFILOD = math.log10(10**((1-LODpercent)*math.log10(amplitudeGFI+backgroundGFI))) #Log10(upper/lowerGFILOD) converted back into normal GFI by 10 to its power, then fed into inverse hill equation with current cytokine fitting parameters to obtain corresponding concentration values lowerConcLOD = InverseHill(lowerGFILOD,fittingParameters[i,:]) upperConcLOD = InverseHill(upperGFILOD,fittingParameters[i,:]) listLOD = [10**lowerGFILOD,10**upperGFILOD,lowerConcLOD,upperConcLOD] #Create dict with keys as cytokines, values as GFI/conc LODs concLOD[listOfCytokines[i]] = listLOD #Plot vertical lines at lower and upper concentration limits of detection plt.axvline(x=lowerConcLOD,color=color_list[i,:],linestyle=':') plt.axvline(x=upperConcLOD,color=color_list[i,:],linestyle=':') plt.legend(loc=0,numpoints=1) plt.ylabel('GeoMFI') #Geometric mean of fluorescence of each standard plt.xlabel('Concentration of Cytokine Standards ('+concUnitPrefix+')') plt.title('Calibration of CBA assay \n'+folderName,fontsize=14) plt.close() fig1.savefig('fullyProcessedFigures/calibrationCurves-'+folderName+'-'+concUnitPrefix+'.png') #Save fitting parameters and LOD for curve fit for each cytokine with open('semiProcessedData/fittingParameters-'+folderName+'-'+concUnitPrefix+'.pkl', "wb") as f: pickle.dump(fittingParameters, f) with open('semiProcessedData/LODParameters-'+folderName+'-'+concUnitPrefix+'.pkl', "wb") as f: pickle.dump(concLOD, f)
def calibrateExperiment(folderName, secondPath, concUnit, concUnitPrefix, numberOfCalibrationSamples, initialStandardVolume): #Get cytokine calibration curve data tempExperimentParameters = {'overallPlateDimensions': [8, 12]} calibrationFileNames = glob.glob('inputData/bulkCSVFiles/Calibration*') print(calibrationFileNames) calibrationNames = [] kitNames = [] for calibrationFileName in calibrationFileNames: newName = calibrationFileName.split('.')[0].split('_')[0].split( '/')[-1] kitNames.append(newName) print(kitNames) sortedData, sortedFiles = cleanUpFlowjoCSV(kitNames, folderName, 'cyt', tempExperimentParameters) for i, newName in enumerate(kitNames): if '-' in newName: newName2 = newName.split('-')[1] else: newName2 = newName kitNames[i] = newName2 rsquaredList = [] concLODList = [] fittingParametersList = [] cbaStandardsMFIList = [] cbaPlotPointsMFIList = [] cbaStandardsConcentrationList = [] cbaPlotPointsConcentrationList = [] numberOfPlotPoints = 101 xaxistitle = 'Concentration of Cytokine Standards Standards (' + concUnitPrefix + ')' yaxistitle = 'GeoMFI' allCytokinesHaveMWInDict = True for calibration in sortedData: cytokines = parseCytokineCSVHeaders(calibration.columns) for cytokine in cytokines: if cytokine[0] not in completeCytokineMWDict: allCytokinesHaveMWInDict = False print(cytokine[0]) print('wat') print(allCytokinesHaveMWInDict) for calibration in sortedData: data = np.array(calibration.values[:, 1:-1], dtype=float) cytokines = parseCytokineCSVHeaders(calibration.columns) fittingParameters = np.zeros((data.shape[1], 4)) concLOD = np.zeros((data.shape[1], 4)) #Initial concentration all cytokine standards is given by CBA kit manual as 5000 pGg/mL: when standards are diluted in 2mL conc = 5000 #pg/mL serialDilutionFactor = 2 #1:serialDilutionFactor dilution between each standard well #Smaller initial dilution (0.5mL instead of 2mL for example) increase the initial concentration of the first calibration sample initialConc = (conc * 1e-12) / ( (initialStandardVolume * 1e-3) / 2 ) #g/L (pg/mL * 1e-12 g/pg)/(1e-3 L/mL) #Calibration samples are always diluted by a factor of serialdilutionFactor (so with 12 calibration samples, the last sample is (serialDilutionFactor^-11) the concentration of the first, which is pure standard (2^0) cbaStandardsConcentrations = np.flipud(initialConc * np.power( serialDilutionFactor, np.linspace(-numberOfCalibrationSamples + 1, 0, numberOfCalibrationSamples))) #More x values along the above concentration bounds are sampled to use to construct calibration curve. Plot points are extended slightly at high range to allow visualization of upper LOD not accessible with experimental dilution cbaStandardsConcentrationsPlotPoints = np.flipud( initialConc * np.power( 2, np.linspace(-numberOfCalibrationSamples + 1, 4, numberOfPlotPoints))) cbaStandardsConcentrationMatrix = np.zeros( [len(cytokines), cbaStandardsConcentrations.shape[0]]) cbaStandardsConcentrationPlotPointsMatrix = np.zeros( [len(cytokines), cbaStandardsConcentrationsPlotPoints.shape[0]]) cbaStandardsMFIMatrix = np.zeros( [len(cytokines), cbaStandardsConcentrations.shape[0]]) cbaStandardsMFIPlotPointsMatrix = np.zeros( [len(cytokines), cbaStandardsConcentrationsPlotPoints.shape[0]]) color_list = sns.color_palette(sns.color_palette(), len(cytokines)) print(cytokines) print(data) for i, cytokineList in enumerate(cytokines): cytokine = cytokineList[0] #amplitude bounded from range/2 to range*2, EC50 bounded from minimum to maximum standard concentration tested, Hill coefficient bounded from 0 to 2, Background bounded from 0 to minimum GFI*2 lowerCurveFitBounds = [ (np.max(data[:, i]) - np.min(data[:, i])) / 2, np.min(cbaStandardsConcentrations), 0, 0 ] upperCurveFitBounds = [ (np.max(data[:, i]) - np.min(data[:, i])) * 2, np.max(cbaStandardsConcentrations), 2, np.min(data[:, i]) * 2 ] #use scipy curve fit to determine best hill equation fit for data, searching within the bounds given above popt, pcov = curve_fit(Hill, cbaStandardsConcentrations, np.log10(data[:, i]), sigma=np.log10(data[:, i]), bounds=(lowerCurveFitBounds, upperCurveFitBounds)) rsquared = round( r_squared(cbaStandardsConcentrations, np.log10(data[:, i]), Hill, popt), 3) print(rsquared) rsquaredList.append(rsquared) for j in range(len(popt)): #Convert just ec50 value to desired units (nM,uM etc) if cytokine has a molar mass in dict if j == 1 and allCytokinesHaveMWInDict: fittingParameters[i, j] = np.multiply( popt[j], (concUnit / completeCytokineMWDict[cytokine])) #other values in 4 parameter logistic equation are tied to intensity y-value, which doesn't change, or are the hill coefficient, which is completely separate, so parameters are kept the same else: fittingParameters[i, j] = popt[j] #Convert x values of experimental data points and curve fit points to desired units (nM,uM,etc.) if allCytokinesHaveMWInDict: cbaStandardsConcentrationMatrix[i, :] = np.multiply( cbaStandardsConcentrations, (concUnit / completeCytokineMWDict[cytokine])) cbaStandardsConcentrationPlotPointsMatrix[i, :] = np.multiply( cbaStandardsConcentrationsPlotPoints, (concUnit / completeCytokineMWDict[cytokine])) else: cbaStandardsConcentrationMatrix[ i, :] = cbaStandardsConcentrations cbaStandardsConcentrationPlotPointsMatrix[ i, :] = cbaStandardsConcentrationsPlotPoints cbaStandardsMFIMatrix[i, :] = data[:, i] print(fittingParameters[i, :]) cbaStandardsMFIPlotPointsMatrix[i, :] = np.power( 10, Hill(cbaStandardsConcentrationPlotPointsMatrix[i, :], *fittingParameters[i, :])) #Plot on log-log scale the experimental points and the curve fit line with previously determined curve fitting parameters #plt.loglog(cbaStandardsConcentrations,data[:,i],'o',color=color_list[i,:],label=listOfCytokines[i]) #plt.loglog(cbaStandardsConcentrationsPlotPoints,np.power(10,Hill(convertedCBAStandardsPlotPoints,*fittingParameters[i,:]))) #'_fit; R2 = '+str(rsquared) #Get LOD for each cytokine calibration curve (aka the linear range of the calibration curve) backgroundGFI = fittingParameters[i, 3] amplitudeGFI = fittingParameters[i, 0] #Approximate LOD by determining concentration values at LOD% and 1-LOD% (3% and 97%) of curve. Must be used on log10(curve), as calibration curve is plotted in logscale LODpercent = 0.03 #LOD% more than background GFI used for lower LOD GFI lowerGFILOD = math.log10(10**((1 + LODpercent) * math.log10(backgroundGFI))) #LOD% less than maximum GFI (Background + amplitude) used for upper LOD GFI upperGFILOD = math.log10( 10**((1 - LODpercent) * math.log10(amplitudeGFI + backgroundGFI))) #Log10(upper/lowerGFILOD) converted back into normal GFI by 10 to its power, then fed into inverse hill equation with current cytokine fitting parameters to obtain corresponding concentration values lowerConcLOD = InverseHill(lowerGFILOD, fittingParameters[i, :]) upperConcLOD = InverseHill(upperGFILOD, fittingParameters[i, :]) #Create dict with keys as cytokines, values as GFI/conc LODs concLOD[i, :] = np.array( [10**lowerGFILOD, 10**upperGFILOD, lowerConcLOD, upperConcLOD]) flattenedMatrix = cbaStandardsMFIMatrix.flatten() reshapedMatrix = np.reshape( flattenedMatrix, (numberOfCalibrationSamples, len(cytokines)), order='F') flattenedMatrix2 = cbaStandardsMFIPlotPointsMatrix.flatten() reshapedMatrix2 = np.reshape(flattenedMatrix2, (numberOfPlotPoints, len(cytokines)), order='F') flattenedMatrix3 = cbaStandardsConcentrationMatrix.flatten() reshapedMatrix3 = np.reshape( flattenedMatrix3, (numberOfCalibrationSamples, len(cytokines)), order='F') flattenedMatrix4 = cbaStandardsConcentrationPlotPointsMatrix.flatten() reshapedMatrix4 = np.reshape(flattenedMatrix4, (numberOfPlotPoints, len(cytokines)), order='F') realCytokineList = [] for cytokine in cytokines: realCytokineList.append(cytokine[0]) dataValsList = [] plotPointsList = [] for j in range(1, numberOfCalibrationSamples + 1): dataValsList.append([j]) for j in range(1, numberOfPlotPoints + 1): plotPointsList.append([j]) dataValsIndex = pd.MultiIndex.from_tuples(dataValsList, names=['Standard']) plotPointsIndex = pd.MultiIndex.from_tuples(plotPointsList, names=['Standard']) currentCBAStandardsMFIDf = pd.DataFrame(reshapedMatrix, index=dataValsIndex, columns=realCytokineList) currentCBAPlotPointsMFIDf = pd.DataFrame(reshapedMatrix2, index=plotPointsIndex, columns=realCytokineList) currentCBAStandardsConcentrationDf = pd.DataFrame( reshapedMatrix3, index=dataValsIndex, columns=realCytokineList) currentCBAPlotPointsConcentrationDf = pd.DataFrame( reshapedMatrix4, index=plotPointsIndex, columns=realCytokineList) currentCBAStandardsMFIDf.columns.name = 'Cytokine' currentCBAPlotPointsMFIDf.columns.name = 'Cytokine' currentCBAStandardsConcentrationDf.columns.name = 'Cytokine' currentCBAPlotPointsConcentrationDf.columns.name = 'Cytokine' mic1 = pd.MultiIndex.from_tuples(cytokines, names=['Cytokine']) print(cytokines) print(mic1) fittingParametersDf = pd.DataFrame( fittingParameters, index=mic1, columns=['Amplitude', 'EC50', 'HillCoeff', 'Background']) mic2 = pd.MultiIndex.from_tuples([['MFI', 'Lower'], ['MFI', 'Upper'], ['Concentration', 'Lower'], ['Concentration', 'Upper']]) concLODDf = pd.DataFrame(concLOD, index=mic1, columns=mic2) concLODList.append(concLODDf) fittingParametersList.append(fittingParametersDf) cbaStandardsMFIList.append(currentCBAStandardsMFIDf) cbaPlotPointsMFIList.append(currentCBAPlotPointsMFIDf) cbaStandardsConcentrationList.append( currentCBAStandardsConcentrationDf) cbaPlotPointsConcentrationList.append( currentCBAPlotPointsConcentrationDf) #fullFittingParametersDf = pd.concat(fittingParametersList,keys=kitNames,names=['Kit Name']) #fullConcLODDf = pd.concat(concLODList,keys=kitNames,names=['Kit Name']) fullFittingParametersDf = pd.concat(fittingParametersList) fullConcLODDf = pd.concat(concLODList) print(fullFittingParametersDf) print(fullConcLODDf) fullCBAStandardsMFIDf = pd.concat(cbaStandardsMFIList, keys=kitNames, names=['Kit Name'], axis=1) fullCBAPlotPointsMFIDf = pd.concat(cbaPlotPointsMFIList, keys=kitNames, names=['Kit Name'], axis=1) fullCBAStandardsConcentrationDf = pd.concat(cbaStandardsConcentrationList, keys=kitNames, names=['Kit Name'], axis=1) fullCBAPlotPointsConcentrationDf = pd.concat( cbaPlotPointsConcentrationList, keys=kitNames, names=['Kit Name'], axis=1) fullCBAStandardsList = [ fullCBAStandardsMFIDf.stack().stack(), fullCBAStandardsConcentrationDf.stack().stack() ] fullCBAPlotPointsList = [ fullCBAPlotPointsMFIDf.stack().stack(), fullCBAPlotPointsConcentrationDf.stack().stack() ] fullCBAStandardsDf = pd.concat(fullCBAStandardsList, axis=1, keys=[yaxistitle, xaxistitle]) fullCBAPlotPointsDf = pd.concat(fullCBAPlotPointsList, axis=1, keys=[yaxistitle, xaxistitle]) plottingPointsDf = fullCBAPlotPointsDf.reset_index() plottingStandardsDf = fullCBAStandardsDf.reset_index() numCyt = len(pd.unique(plottingPointsDf['Cytokine'])) if numCyt <= 10: fullpalette = sns.color_palette(sns.color_palette(), numCyt) else: fullpalette = sns.color_palette('hls', numCyt) g = sns.relplot(data=plottingPointsDf, x=xaxistitle, y=yaxistitle, hue='Cytokine', col='Kit Name', kind='line', col_order=pd.unique(plottingPointsDf['Kit Name']), hue_order=pd.unique(plottingPointsDf['Cytokine']), height=10, palette=fullpalette) #Plot vertical lines at lower and upper concentration limits of detection colorDict = {} for j, cytokine in enumerate(pd.unique(plottingPointsDf['Cytokine'])): colorDict[cytokine] = fullpalette[j] for axis, kitName in zip(g.axes.flat, pd.unique(plottingPointsDf['Kit Name'])): currentpalette = [] for cytokine in pd.unique(plottingStandardsDf[ plottingPointsDf['Kit Name'] == kitName]['Cytokine']): currentColor = colorDict[cytokine] currentpalette.append(currentColor) cytokineLODValues = fullConcLODDf.loc[cytokine, :]['Concentration'] axis.axvline(x=cytokineLODValues['Lower'].values, color=currentColor, linestyle=':') axis.axvline(x=cytokineLODValues['Upper'].values, color=currentColor, linestyle=':') g2 = sns.scatterplot(data=plottingStandardsDf[ plottingStandardsDf['Kit Name'] == kitName], x=xaxistitle, y=yaxistitle, hue='Cytokine', ax=axis, legend=False, palette=currentpalette) axis.set_xscale('log') axis.set_yscale('log') plt.savefig('plots/calibrationCurves-' + folderName + '-' + concUnitPrefix + '.png') #Save fitting parameters and LOD for curve fit for each cytokine with open( 'misc/fittingParameters-' + folderName + '-' + concUnitPrefix + '.pkl', "wb") as f: pickle.dump(fullFittingParametersDf, f) with open( 'misc/LODParameters-' + folderName + '-' + concUnitPrefix + '.pkl', "wb") as f: pickle.dump(fullConcLODDf, f)
def createFullDataFrames(folderName,secondPath,experimentNumber,concUnit,concUnitPrefix,dataType): #Opens experiment parameter file created by setup experiment script and user input with num conditions,timepoints, and condition names with open('inputFiles/experimentParameters-'+folderName+'.json') as f: experimentParameters = json.load(f) numTimePoints = experimentParameters[0][1] paired = experimentParameters[6] contiguous = experimentParameters[7] #Grabs plate names (A1,A2 etc.) plateNames = experimentParameters[4] numPlates = len(plateNames) if not paired: replicateWise = True alternatingPlatewise = experimentParameters[8] numPlates*=2 else: replicateWise = experimentParameters[8] alternatingPlatewise = False processedData = [] if dataType == 'cyt': allRawData,newLevelList = cleanUpFlowjoCSV(plateNames,folderName,dataType) finalDataFrame = createAndCombineBaseDataFrames(folderName,allRawData,numPlates,numTimePoints,newLevelList,cytokineHeaderNames,paired,contiguous,replicateWise,alternatingPlatewise) with open('semiProcessedData/cytokineGFIPickleFile-'+folderName+'.pkl', "wb") as f: pickle.dump(finalDataFrame, f) fittingParameters = pickle.load(open('semiProcessedData/fittingParameters-'+folderName+'-'+concUnitPrefix+'.pkl', "rb")) LODParameters = pickle.load(open('semiProcessedData/LODParameters-'+folderName+'-'+concUnitPrefix+'.pkl', "rb")) #Begin converting GFI dataframe into corresponding concentration dataframe concentrationList = [] #Step through dataframe one cytokine at a time for cytokine in pd.unique(finalDataFrame.index.get_level_values(0)): #Retrieve LODs for current cytokine (from constructed calibration curve) lowerGFILOD = LODParameters[cytokine][0] upperGFILOD = LODParameters[cytokine][1] lowerConcLOD = LODParameters[cytokine][2] upperConcLOD = LODParameters[cytokine][3] cyt = listOfCytokines.index(cytokine) smallConcentrationMatrix = np.zeros(finalDataFrame.loc[cytokine].shape) #Loop through every value in current cytokine's portion of the dataframe for i in range(0,finalDataFrame.loc[cytokine].values.shape[0]): for j in range(0,finalDataFrame.loc[cytokine].values.shape[1]): currentGFIval = finalDataFrame.loc[cytokine].values[i,j] if currentGFIval > upperGFILOD: #If intensity is greater than upper GFI LOD currentConcVal = upperConcLOD #Concentration is equal to upper concentration LOD elif currentGFIval <= upperGFILOD and currentGFIval >= lowerGFILOD: #if intensity is between upper and lower GFI LODs currentConcVal = InverseHill(np.log10(currentGFIval),fittingParameters[cyt,:]) #Use previous hill fit parameters for the cytokine to obtain concentration else: #If intensity is less than background GFI LOD currentConcVal = lowerConcLOD #Concentration is equal to lower concentration LOD smallConcentrationMatrix[i,j] = currentConcVal concentrationList.append(smallConcentrationMatrix) concentrationMatrix = np.vstack(concentrationList) finalDataFrameConcentration = pd.DataFrame(concentrationMatrix,index=finalDataFrame.index,columns=finalDataFrame.columns) finalDataFrameConcentration.columns.name = 'Time' #Fill in n/a's (caused by undetectable fluorescence) with lower concentration LOD finalDataFrameConcentration.fillna(lowerConcLOD, inplace=True) with open('semiProcessedData/cytokineConcentrationPickleFile-'+folderName+'.pkl', "wb") as f: pickle.dump(finalDataFrameConcentration, f) #Create and save modified Df (original concentration df with various modifications due to experimental error) #modifiedDf = returnModifiedDf(experimentNumber,finalDataFrame,dataType) #with open('semiProcessedData/cytokineGFIPickleFile-'+folderName+'-modified.pkl', "wb") as f: # pickle.dump(modifiedDf, f) modifiedConcentrationDf = returnModifiedDf(experimentNumber,finalDataFrameConcentration,dataType) with open('semiProcessedData/cytokineConcentrationPickleFile-'+folderName+'-modified.pkl', "wb") as f: pickle.dump(modifiedConcentrationDf, f) if secondPathBool: with open(secondPath+'/cytokineConcentrationPickleFile-'+folderName+'-modified.pkl', "wb") as f: pickle.dump(modifiedConcentrationDf, f) finalDataFrame = modifiedConcentrationDf if dataType == 'cell': allRawData,newLevelList = cleanUpFlowjoCSV(plateNames,folderName,dataType) finalDataFrame = createAndCombineBaseDataFrames(folderName,allRawData,numPlates,numTimePoints,newLevelList,cellHeaderNames,paired,contiguous,replicateWise,alternatingPlatewise) #Create and save modified Df (original concentration df with various modifications due to experimental error) with open('semiProcessedData/cellStatisticPickleFile-'+folderName+'.pkl', "wb") as f: pickle.dump(finalDataFrame, f) modifiedDataFrame = returnModifiedDf(experimentNumber,finalDataFrame,dataType) with open('semiProcessedData/cellStatisticPickleFile-'+folderName+'-modified.pkl', "wb") as f: pickle.dump(modifiedDataFrame, f) finalDataFrame = modifiedDataFrame if dataType == 'singlecell': allRawData,newLevelList = cleanUpFlowjoCSV(plateNames,folderName,dataType) finalDataFrame = createAndCombineBaseDataFrames(folderName,allRawData,numPlates,numTimePoints,newLevelList,singleCellHeaderNames,paired,contiguous,replicateWise,alternatingPlatewise) else: pass return finalDataFrame
def produceCBADilutedData(cbaCorrExpNum, folderName, excel_data): fullLevelList = json.load( open('inputFiles/cbaCorrectionParameters-' + folderName + '.json', 'r'))[0] names = json.load( open('inputFiles/cbaCorrectionParameters-' + folderName + '.json', 'r'))[1] timepoints = json.load( open('inputFiles/cbaCorrectionParameters-' + folderName + '.json', 'r'))[2] dilutionFactors = json.load( open('inputFiles/cbaCorrectionParameters-' + folderName + '.json', 'r'))[3] calibrateExperiment( folderName, '', 1e9, 'nM', excel_data['NumberOfCBAStandardDilutions'][cbaCorrExpNum - 1], excel_data['CBAStandardDilutedVolume'][cbaCorrExpNum - 1]) dflist = [] for levelList, i in zip(fullLevelList, range(len(fullLevelList))): plateLetter = upperCase[i] matrixList = [] cytList = [None] * 7 for conditionList, j in zip(levelList, range(len(levelList))): plateNumber = j + 1 print(len(levelList)) plate = plateLetter + str(plateNumber) values, _ = cleanUpFlowjoCSV([plate], names[i], 'cytcorr') values = values[0] for cytIndex in range(1, values.shape[1] - 1): if j == 0: cytList[cytIndex - 1] = values.iloc[:, cytIndex] else: cytList[cytIndex - 1] = cytList[cytIndex - 1].append( values.iloc[:, cytIndex]) cytMatrixList = [] for cytl in cytList: cytmatrix = np.reshape(cytl.values, (len(levelList), timepoints[i])) cytMatrixList.append(cytmatrix) fullMatrix = np.vstack(cytMatrixList) with open('../' + names[i] + '/inputFiles/experimentParameters-' + names[i] + '.json') as f: experimentParameters = json.load(f) levelNames = experimentParameters[1] columnNames = experimentParameters[3] indexList = [] for cytokine in listOfCytokines: for level in fullLevelList[i]: indexList.append([cytokine] + level) multiIndex = pd.MultiIndex.from_tuples(indexList, names=['Cytokine'] + levelNames) df = pd.DataFrame(fullMatrix, index=multiIndex, columns=columnNames) df.columns.name = 'Time' dflist.append(df) dfGFIDict = {} dilutionFactorDict = {} for name, i in zip(names, range(len(names))): dfGFIDict[name] = dflist[i] dilutionFactorDict[name] = dilutionFactors[i] dfConcDict = {} for key in dfGFIDict: dfgfi = dfGFIDict[key] dfconc = calibrateDataFrame(folderName, '', cbaCorrExpNum, 1e9, 'nM', 'cyt', dfgfi) if '0404' in key: dfconc.iloc[:, 4:8] /= 2 dfConcDict[key] = dfconc with open( 'semiProcessedData/correctedCytokineDataFrames-' + folderName + '-Concentration.pkl', 'wb') as f: pickle.dump(dfConcDict, f) for key in dfConcDict: os.chdir('../' + key) dfconc = dfConcDict[key] dilutionFactor = dilutionFactorDict[key] with open( 'semiProcessedData/correctedCytokineDataFrameAndDilutionFactor-' + key + '.pkl', 'wb') as f: print('wat') pickle.dump([dfconc, dilutionFactor], f)