Python cleanUpFlowjoCSV 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: miscFunctions

메소드/함수: cleanUpFlowjoCSV

hotexamples.com에서의 예제들: 5

Python cleanUpFlowjoCSV - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 miscFunctions.cleanUpFlowjoCSV에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: initialDataProcessing.py 프로젝트: jmirazee/data-exploration-gui

def createBaseDataFrame(experimentParameters,folderName,experimentNumber,dataType,layoutDict):
    if experimentParameters['format'] == 'tube':
        fullFormatDf = pickle.load(open('misc/tubeLayout-'+folderName+'-cell.pkl','rb'))
        dfList = []
        levelLabelDict = experimentParameters['levelLabelDict']
        for fileName in os.listdir('inputData/bulkCSVFiles/'):
            if '.csv' in fileName:
                performCommaCheck(fileName)
                
                bulkTubeCSVFileName = fileName
                columnMultiIndexTuples = cellDataProcessing.parseCellCSVHeaders(pd.read_csv('inputData/bulkCSVFiles/'+bulkTubeCSVFileName).columns)
                columnMultiIndex = pd.MultiIndex.from_tuples(columnMultiIndexTuples,names=['CellType','Marker','Statistic'])

                fullData = pd.read_csv('inputData/bulkCSVFiles/'+bulkTubeCSVFileName,header=0)
                if 'Unnamed' in fullData.columns[-1]:
                    data = fullData.iloc[:-2,1:-1].values
                else:
                    data = fullData.iloc[:-2,1:].values
                sampleNames = fullData.iloc[:-2,0].values.ravel()
                sampleIndexStart = fullFormatDf.values.ravel().tolist().index(sampleNames[0])
                sampleIndexEnd = fullFormatDf.values.ravel().tolist().index(sampleNames[-1])
                rowMultiIndex = fullFormatDf.iloc[sampleIndexStart:sampleIndexEnd+1,:].index
                
                timeDataList = []
                timeSubsets = []
                times = levelLabelDict[list(levelLabelDict.keys())[-1]]

                #Can use sample name file to assign time values
                if 'sampleNameFile.xlsx' in os.listdir('misc') or 'sampleNameFile.csv' in os.listdir('misc'):
                    if 'sampleNameFile.xlsx' in os.listdir('misc'): 
                        sampleNameDf = pd.read_excel('misc/sampleNameFile.xlsx')
                    else:
                        sampleNameDf = pd.read_csv('misc/sampleNameFile.csv')
                    if 'Time' in sampleNameDf.columns:
                        for time in times:
                            timeIndices = []
                            for row in range(sampleNameDf.shape[0]):
                                if sampleNameDf[list(levelLabelDict.keys())[-1]].values[row] == time:
                                    timeIndices.append(row)
                            timeSubsets.append(timeIndices)
                    #Otherwise just assume 1 timepoint (HACK NEED TO FIX EVENTUALLY)
                    else:
                        timeSubsets.append(list(range(data.shape[0])))
                #Otherwise just assume 1 timepoint (HACK NEED TO FIX EVENTUALLY)
                else:
                    timeSubsets.append(list(range(data.shape[0])))

                for timeSubset in timeSubsets: 
                    dataList = []
                    columnTupleList = []
                    for i,columnTuple in enumerate(columnMultiIndexTuples):
                        ser = pd.Series(data[timeSubset,i],index=rowMultiIndex)
                        dataList.append(ser)
                        columnTupleList.append(tuple(columnTuple))
                    fullExperimentDf = pd.concat(dataList,keys=columnTupleList,names=['CellType','Marker','Statistic'])
                    timeDataList.append(fullExperimentDf)
                
                k = pd.concat(timeDataList,keys=times,names=[list(levelLabelDict.keys())[-1]])
                repeatList = []
                for name in k.index:
                    if name not in repeatList:
                        repeatList.append(name)
                    else:
                        print('Repeated:')
                        print(name)
                partialExperimentDf = pd.concat(timeDataList,keys=times,names=[list(levelLabelDict.keys())[-1]]).unstack(list(levelLabelDict.keys())[-1])
                dfList.append(partialExperimentDf)

        fullExperimentDf = pd.concat(dfList)
    else:
        realDataType = dataType
    
        if 'barcodingDict' in list(experimentParameters.keys()):
            decodeBarcodedPlates(experimentParameters,folderName,dataType)
        if 'unpackingDict' in list(experimentParameters.keys()):
            unpackMultiplexedPlates(experimentParameters,folderName,dataType)

        #Legacy experiment parameter files compatibility
        if 'paired' in experimentParameters.keys():
            if experimentParameters['paired']:
                numRowPlates = 2
            else:
                numRowPlates = 1
            numColumnPlates = int(experimentParameters['numPlates']/numRowPlates)
        else:
            numRowPlates = 1 
            numColumnPlates = experimentParameters['numPlates']

        #Combine plate and well IDs into a single ID val for every single sample in the experiment
        identificationMatrix = np.empty(layoutDict['plateID'].shape,dtype=object)
        for row in range(identificationMatrix.shape[0]):
            for col in range(identificationMatrix.shape[1]):
                wellID = layoutDict['wellID'][row,col]
                plateID = layoutDict['plateID'][row,col]
                fullID = plateID+'-'+wellID
                identificationMatrix[row,col] = fullID

        plateNames = np.unique(layoutDict['plateID'])
        
        levelLabelDict = experimentParameters['levelLabelDict']
        plateDimensions = experimentParameters['overallPlateDimensions'] 
        levels = list(levelLabelDict.keys())
        conditionLevels = list(levelLabelDict.keys())[:-1]
        conditionLevelValues = levelLabelDict.copy()
        del conditionLevelValues[list(levelLabelDict.keys())[-1]]
        allLevelValues = experimentParameters['levelLabelDict']
            
        sortedData,sortedFiles = cleanUpFlowjoCSV(plateNames,folderName,dataType,experimentParameters)
        allRawData,newLevelList = returnMultiIndex(sortedData,sortedFiles,realDataType,folderName)
            
        dfList = []
        for rawData,plateID in zip(allRawData,plateNames):
            fullTupleList = []
            index = 0
            for row in range(rawData.shape[0]):
                sampleID = int(rawData.iloc[row,0])-1
                wellID = plateRowLetters[int(sampleID/plateDimensions[1])]+str(plateColumnNumbers[sampleID % plateDimensions[1]])
                fullID = plateID+'-'+wellID
                
                sampleLocation = np.argwhere(identificationMatrix == fullID)[0]
                column = []
                tupleList = []
                for levelID in layoutDict['keys']:
                    levelValueID = layoutDict['keys'][levelID][sampleLocation[0],sampleLocation[1]]
                    if levelValueID != 'blank':
                        level = levels[levelID]
                        levelValue = allLevelValues[level][levelValueID]
                        tupleList.append(levelValue)
                    else:
                        print('wat2')
                if len(tupleList) != 0:
                    fullTupleList.append(tupleList)
                index+=1
            mi = pd.MultiIndex.from_tuples(fullTupleList,names=levels)
            columnSeriesList = []
            columnTupleList = []
            for column,columnTuple in enumerate(newLevelList):
                columnSeries = pd.Series(rawData.values[:,column+1],index=mi)
                columnSeriesList.append(columnSeries)
                columnTupleList.append(tuple(columnTuple))
            platedf = pd.concat(columnSeriesList,axis=0,keys=columnTupleList,names=dataTypeLevelNames[realDataType])
            dfList.append(platedf)

        idx=pd.IndexSlice 
        fullExperimentDf = pd.concat(dfList)
        #dfl = [fullExperimentDf.xs([12.0],level=['Time']),fullExperimentDf.xs([60.0],level=['Time']),fullExperimentDf.xs([96.0],level=['Time']),fullExperimentDf.xs([156.0],level=['Time'])]
        tempdf = fullExperimentDf.to_frame('temp')
        temp = []
        for row in range(fullExperimentDf.shape[0]):
            name = list(tempdf.iloc[row,:].name)
            if name in temp:
                print(name)
                print(row)
            else:
                temp.append(name)
        #Remove blanks
        for i,level in enumerate(fullExperimentDf.index.names):
            tempLevelValues = pd.unique(fullExperimentDf.index.get_level_values(level))
            if 'Blank' in tempLevelValues:
                fullExperimentDf = fullExperimentDf.drop('Blank',level=i)
        temp = []
        temp2 = []
        tempdf = fullExperimentDf.to_frame('wat')
        for row in range(fullExperimentDf.shape[0]):
            name = list(tempdf.iloc[row,:].name)
            if name not in temp:
                temp.append(name)
            else:
                temp2.append(name)
        fullExperimentDf = fullExperimentDf.unstack(list(levelLabelDict.keys())[-1])
    
    fullExperimentDf = reorderDfByInputOrder(experimentParameters,fullExperimentDf)
    return fullExperimentDf

예제 #2

파일 보기

파일: initialDataProcessing.py 프로젝트: soorajachar/Multiplexed-Flow-Data-Processing

def calibrateExperiment(folderName,secondPath,concUnit,concUnitPrefix,numberOfCalibrationSamples,initialStandardVolume):
    #Get cytokine calibration curve data
    sortedData,newMultiIndexedObject = cleanUpFlowjoCSV(['Calibration'],folderName,'cyt')
    calibration = sortedData[0]
    data = np.array(calibration.values[:,1:8],dtype=float)
    
    fittingParameters = np.zeros((np.shape(listOfCytokines)[0],4))
    concLOD = np.zeros((np.shape(listOfCytokines)[0],2))
    
    #Initial concentration all cytokine standards is given by CBA kit manual as 5000 pGg/mL: when standards are diluted in 2mL
    conc = 5000 #pg/mL
    serialDilutionFactor = 2 #1:serialDilutionFactor dilution between each standard well
    #Smaller initial dilution (0.5mL instead of 2mL for example) increase the initial concentration of the first calibration sample
    initialConc = (conc*1e-12) /((initialStandardVolume*1e-3)/2) #g/L (pg/mL * 1e-12 g/pg)/(1e-3 L/mL)
    #Calibration samples are always diluted by a factor of serialdilutionFactor (so with 12 calibration samples, the last sample is (serialDilutionFactor^-11) the concentration of the first, which is pure standard (2^0)
    cbaStandardsConcentrations = np.flipud(initialConc*np.power(serialDilutionFactor,np.linspace(-numberOfCalibrationSamples+1,0,numberOfCalibrationSamples)))
    #More x values along the above concentration bounds are sampled to use to construct calibration curve. Plot points are extended slightly at high range to allow visualization of upper LOD (not accessible with experimental dilution)
    cbaStandardsConcentrationsPlotPoints = np.flipud(initialConc*np.power(2,np.linspace(-numberOfCalibrationSamples+1,4,101)))

    fig1=plt.figure(num=1,figsize=(10,10))
    plt.gcf().set_facecolor('white')
    color_list = plt.cm.jet(np.linspace(0,1,7))
    ax=fig1.add_subplot(1,1,1)

    concLOD = {}
    for i in range(len(listOfCytokines)):
        #amplitude bounded from range/2 to range*2, EC50 bounded from minimum to maximum standard concentration tested, Hill coefficient bounded from 0 to 2, Background bounded from 0 to minimum GFI*2
        lowerCurveFitBounds = [(np.max(data[:,i])-np.min(data[:,i]))/2,np.min(cbaStandardsConcentrations),0,0]
        upperCurveFitBounds = [(np.max(data[:,i])-np.min(data[:,i]))*2, np.max(cbaStandardsConcentrations), 2,np.min(data[:,i])*2]
        #use scipy curve fit to determine best hill equation fit for data, searching within the bounds given above
        popt,pcov = curve_fit(Hill, cbaStandardsConcentrations,np.log10(data[:,i]),sigma=np.log10(data[:,i]),bounds=(lowerCurveFitBounds,upperCurveFitBounds))
        rsquared = round(r_squared(cbaStandardsConcentrations,np.log10(data[:,i]),Hill,popt),3)

        for j in range(len(popt)):  
            #Convert just ec50 value to desired units (nM,uM etc)
            if j == 1:
                fittingParameters[i,j] = np.multiply(popt[j],(concUnit/MWofCytokines[i]))
            #other values in 4 parameter logistic equation are tied to intensity y-value, which doesn't change, or are the hill coefficient, which is completely separate, so parameters are kept the same
            else:
                fittingParameters[i,j] = popt[j]
        
        #Convert x values of experimental data points and curve fit points to desired units (nM,uM,etc.)
        convertedCBAStandards = np.multiply(cbaStandardsConcentrations,(concUnit/MWofCytokines[i]))
        convertedCBAStandardsPlotPoints = np.multiply(cbaStandardsConcentrationsPlotPoints,(concUnit/MWofCytokines[i]))
        #Plot on log-log scale the experimental points and the curve fit line with previously determined curve fitting parameters
        plt.loglog(convertedCBAStandards,data[:,i],'o',color=color_list[i,:],label=listOfCytokines[i])
        plt.loglog(convertedCBAStandardsPlotPoints,np.power(10,Hill(convertedCBAStandardsPlotPoints,*fittingParameters[i,:])),color=color_list[i,:],label=listOfCytokines[i]+'_fit; R2 = '+str(rsquared))
        
        #Get LOD for each cytokine calibration curve (aka the linear range of the calibration curve)
        backgroundGFI = fittingParameters[i,3]
        amplitudeGFI = fittingParameters[i,0]
        
        #Approximate LOD by determining concentration values at LOD% and 1-LOD% (3% and 97%) of curve. Must be used on log10(curve), as calibration curve is plotted in logscale
        LODpercent = 0.03
        #LOD% more than background GFI used for lower LOD GFI
        lowerGFILOD = math.log10(10**((1+LODpercent)*math.log10(backgroundGFI)))
        #LOD% less than maximum GFI (Background + amplitude) used for upper LOD GFI
        upperGFILOD = math.log10(10**((1-LODpercent)*math.log10(amplitudeGFI+backgroundGFI)))
        #Log10(upper/lowerGFILOD) converted back into normal GFI by 10 to its power, then fed into inverse hill equation with current cytokine fitting parameters to obtain corresponding concentration values
        lowerConcLOD = InverseHill(lowerGFILOD,fittingParameters[i,:])
        upperConcLOD = InverseHill(upperGFILOD,fittingParameters[i,:])
        listLOD = [10**lowerGFILOD,10**upperGFILOD,lowerConcLOD,upperConcLOD]
        #Create dict with keys as cytokines, values as GFI/conc LODs
        concLOD[listOfCytokines[i]] = listLOD
        #Plot vertical lines at lower and upper concentration limits of detection
        plt.axvline(x=lowerConcLOD,color=color_list[i,:],linestyle=':') 
        plt.axvline(x=upperConcLOD,color=color_list[i,:],linestyle=':') 

    plt.legend(loc=0,numpoints=1)           
    plt.ylabel('GeoMFI') #Geometric mean of fluorescence of each standard
    plt.xlabel('Concentration of Cytokine Standards ('+concUnitPrefix+')')
    plt.title('Calibration of CBA assay \n'+folderName,fontsize=14)
    plt.close()
    fig1.savefig('fullyProcessedFigures/calibrationCurves-'+folderName+'-'+concUnitPrefix+'.png')
    #Save fitting parameters and LOD for curve fit for each cytokine
    with open('semiProcessedData/fittingParameters-'+folderName+'-'+concUnitPrefix+'.pkl', "wb") as f:
        pickle.dump(fittingParameters, f)
    with open('semiProcessedData/LODParameters-'+folderName+'-'+concUnitPrefix+'.pkl', "wb") as f:
        pickle.dump(concLOD, f)

예제 #3

파일 보기

def calibrateExperiment(folderName, secondPath, concUnit, concUnitPrefix,
                        numberOfCalibrationSamples, initialStandardVolume):
    #Get cytokine calibration curve data
    tempExperimentParameters = {'overallPlateDimensions': [8, 12]}
    calibrationFileNames = glob.glob('inputData/bulkCSVFiles/Calibration*')
    print(calibrationFileNames)
    calibrationNames = []
    kitNames = []
    for calibrationFileName in calibrationFileNames:
        newName = calibrationFileName.split('.')[0].split('_')[0].split(
            '/')[-1]
        kitNames.append(newName)
    print(kitNames)
    sortedData, sortedFiles = cleanUpFlowjoCSV(kitNames, folderName, 'cyt',
                                               tempExperimentParameters)
    for i, newName in enumerate(kitNames):
        if '-' in newName:
            newName2 = newName.split('-')[1]
        else:
            newName2 = newName
        kitNames[i] = newName2
    rsquaredList = []
    concLODList = []
    fittingParametersList = []
    cbaStandardsMFIList = []
    cbaPlotPointsMFIList = []
    cbaStandardsConcentrationList = []
    cbaPlotPointsConcentrationList = []

    numberOfPlotPoints = 101
    xaxistitle = 'Concentration of Cytokine Standards Standards (' + concUnitPrefix + ')'
    yaxistitle = 'GeoMFI'

    allCytokinesHaveMWInDict = True
    for calibration in sortedData:
        cytokines = parseCytokineCSVHeaders(calibration.columns)
        for cytokine in cytokines:
            if cytokine[0] not in completeCytokineMWDict:
                allCytokinesHaveMWInDict = False
                print(cytokine[0])
    print('wat')
    print(allCytokinesHaveMWInDict)

    for calibration in sortedData:
        data = np.array(calibration.values[:, 1:-1], dtype=float)
        cytokines = parseCytokineCSVHeaders(calibration.columns)
        fittingParameters = np.zeros((data.shape[1], 4))
        concLOD = np.zeros((data.shape[1], 4))
        #Initial concentration all cytokine standards is given by CBA kit manual as 5000 pGg/mL: when standards are diluted in 2mL
        conc = 5000  #pg/mL
        serialDilutionFactor = 2  #1:serialDilutionFactor dilution between each standard well
        #Smaller initial dilution (0.5mL instead of 2mL for example) increase the initial concentration of the first calibration sample
        initialConc = (conc * 1e-12) / (
            (initialStandardVolume * 1e-3) / 2
        )  #g/L (pg/mL * 1e-12 g/pg)/(1e-3 L/mL)
        #Calibration samples are always diluted by a factor of serialdilutionFactor (so with 12 calibration samples, the last sample is (serialDilutionFactor^-11) the concentration of the first, which is pure standard (2^0)
        cbaStandardsConcentrations = np.flipud(initialConc * np.power(
            serialDilutionFactor,
            np.linspace(-numberOfCalibrationSamples + 1, 0,
                        numberOfCalibrationSamples)))
        #More x values along the above concentration bounds are sampled to use to construct calibration curve. Plot points are extended slightly at high range to allow visualization of upper LOD not accessible with experimental dilution
        cbaStandardsConcentrationsPlotPoints = np.flipud(
            initialConc * np.power(
                2,
                np.linspace(-numberOfCalibrationSamples + 1, 4,
                            numberOfPlotPoints)))

        cbaStandardsConcentrationMatrix = np.zeros(
            [len(cytokines), cbaStandardsConcentrations.shape[0]])
        cbaStandardsConcentrationPlotPointsMatrix = np.zeros(
            [len(cytokines), cbaStandardsConcentrationsPlotPoints.shape[0]])
        cbaStandardsMFIMatrix = np.zeros(
            [len(cytokines), cbaStandardsConcentrations.shape[0]])
        cbaStandardsMFIPlotPointsMatrix = np.zeros(
            [len(cytokines), cbaStandardsConcentrationsPlotPoints.shape[0]])
        color_list = sns.color_palette(sns.color_palette(), len(cytokines))
        print(cytokines)
        print(data)
        for i, cytokineList in enumerate(cytokines):
            cytokine = cytokineList[0]
            #amplitude bounded from range/2 to range*2, EC50 bounded from minimum to maximum standard concentration tested, Hill coefficient bounded from 0 to 2, Background bounded from 0 to minimum GFI*2
            lowerCurveFitBounds = [
                (np.max(data[:, i]) - np.min(data[:, i])) / 2,
                np.min(cbaStandardsConcentrations), 0, 0
            ]
            upperCurveFitBounds = [
                (np.max(data[:, i]) - np.min(data[:, i])) * 2,
                np.max(cbaStandardsConcentrations), 2,
                np.min(data[:, i]) * 2
            ]
            #use scipy curve fit to determine best hill equation fit for data, searching within the bounds given above
            popt, pcov = curve_fit(Hill,
                                   cbaStandardsConcentrations,
                                   np.log10(data[:, i]),
                                   sigma=np.log10(data[:, i]),
                                   bounds=(lowerCurveFitBounds,
                                           upperCurveFitBounds))
            rsquared = round(
                r_squared(cbaStandardsConcentrations, np.log10(data[:, i]),
                          Hill, popt), 3)
            print(rsquared)
            rsquaredList.append(rsquared)
            for j in range(len(popt)):
                #Convert just ec50 value to desired units (nM,uM etc) if cytokine has a molar mass in dict
                if j == 1 and allCytokinesHaveMWInDict:
                    fittingParameters[i, j] = np.multiply(
                        popt[j], (concUnit / completeCytokineMWDict[cytokine]))
                #other values in 4 parameter logistic equation are tied to intensity y-value, which doesn't change, or are the hill coefficient, which is completely separate, so parameters are kept the same
                else:
                    fittingParameters[i, j] = popt[j]

            #Convert x values of experimental data points and curve fit points to desired units (nM,uM,etc.)
            if allCytokinesHaveMWInDict:
                cbaStandardsConcentrationMatrix[i, :] = np.multiply(
                    cbaStandardsConcentrations,
                    (concUnit / completeCytokineMWDict[cytokine]))
                cbaStandardsConcentrationPlotPointsMatrix[i, :] = np.multiply(
                    cbaStandardsConcentrationsPlotPoints,
                    (concUnit / completeCytokineMWDict[cytokine]))
            else:
                cbaStandardsConcentrationMatrix[
                    i, :] = cbaStandardsConcentrations
                cbaStandardsConcentrationPlotPointsMatrix[
                    i, :] = cbaStandardsConcentrationsPlotPoints
            cbaStandardsMFIMatrix[i, :] = data[:, i]
            print(fittingParameters[i, :])
            cbaStandardsMFIPlotPointsMatrix[i, :] = np.power(
                10,
                Hill(cbaStandardsConcentrationPlotPointsMatrix[i, :],
                     *fittingParameters[i, :]))
            #Plot on log-log scale the experimental points and the curve fit line with previously determined curve fitting parameters
            #plt.loglog(cbaStandardsConcentrations,data[:,i],'o',color=color_list[i,:],label=listOfCytokines[i])
            #plt.loglog(cbaStandardsConcentrationsPlotPoints,np.power(10,Hill(convertedCBAStandardsPlotPoints,*fittingParameters[i,:])))
            #'_fit; R2 = '+str(rsquared)

            #Get LOD for each cytokine calibration curve (aka the linear range of the calibration curve)
            backgroundGFI = fittingParameters[i, 3]
            amplitudeGFI = fittingParameters[i, 0]

            #Approximate LOD by determining concentration values at LOD% and 1-LOD% (3% and 97%) of curve. Must be used on log10(curve), as calibration curve is plotted in logscale
            LODpercent = 0.03
            #LOD% more than background GFI used for lower LOD GFI
            lowerGFILOD = math.log10(10**((1 + LODpercent) *
                                          math.log10(backgroundGFI)))
            #LOD% less than maximum GFI (Background + amplitude) used for upper LOD GFI
            upperGFILOD = math.log10(
                10**((1 - LODpercent) *
                     math.log10(amplitudeGFI + backgroundGFI)))
            #Log10(upper/lowerGFILOD) converted back into normal GFI by 10 to its power, then fed into inverse hill equation with current cytokine fitting parameters to obtain corresponding concentration values
            lowerConcLOD = InverseHill(lowerGFILOD, fittingParameters[i, :])
            upperConcLOD = InverseHill(upperGFILOD, fittingParameters[i, :])
            #Create dict with keys as cytokines, values as GFI/conc LODs
            concLOD[i, :] = np.array(
                [10**lowerGFILOD, 10**upperGFILOD, lowerConcLOD, upperConcLOD])
        flattenedMatrix = cbaStandardsMFIMatrix.flatten()
        reshapedMatrix = np.reshape(
            flattenedMatrix, (numberOfCalibrationSamples, len(cytokines)),
            order='F')
        flattenedMatrix2 = cbaStandardsMFIPlotPointsMatrix.flatten()
        reshapedMatrix2 = np.reshape(flattenedMatrix2,
                                     (numberOfPlotPoints, len(cytokines)),
                                     order='F')
        flattenedMatrix3 = cbaStandardsConcentrationMatrix.flatten()
        reshapedMatrix3 = np.reshape(
            flattenedMatrix3, (numberOfCalibrationSamples, len(cytokines)),
            order='F')
        flattenedMatrix4 = cbaStandardsConcentrationPlotPointsMatrix.flatten()
        reshapedMatrix4 = np.reshape(flattenedMatrix4,
                                     (numberOfPlotPoints, len(cytokines)),
                                     order='F')
        realCytokineList = []
        for cytokine in cytokines:
            realCytokineList.append(cytokine[0])
        dataValsList = []
        plotPointsList = []
        for j in range(1, numberOfCalibrationSamples + 1):
            dataValsList.append([j])
        for j in range(1, numberOfPlotPoints + 1):
            plotPointsList.append([j])
        dataValsIndex = pd.MultiIndex.from_tuples(dataValsList,
                                                  names=['Standard'])
        plotPointsIndex = pd.MultiIndex.from_tuples(plotPointsList,
                                                    names=['Standard'])
        currentCBAStandardsMFIDf = pd.DataFrame(reshapedMatrix,
                                                index=dataValsIndex,
                                                columns=realCytokineList)
        currentCBAPlotPointsMFIDf = pd.DataFrame(reshapedMatrix2,
                                                 index=plotPointsIndex,
                                                 columns=realCytokineList)
        currentCBAStandardsConcentrationDf = pd.DataFrame(
            reshapedMatrix3, index=dataValsIndex, columns=realCytokineList)
        currentCBAPlotPointsConcentrationDf = pd.DataFrame(
            reshapedMatrix4, index=plotPointsIndex, columns=realCytokineList)

        currentCBAStandardsMFIDf.columns.name = 'Cytokine'
        currentCBAPlotPointsMFIDf.columns.name = 'Cytokine'
        currentCBAStandardsConcentrationDf.columns.name = 'Cytokine'
        currentCBAPlotPointsConcentrationDf.columns.name = 'Cytokine'
        mic1 = pd.MultiIndex.from_tuples(cytokines, names=['Cytokine'])
        print(cytokines)
        print(mic1)
        fittingParametersDf = pd.DataFrame(
            fittingParameters,
            index=mic1,
            columns=['Amplitude', 'EC50', 'HillCoeff', 'Background'])
        mic2 = pd.MultiIndex.from_tuples([['MFI', 'Lower'], ['MFI', 'Upper'],
                                          ['Concentration', 'Lower'],
                                          ['Concentration', 'Upper']])
        concLODDf = pd.DataFrame(concLOD, index=mic1, columns=mic2)

        concLODList.append(concLODDf)
        fittingParametersList.append(fittingParametersDf)
        cbaStandardsMFIList.append(currentCBAStandardsMFIDf)
        cbaPlotPointsMFIList.append(currentCBAPlotPointsMFIDf)
        cbaStandardsConcentrationList.append(
            currentCBAStandardsConcentrationDf)
        cbaPlotPointsConcentrationList.append(
            currentCBAPlotPointsConcentrationDf)

    #fullFittingParametersDf = pd.concat(fittingParametersList,keys=kitNames,names=['Kit Name'])
    #fullConcLODDf = pd.concat(concLODList,keys=kitNames,names=['Kit Name'])
    fullFittingParametersDf = pd.concat(fittingParametersList)
    fullConcLODDf = pd.concat(concLODList)

    print(fullFittingParametersDf)
    print(fullConcLODDf)

    fullCBAStandardsMFIDf = pd.concat(cbaStandardsMFIList,
                                      keys=kitNames,
                                      names=['Kit Name'],
                                      axis=1)
    fullCBAPlotPointsMFIDf = pd.concat(cbaPlotPointsMFIList,
                                       keys=kitNames,
                                       names=['Kit Name'],
                                       axis=1)
    fullCBAStandardsConcentrationDf = pd.concat(cbaStandardsConcentrationList,
                                                keys=kitNames,
                                                names=['Kit Name'],
                                                axis=1)
    fullCBAPlotPointsConcentrationDf = pd.concat(
        cbaPlotPointsConcentrationList,
        keys=kitNames,
        names=['Kit Name'],
        axis=1)
    fullCBAStandardsList = [
        fullCBAStandardsMFIDf.stack().stack(),
        fullCBAStandardsConcentrationDf.stack().stack()
    ]
    fullCBAPlotPointsList = [
        fullCBAPlotPointsMFIDf.stack().stack(),
        fullCBAPlotPointsConcentrationDf.stack().stack()
    ]
    fullCBAStandardsDf = pd.concat(fullCBAStandardsList,
                                   axis=1,
                                   keys=[yaxistitle, xaxistitle])
    fullCBAPlotPointsDf = pd.concat(fullCBAPlotPointsList,
                                    axis=1,
                                    keys=[yaxistitle, xaxistitle])

    plottingPointsDf = fullCBAPlotPointsDf.reset_index()
    plottingStandardsDf = fullCBAStandardsDf.reset_index()

    numCyt = len(pd.unique(plottingPointsDf['Cytokine']))
    if numCyt <= 10:
        fullpalette = sns.color_palette(sns.color_palette(), numCyt)
    else:
        fullpalette = sns.color_palette('hls', numCyt)
    g = sns.relplot(data=plottingPointsDf,
                    x=xaxistitle,
                    y=yaxistitle,
                    hue='Cytokine',
                    col='Kit Name',
                    kind='line',
                    col_order=pd.unique(plottingPointsDf['Kit Name']),
                    hue_order=pd.unique(plottingPointsDf['Cytokine']),
                    height=10,
                    palette=fullpalette)
    #Plot vertical lines at lower and upper concentration limits of detection
    colorDict = {}
    for j, cytokine in enumerate(pd.unique(plottingPointsDf['Cytokine'])):
        colorDict[cytokine] = fullpalette[j]
    for axis, kitName in zip(g.axes.flat,
                             pd.unique(plottingPointsDf['Kit Name'])):
        currentpalette = []
        for cytokine in pd.unique(plottingStandardsDf[
                plottingPointsDf['Kit Name'] == kitName]['Cytokine']):
            currentColor = colorDict[cytokine]
            currentpalette.append(currentColor)
            cytokineLODValues = fullConcLODDf.loc[cytokine, :]['Concentration']
            axis.axvline(x=cytokineLODValues['Lower'].values,
                         color=currentColor,
                         linestyle=':')
            axis.axvline(x=cytokineLODValues['Upper'].values,
                         color=currentColor,
                         linestyle=':')
        g2 = sns.scatterplot(data=plottingStandardsDf[
            plottingStandardsDf['Kit Name'] == kitName],
                             x=xaxistitle,
                             y=yaxistitle,
                             hue='Cytokine',
                             ax=axis,
                             legend=False,
                             palette=currentpalette)
        axis.set_xscale('log')
        axis.set_yscale('log')
    plt.savefig('plots/calibrationCurves-' + folderName + '-' +
                concUnitPrefix + '.png')
    #Save fitting parameters and LOD for curve fit for each cytokine
    with open(
            'misc/fittingParameters-' + folderName + '-' + concUnitPrefix +
            '.pkl', "wb") as f:
        pickle.dump(fullFittingParametersDf, f)
    with open(
            'misc/LODParameters-' + folderName + '-' + concUnitPrefix + '.pkl',
            "wb") as f:
        pickle.dump(fullConcLODDf, f)

예제 #4

파일 보기

파일: initialDataProcessing.py 프로젝트: soorajachar/Multiplexed-Flow-Data-Processing

def createFullDataFrames(folderName,secondPath,experimentNumber,concUnit,concUnitPrefix,dataType):
    
    #Opens experiment parameter file created by setup experiment script and user input with num conditions,timepoints, and condition names
    with open('inputFiles/experimentParameters-'+folderName+'.json') as f:
        experimentParameters = json.load(f)
    numTimePoints = experimentParameters[0][1]
    
    paired = experimentParameters[6]
    contiguous = experimentParameters[7]
    #Grabs plate names (A1,A2 etc.)
    plateNames = experimentParameters[4]
    numPlates = len(plateNames)
    if not paired:
        replicateWise = True
        alternatingPlatewise = experimentParameters[8]
        numPlates*=2
    else:
        replicateWise = experimentParameters[8]
        alternatingPlatewise = False
    
    processedData = []
    if dataType == 'cyt':
        allRawData,newLevelList = cleanUpFlowjoCSV(plateNames,folderName,dataType)
        finalDataFrame = createAndCombineBaseDataFrames(folderName,allRawData,numPlates,numTimePoints,newLevelList,cytokineHeaderNames,paired,contiguous,replicateWise,alternatingPlatewise)

        with open('semiProcessedData/cytokineGFIPickleFile-'+folderName+'.pkl', "wb") as f:
            pickle.dump(finalDataFrame, f)
        
        fittingParameters = pickle.load(open('semiProcessedData/fittingParameters-'+folderName+'-'+concUnitPrefix+'.pkl', "rb"))
        LODParameters = pickle.load(open('semiProcessedData/LODParameters-'+folderName+'-'+concUnitPrefix+'.pkl', "rb"))
        #Begin converting GFI dataframe into corresponding concentration dataframe
        concentrationList = []
        #Step through dataframe one cytokine at a time
        for cytokine in pd.unique(finalDataFrame.index.get_level_values(0)):
            #Retrieve LODs for current cytokine (from constructed calibration curve)
            lowerGFILOD = LODParameters[cytokine][0]
            upperGFILOD = LODParameters[cytokine][1]
            lowerConcLOD = LODParameters[cytokine][2]
            upperConcLOD = LODParameters[cytokine][3]
            cyt = listOfCytokines.index(cytokine)
            smallConcentrationMatrix = np.zeros(finalDataFrame.loc[cytokine].shape)
            #Loop through every value in current cytokine's portion of the dataframe
            for i in range(0,finalDataFrame.loc[cytokine].values.shape[0]):
                for j in range(0,finalDataFrame.loc[cytokine].values.shape[1]):
                    currentGFIval = finalDataFrame.loc[cytokine].values[i,j]
                    if currentGFIval > upperGFILOD: #If intensity is greater than upper GFI LOD
                        currentConcVal = upperConcLOD #Concentration is equal to upper concentration LOD
                    elif currentGFIval <= upperGFILOD and currentGFIval >= lowerGFILOD: #if intensity is between upper and lower GFI LODs
                        currentConcVal = InverseHill(np.log10(currentGFIval),fittingParameters[cyt,:]) #Use previous hill fit parameters for the cytokine to obtain concentration
                    else: #If intensity is less than background GFI LOD
                        currentConcVal = lowerConcLOD #Concentration is equal to lower concentration LOD
                    smallConcentrationMatrix[i,j] = currentConcVal
            concentrationList.append(smallConcentrationMatrix)

        concentrationMatrix = np.vstack(concentrationList)
        finalDataFrameConcentration = pd.DataFrame(concentrationMatrix,index=finalDataFrame.index,columns=finalDataFrame.columns)
        finalDataFrameConcentration.columns.name = 'Time'
        #Fill in n/a's (caused by undetectable fluorescence) with lower concentration LOD
        finalDataFrameConcentration.fillna(lowerConcLOD, inplace=True)
        with open('semiProcessedData/cytokineConcentrationPickleFile-'+folderName+'.pkl', "wb") as f:
            pickle.dump(finalDataFrameConcentration, f)
        #Create and save modified Df (original concentration df with various modifications due to experimental error)
        #modifiedDf = returnModifiedDf(experimentNumber,finalDataFrame,dataType)
        #with open('semiProcessedData/cytokineGFIPickleFile-'+folderName+'-modified.pkl', "wb") as f:
        #    pickle.dump(modifiedDf, f)
        modifiedConcentrationDf = returnModifiedDf(experimentNumber,finalDataFrameConcentration,dataType)
        with open('semiProcessedData/cytokineConcentrationPickleFile-'+folderName+'-modified.pkl', "wb") as f:
            pickle.dump(modifiedConcentrationDf, f)
        if secondPathBool:
            with open(secondPath+'/cytokineConcentrationPickleFile-'+folderName+'-modified.pkl', "wb") as f:
                pickle.dump(modifiedConcentrationDf, f)
        finalDataFrame = modifiedConcentrationDf
    if dataType == 'cell':
        allRawData,newLevelList = cleanUpFlowjoCSV(plateNames,folderName,dataType)
        finalDataFrame = createAndCombineBaseDataFrames(folderName,allRawData,numPlates,numTimePoints,newLevelList,cellHeaderNames,paired,contiguous,replicateWise,alternatingPlatewise)
        #Create and save modified Df (original concentration df with various modifications due to experimental error)
        with open('semiProcessedData/cellStatisticPickleFile-'+folderName+'.pkl', "wb") as f:
            pickle.dump(finalDataFrame, f)
        modifiedDataFrame = returnModifiedDf(experimentNumber,finalDataFrame,dataType)
        with open('semiProcessedData/cellStatisticPickleFile-'+folderName+'-modified.pkl', "wb") as f:
            pickle.dump(modifiedDataFrame, f)
        finalDataFrame = modifiedDataFrame
    if dataType == 'singlecell':
        allRawData,newLevelList = cleanUpFlowjoCSV(plateNames,folderName,dataType)
        finalDataFrame = createAndCombineBaseDataFrames(folderName,allRawData,numPlates,numTimePoints,newLevelList,singleCellHeaderNames,paired,contiguous,replicateWise,alternatingPlatewise)
    else:
        pass

    return finalDataFrame

예제 #5

파일 보기

파일: cbaCorrections.py 프로젝트: soorajachar/Multiplexed-Flow-Data-Processing

def produceCBADilutedData(cbaCorrExpNum, folderName, excel_data):
    fullLevelList = json.load(
        open('inputFiles/cbaCorrectionParameters-' + folderName + '.json',
             'r'))[0]
    names = json.load(
        open('inputFiles/cbaCorrectionParameters-' + folderName + '.json',
             'r'))[1]
    timepoints = json.load(
        open('inputFiles/cbaCorrectionParameters-' + folderName + '.json',
             'r'))[2]
    dilutionFactors = json.load(
        open('inputFiles/cbaCorrectionParameters-' + folderName + '.json',
             'r'))[3]
    calibrateExperiment(
        folderName, '', 1e9, 'nM',
        excel_data['NumberOfCBAStandardDilutions'][cbaCorrExpNum - 1],
        excel_data['CBAStandardDilutedVolume'][cbaCorrExpNum - 1])
    dflist = []
    for levelList, i in zip(fullLevelList, range(len(fullLevelList))):
        plateLetter = upperCase[i]
        matrixList = []
        cytList = [None] * 7
        for conditionList, j in zip(levelList, range(len(levelList))):
            plateNumber = j + 1
            print(len(levelList))
            plate = plateLetter + str(plateNumber)
            values, _ = cleanUpFlowjoCSV([plate], names[i], 'cytcorr')
            values = values[0]
            for cytIndex in range(1, values.shape[1] - 1):
                if j == 0:
                    cytList[cytIndex - 1] = values.iloc[:, cytIndex]
                else:
                    cytList[cytIndex - 1] = cytList[cytIndex - 1].append(
                        values.iloc[:, cytIndex])
        cytMatrixList = []
        for cytl in cytList:
            cytmatrix = np.reshape(cytl.values,
                                   (len(levelList), timepoints[i]))
            cytMatrixList.append(cytmatrix)
        fullMatrix = np.vstack(cytMatrixList)
        with open('../' + names[i] + '/inputFiles/experimentParameters-' +
                  names[i] + '.json') as f:
            experimentParameters = json.load(f)
        levelNames = experimentParameters[1]
        columnNames = experimentParameters[3]
        indexList = []
        for cytokine in listOfCytokines:
            for level in fullLevelList[i]:
                indexList.append([cytokine] + level)
        multiIndex = pd.MultiIndex.from_tuples(indexList,
                                               names=['Cytokine'] + levelNames)
        df = pd.DataFrame(fullMatrix, index=multiIndex, columns=columnNames)
        df.columns.name = 'Time'
        dflist.append(df)
    dfGFIDict = {}
    dilutionFactorDict = {}
    for name, i in zip(names, range(len(names))):
        dfGFIDict[name] = dflist[i]
        dilutionFactorDict[name] = dilutionFactors[i]

    dfConcDict = {}
    for key in dfGFIDict:
        dfgfi = dfGFIDict[key]
        dfconc = calibrateDataFrame(folderName, '', cbaCorrExpNum, 1e9, 'nM',
                                    'cyt', dfgfi)
        if '0404' in key:
            dfconc.iloc[:, 4:8] /= 2
        dfConcDict[key] = dfconc
    with open(
            'semiProcessedData/correctedCytokineDataFrames-' + folderName +
            '-Concentration.pkl', 'wb') as f:
        pickle.dump(dfConcDict, f)
    for key in dfConcDict:
        os.chdir('../' + key)
        dfconc = dfConcDict[key]
        dilutionFactor = dilutionFactorDict[key]
        with open(
                'semiProcessedData/correctedCytokineDataFrameAndDilutionFactor-'
                + key + '.pkl', 'wb') as f:
            print('wat')
            pickle.dump([dfconc, dilutionFactor], f)