Esempi in Python per getRowsWithUniqColumn, esempi in Python per rFunctions.getRowsWithUniqColumn

Esempio n. 1

0

Mostra file

File: test_rFunctions.py Progetto: davidmam/pyMSA

    def test_getRowsWithUniqColumn(self):
        
        expectedDict = {'test1': R.IntVector((12,15)), 'test2': R.IntVector((32,12)), 'test3': R.IntVector((3,26))}  
        expectedSubset = R.DataFrame(expectedDict)
        # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position
        expectedSubset = expectedSubset.rx[True, R.r['with'](expectedSubset, R.r['order'](R.IntVector([rFunctions.index(expectedSubset, 'test1'), rFunctions.index(expectedSubset, 'test2'), rFunctions.index(expectedSubset, 'test3')])),)]


        testDict = {'test1': R.IntVector((12,12,15)), 'test2': R.IntVector((32,4,12)), 'test3': R.IntVector((3,12,26))}       # note that test1 has 12 in row 1 and row 2 

        # testing dataframe
        testFrame = R.DataFrame(testDict)    
        # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position
        testFrame = testFrame.rx[True, R.r['with'](testFrame, R.r['order'](R.IntVector([rFunctions.index(testFrame, 'test1'), rFunctions.index(testFrame, 'test2'), rFunctions.index(testFrame, 'test3')])),)]
        actualDfSubset = rFunctions.getRowsWithUniqColumn(testFrame, 'test1')            # now only two of the three rows remain, because test1 had 12 twice in the row
        
        # a list to keep track of the results, can't compare two dataframes directly so want to compare their values only
        expectedResultList = []
        actualResultList = []
        # getting the relevant data from both expected and actual subset. 
        for values in expectedSubset.iteritems():
            # and append them to the list
            expectedResultList.append([values[0],values[1][0], values[1][1]])
        for values in actualDfSubset.iteritems():
            actualResultList.append([values[0],values[1][0], values[1][1]])
    
        self.assertEqual(expectedResultList, actualResultList)

Esempio n. 2

0

Mostra file

File: test_rFunctions.py Progetto: npklein/pyMSA

    def test_getRowsWithUniqColumn(self):

        expectedDict = {
            'test1': R.IntVector((12, 15)),
            'test2': R.IntVector((32, 12)),
            'test3': R.IntVector((3, 26))
        }
        expectedSubset = R.DataFrame(expectedDict)
        # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position
        expectedSubset = expectedSubset.rx[True, R.r['with'](
            expectedSubset,
            R.r['order'](R.IntVector([
                rFunctions.index(expectedSubset, 'test1'),
                rFunctions.index(expectedSubset, 'test2'),
                rFunctions.index(expectedSubset, 'test3')
            ])),
        )]

        testDict = {
            'test1': R.IntVector((12, 12, 15)),
            'test2': R.IntVector((32, 4, 12)),
            'test3': R.IntVector((3, 12, 26))
        }  # note that test1 has 12 in row 1 and row 2

        # testing dataframe
        testFrame = R.DataFrame(testDict)
        # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position
        testFrame = testFrame.rx[True, R.r['with'](
            testFrame,
            R.r['order'](R.IntVector([
                rFunctions.index(testFrame, 'test1'),
                rFunctions.index(testFrame, 'test2'),
                rFunctions.index(testFrame, 'test3')
            ])),
        )]
        actualDfSubset = rFunctions.getRowsWithUniqColumn(
            testFrame, 'test1'
        )  # now only two of the three rows remain, because test1 had 12 twice in the row

        # a list to keep track of the results, can't compare two dataframes directly so want to compare their values only
        expectedResultList = []
        actualResultList = []
        # getting the relevant data from both expected and actual subset.
        for values in expectedSubset.iteritems():
            # and append them to the list
            expectedResultList.append([values[0], values[1][0], values[1][1]])
        for values in actualDfSubset.iteritems():
            actualResultList.append([values[0], values[1][0], values[1][1]])

        self.assertEqual(expectedResultList, actualResultList)

Esempio n. 3

0

Mostra file

File: test_rPlotGenerics.py Progetto: npklein/pyMSA

    def test_histogram(self):
        ###TEST 1 HISTOGRAM###
        # if the ouput file already exists, remove it
        if os.path.exists(testFolder + 'test_histogram.png'):
            os.remove(testFolder + 'test_histogram.png')

        # reading in a csv file, seperated by tabs into a matrix
        csvData = rFunctions.readCsvFile(testFolder + 'feature.csv',
                                         sep='\t',
                                         head=True,
                                         na='N/A')
        # get only the rows with unique ids and put it in a new matrix
        csvUniqID = rFunctions.getRowsWithUniqColumn(csvData, 'id')
        # get a vector of all intensities using the index function from R_functions
        intensityVector = csvData[rFunctions.index(csvUniqID, 'intensity')]
        logIntensityVector = rFunctions.takeLog(intensityVector, 10)

        # using all possible **kwargs arguments to test if they are all parsed correctly
        self.plots.histogram(testFolder + 'test_histogram.png',
                             logIntensityVector,
                             plotArgs={'labels': True},
                             width=400,
                             height=400,
                             title='test #features per intensity',
                             ylab='# of test features')
        # if after this the ouput does not exist, fail the test
        if not os.path.exists(testFolder + 'test_histogram.png'):
            self.fail(
                testFolder +
                'test_histogram.png does not exist. File not written out correctly'
            )
        else:
            os.remove(testFolder + 'test_histogram.png')

        ###TEST 3 HISTOGRAMS
        # if the test doesn't give an error it succeeded
        if os.path.exists(testFolder + 'testOverlapHistogram.png'):
            os.remove(
                testFolder + 'testOverlapHistogram.png'
            )  # to make sure that the test isn't passing when the method doesn't work, but the file already exists
        outpng = testFolder + 'testOverlapHistogram.png'
        vector1 = R.IntVector((0, 2, 2, 3, 3, 3, 4, 4, 5))
        vector2 = R.IntVector((2, 4, 4, 5, 5, 5, 6, 6, 7))
        vector3 = R.IntVector((4, 6, 6, 7, 7, 7, 8, 8, 9))
        plots = rPlots.Plots()
        plots.histogram(outpng, vector1, vector2, vector3)
        R.r['dev.off']()
        if os.path.exists(testFolder + 'testOverlapHistogram.png'):
            os.remove(testFolder + 'testOverlapHistogram.png')

Esempio n. 4

0

Mostra file

File: test_rPlotGenerics.py Progetto: npklein/pyMSA

    def test_boxplotFormulae(self):
        # if the ouput file already exists, remove it
        if os.path.exists(testFolder + 'boxplotFormulae.png'):
            os.remove(testFolder + 'boxplotFormulae.png')

        featDataframe = rFunctions.readCsvFile(testFolder + 'feature.csv')
        featDataframeUniq = rFunctions.getRowsWithUniqColumn(
            featDataframe, 'id')
        precursorPerFeatureDataframe = rFunctions.readCsvFile(
            testFolder + 'feature_precursor.csv', head=True, sep='\t')
        mergedFeatureDataframe = R.r['merge'](featDataframeUniq,
                                              precursorPerFeatureDataframe)
        mergedFeatureDataframe[rFunctions.index(
            mergedFeatureDataframe,
            'intensity')] = R.r['round'](rFunctions.takeLog(
                featDataframeUniq[rFunctions.index(featDataframeUniq,
                                                   'intensity')], 10))
        vector1 = mergedFeatureDataframe[rFunctions.index(
            mergedFeatureDataframe, 'X..precursors')]
        vector2 = mergedFeatureDataframe[rFunctions.index(
            mergedFeatureDataframe, 'intensity')]

        self.plots.boxplotFormulae(testFolder + 'boxplotFormulae.png',
                                   vector1,
                                   vector2,
                                   mergedFeatureDataframe,
                                   title='MS/MS per feature per intensity',
                                   ylab='# of MS/MS per feature',
                                   xlab='Rounded log10 of intensity')
        R.r['dev.off']()
        # if after this the ouput does not exist, fail the test
        if not os.path.exists(testFolder + 'boxplotFormulae.png'):
            self.fail(
                testFolder +
                'boxplotFormulae.png does not exist. File not written out correctly'
            )
        # remove the plot
        if os.path.exists(testFolder + 'boxplotFormulae.png'):
            os.remove(testFolder + 'boxplotFormulae.png')

Esempio n. 5

0

Mostra file

File: test_rPlotGenerics.py Progetto: davidmam/pyMSA

    def test_boxplotFormulae(self):
        # if the ouput file already exists, remove it
        if os.path.exists(testFolder+'boxplotFormulae.png'):
            os.remove(testFolder+'boxplotFormulae.png')
            
        featDataframe = rFunctions.readCsvFile(testFolder+'feature.csv')
        featDataframeUniq = rFunctions.getRowsWithUniqColumn(featDataframe, 'id')
        precursorPerFeatureDataframe = rFunctions.readCsvFile(testFolder+'feature_precursor.csv', head=True, sep='\t')
        mergedFeatureDataframe = R.r['merge'](featDataframeUniq, precursorPerFeatureDataframe)
        mergedFeatureDataframe[rFunctions.index(mergedFeatureDataframe, 'intensity')] = R.r['round'](rFunctions.takeLog(featDataframeUniq[rFunctions.index(featDataframeUniq, 'intensity')], 10))
        vector1 = mergedFeatureDataframe[rFunctions.index(mergedFeatureDataframe, 'X..precursors')]
        vector2 = mergedFeatureDataframe[rFunctions.index(mergedFeatureDataframe,'intensity')]

        self.plots.boxplotFormulae(testFolder+'boxplotFormulae.png', vector1, vector2, mergedFeatureDataframe, 
                                title = 'MS/MS per feature per intensity', ylab = '# of MS/MS per feature', xlab = 'Rounded log10 of intensity')
        R.r['dev.off']() 
        # if after this the ouput does not exist, fail the test
        if not os.path.exists(testFolder+'boxplotFormulae.png'):
            self.fail(testFolder+'boxplotFormulae.png does not exist. File not written out correctly')
        # remove the plot
        if os.path.exists(testFolder+'boxplotFormulae.png'):
            os.remove(testFolder+'boxplotFormulae.png')

Esempio n. 6

0

Mostra file

File: test_rPlotGenerics.py Progetto: davidmam/pyMSA

    def test_histogram(self):
        ###TEST 1 HISTOGRAM###
        # if the ouput file already exists, remove it
        if os.path.exists(testFolder+'test_histogram.png'):
            os.remove(testFolder+'test_histogram.png')
            
        # reading in a csv file, seperated by tabs into a matrix
        csvData = rFunctions.readCsvFile(testFolder+'feature.csv', sep = '\t', head=True, na='N/A')
        # get only the rows with unique ids and put it in a new matrix
        csvUniqID = rFunctions.getRowsWithUniqColumn(csvData, 'id')      
        # get a vector of all intensities using the index function from R_functions
        intensityVector = csvData[rFunctions.index(csvUniqID, 'intensity')]
        logIntensityVector = rFunctions.takeLog(intensityVector, 10)

        # using all possible **kwargs arguments to test if they are all parsed correctly
        self.plots.histogram(testFolder+'test_histogram.png', logIntensityVector, plotArgs={'labels':True}, width=400, height=400, title='test #features per intensity',  ylab = '# of test features')
        # if after this the ouput does not exist, fail the test
        if not os.path.exists(testFolder+'test_histogram.png'):
            self.fail(testFolder+'test_histogram.png does not exist. File not written out correctly')
        else:
            os.remove(testFolder+'test_histogram.png')


        ###TEST 3 HISTOGRAMS
        # if the test doesn't give an error it succeeded
        if os.path.exists(testFolder+'testOverlapHistogram.png'):
            os.remove(testFolder+'testOverlapHistogram.png') # to make sure that the test isn't passing when the method doesn't work, but the file already exists
        outpng = testFolder+'testOverlapHistogram.png'
        vector1 = R.IntVector((0,2,2,3,3,3,4,4,5))
        vector2 = R.IntVector((2,4,4,5,5,5,6,6,7))
        vector3 = R.IntVector((4,6,6,7,7,7,8,8,9))
        plots = rPlots.Plots()
        plots.histogram(outpng,vector1,vector2,vector3)
        R.r['dev.off']()
        if os.path.exists(testFolder+'testOverlapHistogram.png'):
            os.remove(testFolder+'testOverlapHistogram.png')