Ejemplo n.º 1
0
    def test_custom_var_name(self):
        result5 = melt(self.df, var_name=self.var_name)
        self.assertEqual(result5.columns.tolist(), ['var', 'value'])

        result6 = melt(self.df, id_vars=['id1'], var_name=self.var_name)
        self.assertEqual(result6.columns.tolist(), ['id1', 'var', 'value'])

        result7 = melt(self.df, id_vars=['id1', 'id2'], var_name=self.var_name)
        self.assertEqual(result7.columns.tolist(), ['id1', 'id2', 'var',
                                                    'value'])

        result8 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A',
                       var_name=self.var_name)
        self.assertEqual(result8.columns.tolist(), ['id1', 'id2', 'var',
                                                    'value'])

        result9 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'],
                       var_name=self.var_name)
        expected9 = DataFrame({'id1': self.df['id1'].tolist() * 2,
                               'id2': self.df['id2'].tolist() * 2,
                               self.var_name: ['A'] * 10 + ['B'] * 10,
                               'value': (self.df['A'].tolist() +
                                         self.df['B'].tolist())},
                              columns=['id1', 'id2', self.var_name, 'value'])
        tm.assert_frame_equal(result9, expected9)
Ejemplo n.º 2
0
    def test_custom_var_name(self):
        result5 = melt(self.df, var_name=self.var_name)
        self.assertEqual(result5.columns.tolist(), ['var', 'value'])

        result6 = melt(self.df, id_vars=['id1'], var_name=self.var_name)
        self.assertEqual(result6.columns.tolist(), ['id1', 'var', 'value'])

        result7 = melt(self.df, id_vars=['id1', 'id2'], var_name=self.var_name)
        self.assertEqual(result7.columns.tolist(), ['id1', 'id2', 'var',
                                                    'value'])

        result8 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A',
                       var_name=self.var_name)
        self.assertEqual(result8.columns.tolist(), ['id1', 'id2', 'var',
                                                    'value'])

        result9 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'],
                       var_name=self.var_name)
        expected9 = DataFrame({'id1': self.df['id1'].tolist() * 2,
                               'id2': self.df['id2'].tolist() * 2,
                               self.var_name: ['A'] * 10 + ['B'] * 10,
                               'value': (self.df['A'].tolist() +
                                         self.df['B'].tolist())},
                              columns=['id1', 'id2', self.var_name, 'value'])
        tm.assert_frame_equal(result9, expected9)
Ejemplo n.º 3
0
    def test_custom_value_name(self):
        result10 = melt(self.df, value_name=self.value_name)
        self.assertEqual(result10.columns.tolist(), ['variable', 'val'])

        result11 = melt(self.df, id_vars=['id1'], value_name=self.value_name)
        self.assertEqual(result11.columns.tolist(), ['id1', 'variable', 'val'])

        result12 = melt(self.df, id_vars=['id1', 'id2'],
                        value_name=self.value_name)
        self.assertEqual(result12.columns.tolist(), ['id1', 'id2', 'variable',
                                                     'val'])

        result13 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A',
                        value_name=self.value_name)
        self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable',
                                                     'val'])

        result14 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'],
                        value_name=self.value_name)
        expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2,
                                'id2': self.df['id2'].tolist() * 2,
                                'variable': ['A'] * 10 + ['B'] * 10,
                                self.value_name: (self.df['A'].tolist() +
                                                  self.df['B'].tolist())},
                               columns=['id1', 'id2', 'variable',
                                        self.value_name])
        tm.assert_frame_equal(result14, expected14)
Ejemplo n.º 4
0
    def test_custom_value_name(self):
        result10 = melt(self.df, value_name=self.value_name)
        self.assertEqual(result10.columns.tolist(), ['variable', 'val'])

        result11 = melt(self.df, id_vars=['id1'], value_name=self.value_name)
        self.assertEqual(result11.columns.tolist(), ['id1', 'variable', 'val'])

        result12 = melt(self.df, id_vars=['id1', 'id2'],
                        value_name=self.value_name)
        self.assertEqual(result12.columns.tolist(), ['id1', 'id2', 'variable',
                                                     'val'])

        result13 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A',
                        value_name=self.value_name)
        self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable',
                                                     'val'])

        result14 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'],
                        value_name=self.value_name)
        expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2,
                                'id2': self.df['id2'].tolist() * 2,
                                'variable': ['A'] * 10 + ['B'] * 10,
                                self.value_name: (self.df['A'].tolist() +
                                                  self.df['B'].tolist())},
                               columns=['id1', 'id2', 'variable',
                                        self.value_name])
        tm.assert_frame_equal(result14, expected14)
Ejemplo n.º 5
0
def test_melt():
    df = tm.makeTimeDataFrame()[:10]
    df['id1'] = (df['A'] > 0).astype(int)
    df['id2'] = (df['B'] > 0).astype(int)

    molten1 = melt(df)
    molten2 = melt(df, id_vars=['id1'])
    molten3 = melt(df, id_vars=['id1', 'id2'])
Ejemplo n.º 6
0
def test_melt():
    df = tm.makeTimeDataFrame()[:10]
    df['id1'] = (df['A'] > 0).astype(int)
    df['id2'] = (df['B'] > 0).astype(int)

    molten1 = melt(df)
    molten2 = melt(df, id_vars=['id1'])
    molten3 = melt(df, id_vars=['id1', 'id2'])
Ejemplo n.º 7
0
    def test_default_col_names(self):
        result = melt(self.df)
        self.assertEqual(result.columns.tolist(), ['variable', 'value'])

        result1 = melt(self.df, id_vars=['id1'])
        self.assertEqual(result1.columns.tolist(), ['id1', 'variable', 'value'])

        result2 = melt(self.df, id_vars=['id1', 'id2'])
        self.assertEqual(result2.columns.tolist(), ['id1', 'id2', 'variable', 'value'])
Ejemplo n.º 8
0
    def test_default_col_names(self):
        result = melt(self.df)
        self.assertEqual(result.columns.tolist(), ['variable', 'value'])

        result1 = melt(self.df, id_vars=['id1'])
        self.assertEqual(result1.columns.tolist(), ['id1', 'variable', 'value'])

        result2 = melt(self.df, id_vars=['id1', 'id2'])
        self.assertEqual(result2.columns.tolist(), ['id1', 'id2', 'variable', 'value'])
Ejemplo n.º 9
0
    def test_default_col_names(self):
        result = melt(self.df)
        self.assertEqual(result.columns.tolist(), ["variable", "value"])

        result1 = melt(self.df, id_vars=["id1"])
        self.assertEqual(result1.columns.tolist(), ["id1", "variable", "value"])

        result2 = melt(self.df, id_vars=["id1", "id2"])
        self.assertEqual(result2.columns.tolist(), ["id1", "id2", "variable", "value"])
Ejemplo n.º 10
0
    def test_value_vars(self):
        result3 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A')
        self.assertEqual(len(result3), 10)

        result4 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'])
        expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2,
                               'id2': self.df['id2'].tolist() * 2,
                               'variable': ['A']*10 + ['B']*10,
                               'value': self.df['A'].tolist() + self.df['B'].tolist()},
                              columns=['id1', 'id2', 'variable', 'value'])                  
        tm.assert_frame_equal(result4, expected4)
Ejemplo n.º 11
0
    def test_value_vars(self):
        result3 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A')
        self.assertEqual(len(result3), 10)

        result4 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'])
        expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2,
                               'id2': self.df['id2'].tolist() * 2,
                               'variable': ['A']*10 + ['B']*10,
                               'value': self.df['A'].tolist() + self.df['B'].tolist()},
                              columns=['id1', 'id2', 'variable', 'value'])                  
        tm.assert_frame_equal(result4, expected4)
Ejemplo n.º 12
0
    def test_tuple_vars_fail_with_multiindex(self):
        # melt should fail with an informative error message if
        # the columns have a MultiIndex and a tuple is passed
        # for id_vars or value_vars.
        tuple_a = ('A', 'a')
        list_a = [tuple_a]
        tuple_b = ('B', 'b')
        list_b = [tuple_b]

        for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b),
                                    (tuple_a, tuple_b)):
            with tm.assertRaisesRegexp(ValueError, r'MultiIndex'):
                melt(self.df1, id_vars=id_vars, value_vars=value_vars)
Ejemplo n.º 13
0
    def test_tuple_vars_fail_with_multiindex(self):
        # melt should fail with an informative error message if
        # the columns have a MultiIndex and a tuple is passed
        # for id_vars or value_vars.
        tuple_a = ('A', 'a')
        list_a = [tuple_a]
        tuple_b = ('B', 'b')
        list_b = [tuple_b]

        for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b),
                                    (tuple_a, tuple_b)):
            with tm.assertRaisesRegexp(ValueError, r'MultiIndex'):
                melt(self.df1, id_vars=id_vars, value_vars=value_vars)
Ejemplo n.º 14
0
    def test_value_vars(self):
        result3 = melt(self.df, id_vars=["id1", "id2"], value_vars="A")
        self.assertEqual(len(result3), 10)

        result4 = melt(self.df, id_vars=["id1", "id2"], value_vars=["A", "B"])
        expected4 = DataFrame(
            {
                "id1": self.df["id1"].tolist() * 2,
                "id2": self.df["id2"].tolist() * 2,
                "variable": ["A"] * 10 + ["B"] * 10,
                "value": self.df["A"].tolist() + self.df["B"].tolist(),
            },
            columns=["id1", "id2", "variable", "value"],
        )
        tm.assert_frame_equal(result4, expected4)
Ejemplo n.º 15
0
    def test_method_signatures(self):
        tm.assert_frame_equal(self.df.melt(),
                              melt(self.df))

        tm.assert_frame_equal(self.df.melt(id_vars=['id1', 'id2'],
                                           value_vars=['A', 'B']),
                              melt(self.df,
                                   id_vars=['id1', 'id2'],
                                   value_vars=['A', 'B']))

        tm.assert_frame_equal(self.df.melt(var_name=self.var_name,
                                           value_name=self.value_name),
                              melt(self.df,
                                   var_name=self.var_name,
                                   value_name=self.value_name))

        tm.assert_frame_equal(self.df1.melt(col_level=0),
                              melt(self.df1, col_level=0))
Ejemplo n.º 16
0
    def test_vars_work_with_multiindex(self):
        expected = DataFrame({
            ('A', 'a'): self.df1[('A', 'a')],
            'CAP': ['B'] * len(self.df1),
            'low': ['b'] * len(self.df1),
            'value': self.df1[('B', 'b')],
        }, columns=[('A', 'a'), 'CAP', 'low', 'value'])

        result = melt(self.df1, id_vars=[('A', 'a')], value_vars=[('B', 'b')])
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 17
0
    def test_custom_var_and_value_name(self):

        result15 = melt(self.df,
                        var_name=self.var_name,
                        value_name=self.value_name)
        self.assertEqual(result15.columns.tolist(), ['var', 'val'])

        result16 = melt(self.df,
                        id_vars=['id1'],
                        var_name=self.var_name,
                        value_name=self.value_name)
        self.assertEqual(result16.columns.tolist(), ['id1', 'var', 'val'])

        result17 = melt(self.df,
                        id_vars=['id1', 'id2'],
                        var_name=self.var_name,
                        value_name=self.value_name)
        self.assertEqual(result17.columns.tolist(),
                         ['id1', 'id2', 'var', 'val'])

        result18 = melt(df,
                        id_vars=['id1', 'id2'],
                        value_vars='A',
                        var_name=self.var_name,
                        value_name=self.value_name)
        self.assertEqual(result18.columns.tolist(),
                         ['id1', 'id2', 'var', 'val'])

        result19 = melt(self.df,
                        id_vars=['id1', 'id2'],
                        value_vars=['A', 'B'],
                        var_name=self.var_name,
                        value_name=self.value_name)
        expected19 = DataFrame(
            {
                'id1': self.df['id1'].tolist() * 2,
                'id2': self.df['id2'].tolist() * 2,
                var_name: ['A'] * 10 + ['B'] * 10,
                value_name: self.df['A'].tolist() + self.df['B'].tolist()
            },
            columns=['id1', 'id2', self.var_name, self.value_name])
        tm.assert_frame_equal(result19, expected19)
Ejemplo n.º 18
0
    def test_vars_work_with_multiindex(self):
        expected = DataFrame(
            {
                ('A', 'a'): self.df1[('A', 'a')],
                'CAP': ['B'] * len(self.df1),
                'low': ['b'] * len(self.df1),
                'value': self.df1[('B', 'b')],
            },
            columns=[('A', 'a'), 'CAP', 'low', 'value'])

        result = melt(self.df1, id_vars=[('A', 'a')], value_vars=[('B', 'b')])
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 19
0
    def test_custom_var_and_value_name(self):

        result15 = melt(self.df, var_name=self.var_name, value_name=self.value_name)
        self.assertEqual(result15.columns.tolist(), ["var", "val"])

        result16 = melt(self.df, id_vars=["id1"], var_name=self.var_name, value_name=self.value_name)
        self.assertEqual(result16.columns.tolist(), ["id1", "var", "val"])

        result17 = melt(self.df, id_vars=["id1", "id2"], var_name=self.var_name, value_name=self.value_name)
        self.assertEqual(result17.columns.tolist(), ["id1", "id2", "var", "val"])

        result18 = melt(
            self.df, id_vars=["id1", "id2"], value_vars="A", var_name=self.var_name, value_name=self.value_name
        )
        self.assertEqual(result18.columns.tolist(), ["id1", "id2", "var", "val"])

        result19 = melt(
            self.df, id_vars=["id1", "id2"], value_vars=["A", "B"], var_name=self.var_name, value_name=self.value_name
        )
        expected19 = DataFrame(
            {
                "id1": self.df["id1"].tolist() * 2,
                "id2": self.df["id2"].tolist() * 2,
                self.var_name: ["A"] * 10 + ["B"] * 10,
                self.value_name: self.df["A"].tolist() + self.df["B"].tolist(),
            },
            columns=["id1", "id2", self.var_name, self.value_name],
        )
        tm.assert_frame_equal(result19, expected19)

        df20 = self.df.copy()
        df20.columns.name = "foo"
        result20 = melt(df20)
        self.assertEqual(result20.columns.tolist(), ["foo", "value"])
Ejemplo n.º 20
0
    def test_custom_var_and_value_name(self):

        result15 = melt(self.df, var_name=self.var_name,
                        value_name=self.value_name)
        self.assertEqual(result15.columns.tolist(), ['var', 'val'])

        result16 = melt(self.df, id_vars=['id1'], var_name=self.var_name,
                        value_name=self.value_name)
        self.assertEqual(result16.columns.tolist(), ['id1', 'var', 'val'])

        result17 = melt(self.df, id_vars=['id1', 'id2'],
                        var_name=self.var_name, value_name=self.value_name)
        self.assertEqual(result17.columns.tolist(), ['id1', 'id2', 'var', 'val'
                                                     ])

        result18 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A',
                        var_name=self.var_name, value_name=self.value_name)
        self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val'
                                                     ])

        result19 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'],
                        var_name=self.var_name, value_name=self.value_name)
        expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2,
                                'id2': self.df['id2'].tolist() * 2,
                                self.var_name: ['A'] * 10 + ['B'] * 10,
                                self.value_name: (self.df['A'].tolist() +
                                                  self.df['B'].tolist())},
                               columns=['id1', 'id2', self.var_name,
                                        self.value_name])
        tm.assert_frame_equal(result19, expected19)

        df20 = self.df.copy()
        df20.columns.name = 'foo'
        result20 = melt(df20)
        self.assertEqual(result20.columns.tolist(), ['foo', 'value'])
Ejemplo n.º 21
0
    def test_value_vars_types(self):
        # GH 15348
        expected = DataFrame({'id1': self.df['id1'].tolist() * 2,
                              'id2': self.df['id2'].tolist() * 2,
                              'variable': ['A'] * 10 + ['B'] * 10,
                              'value': (self.df['A'].tolist() +
                                        self.df['B'].tolist())},
                             columns=['id1', 'id2', 'variable', 'value'])

        for type_ in (tuple, list, np.array):
            result = melt(self.df, id_vars=['id1', 'id2'],
                          value_vars=type_(('A', 'B')))
            tm.assert_frame_equal(result, expected)
Ejemplo n.º 22
0
Archivo: PPI.py Proyecto: cbxx/Phantom
    def getGetSurfaceBins(self, gridspacing=10.0):
        """ 
        Bins the surface residues ans calculates the mean probability for each bin 
        
        Args:
            gridspacing: The grid spacing to use in Angstrom (default 10.0)

        Returns:
            A dataframe with mean PPI probability for each bin
        """
        self._logger.info("Calculating probabilities per bin")
        if not 'probNPPI' in self._df.columns:
            raise Exception('predictPPIs has to be called before getSurfaceBins')

        minMax = self._df.loc[self._df.surfaceAtom==True, ['x', 'y', 'z']].describe().loc[['min', 'max'],:].transpose()
        minVal = np.floor(minMax['min']).min()
        maxVal = np.ceil(minMax['max']).max()
        r = np.arange(minVal, maxVal+gridspacing, gridspacing)
        p = partial(np.digitize, bins=r)
        self._df[['bin_x', 'bin_y', 'bin_z']] = self._df.loc[self._df.surfaceAtom==True, ['x', 'y', 'z']].apply(p)
        self._df['bin'] = self._df[self._df.surfaceAtom==True].apply(lambda r: '%i_%i_%i' % (r['bin_x'], r['bin_y'], r['bin_z']), axis=1)
        # unify bins
        #tmpdf = self._df[self._df.surfaceAtom==True].sort('probPPI', ascending=False).groupby(['bin', 'chain', 'resi']).first().reset_index()
        melted = melt(self._df.loc[self._df.surfaceAtom==True, ['chain', 'resi', 'resn', 'probPPI', 'bin']], id_vars=['chain', 'resi', 'bin', 'resn'], value_name='probPPI')
        tmpdf = melted.groupby(['chain', 'resi', 'bin']).first().reset_index()

        # try to increase bin size
        for bin_x in xrange(1, len(r)):
            for bin_y in xrange(1, len(r)):
                for bin_z in xrange(1, len(r)):
                    meid = '%i_%i_%i' % (bin_x, bin_y, bin_z)
                    me = tmpdf[tmpdf.bin == meid]            
            
                    mesum = me['probPPI'].sum()
                    memean = me['probPPI'].mean()
                    if len(me) > 0 and memean > 0.0:
                        prevBins = [(bin_x -  1, bin_y, bin_z),
                                    (bin_x, bin_y -  1, bin_z),
                                    (bin_x, bin_y, bin_z -  1)]
                        for prevBin in prevBins:
                            binid = '%i_%i_%i' % prevBin
                            b = tmpdf[tmpdf.bin == binid]
                            bsum = b['probPPI'].sum()
                            if len(b) > 0:
                                combinedMean = (memean + b['probPPI'].mean()) / (len(me) + len(b))                    
                                if combinedMean >= memean:
                                    tmpdf.loc[(tmpdf.bin == binid), 'bin'] = meid
            
        bin_means = tmpdf.groupby(['chain', 'resi']).reset_index().groupby('bin').mean()['probPPI'].copy()
        bin_means.sort(ascending=False)
        return bin_means
Ejemplo n.º 23
0
    def test_custom_var_name(self):
        result5 = melt(self.df, var_name=self.var_name)
        self.assertEqual(result5.columns.tolist(), ["var", "value"])

        result6 = melt(self.df, id_vars=["id1"], var_name=self.var_name)
        self.assertEqual(result6.columns.tolist(), ["id1", "var", "value"])

        result7 = melt(self.df, id_vars=["id1", "id2"], var_name=self.var_name)
        self.assertEqual(result7.columns.tolist(), ["id1", "id2", "var", "value"])

        result8 = melt(self.df, id_vars=["id1", "id2"], value_vars="A", var_name=self.var_name)
        self.assertEqual(result8.columns.tolist(), ["id1", "id2", "var", "value"])

        result9 = melt(self.df, id_vars=["id1", "id2"], value_vars=["A", "B"], var_name=self.var_name)
        expected9 = DataFrame(
            {
                "id1": self.df["id1"].tolist() * 2,
                "id2": self.df["id2"].tolist() * 2,
                self.var_name: ["A"] * 10 + ["B"] * 10,
                "value": self.df["A"].tolist() + self.df["B"].tolist(),
            },
            columns=["id1", "id2", self.var_name, "value"],
        )
        tm.assert_frame_equal(result9, expected9)
Ejemplo n.º 24
0
    def test_custom_value_name(self):
        result10 = melt(self.df, value_name=self.value_name)
        self.assertEqual(result10.columns.tolist(), ["variable", "val"])

        result11 = melt(self.df, id_vars=["id1"], value_name=self.value_name)
        self.assertEqual(result11.columns.tolist(), ["id1", "variable", "val"])

        result12 = melt(self.df, id_vars=["id1", "id2"], value_name=self.value_name)
        self.assertEqual(result12.columns.tolist(), ["id1", "id2", "variable", "val"])

        result13 = melt(self.df, id_vars=["id1", "id2"], value_vars="A", value_name=self.value_name)
        self.assertEqual(result13.columns.tolist(), ["id1", "id2", "variable", "val"])

        result14 = melt(self.df, id_vars=["id1", "id2"], value_vars=["A", "B"], value_name=self.value_name)
        expected14 = DataFrame(
            {
                "id1": self.df["id1"].tolist() * 2,
                "id2": self.df["id2"].tolist() * 2,
                "variable": ["A"] * 10 + ["B"] * 10,
                self.value_name: self.df["A"].tolist() + self.df["B"].tolist(),
            },
            columns=["id1", "id2", "variable", self.value_name],
        )
        tm.assert_frame_equal(result14, expected14)
Ejemplo n.º 25
0
    def test_value_vars_types(self):
        # GH 15348
        expected = DataFrame(
            {
                'id1': self.df['id1'].tolist() * 2,
                'id2': self.df['id2'].tolist() * 2,
                'variable': ['A'] * 10 + ['B'] * 10,
                'value': (self.df['A'].tolist() + self.df['B'].tolist())
            },
            columns=['id1', 'id2', 'variable', 'value'])

        for type_ in (tuple, list, np.array):
            result = melt(self.df,
                          id_vars=['id1', 'id2'],
                          value_vars=type_(('A', 'B')))
            tm.assert_frame_equal(result, expected)
Ejemplo n.º 26
0
        # clean the whole, but not the parts
        rho, p1 = na_spearmanr(concatted_uncleaned_together['mean'], whole_clean['mean'])
        row['Whole Cleaned'] = rho

        rho, p1 = na_spearmanr(agg['mean'], assoc['jaccard'])
        row['Association Norms (Indiv)'] = rho

        combined_assoc = combine_measures(assoc[['compound', 'const', 'jaccard']]).sort('compound')
        rho, p1 = na_spearmanr(combined_assoc['jaccard'], whole_clean['mean'])
        row['Association Norms (Whole)'] = rho

        results.append(row)

    results = pd.DataFrame(results)
    output = melt(results, id_vars=parameters)
    output.to_csv(sys.stdout, index=False)

    # # produce plots
    # from rplots import line_plot
    # import operator
    # for p in parameters:
    #     other_params = parameters - set([p])
    #     if other_params:
    #         experiment = reduce(operator.and_, [output[op].isnull() for op in other_params])
    #         experiment = output[experiment]
    #     else:
    #         experiment = output
    #     line_plot("graphs/" + p + ".pdf", experiment, p, 'value', 'variable',
    #             ylab='Resulting Correlation',
    #             colorname="Eval Method")
Ejemplo n.º 27
0
 def time_melt_dataframe(self):
     melt(self.df, id_vars=['id1', 'id2'])
Ejemplo n.º 28
0
def test_melt():
    df = tm.makeTimeDataFrame()[:10]
    df['id1'] = (df['A'] > 0).astype(np.int64)
    df['id2'] = (df['B'] > 0).astype(np.int64)

    var_name = 'var'
    value_name = 'val'

    # Default column names
    result = melt(df)
    result1 = melt(df, id_vars=['id1'])
    result2 = melt(df, id_vars=['id1', 'id2'])
    result3 = melt(df, id_vars=['id1', 'id2'],
                   value_vars='A')
    result4 = melt(df, id_vars=['id1', 'id2'],
                   value_vars=['A', 'B'])
                  
    expected4 = DataFrame({'id1': df['id1'].tolist() * 2,
                           'id2': df['id2'].tolist() * 2,
                           'variable': ['A']*10 + ['B']*10,
                           'value': df['A'].tolist() + df['B'].tolist()},
                          columns=['id1', 'id2', 'variable', 'value'])                  
    tm.assert_frame_equal(result4, expected4)
    
    # Supply custom name for the 'variable' column    
    result5 = melt(df, var_name=var_name)
    result6 = melt(df, id_vars=['id1'], var_name=var_name)
    result7 = melt(df, id_vars=['id1', 'id2'], var_name=var_name)
    result8 = melt(df, id_vars=['id1', 'id2'],
                   value_vars='A', var_name=var_name)
    result9 = melt(df, id_vars=['id1', 'id2'],
                   value_vars=['A', 'B'], var_name=var_name)
                    
    expected9 = DataFrame({'id1': df['id1'].tolist() * 2,
                           'id2': df['id2'].tolist() * 2,
                           var_name: ['A']*10 + ['B']*10,
                           'value': df['A'].tolist() + df['B'].tolist()},
                          columns=['id1', 'id2', var_name, 'value'])                  
    tm.assert_frame_equal(result9, expected9)

    # Supply custom name for the 'value' column
    result10 = melt(df, value_name=value_name)
    result11 = melt(df, id_vars=['id1'], value_name=value_name)
    result12 = melt(df, id_vars=['id1', 'id2'], value_name=value_name)
    result13 = melt(df, id_vars=['id1', 'id2'],
                    value_vars='A', value_name=value_name)
    result14 = melt(df, id_vars=['id1', 'id2'],
                    value_vars=['A', 'B'], value_name=value_name)
                    
    expected14 = DataFrame({'id1': df['id1'].tolist() * 2,
                            'id2': df['id2'].tolist() * 2,
                            'variable': ['A']*10 + ['B']*10,
                            value_name: df['A'].tolist() + df['B'].tolist()},
                           columns=['id1', 'id2', 'variable', value_name])                  
    tm.assert_frame_equal(result14, expected14)

    # Supply custom names for the 'variable' and 'value' columns
    result15 = melt(df, var_name=var_name, value_name=value_name)
    result16 = melt(df, id_vars=['id1'], var_name=var_name, value_name=value_name)
    result17 = melt(df, id_vars=['id1', 'id2'],
                    var_name=var_name, value_name=value_name)
    result18 = melt(df, id_vars=['id1', 'id2'],
                    value_vars='A', var_name=var_name, value_name=value_name)
    result19 = melt(df, id_vars=['id1', 'id2'],
                    value_vars=['A', 'B'], var_name=var_name, value_name=value_name)
                    
    expected19 = DataFrame({'id1': df['id1'].tolist() * 2,
                            'id2': df['id2'].tolist() * 2,
                            var_name: ['A']*10 + ['B']*10,
                            value_name: df['A'].tolist() + df['B'].tolist()},
                           columns=['id1', 'id2', var_name, value_name])                  
    tm.assert_frame_equal(result19, expected19)
Ejemplo n.º 29
0
 def test_col_level(self):
     res1 = melt(self.df1, col_level=0)
     res2 = melt(self.df1, col_level='CAP')
     self.assertEqual(res1.columns.tolist(), ['CAP', 'value'])
     self.assertEqual(res2.columns.tolist(), ['CAP', 'value'])
Ejemplo n.º 30
0
 def test_top_level_method(self):
     result = melt(self.df)
     self.assertEqual(result.columns.tolist(), ['variable', 'value'])
Ejemplo n.º 31
0
        rho, p1 = na_spearmanr(concatted_uncleaned_together['mean'],
                               whole_clean['mean'])
        row['Whole Cleaned'] = rho

        rho, p1 = na_spearmanr(agg['mean'], assoc['jaccard'])
        row['Association Norms (Indiv)'] = rho

        combined_assoc = combine_measures(
            assoc[['compound', 'const', 'jaccard']]).sort('compound')
        rho, p1 = na_spearmanr(combined_assoc['jaccard'], whole_clean['mean'])
        row['Association Norms (Whole)'] = rho

        results.append(row)

    results = pd.DataFrame(results)
    output = melt(results, id_vars=parameters)
    output.to_csv(sys.stdout, index=False)

    # # produce plots
    # from rplots import line_plot
    # import operator
    # for p in parameters:
    #     other_params = parameters - set([p])
    #     if other_params:
    #         experiment = reduce(operator.and_, [output[op].isnull() for op in other_params])
    #         experiment = output[experiment]
    #     else:
    #         experiment = output
    #     line_plot("graphs/" + p + ".pdf", experiment, p, 'value', 'variable',
    #             ylab='Resulting Correlation',
    #             colorname="Eval Method")
Ejemplo n.º 32
0
 def test_col_level(self):
     res1 = melt(self.df1, col_level=0)
     res2 = melt(self.df1, col_level="CAP")
     self.assertEqual(res1.columns.tolist(), ["CAP", "value"])
     self.assertEqual(res1.columns.tolist(), ["CAP", "value"])
Ejemplo n.º 33
0
 def test_custom_var_and_value_name(self):
     self.df.columns.name = 'foo'
     result20 = melt(self.df)
     self.assertEqual(result20.columns.tolist(), ['foo', 'value'])
Ejemplo n.º 34
0
def test_melt():
    df = tm.makeTimeDataFrame()[:10]
    df['id1'] = (df['A'] > 0).astype(int)
    df['id2'] = (df['B'] > 0).astype(int)

    var_name = 'var'
    value_name = 'val'

    # Default column names
    result = melt(df)
    result1 = melt(df, id_vars=['id1'])
    result2 = melt(df, id_vars=['id1', 'id2'])
    result3 = melt(df, id_vars=['id1', 'id2'],
                   value_vars='A')
    result4 = melt(df, id_vars=['id1', 'id2'],
                   value_vars=['A', 'B'])
                  
    expected4 = DataFrame({'id1': df['id1'].tolist() * 2,
                           'id2': df['id2'].tolist() * 2,
                           'variable': ['A']*10 + ['B']*10,
                           'value': df['A'].tolist() + df['B'].tolist()},
                          columns=['id1', 'id2', 'variable', 'value'])                  
    tm.assert_frame_equal(result4, expected4)
    
    # Supply custom name for the 'variable' column    
    result5 = melt(df, var_name=var_name)
    result6 = melt(df, id_vars=['id1'], var_name=var_name)
    result7 = melt(df, id_vars=['id1', 'id2'], var_name=var_name)
    result8 = melt(df, id_vars=['id1', 'id2'],
                   value_vars='A', var_name=var_name)
    result9 = melt(df, id_vars=['id1', 'id2'],
                   value_vars=['A', 'B'], var_name=var_name)
                    
    expected9 = DataFrame({'id1': df['id1'].tolist() * 2,
                           'id2': df['id2'].tolist() * 2,
                           var_name: ['A']*10 + ['B']*10,
                           'value': df['A'].tolist() + df['B'].tolist()},
                          columns=['id1', 'id2', var_name, 'value'])                  
    tm.assert_frame_equal(result9, expected9)

    # Supply custom name for the 'value' column
    result10 = melt(df, value_name=value_name)
    result11 = melt(df, id_vars=['id1'], value_name=value_name)
    result12 = melt(df, id_vars=['id1', 'id2'], value_name=value_name)
    result13 = melt(df, id_vars=['id1', 'id2'],
                    value_vars='A', value_name=value_name)
    result14 = melt(df, id_vars=['id1', 'id2'],
                    value_vars=['A', 'B'], value_name=value_name)
                    
    expected14 = DataFrame({'id1': df['id1'].tolist() * 2,
                            'id2': df['id2'].tolist() * 2,
                            'variable': ['A']*10 + ['B']*10,
                            value_name: df['A'].tolist() + df['B'].tolist()},
                           columns=['id1', 'id2', 'variable', value_name])                  
    tm.assert_frame_equal(result14, expected14)

    # Supply custom names for the 'variable' and 'value' columns
    result15 = melt(df, var_name=var_name, value_name=value_name)
    result16 = melt(df, id_vars=['id1'], var_name=var_name, value_name=value_name)
    result17 = melt(df, id_vars=['id1', 'id2'],
                    var_name=var_name, value_name=value_name)
    result18 = melt(df, id_vars=['id1', 'id2'],
                    value_vars='A', var_name=var_name, value_name=value_name)
    result19 = melt(df, id_vars=['id1', 'id2'],
                    value_vars=['A', 'B'], var_name=var_name, value_name=value_name)
                    
    expected19 = DataFrame({'id1': df['id1'].tolist() * 2,
                            'id2': df['id2'].tolist() * 2,
                            var_name: ['A']*10 + ['B']*10,
                            value_name: df['A'].tolist() + df['B'].tolist()},
                           columns=['id1', 'id2', var_name, value_name])                  
    tm.assert_frame_equal(result19, expected19)
Ejemplo n.º 35
0
 def test_col_level(self):
     res1 = melt(self.df1, col_level=0)
     res2 = melt(self.df1, col_level='CAP')
     self.assertEqual(res1.columns.tolist(), ['CAP', 'value'])
     self.assertEqual(res2.columns.tolist(), ['CAP', 'value'])
Ejemplo n.º 36
0
 def test_custom_var_and_value_name(self):
     self.df.columns.name = 'foo'
     result20 = melt(self.df)
     self.assertEqual(result20.columns.tolist(), ['foo', 'value'])
Ejemplo n.º 37
0
 def time_melt_dataframe(self):
     melt(self.df, id_vars=['id1', 'id2'])
Ejemplo n.º 38
0
import pandas as pd
from pandas.core.reshape import melt
import sys
import argparse

# setup argument parsing
parser = argparse.ArgumentParser(description='Melt data.')
parser.add_argument('--ids', help='comma-separated list of column names', nargs='+')
args = parser.parse_args()

# read csv from stdin into a dataframe
df = pd.read_csv(sys.stdin, low_memory=False)

df = melt(df, id_vars=args.ids)

# output dataframe to stdout
df.to_csv(sys.stdout, index=False)