def test_custom_var_name(self): result5 = melt(self.df, var_name=self.var_name) self.assertEqual(result5.columns.tolist(), ['var', 'value']) result6 = melt(self.df, id_vars=['id1'], var_name=self.var_name) self.assertEqual(result6.columns.tolist(), ['id1', 'var', 'value']) result7 = melt(self.df, id_vars=['id1', 'id2'], var_name=self.var_name) self.assertEqual(result7.columns.tolist(), ['id1', 'id2', 'var', 'value']) result8 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A', var_name=self.var_name) self.assertEqual(result8.columns.tolist(), ['id1', 'id2', 'var', 'value']) result9 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], var_name=self.var_name) expected9 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, self.var_name: ['A'] * 10 + ['B'] * 10, 'value': (self.df['A'].tolist() + self.df['B'].tolist())}, columns=['id1', 'id2', self.var_name, 'value']) tm.assert_frame_equal(result9, expected9)
def test_custom_value_name(self): result10 = melt(self.df, value_name=self.value_name) self.assertEqual(result10.columns.tolist(), ['variable', 'val']) result11 = melt(self.df, id_vars=['id1'], value_name=self.value_name) self.assertEqual(result11.columns.tolist(), ['id1', 'variable', 'val']) result12 = melt(self.df, id_vars=['id1', 'id2'], value_name=self.value_name) self.assertEqual(result12.columns.tolist(), ['id1', 'id2', 'variable', 'val']) result13 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A', value_name=self.value_name) self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable', 'val']) result14 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], value_name=self.value_name) expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, 'variable': ['A'] * 10 + ['B'] * 10, self.value_name: (self.df['A'].tolist() + self.df['B'].tolist())}, columns=['id1', 'id2', 'variable', self.value_name]) tm.assert_frame_equal(result14, expected14)
def test_melt(): df = tm.makeTimeDataFrame()[:10] df['id1'] = (df['A'] > 0).astype(int) df['id2'] = (df['B'] > 0).astype(int) molten1 = melt(df) molten2 = melt(df, id_vars=['id1']) molten3 = melt(df, id_vars=['id1', 'id2'])
def test_default_col_names(self): result = melt(self.df) self.assertEqual(result.columns.tolist(), ['variable', 'value']) result1 = melt(self.df, id_vars=['id1']) self.assertEqual(result1.columns.tolist(), ['id1', 'variable', 'value']) result2 = melt(self.df, id_vars=['id1', 'id2']) self.assertEqual(result2.columns.tolist(), ['id1', 'id2', 'variable', 'value'])
def test_default_col_names(self): result = melt(self.df) self.assertEqual(result.columns.tolist(), ["variable", "value"]) result1 = melt(self.df, id_vars=["id1"]) self.assertEqual(result1.columns.tolist(), ["id1", "variable", "value"]) result2 = melt(self.df, id_vars=["id1", "id2"]) self.assertEqual(result2.columns.tolist(), ["id1", "id2", "variable", "value"])
def test_value_vars(self): result3 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A') self.assertEqual(len(result3), 10) result4 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B']) expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, 'variable': ['A']*10 + ['B']*10, 'value': self.df['A'].tolist() + self.df['B'].tolist()}, columns=['id1', 'id2', 'variable', 'value']) tm.assert_frame_equal(result4, expected4)
def test_tuple_vars_fail_with_multiindex(self): # melt should fail with an informative error message if # the columns have a MultiIndex and a tuple is passed # for id_vars or value_vars. tuple_a = ('A', 'a') list_a = [tuple_a] tuple_b = ('B', 'b') list_b = [tuple_b] for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b), (tuple_a, tuple_b)): with tm.assertRaisesRegexp(ValueError, r'MultiIndex'): melt(self.df1, id_vars=id_vars, value_vars=value_vars)
def test_value_vars(self): result3 = melt(self.df, id_vars=["id1", "id2"], value_vars="A") self.assertEqual(len(result3), 10) result4 = melt(self.df, id_vars=["id1", "id2"], value_vars=["A", "B"]) expected4 = DataFrame( { "id1": self.df["id1"].tolist() * 2, "id2": self.df["id2"].tolist() * 2, "variable": ["A"] * 10 + ["B"] * 10, "value": self.df["A"].tolist() + self.df["B"].tolist(), }, columns=["id1", "id2", "variable", "value"], ) tm.assert_frame_equal(result4, expected4)
def test_method_signatures(self): tm.assert_frame_equal(self.df.melt(), melt(self.df)) tm.assert_frame_equal(self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B']), melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'])) tm.assert_frame_equal(self.df.melt(var_name=self.var_name, value_name=self.value_name), melt(self.df, var_name=self.var_name, value_name=self.value_name)) tm.assert_frame_equal(self.df1.melt(col_level=0), melt(self.df1, col_level=0))
def test_vars_work_with_multiindex(self): expected = DataFrame({ ('A', 'a'): self.df1[('A', 'a')], 'CAP': ['B'] * len(self.df1), 'low': ['b'] * len(self.df1), 'value': self.df1[('B', 'b')], }, columns=[('A', 'a'), 'CAP', 'low', 'value']) result = melt(self.df1, id_vars=[('A', 'a')], value_vars=[('B', 'b')]) tm.assert_frame_equal(result, expected)
def test_custom_var_and_value_name(self): result15 = melt(self.df, var_name=self.var_name, value_name=self.value_name) self.assertEqual(result15.columns.tolist(), ['var', 'val']) result16 = melt(self.df, id_vars=['id1'], var_name=self.var_name, value_name=self.value_name) self.assertEqual(result16.columns.tolist(), ['id1', 'var', 'val']) result17 = melt(self.df, id_vars=['id1', 'id2'], var_name=self.var_name, value_name=self.value_name) self.assertEqual(result17.columns.tolist(), ['id1', 'id2', 'var', 'val']) result18 = melt(df, id_vars=['id1', 'id2'], value_vars='A', var_name=self.var_name, value_name=self.value_name) self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val']) result19 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], var_name=self.var_name, value_name=self.value_name) expected19 = DataFrame( { 'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, var_name: ['A'] * 10 + ['B'] * 10, value_name: self.df['A'].tolist() + self.df['B'].tolist() }, columns=['id1', 'id2', self.var_name, self.value_name]) tm.assert_frame_equal(result19, expected19)
def test_vars_work_with_multiindex(self): expected = DataFrame( { ('A', 'a'): self.df1[('A', 'a')], 'CAP': ['B'] * len(self.df1), 'low': ['b'] * len(self.df1), 'value': self.df1[('B', 'b')], }, columns=[('A', 'a'), 'CAP', 'low', 'value']) result = melt(self.df1, id_vars=[('A', 'a')], value_vars=[('B', 'b')]) tm.assert_frame_equal(result, expected)
def test_custom_var_and_value_name(self): result15 = melt(self.df, var_name=self.var_name, value_name=self.value_name) self.assertEqual(result15.columns.tolist(), ["var", "val"]) result16 = melt(self.df, id_vars=["id1"], var_name=self.var_name, value_name=self.value_name) self.assertEqual(result16.columns.tolist(), ["id1", "var", "val"]) result17 = melt(self.df, id_vars=["id1", "id2"], var_name=self.var_name, value_name=self.value_name) self.assertEqual(result17.columns.tolist(), ["id1", "id2", "var", "val"]) result18 = melt( self.df, id_vars=["id1", "id2"], value_vars="A", var_name=self.var_name, value_name=self.value_name ) self.assertEqual(result18.columns.tolist(), ["id1", "id2", "var", "val"]) result19 = melt( self.df, id_vars=["id1", "id2"], value_vars=["A", "B"], var_name=self.var_name, value_name=self.value_name ) expected19 = DataFrame( { "id1": self.df["id1"].tolist() * 2, "id2": self.df["id2"].tolist() * 2, self.var_name: ["A"] * 10 + ["B"] * 10, self.value_name: self.df["A"].tolist() + self.df["B"].tolist(), }, columns=["id1", "id2", self.var_name, self.value_name], ) tm.assert_frame_equal(result19, expected19) df20 = self.df.copy() df20.columns.name = "foo" result20 = melt(df20) self.assertEqual(result20.columns.tolist(), ["foo", "value"])
def test_custom_var_and_value_name(self): result15 = melt(self.df, var_name=self.var_name, value_name=self.value_name) self.assertEqual(result15.columns.tolist(), ['var', 'val']) result16 = melt(self.df, id_vars=['id1'], var_name=self.var_name, value_name=self.value_name) self.assertEqual(result16.columns.tolist(), ['id1', 'var', 'val']) result17 = melt(self.df, id_vars=['id1', 'id2'], var_name=self.var_name, value_name=self.value_name) self.assertEqual(result17.columns.tolist(), ['id1', 'id2', 'var', 'val' ]) result18 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A', var_name=self.var_name, value_name=self.value_name) self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val' ]) result19 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], var_name=self.var_name, value_name=self.value_name) expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, self.var_name: ['A'] * 10 + ['B'] * 10, self.value_name: (self.df['A'].tolist() + self.df['B'].tolist())}, columns=['id1', 'id2', self.var_name, self.value_name]) tm.assert_frame_equal(result19, expected19) df20 = self.df.copy() df20.columns.name = 'foo' result20 = melt(df20) self.assertEqual(result20.columns.tolist(), ['foo', 'value'])
def test_value_vars_types(self): # GH 15348 expected = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, 'variable': ['A'] * 10 + ['B'] * 10, 'value': (self.df['A'].tolist() + self.df['B'].tolist())}, columns=['id1', 'id2', 'variable', 'value']) for type_ in (tuple, list, np.array): result = melt(self.df, id_vars=['id1', 'id2'], value_vars=type_(('A', 'B'))) tm.assert_frame_equal(result, expected)
def getGetSurfaceBins(self, gridspacing=10.0): """ Bins the surface residues ans calculates the mean probability for each bin Args: gridspacing: The grid spacing to use in Angstrom (default 10.0) Returns: A dataframe with mean PPI probability for each bin """ self._logger.info("Calculating probabilities per bin") if not 'probNPPI' in self._df.columns: raise Exception('predictPPIs has to be called before getSurfaceBins') minMax = self._df.loc[self._df.surfaceAtom==True, ['x', 'y', 'z']].describe().loc[['min', 'max'],:].transpose() minVal = np.floor(minMax['min']).min() maxVal = np.ceil(minMax['max']).max() r = np.arange(minVal, maxVal+gridspacing, gridspacing) p = partial(np.digitize, bins=r) self._df[['bin_x', 'bin_y', 'bin_z']] = self._df.loc[self._df.surfaceAtom==True, ['x', 'y', 'z']].apply(p) self._df['bin'] = self._df[self._df.surfaceAtom==True].apply(lambda r: '%i_%i_%i' % (r['bin_x'], r['bin_y'], r['bin_z']), axis=1) # unify bins #tmpdf = self._df[self._df.surfaceAtom==True].sort('probPPI', ascending=False).groupby(['bin', 'chain', 'resi']).first().reset_index() melted = melt(self._df.loc[self._df.surfaceAtom==True, ['chain', 'resi', 'resn', 'probPPI', 'bin']], id_vars=['chain', 'resi', 'bin', 'resn'], value_name='probPPI') tmpdf = melted.groupby(['chain', 'resi', 'bin']).first().reset_index() # try to increase bin size for bin_x in xrange(1, len(r)): for bin_y in xrange(1, len(r)): for bin_z in xrange(1, len(r)): meid = '%i_%i_%i' % (bin_x, bin_y, bin_z) me = tmpdf[tmpdf.bin == meid] mesum = me['probPPI'].sum() memean = me['probPPI'].mean() if len(me) > 0 and memean > 0.0: prevBins = [(bin_x - 1, bin_y, bin_z), (bin_x, bin_y - 1, bin_z), (bin_x, bin_y, bin_z - 1)] for prevBin in prevBins: binid = '%i_%i_%i' % prevBin b = tmpdf[tmpdf.bin == binid] bsum = b['probPPI'].sum() if len(b) > 0: combinedMean = (memean + b['probPPI'].mean()) / (len(me) + len(b)) if combinedMean >= memean: tmpdf.loc[(tmpdf.bin == binid), 'bin'] = meid bin_means = tmpdf.groupby(['chain', 'resi']).reset_index().groupby('bin').mean()['probPPI'].copy() bin_means.sort(ascending=False) return bin_means
def test_custom_var_name(self): result5 = melt(self.df, var_name=self.var_name) self.assertEqual(result5.columns.tolist(), ["var", "value"]) result6 = melt(self.df, id_vars=["id1"], var_name=self.var_name) self.assertEqual(result6.columns.tolist(), ["id1", "var", "value"]) result7 = melt(self.df, id_vars=["id1", "id2"], var_name=self.var_name) self.assertEqual(result7.columns.tolist(), ["id1", "id2", "var", "value"]) result8 = melt(self.df, id_vars=["id1", "id2"], value_vars="A", var_name=self.var_name) self.assertEqual(result8.columns.tolist(), ["id1", "id2", "var", "value"]) result9 = melt(self.df, id_vars=["id1", "id2"], value_vars=["A", "B"], var_name=self.var_name) expected9 = DataFrame( { "id1": self.df["id1"].tolist() * 2, "id2": self.df["id2"].tolist() * 2, self.var_name: ["A"] * 10 + ["B"] * 10, "value": self.df["A"].tolist() + self.df["B"].tolist(), }, columns=["id1", "id2", self.var_name, "value"], ) tm.assert_frame_equal(result9, expected9)
def test_custom_value_name(self): result10 = melt(self.df, value_name=self.value_name) self.assertEqual(result10.columns.tolist(), ["variable", "val"]) result11 = melt(self.df, id_vars=["id1"], value_name=self.value_name) self.assertEqual(result11.columns.tolist(), ["id1", "variable", "val"]) result12 = melt(self.df, id_vars=["id1", "id2"], value_name=self.value_name) self.assertEqual(result12.columns.tolist(), ["id1", "id2", "variable", "val"]) result13 = melt(self.df, id_vars=["id1", "id2"], value_vars="A", value_name=self.value_name) self.assertEqual(result13.columns.tolist(), ["id1", "id2", "variable", "val"]) result14 = melt(self.df, id_vars=["id1", "id2"], value_vars=["A", "B"], value_name=self.value_name) expected14 = DataFrame( { "id1": self.df["id1"].tolist() * 2, "id2": self.df["id2"].tolist() * 2, "variable": ["A"] * 10 + ["B"] * 10, self.value_name: self.df["A"].tolist() + self.df["B"].tolist(), }, columns=["id1", "id2", "variable", self.value_name], ) tm.assert_frame_equal(result14, expected14)
def test_value_vars_types(self): # GH 15348 expected = DataFrame( { 'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, 'variable': ['A'] * 10 + ['B'] * 10, 'value': (self.df['A'].tolist() + self.df['B'].tolist()) }, columns=['id1', 'id2', 'variable', 'value']) for type_ in (tuple, list, np.array): result = melt(self.df, id_vars=['id1', 'id2'], value_vars=type_(('A', 'B'))) tm.assert_frame_equal(result, expected)
# clean the whole, but not the parts rho, p1 = na_spearmanr(concatted_uncleaned_together['mean'], whole_clean['mean']) row['Whole Cleaned'] = rho rho, p1 = na_spearmanr(agg['mean'], assoc['jaccard']) row['Association Norms (Indiv)'] = rho combined_assoc = combine_measures(assoc[['compound', 'const', 'jaccard']]).sort('compound') rho, p1 = na_spearmanr(combined_assoc['jaccard'], whole_clean['mean']) row['Association Norms (Whole)'] = rho results.append(row) results = pd.DataFrame(results) output = melt(results, id_vars=parameters) output.to_csv(sys.stdout, index=False) # # produce plots # from rplots import line_plot # import operator # for p in parameters: # other_params = parameters - set([p]) # if other_params: # experiment = reduce(operator.and_, [output[op].isnull() for op in other_params]) # experiment = output[experiment] # else: # experiment = output # line_plot("graphs/" + p + ".pdf", experiment, p, 'value', 'variable', # ylab='Resulting Correlation', # colorname="Eval Method")
def time_melt_dataframe(self): melt(self.df, id_vars=['id1', 'id2'])
def test_melt(): df = tm.makeTimeDataFrame()[:10] df['id1'] = (df['A'] > 0).astype(np.int64) df['id2'] = (df['B'] > 0).astype(np.int64) var_name = 'var' value_name = 'val' # Default column names result = melt(df) result1 = melt(df, id_vars=['id1']) result2 = melt(df, id_vars=['id1', 'id2']) result3 = melt(df, id_vars=['id1', 'id2'], value_vars='A') result4 = melt(df, id_vars=['id1', 'id2'], value_vars=['A', 'B']) expected4 = DataFrame({'id1': df['id1'].tolist() * 2, 'id2': df['id2'].tolist() * 2, 'variable': ['A']*10 + ['B']*10, 'value': df['A'].tolist() + df['B'].tolist()}, columns=['id1', 'id2', 'variable', 'value']) tm.assert_frame_equal(result4, expected4) # Supply custom name for the 'variable' column result5 = melt(df, var_name=var_name) result6 = melt(df, id_vars=['id1'], var_name=var_name) result7 = melt(df, id_vars=['id1', 'id2'], var_name=var_name) result8 = melt(df, id_vars=['id1', 'id2'], value_vars='A', var_name=var_name) result9 = melt(df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], var_name=var_name) expected9 = DataFrame({'id1': df['id1'].tolist() * 2, 'id2': df['id2'].tolist() * 2, var_name: ['A']*10 + ['B']*10, 'value': df['A'].tolist() + df['B'].tolist()}, columns=['id1', 'id2', var_name, 'value']) tm.assert_frame_equal(result9, expected9) # Supply custom name for the 'value' column result10 = melt(df, value_name=value_name) result11 = melt(df, id_vars=['id1'], value_name=value_name) result12 = melt(df, id_vars=['id1', 'id2'], value_name=value_name) result13 = melt(df, id_vars=['id1', 'id2'], value_vars='A', value_name=value_name) result14 = melt(df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], value_name=value_name) expected14 = DataFrame({'id1': df['id1'].tolist() * 2, 'id2': df['id2'].tolist() * 2, 'variable': ['A']*10 + ['B']*10, value_name: df['A'].tolist() + df['B'].tolist()}, columns=['id1', 'id2', 'variable', value_name]) tm.assert_frame_equal(result14, expected14) # Supply custom names for the 'variable' and 'value' columns result15 = melt(df, var_name=var_name, value_name=value_name) result16 = melt(df, id_vars=['id1'], var_name=var_name, value_name=value_name) result17 = melt(df, id_vars=['id1', 'id2'], var_name=var_name, value_name=value_name) result18 = melt(df, id_vars=['id1', 'id2'], value_vars='A', var_name=var_name, value_name=value_name) result19 = melt(df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], var_name=var_name, value_name=value_name) expected19 = DataFrame({'id1': df['id1'].tolist() * 2, 'id2': df['id2'].tolist() * 2, var_name: ['A']*10 + ['B']*10, value_name: df['A'].tolist() + df['B'].tolist()}, columns=['id1', 'id2', var_name, value_name]) tm.assert_frame_equal(result19, expected19)
def test_col_level(self): res1 = melt(self.df1, col_level=0) res2 = melt(self.df1, col_level='CAP') self.assertEqual(res1.columns.tolist(), ['CAP', 'value']) self.assertEqual(res2.columns.tolist(), ['CAP', 'value'])
def test_top_level_method(self): result = melt(self.df) self.assertEqual(result.columns.tolist(), ['variable', 'value'])
rho, p1 = na_spearmanr(concatted_uncleaned_together['mean'], whole_clean['mean']) row['Whole Cleaned'] = rho rho, p1 = na_spearmanr(agg['mean'], assoc['jaccard']) row['Association Norms (Indiv)'] = rho combined_assoc = combine_measures( assoc[['compound', 'const', 'jaccard']]).sort('compound') rho, p1 = na_spearmanr(combined_assoc['jaccard'], whole_clean['mean']) row['Association Norms (Whole)'] = rho results.append(row) results = pd.DataFrame(results) output = melt(results, id_vars=parameters) output.to_csv(sys.stdout, index=False) # # produce plots # from rplots import line_plot # import operator # for p in parameters: # other_params = parameters - set([p]) # if other_params: # experiment = reduce(operator.and_, [output[op].isnull() for op in other_params]) # experiment = output[experiment] # else: # experiment = output # line_plot("graphs/" + p + ".pdf", experiment, p, 'value', 'variable', # ylab='Resulting Correlation', # colorname="Eval Method")
def test_col_level(self): res1 = melt(self.df1, col_level=0) res2 = melt(self.df1, col_level="CAP") self.assertEqual(res1.columns.tolist(), ["CAP", "value"]) self.assertEqual(res1.columns.tolist(), ["CAP", "value"])
def test_custom_var_and_value_name(self): self.df.columns.name = 'foo' result20 = melt(self.df) self.assertEqual(result20.columns.tolist(), ['foo', 'value'])
def test_melt(): df = tm.makeTimeDataFrame()[:10] df['id1'] = (df['A'] > 0).astype(int) df['id2'] = (df['B'] > 0).astype(int) var_name = 'var' value_name = 'val' # Default column names result = melt(df) result1 = melt(df, id_vars=['id1']) result2 = melt(df, id_vars=['id1', 'id2']) result3 = melt(df, id_vars=['id1', 'id2'], value_vars='A') result4 = melt(df, id_vars=['id1', 'id2'], value_vars=['A', 'B']) expected4 = DataFrame({'id1': df['id1'].tolist() * 2, 'id2': df['id2'].tolist() * 2, 'variable': ['A']*10 + ['B']*10, 'value': df['A'].tolist() + df['B'].tolist()}, columns=['id1', 'id2', 'variable', 'value']) tm.assert_frame_equal(result4, expected4) # Supply custom name for the 'variable' column result5 = melt(df, var_name=var_name) result6 = melt(df, id_vars=['id1'], var_name=var_name) result7 = melt(df, id_vars=['id1', 'id2'], var_name=var_name) result8 = melt(df, id_vars=['id1', 'id2'], value_vars='A', var_name=var_name) result9 = melt(df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], var_name=var_name) expected9 = DataFrame({'id1': df['id1'].tolist() * 2, 'id2': df['id2'].tolist() * 2, var_name: ['A']*10 + ['B']*10, 'value': df['A'].tolist() + df['B'].tolist()}, columns=['id1', 'id2', var_name, 'value']) tm.assert_frame_equal(result9, expected9) # Supply custom name for the 'value' column result10 = melt(df, value_name=value_name) result11 = melt(df, id_vars=['id1'], value_name=value_name) result12 = melt(df, id_vars=['id1', 'id2'], value_name=value_name) result13 = melt(df, id_vars=['id1', 'id2'], value_vars='A', value_name=value_name) result14 = melt(df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], value_name=value_name) expected14 = DataFrame({'id1': df['id1'].tolist() * 2, 'id2': df['id2'].tolist() * 2, 'variable': ['A']*10 + ['B']*10, value_name: df['A'].tolist() + df['B'].tolist()}, columns=['id1', 'id2', 'variable', value_name]) tm.assert_frame_equal(result14, expected14) # Supply custom names for the 'variable' and 'value' columns result15 = melt(df, var_name=var_name, value_name=value_name) result16 = melt(df, id_vars=['id1'], var_name=var_name, value_name=value_name) result17 = melt(df, id_vars=['id1', 'id2'], var_name=var_name, value_name=value_name) result18 = melt(df, id_vars=['id1', 'id2'], value_vars='A', var_name=var_name, value_name=value_name) result19 = melt(df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], var_name=var_name, value_name=value_name) expected19 = DataFrame({'id1': df['id1'].tolist() * 2, 'id2': df['id2'].tolist() * 2, var_name: ['A']*10 + ['B']*10, value_name: df['A'].tolist() + df['B'].tolist()}, columns=['id1', 'id2', var_name, value_name]) tm.assert_frame_equal(result19, expected19)
import pandas as pd from pandas.core.reshape import melt import sys import argparse # setup argument parsing parser = argparse.ArgumentParser(description='Melt data.') parser.add_argument('--ids', help='comma-separated list of column names', nargs='+') args = parser.parse_args() # read csv from stdin into a dataframe df = pd.read_csv(sys.stdin, low_memory=False) df = melt(df, id_vars=args.ids) # output dataframe to stdout df.to_csv(sys.stdout, index=False)