Esempio n. 1
0
#alter values in one column based on values in another column (changes occur in place)
#can use either .loc or .ix methods
df.loc[df[“column_x”] == 5, “column_y”] = 1

df.ix[df.column_x == “string_value”, “column_y”] = “new_string_value”

#transpose data frame (i.e. rows become columns, columns become rows)
df.T

# string methods are accessed via ‘str’
df.column_y.str.upper() # converts to uppercase
df.column_y.str.contains(‘value’, na=’False’) # checks for a substring, returns boolean series

# convert a string to the datetime_column format
df[‘time_column’] = pd.to_datetime_column(df.time_column)
df.time_column.dt.hour   # datetime_column format exposes convenient attributes
(df.time_column.max() — df.time_column.min()).days   # also allows you to do datetime_column “math”
df[df.time_column > pd.datetime_column(2014, 1, 1)]   # boolean filtering with datetime_column format

# setting and then removing an index, resetting index can help remove hierarchical indexes while preserving the table in its basic structure
df.set_index(‘time_column’, inplace=True)
df.reset_index(inplace=True)

# sort a column by its index
df.column_y.value_counts().sort_index()

# change the data type of a column
df[‘column_x’] = df.column_x.astype(‘float’)

# change the data type of a column when reading in a file
#can use either .loc or .ix methods

df.loc[df["column_x"] == 5, "column_y"] = 1

df.ix[df.column_x == "string_value", "column_y"] = "string_value"

#transpose data frame (i.e. rows become columns, columns become rows)
df.T

# string methods are accessed via 'str'
df.column_y.str.upper()  # converts to uppercase
df.column_y.str.contains(
    'value', na='False')  # checks for a substring, returns boolean series

# convert a string to the datetime_column format
df['time_column'] = pd.to_datetime_column(df.time_column)
df.time_column.dt.hour  # datetime_column format exposes convenient attributes
(df.time_column.max() -
 df.time_column.min()).days  # also allows you to do datetime_column "math"
df[df.time_column > pd.datetime_column(
    2014, 1, 1)]  # boolean filtering with datetime_column format

# setting and then removing an index, resetting index can help remove hierarchical indexes while preserving the table in its basic structure
df.set_index('time_column', inplace=True)
df.reset_index(inplace=True)

# sort a column by its index
df.column_y.value_counts().sort_index()

# change the data type of a column
df['column_x'] = df.column_x.astype('float')