Exemple #1
0
    
#     predictions = []
#     for _, passenger in data.iterrows():
        
#         if(passenger['Sex'] == 'female' or (passenger['Age']<10 and passenger['Sex']== 'male')):
#         	predictions.append(1)
#         else:
#         	predictions.append(0)
    
#     # Return our predictions
#     return pd.Series(predictions)

# # # Make the predictions
# predictions = predictions_2(data)
#print accuracy_score(outcomes, predictions)
survival_stats(data, outcomes, 'Pclass',["Sex == 'male'"])
# survival_stats(data, outcomes, 'Pclass', ["Sex == 'male'"])
def predictions_3(data):
    """ Model with two features: 
            - Predict a passenger survived if they are female.
            - Predict a passenger survived if they are male and younger than 10. """
    
    predictions = []
    for _, passenger in data.iterrows():
        #if(passenger['Sex'] == 'male' or (passenger['Age'] >= 10 and passenger['Age'] <=20  and passenger['Age']>=30) or (passenger["SibSp"]>2)  or passenger['Fare'] < 7):
    	if(passenger['Sex'] == 'female'):
            if(passenger['SibSp']<=2):
                predictions.append(1)
            else:
                predictions.append(0)    
    	if((passenger['Sex'] == 'male')):
    predictions = []
    for _, passenger in data.iterrows():
        
        # Predict the survival of 'passenger'
        predictions.append(0)
    
    # Return our predictions
    return pd.Series(predictions)

# Make the predictions|
predictions = predictions_0(data_left)

print "accuracy if no passanger survived:", accuracy_score(survived_real, predictions)

##passanger survival statistics with 'sex' feature
survival_stats(data_left, survived_real, 'sex')

##prediction that the female survive
def predictions_1(data):
    """ Model with one feature: 
            - Predict a passenger survived if they are female. """
    
    predictions = []
    for _, passenger in data.iterrows():
        
        # Remove the 'pass' statement below 
        # and write your prediction conditions here
        if (passenger['Sex'] == 'female'):
            predictions.append(1)
        else:
            predictions.append(0)
Exemple #3
0
# 加载数据集
in_file = 'titanic_data.csv'
full_data = pd.read_csv(in_file)

# 显示数据列表中的前几项乘客数据
# display(full_data.head())

# 从数据集中移除 'Survived' 这个特征,并将它存储在一个新的变量中。它也做为我们要预测的目标。
outcomes = full_data['Survived']
data = full_data.drop('Survived', axis=1)

# Survived:是否存活(0代表否,1代表是)
# Pclass:社会阶级(1代表上层阶级,2代表中层阶级,3代表底层阶级)
# Name:船上乘客的名字
# Sex:船上乘客的性别(male、female)
# Age:船上乘客的年龄(可能存在 NaN)
# SibSp:乘客在船上的兄弟姐妹和配偶的数量(0 1 2 。。)
# Parch:乘客在船上的父母以及小孩的数量(0 1 。。)
# Ticket:乘客船票的编号
# Fare:乘客为船票支付的费用
# Cabin:乘客所在船舱的编号(可能存在 NaN)
# Embarked:乘客上船的港口(C 代表从 Cherbourg 登船,Q 代表从 Queenstown 登船,S 代表从 Southampton 登船)

# survival_stats(data, outcomes, 'Sex')
# survival_stats(data, outcomes, 'Age', ["Sex == 'male'"])
# survival_stats(data, outcomes, 'Age', ["Sex == 'male'", "Age < 18"])
survival_stats(data, outcomes, 'Fare', ["Sex == 'female'", "Pclass == 3"])
# survival_stats(data, outcomes, 'Embarked', ["Sex == 'female'"])
# survival_stats(data, outcomes, 'Fare', ["Sex == 'female'", "Embarked == 'S'"])
Exemple #4
0
#
#        # Predict the survival of 'passenger'
#        pass_predictions.append(1)
#
#    # Return our predictions
#    return pd.Series(pass_predictions)
#
## Make the predictions
#pass_predictions = predictions_1(data)
#
#
#
#print ('Failure Prediction:-',accuracy_score(outcome, predictions))
#print ('Success Prediction:-',accuracy_score(outcome, pass_predictions))

survival_stats(data, outcome, 'Sex')


def predictions_1(data):
    """ Model with one feature: 
            - Predict a passenger survived if they are female. """
    #from titanic_visualizations import filter_data
    #print(filter_data(data,"Sex == 'female'"))
    predictions = []
    for _, passenger in data.iterrows():

        # Remove the 'pass' statement below
        # and write your prediction conditions here
        #pass

        predictions.append(1)
Exemple #5
0
# In[22]:


print accuracy_score(outcomes, predictions)


# **Answer:** * The predictions have an accuracy of 61.62% assuming none of the passengers survived*

# ***
# Let's take a look at whether the feature **Sex** has any indication of survival rates among passengers using the `survival_stats` function. This function is defined in the `titanic_visualizations.py` Python script included with this project. The first two parameters passed to the function are the RMS Titanic data and passenger survival outcomes, respectively. The third parameter indicates which feature we want to plot survival statistics across.  
# Run the code cell below to plot the survival outcomes of passengers based on their sex.

# In[51]:


survival_stats(data, outcomes, 'Pclass')


# Examining the survival statistics, a large majority of males did not survive the ship sinking. However, a majority of females *did* survive the ship sinking. Let's build on our previous prediction: If a passenger was female, then we will predict that they survived. Otherwise, we will predict the passenger did not survive.  
# Fill in the missing code below so that the function will make this prediction.  
# **Hint:** You can access the values of each feature for a passenger like a dictionary. For example, `passenger['Sex']` is the sex of the passenger.

# In[19]:


def predictions_1(data):
    """ Model with one feature: 
            - Predict a passenger survived if they are female. """
    
    predictions = []
    for _, passenger in data.iterrows():
def predictions_0(data):

    predictions = []
    for _, passenger in data.iterrows():

        predictions.append(0)

    return pd.Series(predictions)


predictions = predictions_0(data)

print accuracy_score(outcomes, predictions)

survival_stats(data, outcomes, 'Sex')


def predictions_1(data):

    predictions = []
    for _, passenger in data.iterrows():

        if passenger['Sex'] == "female":
            predictions.append(1)

        else:
            predictions.append(0)

    return pd.Series(predictions)
data_tst.drop(col_drop, inplace=True, axis=1)

# -----------------------------------------------------------------------------
# Data Exploration (before or after NaN treatment?)
# -----------------------------------------------------------------------------

# Let's try before
# Maybe works for simple stats, but might be a problem if you apply DT's for example

# First look at marginals
# Actually for supervised learning problems you should leverage the stuff from titanic proj from MLND

sys.path.append(
    r'C:\Users\rghiglia\Documents\ML_ND\titanic_survival_exploration')
from titanic_visualizations import survival_stats
survival_stats(data_trn, y_trn, 'Sex')
# Ok, now try to extract the viz code and make it more generic at least with single condition

# Grouping
feat = 'Sex'
df_grp = data_trn.groupby(feat)
display(np.round(df_grp.describe(), 1))

# But what you really want to know is just sex and survivorship
df_tmp = pd.concat([data_trn[feat], y_trn], axis=1)
df_grp = df_tmp.groupby([df_tmp[col_tgt], df_tmp[feat]]).count()
display(df_grp)

df_tmp = pd.concat([data_trn, y_trn], axis=1)
df_grp = df_tmp.groupby([feat, col_tgt]).count()
df_grp = df_grp.ix[:, 0].unstack()
# *Using the RMS Titanic data, how accurate would a prediction be that none of the passengers survived?*  
# **Hint:** Run the code cell below to see the accuracy of this prediction.

# In[44]:

print accuracy_score(outcomes, predictions)


# **Answer:** Predictions have an accuracy of 61.62%.

# Let's take a look at whether the feature **Sex** has any indication of survival rates among passengers using the `survival_stats` function. This function is defined in the `titanic_visualizations.py` Python script included with this project. The first two parameters passed to the function are the RMS Titanic data and passenger survival outcomes, respectively. The third parameter indicates which feature we want to plot survival statistics across.  
# Run the code cell below to plot the survival outcomes of passengers based on their sex.

# In[45]:

survival_stats(data, outcomes, 'Sex')


# Examining the survival statistics, a large majority of males did not survive the ship sinking. However, a majority of females *did* survive the ship sinking. Let's build on our previous prediction: If a passenger was female, then we will predict that they survived. Otherwise, we will predict the passenger did not survive.  
# Fill in the missing code below so that the function will make this prediction.  
# **Hint:** You can access the values of each feature for a passenger like a dictionary. For example, `passenger['Sex']` is the sex of the passenger.

# In[46]:

def predictions_1(data):
    """ Model with one feature: 
            - Predict a passenger survived if they are female. """
    
    predictions = []
    for _, passenger in data.iterrows():
        if(passenger['Sex']=='female'):
# *Using the RMS Titanic data, how accurate would a prediction be that none of the passengers survived?*
# **Hint:** Run the code cell below to see the accuracy of this prediction.

# In[18]:

print(accuracy_score(outcomes, predictions))

# **Answer:** Predictions have an accuracy of 61.62%.

# ***
# Let's take a look at whether the feature **Sex** has any indication of survival rates among passengers using the `survival_stats` function. This function is defined in the `titanic_visualizations.py` Python script included with this project. The first two parameters passed to the function are the RMS Titanic data and passenger survival outcomes, respectively. The third parameter indicates which feature we want to plot survival statistics across.
# Run the code cell below to plot the survival outcomes of passengers based on their sex.

# In[20]:

survival_stats(data, outcomes, 'Sex')

# Examining the survival statistics, a large majority of males did not survive the ship sinking. However, a majority of females *did* survive the ship sinking. Let's build on our previous prediction: If a passenger was female, then we will predict that they survived. Otherwise, we will predict the passenger did not survive.
# Fill in the missing code below so that the function will make this prediction.
# **Hint:** You can access the values of each feature for a passenger like a dictionary. For example, `passenger['Sex']` is the sex of the passenger.

# In[41]:


def predictions_1(data):
    """ Model with one feature: 
            - Predict a passenger survived if they are female. """

    predictions = []
    for _, passenger in data.iterrows():
    predictions = []
    for _, passenger in data.iterrows():
        
        # Predict the survival of 'passenger'
        predictions.append(0)
    
    # Return our predictions
    return pd.Series(predictions)

# Make the predictions
predictions = predictions_0(data)

print accuracy_score(outcomes, predictions)

survival_stats(data, outcomes, 'Sex')

def predictions_1(data):
    """ Model with one feature: 
            - Predict a passenger survived if they are female. """
    
    predictions = []
    for _, passenger in data.iterrows():
        
        # Remove the 'pass' statement below 
        # and write your prediction conditions here
        if passenger['Sex'] == "female":
            predictions.append(1)
        else:
            predictions.append(0)
data_tst.drop(col_drop, inplace=True, axis=1)


# -----------------------------------------------------------------------------
# Data Exploration (before or after NaN treatment?)
# -----------------------------------------------------------------------------

# Let's try before
# Maybe works for simple stats, but might be a problem if you apply DT's for example

# First look at marginals
# Actually for supervised learning problems you should leverage the stuff from titanic proj from MLND

sys.path.append(r'C:\Users\rghiglia\Documents\ML_ND\titanic_survival_exploration')
from titanic_visualizations import survival_stats
survival_stats(data_trn, y_trn, 'Sex')
# Ok, now try to extract the viz code and make it more generic at least with single condition

# Grouping
feat = 'Sex'
df_grp = data_trn.groupby(feat)
display(np.round(df_grp.describe(),1))

# But what you really want to know is just sex and survivorship
df_tmp = pd.concat([data_trn[feat], y_trn], axis=1)
df_grp = df_tmp.groupby([df_tmp[col_tgt], df_tmp[feat]]).count()
display(df_grp)

df_tmp = pd.concat([data_trn, y_trn], axis=1)
df_grp = df_tmp.groupby([feat, col_tgt]).count()
df_grp = df_grp.ix[:,0].unstack()
Exemple #12
0
# *Using the RMS Titanic data, how accurate would a prediction be that none of the passengers survived?*  
# **Hint:** Run the code cell below to see the accuracy of this prediction.

# In[5]:

print accuracy_score(outcomes, predictions)


# ###### **Answer:** 61.62%

# Let's take a look at whether the feature **Sex** has any indication of survival rates among passengers using the `survival_stats` function. This function is defined in the `titanic_visualizations.py` Python script included with this project. The first two parameters passed to the function are the RMS Titanic data and passenger survival outcomes, respectively. The third parameter indicates which feature we want to plot survival statistics across.  
# Run the code cell below to plot the survival outcomes of passengers based on their sex.

# In[6]:

survival_stats(data, outcomes, 'Sex')


# Examining the survival statistics, a large majority of males did not survive the ship sinking. However, a majority of females *did* survive the ship sinking. Let's build on our previous prediction: If a passenger was female, then we will predict that they survived. Otherwise, we will predict the passenger did not survive.  
# Fill in the missing code below so that the function will make this prediction.  
# **Hint:** You can access the values of each feature for a passenger like a dictionary. For example, `passenger['Sex']` is the sex of the passenger.

# In[7]:

def predictions_1(data):
    """ Model with one feature: 
            - Predict a passenger survived if they are female. """
    
    predictions = []
    for _, passenger in data.iterrows():
        
def predictions_0(data):

    predictions = []
    for _, passenger in data.iterrows():
        
        predictions.append(0)
    
    return pd.Series(predictions)

predictions = predictions_0(data)


print accuracy_score(outcomes, predictions)

survival_stats(data, outcomes, 'Sex')


def predictions_1(data):
    
    predictions = []
    for _, passenger in data.iterrows():
        
        
        if passenger['Sex']=="female":
            predictions.append(1)
            
        else:
            predictions.append(0)
    
    return pd.Series(predictions)
            predictions.append(1)
        elif passenger['Sex'] == 'male' and passenger['Age'] < 10:
            predictions.append(1)
        else:
            predictions.append(0)

    # Return our predictions
    return pd.Series(predictions)


# Make the predictions
predictions = predictions_2(data)

#print accuracy_score(outcomes, predictions)

survival_stats(data, outcomes, 'Age', ["Sex == 'male'", "Age < 10"])


def predictions_3(data):
    """ Model with multiple features. Makes a prediction with an accuracy of at least 80%. """

    predictions = []
    for _, passenger in data.iterrows():

        # Remove the 'pass' statement below
        # and write your prediction conditions here
        if passenger['Sex'] == 'female' and passenger['Pclass'] < 3:
            predictions.append(1)
        elif passenger['Age'] < 13 and passenger['Sibsp'] < 3:
            predictions.append(1)
        else:
        # 预测 'passenger' 的生还率
        predictions.append(0)

    # Return our predictions
    # 返回预测结果
    return pd.Series(predictions)

# Make the predictions
# 进行预测
predictions = predictions_0(data)


print accuracy_score(outcomes, predictions)


survival_stats(data, outcomes, 'Sex')


def predictions_1(data):
    """ Model with one feature:
            - Predict a passenger survived if they are female. """

    predictions = []
    for _, passenger in data.iterrows():

        # Remove the 'pass' statement below
        # 移除下方的 'pass' 声明
        # and write your prediction conditions here
        # 输入你自己的预测条件
        # pass
        if passenger['Sex'] == "female":
# In[7]:


print (accuracy_score(outcomes, predictions))


# **Answer:** Predictions have an accuracy of 61.62%.

# Let's take a look at whether the feature **Sex** has any indication of survival rates among passengers using the `survival_stats` function. This function is defined in the `titanic_visualizations.py` Python script included with this project. The first two parameters passed to the function are the RMS Titanic data and passenger survival outcomes, respectively. The third parameter indicates which feature we want to plot survival statistics across.  
# Run the code cell below to plot the survival outcomes of passengers based on their sex.

# In[103]:


survival_stats(data, outcomes, 'Sex')


# Examining the survival statistics, a large majority of males did not survive the ship sinking. However, a majority of females *did* survive the ship sinking. Let's build on our previous prediction: If a passenger was female, then we will predict that they survived. Otherwise, we will predict the passenger did not survive.  
# Fill in the missing code below so that the function will make this prediction.  
# **Hint:** You can access the values of each feature for a passenger like a dictionary. For example, `passenger['Sex']` is the sex of the passenger.

# In[14]:


def predictions_1(data):
    """ Model with one feature: 
            - Predict a passenger survived if they are female. """
    
    predictions = []
    for _, passenger in data.iterrows():
# **Hint:** Run the code cell below to see the accuracy of this prediction.

# In[27]:

print accuracy_score(outcomes, predictions)


# **Answer:**Predictions have an accuracy of 61.62%.

# ***
# Let's take a look at whether the feature **Sex** has any indication of survival rates among passengers using the `survival_stats` function. This function is defined in the `titanic_visualizations.py` Python script included with this project. The first two parameters passed to the function are the RMS Titanic data and passenger survival outcomes, respectively. The third parameter indicates which feature we want to plot survival statistics across.  
# Run the code cell below to plot the survival outcomes of passengers based on their sex.

# In[28]:

survival_stats(data, outcomes, 'Sex')


# Examining the survival statistics, a large majority of males did not survive the ship sinking. However, a majority of females *did* survive the ship sinking. Let's build on our previous prediction: If a passenger was female, then we will predict that they survived. Otherwise, we will predict the passenger did not survive.  
# Fill in the missing code below so that the function will make this prediction.  
# **Hint:** You can access the values of each feature for a passenger like a dictionary. For example, `passenger['Sex']` is the sex of the passenger.

# In[31]:

def predictions_1(data):
    """ Model with one feature: 
            - Predict a passenger survived if they are female. """
    
    predictions = []
    for _, passenger in data.iterrows():
        
Exemple #18
0
    
#     predictions = []
#     for _, passenger in data.iterrows():
        
#         if(passenger['Sex'] == 'female' or (passenger['Age']<10 and passenger['Sex']== 'male')):
#         	predictions.append(1)
#         else:
#         	predictions.append(0)
    
#     # Return our predictions
#     return pd.Series(predictions)

# # # Make the predictions
# predictions = predictions_2(data)
#print accuracy_score(outcomes, predictions)
survival_stats(data, outcomes, 'Age',["Sex == 'male'"])
# survival_stats(data, outcomes, 'Pclass', ["Sex == 'male'"])
def predictions_3(data):
    """ Model with two features: 
            - Predict a passenger survived if they are female.
            - Predict a passenger survived if they are male and younger than 10. """
    
    predictions = []
    for _, passenger in data.iterrows():
        if(passenger['Sex'] == 'male'):
            if(passenger['Pclass'] == 2):
                if(passenger['Age'] >=12):
                    predictions.append(0)    
                else:
                    predictions.append(1)  
            else: