import turicreate as tc data = tc.SFrame.read_csv('mangabox/manga_fav.csv') data.explore() train, test = tc.recommender.util.random_split_by_user(data, 'uid', 'manga_id') print("#### train #####") m = tc.recommender.create(train, user_id='uid', item_id='manga_id') print("#### eval #####") res = m.evaluate(test) pro = res['precision_recall_overall'] pro.print_rows(18, 3) tc.show(pro['recall'], pro['precision'], 'recall', 'precision') m.save('recommend.model')
import turicreate #Importing Data sales = turicreate.SFrame( 'https://courses.cs.washington.edu/courses/cse416/18sp/notebooks/Philadelphia_Crime_Rate_noNA.csv' ) # Exploring the data #The house price in a town is correlated with the crime rate of that town. Low crime towns tend to be associated with higher house prices and vice versa. turicreate.show(sales['CrimeRate'], sales['HousePrice']) # Fit the regression model using crime as the feature crime_model = turicreate.linear_regression.create(sales, target='HousePrice', features=['CrimeRate'], validation_set=None, verbose=False) #Let's see what our fit looks like import matplotlib.pylplot as plt plt.plot(sales['CrimeRate'], sales['HousePrice'], '.', sales['CrimeRate'], crime_model.predict(sales), '-') plt.show() # Remove Center City and redo the analysis #Center City is the one observation with an extremely high crime rate, yet house prices are not very low. This point does not follow the trend of the rest of the data very well. A question is how much including Center City is influencing our fit on the other datapoints. Let's remove this datapoint and see what happens. sales_noCC = sales[sales['MilesPhila'] != 0.0] turicreate.show(sales_noCC['CrimeRate'], sales_noCC['HousePrice']) #Fitting a regression model for sales_noCC crime_model_noCC = turicreatelinear_regression.create(sales_noCC,
import turicreate sales = turicreate.SFrame('home_data.sframe') #displaying imported data in graphical view sales.show() #displaying data in tabular view print(sales[:]) #Plotting sqft_living(x) vs price(y) turicreate.show(sales[1:5000]['sqft_living'],sales[1:5000]['price']) ##Simple Linear Regression model #Spliting data into training data and testing data training_data,testing_data = sales.random_split(0.75,seed=0) #Defining our linear regression model sqft_model = turicreate.linear_regression.create(training_data,target='price',features='sqft_living') #Evaluate the quality of model print(testing_data['price'].mean()) print(sqft_model.evaluate(testing_data)) sqft_model.coefficients #Exploring a little further import matplotlib.pyplot as plt %matplotlib inline plt.plot(testing_data['sqft_living'],testing_data['price'],'.',
import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') plt.plot(test_set['Age'], test_set['Purchased'], '.', test_set['Age'], model.predict(test_set), '-') # In[19]: my_features = ['Gender', 'EstimatedSalary'] # In[20]: data[my_features].show() # In[21]: t.show(data['Gender'], data['Purchased']) # In[22]: t.show(data['EstimatedSalary'], data['Purchased']) # In[23]: my_features_model = t.linear_regression.create(training_set, target='Purchased', features=my_features) # In[24]: print(my_features)