def test_name_parse1(self): sub = {'name': 'lmao_390', 'image': 'x', 'provider': 'Authentic Jobs'} type_ = 2 res = main.clean_data(type_, sub) self.assertTrue(res['name'] == 'lmao') # multiple underscores sub2 = { 'name': 'lmao_lmao_lmao_390', 'image': 'x', 'provider': 'Authentic Jobs' } res2 = main.clean_data(type_, sub2) self.assertTrue(res2['name'] == 'lmao_lmao_lmao')
def excutor_task(self, myqueue): # 执行任务 while True: try: task = myqueue.get() if task: # 将任务状态更改为执行状态 print(task) main.clean_data().run(task) else: time.sleep(1) # 执行任务动作 except Exception as e: print('执行任务异常:', e)
def excutor_task(self, myqueue): # 执行任务 thread_all = [] for _ in range(10): t = threading.Thread(target=main.clean_data().run()) t.start() thread_all.append(t) for thr in thread_all: thr.join()
def train_model(): train = pd.read_csv('./data/train.csv') y = train.pop('Survived') train = main.clean_data(train) train = main.one_hot_encode(train) model = main.build_model() model.fit(train, y) return model
def test_image3(self): sub = {'name': 'x', 'image': 'null', 'provider': 'Github Jobs'} type_ = 2 res = main.clean_data(type_, sub) self.assertTrue(res['image'] != 'null') self.assertTrue( res['image'] == 'https://pbs.twimg.com/profile_images/625760778554093568/dM7xD4SQ_400x400.png' )
def test_image1(self): sub = { 'name': 'x', 'image': 'https://udemy-images.udemy.com/course/125_H/364426_2991_5.jpg', 'provider': 'Udemy' } type_ = 1 type_2 = 0 res = main.clean_data(type_, sub) self.assertTrue('125_H' not in res['image']) self.assertTrue( res['image'] == sub['image'].replace('125_H', '480x270')) res2 = main.clean_data(type_2, sub) self.assertTrue('125_H' not in res2['image']) self.assertTrue( res2['image'] == sub['image'].replace('125_H', '480x270'))
def index(): if request.method == 'POST': f = request.files['file'] if f and validate_files(f): model = train_model() test = pd.read_csv(f) ids = test['PassengerId'] test = main.clean_data(test) test = main.one_hot_encode(test) result = pd.DataFrame({'PassengerId': ids, 'Survived': model.predict(test)}) return result.to_html(index = False) return '''
def test_image2(self): sub = { 'name': 'x_', 'image': 'https://d2fcz5no062gar.cloudfront.nethttps://authenticjobs.s3.amazonaws.com/uploads/logos/lbvf6cci6jno2f4tzl2nsoip4xoam1n9/thumb/logo.png', 'provider': 'Authentic Jobs' } sub_empty = { 'name': 'x_', 'image': 'https://d2fcz5no062gar.cloudfront.net/uploads/logos/lbvf6cci6jno2f4tzl2nsoip4xoam1n9/thumb/company-blank.png', 'provider': 'Authentic Jobs' } type_ = 2 res = main.clean_data(type_, sub) self.assertTrue(len(res['image'].split("https://")) <= 2) self.assertTrue(res['image'] == sub['image'].replace( 'https://authenticjobs.s3.amazonaws.com', '')) res2 = main.clean_data(type_, sub_empty) self.assertTrue('company-blank.png' not in res2['image']) self.assertTrue( res2['image'] == 'https://i.vimeocdn.com/portrait/3831018_300x300')
import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn import tree from sklearn.model_selection import cross_val_score import matplotlib.pyplot as plt from sklearn.metrics import precision_score from sklearn.metrics import recall_score from sklearn.metrics import accuracy_score plt.style.use('ggplot') if __name__ == '__main__': data = clean_data(pd.read_csv("data/churn_train.csv"), ['last_trip_date', 'signup_date'], thresh_dict={ "driver": 5, "passenger": 5 }) y = data.pop('churn') X = data X = X.drop('last_trip_date', axis=1) X = X.drop('signup_date', axis=1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) clf = tree.DecisionTreeClassifier(criterion='gini', max_depth=8, min_samples_leaf=2) clf = clf.fit(X_train, y_train) feature_names = X_train.columns
kde=True, bins=75, ax=axs[0], color='steelblue') ax1.set_xlabel('Normal Sale Price') ax1.set_ylabel('Frequency', size=12) ax2 = sns.distplot(df['SalePrice_Log'], kde=True, bins=75, ax=axs[1], color='steelblue') ax2.set_xlabel('Log Transformed Sale Price') ax2.set_ylabel('Frequency', size=12) plt.tight_layout() plt.subplots_adjust(top=0.9) plt.show() if __name__ == '__main__': #Load Data train, test = load_data() # Plot Correlation Matrix of all features train = clean_data(train, dummy=False) # plot_correlation_matrix(train) scatter_matrix(train) # Log-Transform SalePrice train['SalePrice_Log'] = np.log1p(train["SalePrice"]) saleprice_dist(train)
print("-" * 55) for param, vals in parameter_grid.items(): print("{0:<20s} | {1:<8s} | {2}".format(str(param), str(best_params[param]), str(vals))) return best_params, model_best if __name__ == "__main__": churn_df = pd.read_csv('data/churn.csv') churn_test_df = pd.read_csv('data/churn_test.csv') churn_train_df = pd.read_csv('data/churn_train.csv') churn_df = clean_data(churn_df, ['last_trip_date', 'signup_date'], thresh_dict={ "driver": 5, "passenger": 5 }) churn_test_df = clean_data(churn_test_df, ['last_trip_date', 'signup_date'], thresh_dict={ "driver": 5, "passenger": 5 }) churn_train_df = clean_data(churn_train_df, ['last_trip_date', 'signup_date'], thresh_dict={ "driver": 5, "passenger": 5 })
ax.set_title(f"Feature importances - {type(model).__name__}") ax.set_xlabel("Feature", fontsize=16) ax.set_ylabel("Feature importance", fontsize=16) plt.tight_layout() plt.savefig(out_filepath) plt.show() return if __name__ == '__main__': churn_df = pd.read_csv('data/churn.csv') churn_test_df = pd.read_csv('data/churn_test.csv') churn_train_df = pd.read_csv('data/churn_train.csv') churn_df = clean_data(churn_df, ['last_trip_date', 'signup_date']) churn_test_df = clean_data(churn_test_df, ['last_trip_date', 'signup_date']) churn_train_df = clean_data(churn_train_df, ['last_trip_date', 'signup_date']) churn_train_df.drop(['last_trip_date', 'signup_date'], axis=1, inplace=True) churn_test_df.drop(['last_trip_date', 'signup_date'], axis=1, inplace=True) y = churn_train_df.pop('churn').values X = churn_train_df.values X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.80)