def main(): """ 主函数 """ # 加载数据 raw_data = pd.read_csv(os.path.join(config.dataset_path, 'charging_pile.csv'), index_col='id') # 分割数据集 train_data, test_data = train_test_split(raw_data, test_size=1 / 4, random_state=10) # 数据查看 utils.inspect_dataset(train_data, test_data) # 特征工程 print('\n===================== 特征工程 =====================') X_train, y_train = utils.transform_data(train_data) X_test, y_test = utils.transform_data(test_data) # 构建训练测试数据 # 数据建模及验证 print('\n===================== 数据建模及验证 =====================') model_name_param_dict = {'kNN': [5, 11, 15], 'LR': [0.1, 1, 10]} # 比较结果的DataFrame results_df = pd.DataFrame(columns=['Accuracy (%)', 'Time (s)'], index=list(model_name_param_dict.keys())) results_df.index.name = 'Model' for model_name, param_range in model_name_param_dict.items(): _, best_acc, mean_duration = utils.train_test_model( X_train, y_train, X_test, y_test, param_range, model_name) results_df.loc[model_name, 'Accuracy (%)'] = best_acc * 100 results_df.loc[model_name, 'Time (s)'] = mean_duration results_df.to_csv(os.path.join(config.output_path, 'model_comparison.csv')) # 模型及结果比较 print('\n===================== 模型及结果比较 =====================') plt.figure(figsize=(10, 4)) ax1 = plt.subplot(1, 2, 1) results_df.plot(y=['Accuracy (%)'], kind='bar', ylim=[60, 100], ax=ax1, title='Accuracy(%)', legend=False) ax2 = plt.subplot(1, 2, 2) results_df.plot(y=['Time (s)'], kind='bar', ax=ax2, title='Time(s)', legend=False) plt.tight_layout() plt.savefig(os.path.join(config.output_path, 'pred_results.png')) plt.show()
def predict(self, test_x): ret = [] y = 0 for i in range(0, len(test_x) - 1): x = np.zeros(self.dimension) for i in transform_data(test_x[i]): if i > 2 and i < self.dimension + 3: if self.binary: x[i] = 1 else: x[i] += 1 y1 = 0 y2 = 0 pos = np.multiply(x, self.x_pos_prob) neg = np.multiply(x, self.x_neg_prob) for i in np.nditer(pos): if i > 0: y1 += math.log(i, 2) for i in np.nditer(neg): if i > 0: y2 += math.log(i, 2) if y1 >= y2: ret.append(1) else: ret.append(-1) return ret
def sentiemnt(): sentence = request.args.get('sentence') record = {'data': sentence} data, _ = transform_data(record, TEXT, LABEL) prediction = net(data).argmax(dim=1).item() if prediction == 0: result = '积极' else: result = '消极' return jsonify({'data': result, 'status_code': 200})
def cal_new_w(self, tr_xi, label): x = np.zeros(self.dimension) for i in transform_data(tr_xi): if i > 2 and i < self.dimension + 3: if self.binary: x[i - 3] = 1 else: x[i - 3] += 1 self.b = self.b + self.rate * label y = np.inner(self.w, x) + self.b x = x * label * self.rate if (label == 1 and y < 0) or (label == -1 and y >= 0): self.w = np.sum((self.w, x), axis=0)
def evaluate(model, df): result = {'correct': 0, 'wrong': 0} df_len = df.shape[0] for i in range(df_len): record = df.loc[i, :].to_dict() data, label = transform_data(record, TEXT, LABEL) score = model(data) if score.argmax(dim=1) == label: result['correct'] += 1 else: result['wrong'] += 1 print( f"Classification Accuracy of Model({model.__class__.__name__})is {result['correct']/df_len} " )
def predict(): if request.method == 'GET': Pclass = request.args.get('Pclass') Age = request.args.get('Age') Sex = request.args.get('Sex') Parch = request.args.get('Parch') raw_data = pd.DataFrame({'Pclass': [Pclass], 'Age': [Age], 'Sex':[Sex], 'Parch':[Parch]}) transf_data = transform_data(raw_data) prediction = get_prediction(transf_data) prediction = prediction[0][1].item() # We take the first value of our predictions, representing the probability not to churn. data = {'prediction': prediction} return jsonify(data) else: return jsonify({'error': 'Only GET requests possible'})
def predict(self, test_x): ret = [] y = 0; for i in range(0, len(test_x)): x = np.zeros(self.dimension) for i in transform_data(test_x[i]): if i > 2 and i < self.dimension + 3: if self.binary: x[i - 3] = 1 else: x[i - 3] += 1 y = np.inner(self.w, x) + self.b if y >= 0: ret.append(1) else: ret.append(-1) return ret
def cal_new_w(self, tr_xi, label): x = np.zeros(self.dimension) for i in transform_data(tr_xi): if i > 2 and i < self.dimension + 3: if self.binary: x[i - 3] = 1 else: x[i - 3] += 1 y = np.inner(self.w, x) + self.b x = x * float(label) * self.rate if (label == 1 and y >= 0) or (label == -1 and y < 0): self.survival = self.survival + 1 else: self.b = self.b + self.rate * label / (self.survival + 1) tw = np.sum((self.w, x), axis=0) self.w = self.w * self.survival self.w = np.sum((self.w, tw), axis=0) self.w = self.w / (self.survival + 1) self.survival = 1
def __getitem__(self, i): # Read image image = Image.open(self.images[i], mode='r') image = image.convert('RGB') # Read objects in this image (bounding boxes, labels, difficulties) objects = self.objects[i] boxes = torch.FloatTensor(objects['boxes']) # (n_objects, 4) labels = torch.LongTensor(objects['labels']) # (n_objects) difficulties = torch.ByteTensor(objects['difficulties']) # (n_objects) # Discard difficult objects, if desired if not self.keep_difficult: boxes = boxes[1 - difficulties] labels = labels[1 - difficulties] difficulties = difficulties[1 - difficulties] # Apply transformations image, boxes, labels, difficulties = transform_data(image, boxes, labels, difficulties, split=self.split) return image, boxes, labels, difficulties
async def handle_webhook(payload: Dict[str, Any], type: str): engine = get_engine(connection_string()) # Only look at allowlisted webhooks if type not in ACCEPTABLE_WEBHOOKS: return {"statusCode": 200, "body": f"not processing {type}"} # Marshal JSON into SQL-able data objects = extract_github_objects(payload, type) print("Writing", ", ".join([n for n, o in objects])) with engine.connect() as conn: for tablename, obj in objects: # Some of the data is not already in the right form (e.g. dates and # lists, so fix that up here) obj = transform_data(obj) model_data = [tablename] + [column(k) for k in obj.keys()] model = table(*model_data) upsert(conn, model, obj) return {"statusCode": 200, "body": "ok"}
async def handle_webhook(payload: Dict[str, Any], type: str): engine = get_engine(connection_string()) # Marshal JSON into SQL-able data objects = extract_github_objects(payload, type) print("Writing", ", ".join([n for n, o in objects])) with engine.connect() as conn: for tablename, obj in objects: # Some of the data is not already in the right form (e.g. dates and # lists, so fix that up here) obj = transform_data(obj) model_data = [tablename] + [column(k) for k in obj.keys()] model = table(*model_data) if tablename not in existing_schema: print( f"Skipping write of {tablename} since it doesn't exist in hardcoded schema" ) continue # Remove non-existent fields newdata = {} for key, value in obj.items(): if key in existing_schema[tablename]: newdata[key] = value else: print( f"Dropping key '{key}' with value '{value}' since it doesn't exist in table {tablename}" ) obj = newdata upsert(conn, model, obj) return {"statusCode": 200, "body": "ok"}
def main(): """ 主函数 """ # 加载数据 raw_data = pd.read_csv(os.path.join(config.dataset_path, 'zoo.csv'), usecols=config.all_cols) # 分割数据集 train_data, test_data = train_test_split(raw_data, test_size=1/4, random_state=10) # 数据查看 # utils.inspect_dataset(train_data, test_data) # 特征工程 print('\n===================== 特征工程 =====================') X_train, X_test = utils.transform_data(train_data, test_data) # 标签 y_train = train_data[config.label_col].values y_test = test_data[config.label_col].values # 数据建模及验证 print('\n===================== 数据建模及验证 =====================') sclf = StackingClassifier(classifiers=[KNeighborsClassifier(), SVC(), DecisionTreeClassifier()], meta_classifier=LogisticRegression()) model_name_param_dict = {'kNN': (KNeighborsClassifier(), {'n_neighbors': [5, 25, 55]}), 'LR': (LogisticRegression(), {'C': [0.01, 1, 100]}), 'SVM': (SVC(), {'C': [0.01, 1, 100]}), 'DT': (DecisionTreeClassifier(), {'max_depth': [50, 100, 150]}), 'Stacking': (sclf, {'kneighborsclassifier__n_neighbors': [5, 25, 55], 'svc__C': [0.01, 1, 100], 'decisiontreeclassifier__max_depth': [50, 100, 150], 'meta-logisticregression__C': [0.01, 1, 100]}), 'AdaBoost': (AdaBoostClassifier(), {'n_estimators': [50, 100, 150, 200]}), 'GBDT': (GradientBoostingClassifier(), {'learning_rate': [0.01, 0.1, 1, 10, 100]}), 'RF': (RandomForestClassifier(), {'n_estimators': [100, 150, 200, 250]})} # 比较结果的DataFrame results_df = pd.DataFrame(columns=['Accuracy (%)', 'Time (s)'], index=list(model_name_param_dict.keys())) results_df.index.name = 'Model' for model_name, (model, param_range) in model_name_param_dict.items(): _, best_acc, mean_duration = utils.train_test_model(X_train, y_train, X_test, y_test, model_name, model, param_range) results_df.loc[model_name, 'Accuracy (%)'] = best_acc * 100 results_df.loc[model_name, 'Time (s)'] = mean_duration results_df.to_csv(os.path.join(config.output_path, 'model_comparison.csv')) # 模型及结果比较 print('\n===================== 模型及结果比较 =====================') plt.figure(figsize=(10, 4)) ax1 = plt.subplot(1, 2, 1) results_df.plot(y=['Accuracy (%)'], kind='bar', ylim=[60, 100], ax=ax1, title='Accuracy(%)', legend=False) ax2 = plt.subplot(1, 2, 2) results_df.plot(y=['Time (s)'], kind='bar', ax=ax2, title='Time(s)', legend=False) plt.tight_layout() plt.savefig(os.path.join(config.output_path, 'pred_results.png')) plt.show()
def add_neg(self, tr_xi): for i in transform_data(tr_xi): if i > 2 and i < self.dimension + 3: self.x_dict_neg[i] += 1