def solve(taboo_list, distance_matrix, tour): distance_matrix_tmp = np.copy(distance_matrix) distance = 0 while taboo_list.size != distance_matrix.shape[0]: ensemble = NearestNeighborVoting(distance_matrix_tmp, taboo_list) ensemble.voting() taboo_list = ensemble.taboo_list distance_matrix_tmp = ensemble.distance_matrix candidate = ensemble.candidate value = [] indices = [] for i in range(1, len(tour)): distance_gain = distance_matrix[tour[i - 1]][candidate] + distance_matrix[candidate][tour[i]] - distance_matrix[tour[i - 1]][i] value.append(distance_gain) indices.append(i) positions = Series(value, indices) position = positions.argmin() tour.insert(position, candidate) # computing solution for i in range(len(tour)-1): distance += distance_matrix[tour[i]][tour[i + 1]] return tour, distance
def _find_price(self, date_: date): ts = Timestamp(date_) if ts not in self._prices.index: diffs = Series(self._prices.index - ts).abs() i = diffs.argmin() ts = self._prices.index[i] close_price = self._prices.loc[ts, "Close"] return round(close_price, self._precison)
def voting(self): self.distance_matrix[:, 0] = 999999999 self.distance_matrix[:, self.distance_matrix.shape[0]-1] = 999999999 values1 = [] index1 = [] for i in range(self.distance_matrix.shape[0]): votes = np.sum(self.distance_matrix[:, i]) values1.append(votes) index1.append(i) candidates = Series(data=values1,index=index1) self.candidate = candidates.argmin() self.distance_matrix[:, self.candidate] = 999999999 self.taboo_list.push(self.candidate)
def test_numpy_argmin(self): # See GH#16830 data = np.arange(1, 11) s = Series(data, index=data) result = np.argmin(s) expected = np.argmin(data) assert result == expected result = s.argmin() assert result == expected msg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=msg): np.argmin(s, out=data)
def test_numpy_argmin_deprecated(self): # See GH#16830 data = np.arange(1, 11) s = Series(data, index=data) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): # The deprecation of Series.argmin also causes a deprecation # warning when calling np.argmin. This behavior is temporary # until the implementation of Series.argmin is corrected. result = np.argmin(s) assert result == 1 with tm.assert_produces_warning(FutureWarning): # argmin is aliased to idxmin result = s.argmin() assert result == 1 with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): msg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=msg): np.argmin(s, out=data)
## fit with 5-fold CV to choose n_components n_splits=5 kf = KFold(n_splits=n_splits, shuffle=True, random_state=0) mse = Series(dtype=float) for i in np.arange(1, 31): pls = PLSRegression(n_components=i) score = cross_val_score(pls, scale.transform(X_train), Y_train, cv=kf, scoring='neg_mean_squared_error').mean() mse.loc[i] = -score ## show CV results and best model fig, ax = plt.subplots(clear=True, num=1, figsize=(5,3)) mse.plot(ylabel='Mean Squared Error', xlabel='Number of Components', title=f"PLS Regression with {n_splits}-fold CV", ax=ax) best = mse.index[mse.argmin()] ax.plot(best, mse.loc[best], "or") ax.legend(['MSE', f"best={best}"]) plt.savefig(os.path.join(imgdir, 'pls.jpg')) plt.show() # evaluate train and test mse model = PLSRegression(n_components=best).fit(X_train, Y_train) name = f"PLS Regression" test[name] = mean_squared_error(Y_test, model.predict(X_test)) train[name] = mean_squared_error(Y_train, model.predict(X_train)) final_models[name] = model DataFrame({'name': name, 'train': np.sqrt(train[name]), 'test': np.sqrt(test[name])}, index=['RMSE']) # Ridge Regression
np.set_printoptions(threshold=175) print np.unique(in_df.PoliceDistrict) in_df.PoliceDistrict.value_counts() # Freq policeDistricts_list = np.unique(in_df.PoliceDistrict) dist_resp_times = SR(np.zeros(len(policeDistricts_list))) for i in policeDistricts_list: mask = (in_df.PoliceDistrict == i) & (response_time1 > 0) dist_resp_times[i] = response_time1[mask].mean() print 'District ', i, print_response_time('Mean', dist_resp_times[i]) print 'Longest avg. resp time ', dist_resp_times.max( ), 'for District', dist_resp_times.argmax() print 'Shortest avg. resp time ', min( dist_resp_times), 'for District', dist_resp_times.argmin() print_response_time('Difference in avg.', max(dist_resp_times) - min(dist_resp_times)) #------------------------------------------------------------------------------ # Event types that occur more often in a district print np.unique(in_df.Type_) in_df.Type_.value_counts() ## 21 has the highest freq in_df.TypeText[in_df.Type_ == '21'] ## Corresponds to 'COMPLAINT OTHER' in_df.TypeText.value_counts() tmp_df1 = in_df.groupby(['PoliceDistrict', 'Type_', 'TypeText']).size().reset_index(name='Times') for i in policeDistricts_list: