def sequence_interpolate(sequence: Sequence, fit_method="cubic", strip_details=True): """interpolate with scipy interp1d""" filled_sequence = tidy_up_sequence(sequence) has_defined = [_valid_value(v) for v in filled_sequence.values] if all(has_defined): if strip_details: return filled_sequence else: return Sequence(timestamps=filled_sequence.timestamps, values=filled_sequence.values, name=sequence.name, step=sequence.step, labels=sequence.labels) if True not in has_defined: raise ValueError("All of sequence values are undefined.") y_raw = np.array(filled_sequence.values) y_nona = [] x_nona = [] na_index = [] x_new, y_new, na_param = _init_interpolate_param(filled_sequence) # prepare x_nona and y_nona for interp1d for i in range(len(y_new)): if _valid_value(y_new[i]): y_nona.append(y_new[i]) x_nona.append(x_new[i]) else: na_index.append(i) fit_func = interp1d(x_nona, y_nona, kind=fit_method) y_new = fit_func(x_new) # replace the nan with interp1d value for raw y for i in na_index: raw_index = i + len(na_param.head_na_index) y_raw[raw_index] = y_new[i] y_raw[na_param.head_na_index] = na_param.head_start_nona_value y_raw[na_param.tail_na_index] = na_param.tail_start_nona_value if strip_details: return Sequence(timestamps=filled_sequence.timestamps, values=y_raw) else: return Sequence(timestamps=filled_sequence.timestamps, values=y_raw, name=sequence.name, step=sequence.step, labels=sequence.labels)
def fetchone(self): self.rv = self.rv or self._real_fetching_action() # If iterator has un-popped elements then return it, # otherwise return empty of the sequence. try: return self.rv.pop(0) except IndexError: return Sequence()
def test_sequence(): no_one_seq = Sequence(range(0, 0), range(0, 0)) assert str(no_one_seq) == 'Sequence[None](0){}' assert len(no_one_seq) == 0 s1_tms = (10, 20, 30, 40, 50) s1_vls = (1, 2, 3, 4, 5) s1 = Sequence(s1_tms, s1_vls) assert s1.timestamps == (10, 20, 30, 40, 50) assert s1.values == (1, 2, 3, 4, 5) assert len(s1) == 5 assert s1[30] == 3 assert s1[1] is None sub1 = s1[20, 40] # (20, 30, 40), (2, 3, 4) assert sub1.timestamps == (20, 30, 40) assert sub1.values == (2, 3, 4) assert len(sub1) == 3 sub_non = sub1[100, 111] assert len(sub_non) == 0 assert sub_non.timestamps == tuple() assert sub_non.values == tuple() sub2 = s1[40, 80] assert len(sub2) == 2 sub3 = sub2[40, 40] assert len(sub3) == 1 sub4 = sub2[80, 80] assert len(sub4) == 0 assert sub2.values == (4, 5) assert sub2[40] == 4, sub2[50] == 5 e = None try: Sequence((1, 2, 3, 4, 4, 5), (10, 20, 30, 40, 30, 20)) except ValueError as _: e = _ assert isinstance(e, ValueError) # test iterator for sequence for i, (t, v) in enumerate(s1): assert t == s1_tms[i] and v == s1_vls[i]
def _real_fetching_action(self): hosts = ('192.168.1.100:1234', '192.168.1.101:5678', '192.168.1.102:1111') rv = list() for host in hosts: rv.append( Sequence(timestamps=timestamps, values=values, name=metric_name, labels={'from_instance': host})) return rv
def test_save_xxx(): host = '127.0.0.1' metric_name = 'test_metric' sequence = Sequence(tuple(range(0, 100)), tuple(range(100, 200))) dai.save_forecast_sequence(host, metric_name, sequence) slow_query = SlowQuery(db_host='10.90.5.172', db_port=1234, schema_name='test_schema', db_name='test_db', query='select sleep(100);', start_timestamp=1000, duration_time=2, hit_rate=0.90, fetch_rate=1000, cpu_time=100, data_io_time=100) slow_query.add_cause(RootCause.get('LOCK_CONTENTION')) dai.save_slow_queries([slow_query, slow_query, slow_query])
def tidy_up_sequence(sequence): """Fill up missing values for sequence and align sequence's timestamps. """ if sequence.step <= 0: return sequence def estimate_error(a, b): return (a - b) / b timestamps = list(sequence.timestamps) values = list(sequence.values) i = 1 while i < len(timestamps): real_interval = timestamps[i] - timestamps[i - 1] error = estimate_error(real_interval, sequence.step) if error < 0: # This is because the current timestamp is lesser than the previous one. # We should remove one to keep monotonic. if not _valid_value(values[i - 1]): values[i - 1] = values[i] timestamps.pop(i) values.pop(i) i -= 1 # We have removed an element so we have to decrease the cursor. elif error == 0: """Everything is normal, skipping.""" elif 0 < error < 1: # Align the current timestamp. timestamps[i] = timestamps[i - 1] + sequence.step else: # Fill up missing value with NaN. next_ = timestamps[i - 1] + sequence.step timestamps.insert(i, next_) values.insert(i, float('nan')) i += 1 return Sequence(timestamps, values)
configs.set('slow_sql_threshold', 'load_average_rate_limit', '0.5') query_feature._get_threshold = mock.Mock(side_effect=lambda x: configs.getfloat('slow_sql_threshold', x)) simple_slow_sql_dict = {'from_instance': '127.0.0.1:5432', 'datname': 'database1', 'schema': 'public', 'query': 'select count(*) from schema1.table1', 'start_time': '1640139690000', 'finish_time': '1640139700000', 'hit_rate': '0.988', 'fetch_rate': '0.99', 'cpu_time': '14200', 'data_io_time': '1231243', 'unique_query_id': '12432453234', 'sort_count': '13', 'sort_mem_used': '12.43', 'sort_spill_count': '3', 'hash_count': '0', 'hash_mem_used': '0', 'hash_spill_count': '0', 'lock_wait_count': '0', 'lwlock_wait_count': '0', 'n_returned_rows': '1', 'n_tuples_returned': '100000', 'n_tuples_fetched': '0', 'n_tuples_inserted': '0', 'n_tuples_updated': '0', 'n_tuples_deleted': 0} simple_slow_sql_seq = Sequence(timestamps=[1640139695000], values=[101], name='pg_sql_statement_history_exec_time', step=5, labels=simple_slow_sql_dict) complex_slow_sql_dict = {'from_instance': '127.0.0.1:5432', 'datname': 'database1', 'schema': 'public', 'query': 'update schema1.table1 set age=30 where id=3', 'start_time': '1640139690000', 'finish_time': '1640139700000', 'hit_rate': '0.899', 'fetch_rate': '0.99', 'cpu_time': '14200', 'data_io_time': '1231243', 'unique_query_id': '12432453234', 'sort_count': '0', 'sort_mem_used': '0', 'sort_spill_count': '0', 'hash_count': '0', 'hash_mem_used': '0', 'hash_spill_count': '0', 'lock_wait_count': '2', 'lwlock_wait_count': '3', 'n_returned_rows': '100000', 'n_tuples_returned': '100000', 'n_tuples_fetched': '100000', 'n_tuples_inserted': '0', 'n_tuples_updated': '100000', 'n_tuples_deleted': 0} complex_slow_sql_seq = Sequence(timestamps=[1640139695000], values=[101], name='pg_sql_statement_history_exec_time', step=5,
'indexrelname': 'table_index1' } gaussdb_qps_by_instance_dict = {'instance': '127.0.0.1:5432'} pg_connections_max_conn_dict = {'instance': '127.0.0.1:5432'} pg_connections_used_conn_dict = {'instance': '127.0.0.1:5432'} os_disk_iops_dict = {'instance': '127.0.0.1:5432'} os_disk_ioutils_dict = {'instance': '127.0.0.1:5432', 'device': 'sdm-0'} os_cpu_iowait_dict = {'instance': '127.0.0.1:5432'} os_disk_iocapacity_dict = {'instance': '127.0.0.1:5432'} os_cpu_usage_rate_dict = {'instance': '127.0.0.1:5432'} os_mem_usage_dict = {'instance': '127.0.0.1:5432'} node_load1_dict = {'instance': '127.0.0.1:5432'} pg_class_relsize_seq = Sequence(timestamps=(1640139695000, ), values=(1000, ), name='pg_class_relsize', step=5, labels=pg_class_relsize_dict) pg_lock_sql_locked_times_seq = Sequence(timestamps=(1640139695000, ), values=(1000, ), name='pg_lock_sql_locked_times', step=5, labels=pg_lock_sql_locked_times_dict) pg_tables_expansion_rate_dead_rate_seq = Sequence( timestamps=(1640139695000, 1640139700000, 1640139705000), values=(0.1, 0.2, 0.3), name='pg_tables_expansion_rate_dead_rate', step=5, labels=pg_tables_expansion_rate_dead_rate_dict)
# You may obtain a copy of Mulan PSL v2 at: # # http://license.coscl.org.cn/MulanPSL2 # # THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, # EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. from sklearn.svm import SVR from dbmind.common.algorithm.forecasting.simple_forecasting import SupervisedModel from dbmind.common.algorithm.forecasting import ForecastingFactory from dbmind.common.types import Sequence linear_seq = Sequence(tuple(range(1, 10)), tuple(range(1, 10))) def roughly_compare(list1, list2, threshold=1): if len(list1) != len(list2): return False for v1, v2 in zip(list1, list2): if abs(v1 - v2) > threshold: return False return True def test_linear_regression(): linear = ForecastingFactory.get_instance('linear') linear.fit(linear_seq) result = linear.forecast(10)