def sequence_interpolate(sequence: Sequence,
                         fit_method="cubic",
                         strip_details=True):
    """interpolate with scipy interp1d"""
    filled_sequence = tidy_up_sequence(sequence)
    has_defined = [_valid_value(v) for v in filled_sequence.values]

    if all(has_defined):
        if strip_details:
            return filled_sequence
        else:
            return Sequence(timestamps=filled_sequence.timestamps,
                            values=filled_sequence.values,
                            name=sequence.name,
                            step=sequence.step,
                            labels=sequence.labels)

    if True not in has_defined:
        raise ValueError("All of sequence values are undefined.")

    y_raw = np.array(filled_sequence.values)
    y_nona = []
    x_nona = []
    na_index = []

    x_new, y_new, na_param = _init_interpolate_param(filled_sequence)

    # prepare x_nona and y_nona for interp1d
    for i in range(len(y_new)):
        if _valid_value(y_new[i]):
            y_nona.append(y_new[i])
            x_nona.append(x_new[i])
        else:
            na_index.append(i)

    fit_func = interp1d(x_nona, y_nona, kind=fit_method)
    y_new = fit_func(x_new)

    # replace the nan with interp1d value for raw y
    for i in na_index:
        raw_index = i + len(na_param.head_na_index)
        y_raw[raw_index] = y_new[i]

    y_raw[na_param.head_na_index] = na_param.head_start_nona_value
    y_raw[na_param.tail_na_index] = na_param.tail_start_nona_value
    if strip_details:
        return Sequence(timestamps=filled_sequence.timestamps, values=y_raw)
    else:
        return Sequence(timestamps=filled_sequence.timestamps,
                        values=y_raw,
                        name=sequence.name,
                        step=sequence.step,
                        labels=sequence.labels)
Example #2
0
 def fetchone(self):
     self.rv = self.rv or self._real_fetching_action()
     # If iterator has un-popped elements then return it,
     # otherwise return empty of the sequence.
     try:
         return self.rv.pop(0)
     except IndexError:
         return Sequence()
Example #3
0
def test_sequence():
    no_one_seq = Sequence(range(0, 0), range(0, 0))
    assert str(no_one_seq) == 'Sequence[None](0){}'
    assert len(no_one_seq) == 0

    s1_tms = (10, 20, 30, 40, 50)
    s1_vls = (1, 2, 3, 4, 5)
    s1 = Sequence(s1_tms, s1_vls)
    assert s1.timestamps == (10, 20, 30, 40, 50)
    assert s1.values == (1, 2, 3, 4, 5)
    assert len(s1) == 5
    assert s1[30] == 3
    assert s1[1] is None

    sub1 = s1[20, 40]  # (20, 30, 40), (2, 3, 4)
    assert sub1.timestamps == (20, 30, 40)
    assert sub1.values == (2, 3, 4)

    assert len(sub1) == 3

    sub_non = sub1[100, 111]
    assert len(sub_non) == 0
    assert sub_non.timestamps == tuple()
    assert sub_non.values == tuple()
    sub2 = s1[40, 80]
    assert len(sub2) == 2
    sub3 = sub2[40, 40]
    assert len(sub3) == 1
    sub4 = sub2[80, 80]
    assert len(sub4) == 0
    assert sub2.values == (4, 5)

    assert sub2[40] == 4, sub2[50] == 5

    e = None
    try:
        Sequence((1, 2, 3, 4, 4, 5), (10, 20, 30, 40, 30, 20))
    except ValueError as _:
        e = _
    assert isinstance(e, ValueError)

    # test iterator for sequence
    for i, (t, v) in enumerate(s1):
        assert t == s1_tms[i] and v == s1_vls[i]
Example #4
0
 def _real_fetching_action(self):
     hosts = ('192.168.1.100:1234', '192.168.1.101:5678',
              '192.168.1.102:1111')
     rv = list()
     for host in hosts:
         rv.append(
             Sequence(timestamps=timestamps,
                      values=values,
                      name=metric_name,
                      labels={'from_instance': host}))
     return rv
Example #5
0
def test_save_xxx():
    host = '127.0.0.1'
    metric_name = 'test_metric'

    sequence = Sequence(tuple(range(0, 100)), tuple(range(100, 200)))
    dai.save_forecast_sequence(host, metric_name, sequence)

    slow_query = SlowQuery(db_host='10.90.5.172',
                           db_port=1234,
                           schema_name='test_schema',
                           db_name='test_db',
                           query='select sleep(100);',
                           start_timestamp=1000,
                           duration_time=2,
                           hit_rate=0.90,
                           fetch_rate=1000,
                           cpu_time=100,
                           data_io_time=100)
    slow_query.add_cause(RootCause.get('LOCK_CONTENTION'))
    dai.save_slow_queries([slow_query, slow_query, slow_query])
def tidy_up_sequence(sequence):
    """Fill up missing values for sequence and
    align sequence's timestamps.
    """
    if sequence.step <= 0:
        return sequence

    def estimate_error(a, b):
        return (a - b) / b

    timestamps = list(sequence.timestamps)
    values = list(sequence.values)

    i = 1
    while i < len(timestamps):
        real_interval = timestamps[i] - timestamps[i - 1]
        error = estimate_error(real_interval, sequence.step)
        if error < 0:
            # This is because the current timestamp is lesser than the previous one.
            # We should remove one to keep monotonic.
            if not _valid_value(values[i - 1]):
                values[i - 1] = values[i]
            timestamps.pop(i)
            values.pop(i)
            i -= 1  # We have removed an element so we have to decrease the cursor.
        elif error == 0:
            """Everything is normal, skipping."""
        elif 0 < error < 1:
            # Align the current timestamp.
            timestamps[i] = timestamps[i - 1] + sequence.step
        else:
            # Fill up missing value with NaN.
            next_ = timestamps[i - 1] + sequence.step
            timestamps.insert(i, next_)
            values.insert(i, float('nan'))
        i += 1

    return Sequence(timestamps, values)
configs.set('slow_sql_threshold', 'load_average_rate_limit', '0.5')

query_feature._get_threshold = mock.Mock(side_effect=lambda x: configs.getfloat('slow_sql_threshold', x))


simple_slow_sql_dict = {'from_instance': '127.0.0.1:5432', 'datname': 'database1', 'schema': 'public',
                        'query': 'select count(*) from schema1.table1', 'start_time': '1640139690000',
                        'finish_time': '1640139700000', 'hit_rate': '0.988', 'fetch_rate': '0.99', 'cpu_time': '14200',
                        'data_io_time': '1231243', 'unique_query_id': '12432453234', 'sort_count': '13',
                        'sort_mem_used': '12.43', 'sort_spill_count': '3', 'hash_count': '0', 'hash_mem_used': '0',
                        'hash_spill_count': '0', 'lock_wait_count': '0', 'lwlock_wait_count': '0',
                        'n_returned_rows': '1', 'n_tuples_returned': '100000', 'n_tuples_fetched': '0',
                        'n_tuples_inserted': '0', 'n_tuples_updated': '0', 'n_tuples_deleted': 0}
simple_slow_sql_seq = Sequence(timestamps=[1640139695000],
                               values=[101],
                               name='pg_sql_statement_history_exec_time',
                               step=5,
                               labels=simple_slow_sql_dict)

complex_slow_sql_dict = {'from_instance': '127.0.0.1:5432', 'datname': 'database1', 'schema': 'public',
                         'query': 'update schema1.table1 set age=30 where id=3', 'start_time': '1640139690000',
                         'finish_time': '1640139700000', 'hit_rate': '0.899', 'fetch_rate': '0.99', 'cpu_time': '14200',
                         'data_io_time': '1231243', 'unique_query_id': '12432453234', 'sort_count': '0',
                         'sort_mem_used': '0', 'sort_spill_count': '0', 'hash_count': '0', 'hash_mem_used': '0',
                         'hash_spill_count': '0', 'lock_wait_count': '2', 'lwlock_wait_count': '3',
                         'n_returned_rows': '100000', 'n_tuples_returned': '100000', 'n_tuples_fetched': '100000',
                         'n_tuples_inserted': '0', 'n_tuples_updated': '100000', 'n_tuples_deleted': 0}
complex_slow_sql_seq = Sequence(timestamps=[1640139695000],
                                values=[101],
                                name='pg_sql_statement_history_exec_time',
                                step=5,
    'indexrelname': 'table_index1'
}
gaussdb_qps_by_instance_dict = {'instance': '127.0.0.1:5432'}
pg_connections_max_conn_dict = {'instance': '127.0.0.1:5432'}
pg_connections_used_conn_dict = {'instance': '127.0.0.1:5432'}
os_disk_iops_dict = {'instance': '127.0.0.1:5432'}
os_disk_ioutils_dict = {'instance': '127.0.0.1:5432', 'device': 'sdm-0'}
os_cpu_iowait_dict = {'instance': '127.0.0.1:5432'}
os_disk_iocapacity_dict = {'instance': '127.0.0.1:5432'}
os_cpu_usage_rate_dict = {'instance': '127.0.0.1:5432'}
os_mem_usage_dict = {'instance': '127.0.0.1:5432'}
node_load1_dict = {'instance': '127.0.0.1:5432'}

pg_class_relsize_seq = Sequence(timestamps=(1640139695000, ),
                                values=(1000, ),
                                name='pg_class_relsize',
                                step=5,
                                labels=pg_class_relsize_dict)

pg_lock_sql_locked_times_seq = Sequence(timestamps=(1640139695000, ),
                                        values=(1000, ),
                                        name='pg_lock_sql_locked_times',
                                        step=5,
                                        labels=pg_lock_sql_locked_times_dict)

pg_tables_expansion_rate_dead_rate_seq = Sequence(
    timestamps=(1640139695000, 1640139700000, 1640139705000),
    values=(0.1, 0.2, 0.3),
    name='pg_tables_expansion_rate_dead_rate',
    step=5,
    labels=pg_tables_expansion_rate_dead_rate_dict)
# You may obtain a copy of Mulan PSL v2 at:
#
#          http://license.coscl.org.cn/MulanPSL2
#
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
from sklearn.svm import SVR

from dbmind.common.algorithm.forecasting.simple_forecasting import SupervisedModel
from dbmind.common.algorithm.forecasting import ForecastingFactory
from dbmind.common.types import Sequence


linear_seq = Sequence(tuple(range(1, 10)), tuple(range(1, 10)))


def roughly_compare(list1, list2, threshold=1):
    if len(list1) != len(list2):
        return False
    for v1, v2 in zip(list1, list2):
        if abs(v1 - v2) > threshold:
            return False
    return True


def test_linear_regression():
    linear = ForecastingFactory.get_instance('linear')
    linear.fit(linear_seq)
    result = linear.forecast(10)