コード例 #1
0
ファイル: test_lttb.py プロジェクト: javiljoen/lttb.py
def test_downsample_with_default_validators_raises_error_with_multiple_messages(
):
    data = np.random.standard_normal((4, 3))  # 3 columns
    data[:, 0] = [1, 2, 2, 3]  # unsorted x values
    data[2, 1] = np.nan  # missing y value

    with pytest.raises(ValueError) as exc:
        lttb.downsample(data, 3)

    assert exc.match("data does not have 2 columns; "
                     "data contains NaN values; "
                     "first column is not strictly increasing")
コード例 #2
0
ファイル: test_lttb.py プロジェクト: jpgill86/lttb.py
def test_downsampling():
    csv = 'tests/timeseries.csv'
    data = np.genfromtxt(csv, delimiter=',', names=True)
    xs = data['X']
    ys = data['Y']
    data = np.array([xs, ys]).T
    out = lttb.downsample(data, 100)
    assert out.shape == (100, 2)
コード例 #3
0
def load_data(graph, data_slice=None):
    global DATA, FILENAME
    app_logger.info('building graphs')
    app_logger.info('load_data DATA.shape %r %r' % (DATA.shape, DATA.length))
    # history, filename = load_history()
    history = DATA
    filename = FILENAME
    newdata = []
    if data_slice is None:
        data_slice = range(DATA.length)
    else:
        start = np.where(history[:, 0] > data_slice[0])[0][0]
        stop = np.where(history[:, 0] < data_slice[1])[0][-1]
        data_slice = range(start, stop)
        # print('data_slice:', data_slice, stop-start)
        # bool_ = np.logical_and(history[:,0] > data_slice[0], history[:,0] <
        #                data_slice[1])
        # data_slice = np.where(bool_)[0]
    x = [
        datetime.datetime.fromtimestamp(d).strftime('%y-%m-%d %H:%M:%S')
        for d in history[data_slice, 0]
    ]
    use_lttb = len(x) > 1000
    for idx, name in enumerate(graph):
        lower_labels = [label.lower() for label in labels]
        idx = lower_labels.index(name.lower(
        )) + LABEL_OFFSET  # Could be 2 if there is human readable date
        # y = history[data_slice, idx]
        # print('y', y)
        # if len(history[data_slice, 0]) > 1000:
        if use_lttb:
            app_logger.debug('data_slice len', len(data_slice))
            hdata = np.column_stack((history[data_slice,
                                             0], history[data_slice, idx]))
            app_logger.debug('hdata.shape', hdata.shape)
            downsize = lttb.downsample(hdata, n_out=1000)
            assert downsize.shape == (1000, 2)
            x = [
                datetime.datetime.fromtimestamp(d).strftime(
                    '%y-%m-%d %H:%M:%S') for d in downsize[:, 0]
            ]
            y = list(downsize[:, 1])
        else:
            y = list(history[data_slice, idx])

        # print('load_data:', name, idx)
        newdata.append({
            'x': x,
            'y': y,
            'type': 'scatter',
            'mode': 'markers+lines',
            'name': '%s' % name
        })
    app_logger.info('done building graphs')
    return newdata, filename
コード例 #4
0
ファイル: plotter.py プロジェクト: zorzr/TSL
    def process_series(self):
        n_rows = self.draw_set[0].shape[0]

        if n_rows <= N_MAX:
            return self.draw_set

        sampled_set = []
        for ts in self.draw_set:
            out = lttb.downsample(np.array([ts.index, ts]).T, N_MAX)
            sampled_set.append(
                pd.Series(out[:, 1], index=out[:, 0], name=ts.name))
        return sampled_set
コード例 #5
0
def create_per_profile_df(root_dir: str, organisms: List[str]):
    dfs = []
    confusion_matrices = _prof_confusion_matrices(root_dir, organisms, False)
    clans = pfam.Clans()
    for profile, cm in progress(confusion_matrices.items()):

        pr = cm.pr_curve
        if cm.P <= config.min_hmmer_hits:
            continue

        evalues = cm.sample_scores
        assert len(pr.recall) == len(evalues)

        x = pr.recall
        x, idx = np.unique(x, return_index=True)
        y = pr.precision[idx]
        evalues = evalues[idx]

        n_out = min(config.downsample, len(x))
        matrix = lttb.downsample(np.stack((x, y), axis=1), n_out=n_out)
        idx = np.searchsorted(x, matrix[:, 0])
        x = matrix[:, 0]
        y = matrix[:, 1]
        evalues = evalues[idx]

        df = pd.DataFrame()
        df[config.label.recall] = x
        df[config.label.precision] = y
        df["profile"] = profile
        df["clan"] = _clan_name(clans.get(profile))
        df[config.label.auc] = pr.auc
        df[config.label.hmmer_hits] = cm.P
        df["e-value"] = evalues
        dfs.append(df)

    df = pd.concat(dfs)
    df.sort_values(
        [
            "profile",
            "clan",
            config.label.recall,
            config.label.precision,
        ],
        inplace=True,
    )
    df = df.reset_index(drop=True)

    return df
コード例 #6
0
ファイル: plotter.py プロジェクト: zorzr/TSL
    def process_zoom(self, xlim):
        a = get_nearest_index(xlim[0],
                              self.timestamp) if self.timestamp else max(
                                  int(xlim[0]), 0)
        b = get_nearest_index(xlim[1],
                              self.timestamp) if self.timestamp else min(
                                  int(xlim[1]) + 1, len(self.draw_set[0]))
        zoomed_set = [df.iloc[a:b] for df in self.draw_set]

        if b - a <= N_MAX:
            return zoomed_set

        sampled_set = []
        for ts in zoomed_set:
            out = lttb.downsample(np.array([ts.index, ts]).T, N_MAX)
            sampled_set.append(
                pd.Series(out[:, 1], index=out[:, 0], name=ts.name))
        return sampled_set
コード例 #7
0
def create_per_organism_df(root_dir: str, organisms: List[str],
                           downsample: int):
    dfs = []
    for organism in progress(organisms):

        result, cm = _fetch_organism(root_dir, organism)

        pr = cm.pr_curve
        evalues = cm.sample_scores
        assert len(pr.recall) == len(evalues)

        x = pr.recall
        x, idx = np.unique(x, return_index=True)
        y = pr.precision[idx]
        evalues = evalues[idx]

        arr = np.where(x > 0.005)[0]
        if len(arr) == 0:
            first = 0
        else:
            first = arr[0]

        x = x[first:]
        y = y[first:]
        evalues = evalues[first:]

        matrix = lttb.downsample(np.stack((x, y), axis=1), n_out=downsample)
        idx = np.searchsorted(x, matrix[:, 0])
        x = matrix[:, 0]
        y = matrix[:, 1]
        evalues = evalues[idx]

        df = pd.DataFrame()
        df[config.label.recall] = x
        df[config.label.precision] = y
        df["organism"] = organism
        df[config.label.auc] = pr.auc
        df[config.label.hmmer_hits] = cm.P
        df["domain"] = result.accession.domain
        df["e-value"] = evalues

        dfs.append(df)
    return pd.concat(dfs).reset_index(drop=True)
コード例 #8
0
    def lttb_ops(x: pd.DataFrame):
        if x.shape[0] <= n_out:
            # just split into multiple dataframes
            ret = [
                pd.DataFrame(index=x.index, data=x[col]) for col in x.columns
            ]
            return ret

        # convert DataFrame index (datetime) to int
        x = x.copy()
        x.index = x.index.astype("int64")

        # lttb calculation
        ret = []
        for col in x.columns:
            data = np.array((x.index.values, x[col])).T
            output = lttb.downsample(data, n_out)
            df = pd.DataFrame(index=output[:, 0].astype('datetime64[ns]'),
                              data={col: output[:, 1]})
            ret.append(df)
        return ret
コード例 #9
0
ファイル: test_lttb.py プロジェクト: javiljoen/lttb.py
def test_invalid_n_out_raises_error(n_out):
    data = gen_valid_data(6)

    with pytest.raises(ValueError):
        lttb.downsample(data, n_out)
コード例 #10
0
ファイル: test_lttb.py プロジェクト: javiljoen/lttb.py
def test_downsampling_random_data_retains_variation(data, n_out):
    assume(n_out <= len(data))
    out = lttb.downsample(data, n_out)
    var_in = np.var(data[:, 1])
    var_out = np.var(out[:, 1])
    assert var_out >= 0.95 * var_in
コード例 #11
0
ファイル: test_lttb.py プロジェクト: javiljoen/lttb.py
def test_downsampled_random_data_is_correct_shape(data, n_out):
    assume(n_out <= len(data))
    out = lttb.downsample(data, n_out)
    assert out.shape == (n_out, 2)
コード例 #12
0
ファイル: test_lttb.py プロジェクト: javiljoen/lttb.py
def test_downsampling_test_data_retains_variation(n_out):
    data = load_test_data()
    out = lttb.downsample(data, n_out)
    assert np.var(out[:, 1]) >= 29.5  # var(data) == 30.9968
コード例 #13
0
ファイル: test_lttb.py プロジェクト: javiljoen/lttb.py
def test_downsampled_test_data_is_correct_shape(n_out):
    data = load_test_data()
    out = lttb.downsample(data, n_out)
    assert out.shape == (n_out, 2)