コード例 #1
0
ファイル: cobras_kshape.py プロジェクト: liujiang96/cobras
    def split_superinstance(self, si, k):
        # the actual splitting
        pred = kshape(self.data[si.indices, :], k)

        # making sure that all of the super-instances contain at least one training instance
        # super-instances without training instance are merged with the closest one that does contain a
        # training instance
        training = []
        no_training = []

        for new_si_centroid, new_si_idx in pred:
            # go from super instance indices to global ones
            cur_indices = [si.indices[idx] for idx in new_si_idx]

            si_train_indices = [x for x in cur_indices if x in self.train_indices]
            if len(si_train_indices) != 0:
                training.append(SuperInstance_kShape(self.data, cur_indices, self.train_indices, new_si_centroid, si))
            else:
                no_training.append((cur_indices, new_si_centroid))

        for indices, centroid in no_training:
            # sets of indices without a training point are merged with their closest super-instance
            # closeness is based on the SBD centroid
            closest_train = None
            closest_train_dist = np.inf
            for training_si in training:
                cur_dist, _ = _sbd(training_si.sbd_centroid, centroid)
                if cur_dist < closest_train_dist:
                    closest_train_dist = cur_dist
                    closest_train = training_si
            closest_train.indices.extend(indices)

        si.children = training

        return training
コード例 #2
0
ファイル: run.py プロジェクト: adam-dziedzic/kshape
def run():
    args = parser.parse_args(sys.argv[1:])
    print("number of clusters: ", args.clusters)
    print(time_series_number, ": ", args.number)
    print(time_series_length, ": ", args.length)
    print(framework_help, ": ", args.framework)
    print("selected device: ", args.device)
    print("selected data type: ", args.type)
    print("selected source of data: ", args.sourcedata)

    clusters = args.clusters

    if args.sourcedata == "random":
        x = np.random.rand(args.number, args.length)
    else:
        datasets = load_time_series.load_data(args.sourcedata)
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
        test_set_x, test_set_y = datasets[2]

        x = np.vstack((train_set_x, valid_set_x, test_set_x))
        print("loaded ", x.shape[0], " data points of length ", x.shape[1])

        clusters = len(np.unique(train_set_y))
        print("clusters: ", clusters)

    try:
        x = x.astype(dtype=args.type, copy=False)
    except TypeError as err:
        print(err)
        print("Error: ", datatype_help, " got: ", args.type)
        sys.stdout.flush()
        exit(1)

    if args.device == gpu:
        if torch.cuda.is_available():
            print("CUDA is available via PyTorch")
        else:
            print(
                "CUDA is not available via PyTorch, please install cuda and libcudnn from NVIDIA"
            )
            exit(1)

    result = None
    start = time.time()
    if args.device == gpu:
        result = core_gpu.kshape_gpu(x=x, k=clusters, device="cuda")
    elif args.device == cpu:
        if args.framework == torch_lib:
            result = core_gpu.kshape_gpu(x=x, k=clusters, device=cpu)
        elif args.framework == numpy_lib:
            result = core.kshape(x=x, k=clusters)
    else:
        print("Error: ", device_help)
        exit(1)

    print("elapsed time, ", time.time() - start, ",sec")
    if args.print:
        print(result)
コード例 #3
0
def _prepare_kshape(data: pd.DataFrame, cluster_num: int) -> List[Tuple]:
    """

    Parameters
    ----------
    data: pd.DataFrame
    cluster_num: int

    Returns
    -------
    List[Tuple]

    """

    return kshape(data, cluster_num)
コード例 #4
0
from kshape.core import kshape, zscore

time_series = [[1, 2, 3, 4], [0, 1, 2, 3], [0, 1, 2, 3], [1, 2, 2, 3]]
cluster_num = 2
clusters = kshape(zscore(time_series, axis=1), cluster_num)
コード例 #5
0
ファイル: kshape_correlator.py プロジェクト: str4h4t/c_core
#def data_plotter(data):

if __name__ == '__main__':
    with open(
            'C://Users//k_mathin//PycharmProjects//Masters//ciena_trials//Kamal//data//vodafone_data_clusters_filtered.pkl',
            'rb') as f:
        data_set = pickle.load(f)
    data = []
    for d in data_set['data']:
        data.append(d)
    data = np.asarray(data)
    #data = data[:,:15]
    print(data.shape[0])
    label_data = np.asarray(data_set['osid'])
    labels, levels = pd.factorize(label_data)
    shelves = np.asarray(data_set['shelf'])
    cluster_num = levels.shape[0]
    print(cluster_num)
    clusters = kshape(zscore(data, axis=1), cluster_num)
    #clusters = kshape(data,cluster_num)
    y_pred = []
    for i in range(0, data.shape[0]):
        for j in range(0, cluster_num):
            if i in clusters[j][1]:
                y_pred.append(j)
                continue
    conf = conf_mat(labels, y_pred)

    print(conf_mat(labels, y_pred))
    print("done")
コード例 #6
0
def kshape_clusters(arr, cluster_num, ax=1):
    from kshape.core import kshape, zscore
    clusters = kshape(zscore(arr, ax), cluster_num)
    re_arr = apply_clusters(clusters, arr)
    return re_arr
コード例 #7
0
ファイル: example.py プロジェクト: str4h4t/c_core
from kshape.core import kshape, zscore

time_series = [[1, 2, 3, 4, 5], [0, 1, 2, 3, 4], [3, 2, 1, 0, -1],
               [1, 2, 2, 3, 3]]
cluster_num = 2
clusters = kshape(zscore(time_series), cluster_num)
print(clusters)
コード例 #8
0
ファイル: main.py プロジェクト: arminalgln/waveform
#%%
causes = pd.read_pickle('data/causes.pkl')


def cluster_show(cluster_rep, cluster_id):
    #plt.plot(cluster_rep)
    for i in cluster_id:
        ev = causes.iloc[i]['id']
        temp_In = list(Event(ev, start, end).data[' In'].values)
        plt.plot(temp_In)
    plt.legend(list(causes.iloc[cluster_id]['cause']))
    plt.show()


cluster_num = 6
clusters = kshape(zscore(I_ns, axis=1), cluster_num)
for i in range(cluster_num):
    print(causes.iloc[clusters[i][1]], '\n', '----------------------')
    cluster_show(clusters[i][0], clusters[i][1])

#%%
import statsmodels.api as sm

dta = sm.datasets.co2.load_pandas().data
# deal with missing values. see issue
dta.co2.interpolate(inplace=True)

id = whole_events[100]
start = 0
end = -1
e = Event(id, start, end)