Ejemplo n.º 1
0
    def HiddenSpaceGenerator(cls, X, n_components):
        """This method creates more features by training a HiddenMarkovModel
        on the game statistics, then returns the hidden state space of each
        timestep/game as a new feature. HOWEVER, note that this doesn't
        return a list of features, instead it returns a HMM that can generate
        more features but can still be used for classification also.

        Arguments:
            X: the input features (with a series_idx)
            n_components: The number of hidden state space variables to
                initialize. Note that (sadly) pomegranate does not implement
                continuous HMMs, so it will discretize every continuous
                variable by K-means so then the outputted space will be
                discrete.
        """
        # the last series id, none are negative so we will get a new one
        last_series_idx = -1
        # restrict down, but since a temporal we need to make a list of entries
        _X = []
        for row in X:
            if row[0] != last_series_idx:
                # create a new series to train on, start with empty input
                # series then add to them while the row has the same index
                last_series_idx = row[0]
                _X.append(np.full((0, X.shape[1] - 1), None, dtype=None))

            # add the datapoint to the current series
            _X[-1] = np.vstack((_X[-1], row[1:]))

        # now train an HMM to the data
        return HiddenMarkovModel.from_samples(MultivariateGaussianDistribution,
                                              n_components, _X)
Ejemplo n.º 2
0
def hmm(df, emissions, n_states, algorithm):
    model = HiddenMarkovModel.from_samples(
        distribution=MultivariateGaussianDistribution,
        n_components=n_states,
        X=df[emissions].to_numpy(),
        algorithm=algorithm,
        verbose=True,
    )
    return model
Ejemplo n.º 3
0
def hmm(df, num_states):
    "HMM program"
    # df['value']=df['value'].replace(0,np.nan) #this removes unmappable areas of chr
    # df_dropna=df.dropna(subset=['value']) #this removes unmappable areas of chr (NaN is otherwise considered 0)
    vals = df["value"].values
    model = HiddenMarkovModel.from_samples(NormalDistribution,
                                           X=[vals],
                                           n_components=num_states)
    states = model.predict(vals)

    # Rename states to increase with mean signal
    order = np.argsort(df['value'].groupby(states).mean())
    states = [order[s] for s in states]
    df["state"] = states
    df['state'][np.isnan(df['value'])] = np.nan
    return df
Ejemplo n.º 4
0
def generate(
    genre_folder: str,
    bpm: int,
    beats: int,
    steps: int,
    onset: str,
    components: int,
    regex: str,
    output: str,
    include: bool,
):
    """
    This command generates a new unique beat, based on the audio files in the given input folder.
    """

    audios = util.read_audio_files(include, Path(genre_folder), regex)

    sequences, samples = util.create_knowledge_base(
        audios, OnsetAlgorithm(onset.lower()), beats, steps
    )

    # Create the model
    # sequences = [add_up_ones(seq) for seq in sequences]
    model: HiddenMarkovModel = HiddenMarkovModel.from_samples(
        DiscreteDistribution,
        n_components=components,
        X=sequences,
        algorithm="viterbi",
        verbose=True,
        name="groover",
    )
    # model: MarkovChain = MarkovChain.from_samples(X=sequences)
    # lengths: List[int] = [len(x) for x in sequences]
    sequence = model.sample(length=beats * steps)
    sequence = sequences[0]
    print(sequence)
    # sequence = ones(sequence)
    # print(len(sequence))
    print(
        "BPM: {}, Beats: {}, Steps:{}, Onset Algorithm: {}".format(
            bpm, beats, steps, onset
        )
    )

    # Save the beat
    util.create_beat(sequence, samples, bpm, beats, steps).save(Path(output))
Ejemplo n.º 5
0
Archivo: many.py Proyecto: kirilman/nir
def run_test(arg, k):
    np.random.seed(k)
    exp_type = arg['type']
    N = arg['N']
    alpha = arg['alpha']
    n_comp = arg['n_comp']
    norm_params = arg['norm_params']
    save_dir = arg['dir']

    sequence = generator.Sequence(N, alpha, type=exp_type, params=norm_params)
    labels = list(map(myutils.rename_state, sequence.path))
    model = HiddenMarkovModel.from_samples(DiscreteDistribution,
                                           n_components=n_comp,
                                           X=[sequence.sequence],
                                           labels=[labels],
                                           algorithm='labeled')
    return model, sequence.sequence
Ejemplo n.º 6
0
    def fit_hmm(self,
                signal_arrays,
                state_vectors,
                distribution,
                state_transition_threshold=1e-4,
                **kwargs):

        # We want to bunch together artefact states with their
        # corresponding "clean" states.
        state_vectors = [np.abs(vec) for vec in state_vectors]

        # remove 'undefined' samples
        # TODO: let pomegranate handle that
        signal_arrays = [
            arr[vec != 0] for arr, vec in zip(signal_arrays, state_vectors)
        ]
        state_vectors = [vec[vec != 0] for vec in state_vectors]

        # Pomegranate expects string labels for valid states and None for invalid states.
        # labels = [[str(state) if state != 0 else None for state in vec] for vec in state_vectors]
        labels = [[str(state) for state in vec] for vec in state_vectors]

        # construct matching state names
        # state_names = [str(state) for state in np.unique(np.concatenate(state_vectors)) if state != 0]
        state_names = [
            str(state) for state in np.unique(np.concatenate(state_vectors))
        ]

        # fit HMM states to transformed signals
        signals = [self.transform(arr) for arr in signal_arrays]

        hmm = HiddenMarkovModel.from_samples(distribution=distribution,
                                             n_components=len(state_names),
                                             X=signals,
                                             labels=labels,
                                             algorithm='labeled',
                                             state_names=state_names,
                                             **kwargs)

        if state_transition_threshold > 0.:
            new_hmm = _sparsify_hmm(hmm, state_transition_threshold)
            return new_hmm

        else:
            return hmm
Ejemplo n.º 7
0
    def fit(self, X, y=None):
        X_processed = self._check_and_preprocess(X, True)
        self.hmmmodel = HiddenMarkovModel.from_samples(
            NormalDistribution,
            self.n_states,
            X_processed,
            algorithm="baum-welch",
            n_jobs=8,
            verbose=self.verbose,
            batches_per_epoch=20,
            max_iterations=self.max_iterations)
        self.hmmmodel.bake()

        self.decision_scores_ = np.zeros(X.shape[0])
        for i, sequence in enumerate(X_processed):
            self.decision_scores_[i] = -self.hmmmodel.log_probability(sequence)

        self._process_decision_scores()
Ejemplo n.º 8
0
    def fit(self, data):
        """
        Fits a model---learns transition and emission probabilities

        Arguments:
            data: list of SMILES
        """
        list_data = [list(smiles) for smiles in data]
        self.model = HiddenMarkovModel.from_samples(
            DiscreteDistribution, n_components=self.n_components,
            end_state=True, X=list_data,
            init='kmeans||', verbose=self.verbose, n_jobs=self.n_jobs,
            max_iterations=self.epochs,
            batches_per_epoch=self.batches_per_epoch,
            random_state=self.seed
        )
        self.fitted = True
        return self
Ejemplo n.º 9
0
def create_casas7_HMM_with_prepared_train_and_test_based_on_seq_of_activities(
        train_set, list_of_persons_in_train, test_set,
        list_of_persons_in_test):
    '''
    create a single HMM for all of persons
    train_set = an ndarray that has train_set for each person separately
    test_set = 
    '''

    #concatinate train_sets and test_sets of all of people
    number_of_persons = len(train_set)
    final_train_set = train_set[0]
    final_test_set = test_set[0]
    final_train_set_labels = list_of_persons_in_train[0]
    final_test_set_labels = list_of_persons_in_test[0]
    #print(type(final_train_set) , type(train_set) , type(train_set[1]))
    for per in range(1, number_of_persons):
        final_train_set = np.concatenate((final_train_set, train_set[per]),
                                         axis=0)
        final_test_set = np.concatenate((final_test_set, test_set[per]),
                                        axis=0)
        final_train_set_labels = np.concatenate(
            (final_train_set_labels, list_of_persons_in_train[per]), axis=0)
        final_test_set_labels = np.concatenate(
            (final_test_set_labels, list_of_persons_in_test[per]), axis=0)

    #r = np.shape(final_train_set)
    #for i in range(r[0]):
    #    print(np.shape(final_train_set[i]))
    #final_train_set = np.array([[1,2,3,0,0] , [1,2,0,0,0]], dtype = np.ndarray)
    #final_train_set_labels = np.array([1,2] , dtype= np.ndarray)
    print(type(final_train_set[11]), np.shape(final_train_set[11]))
    print(final_train_set[0:2])
    model = HiddenMarkovModel.from_samples(
        DiscreteDistribution,
        n_components=2,
        X=final_train_set,
        labels=final_train_set_labels,
        algorithm='labeled'
    )  # according to my tests :D n_components is number of hidden states
    print(model)
    #return 0
    #test
    '''predicted_labels = np.zeros_like(actual_labels)
Ejemplo n.º 10
0
def create_hmm_from_sample(file_address):

    #data, _ , _ = read_sequence_based_CSV_file_with_activity(file_address = file_address, has_header = True , separate_data_based_on_persons = False )
    #data = read_data_from_CSV_file(dest_file = file_address, data_type = np.int ,  has_header = True , return_as_pandas_data_frame = False )
    '''
    data = np.delete(data , 2, 1)
    data = np.delete(data , 2, 1)
    data = np.delete(data , 0, 1)
    data = np.delete(data , 0, 1)
    data = np.delete(data , 0, 1)
    print(np.shape(data))
    '''
    #print(data)
    data = np.array([['a', 'b'], ['a', 'b']])
    data = np.array([[np.array([1, 2, 3]),
                      np.array([1, 1, 1])],
                     [np.array([1, 1, 2]),
                      np.array([1, 2, 2])]])
    data = [
        np.array([[1, 2, 3], [1, 2, 3]], np.int32),
        np.array([[1, 2, 3], [1, 2, 3]], np.int32),
        np.array([[1, 2, 3], [1, 2, 3]], np.int32)
    ]
    print(data)
    #data = np.array([[['a' , 'b'] , ['a' , 'a']] , [['a' , 'b'] , ['b' , 'b']]])

    #data = create_sequence_of_sensor_events_based_on_activity(address_to_read = file_address, has_header = False, address_for_save = " ", isSave = False)#read_data_from_CSV_file(dest_file = file_address, data_type = numpy.int ,  has_header = False , return_as_pandas_data_frame = False )
    model = HiddenMarkovModel.from_samples(
        MultivariateDistribution, n_components=3, X=data
    )  # according to my tests :D n_components is number of hidden states

    #print(model)
    #print(model._baum_welch_summarize())
    #model.plot()
    '''
    print("dense_transition_matrix:" , model.dense_transition_matrix())
    print("edge_count:" , model.edge_count())
    print("edges:" , model.edges)
    print("name:" , model.name)
    print("state_count:" , model.state_count())
    '''
    print(model)
Ejemplo n.º 11
0
def create_casas7_hmm(file_address, has_activity):

    if has_activity:
        list_of_data, list_of_persons, _ = read_sequence_based_CSV_file_with_activity(
            file_address=file_address,
            has_header=True,
            separate_data_based_on_persons=False)
    else:
        list_of_data, list_of_persons = read_sequence_based_CSV_file_without_activity(
            file_address=file_address,
            has_header=True,
            separate_data_based_on_persons=False)

    model = ""

    try:
        model = HiddenMarkovModel.from_samples(DiscreteDistribution,
                                               n_components=5,
                                               X=list_of_data,
                                               algorithm='baum-welch')
        #model = HiddenMarkovModel.from_samples(DiscreteDistribution, n_components=2, X=list_of_data , labels = list_of_persons , algorithm = 'labeled' )
    except KeyError:
        print('there is an exception')
    print(model)

    #print((list_of_persons[0]))
    print("np.shape(list_of_data):", np.shape(list_of_data))

    #print(model._baum_welch_summarize())
    model.plot()
    print("dense_transition_matrix:", model.dense_transition_matrix())
    print("edge_count:", model.edge_count())
    print("edges:", model.edges)
    print("name:", model.name)
    print("state_count:", model.state_count())
    #print("summarize:" , model.summarize())
    print(model.thaw())
Ejemplo n.º 12
0
def run(arg, k):
    np.random.seed(k)
    exp_type = arg['type']
    N = arg['N']
    alpha = arg['alpha']
    n_comp = arg['n_comp']
    norm_params = arg['norm_params']
    an_params = arg['an_params']
    save_dir = arg['dir']
    mean = arg['mean']
    variance = arg['varience']
    anomal_mean = arg['anomal_mean']
    anomal_variance = arg['anomal_varience']
    norm_gen = generator.Sequence(N,
                                  alpha,
                                  type=exp_type,
                                  params=norm_params,
                                  mean=mean,
                                  variance=variance)
    norm_signal = norm_gen.sequence

    an_gen = generator.Sequence(N,
                                alpha,
                                type=exp_type,
                                params=an_params,
                                mean=anomal_mean,
                                variance=anomal_variance)

    an_signal = an_gen.sequence

    # an_signal[180:200] = np.random.normal(2,0.02,20)
    an_labels = list(map(myutils.rename_state, an_gen.path))
    labels = list(map(myutils.rename_state, norm_gen.path))
    if exp_type == 'continue':
        model = HiddenMarkovModel.from_samples(NormalDistribution,
                                               n_components=n_comp,
                                               X=[norm_signal],
                                               labels=[labels],
                                               algorithm='labeled')

        an_model = HiddenMarkovModel.from_samples(NormalDistribution,
                                                  n_components=n_comp,
                                                  X=[an_signal],
                                                  labels=[an_labels],
                                                  algorithm='labeled')
    else:
        model = HiddenMarkovModel.from_samples(DiscreteDistribution,
                                               n_components=n_comp,
                                               X=[norm_signal],
                                               labels=[labels],
                                               algorithm='labeled')

        an_model = HiddenMarkovModel.from_samples(DiscreteDistribution,
                                                  n_components=n_comp,
                                                  X=[an_signal])
    #     model = HiddenMarkovModel.from_samples(DiscreteDistribution, n_components = n_comp, X = [norm_signal])
    #     an_model = HiddenMarkovModel.from_samples(DiscreteDistribution, n_components = n_comp, X = [an_signal])

    l1 = model.log_probability(norm_signal)
    l2 = model.log_probability(an_signal)
    cdir = os.getcwd()
    path = cdir + '/' + arg['dir']
    try:
        os.mkdir(path)
    except:
        pass
    with open(path + '/log_' + str(k) + '.txt', 'w') as file:
        out = myutils.print_model_distribution(model)
        file.write(out)
        out = myutils.print_model_distribution(an_model)
        file.write(out)
        file.write('l_normal = {} l_anomal = {}'.format(l1, l2))
        # out = myutils.print_model_distribution(model_2)
        # file.write(out)
        # file.write(str(an_model.to_json()))

    # fig_sub = plt.figure(figsize = (18,5.9))
    fig_sub = plt.figure(figsize=(16, 5.9))

    ax2 = fig_sub.add_axes([0.12, 0.1, 0.07, 0.8])
    ax2.plot([1] * len([l1]), l1, 'b.', markersize=12)
    ax2.plot([1] * len([l2]), l2, 'r.', markersize=12)
    # ax2.plot([1], normal_score, 'g*', markersize=12)

    ax2.set_ylabel('log probability')
    # ax2.set_xlim(0.9, 1.2)
    ax2.set_xticks([0.95, 1, 1.05])
    ax2.set_xticklabels(['', '', ''])

    ax = fig_sub.add_axes([0.24, 0.1, 0.74, 0.8])
    ax.plot(norm_signal, 'b', label='Normal')  #Ошибка в цветах
    ax.plot(an_signal, 'r', label='Abnormal')
    ax.set_xlabel('Time', )
    # ax.grid()
    plt.legend(loc=1)
    plt.tight_layout()
    plt.savefig(path + '/plot' + str(k) + '.png', dpi=180)

    plt.close()
    # ax.set_y
    print(' {}, {}'.format(l1, l2))
    print('На аномальной')

    l1 = an_model.log_probability(norm_signal)
    l2 = an_model.log_probability(an_signal)
    print(' {}, {}'.format(l1, l2))
    print(' Норма\n {}'.format(model.predict_proba(an_signal)))
Ejemplo n.º 13
0
    anomal_variance = variance

    write_log(N, N_train, norm_params, mean, variance, an_params, anomal_mean,
              anomal_variance)

    sequence = generator.Sequence(N_train,
                                  alpha,
                                  type='continue',
                                  params=norm_params,
                                  mean=mean,
                                  variance=variance)
    normal_signal = sequence.sequence
    labels = list(map(myutils.rename_state, sequence.path))
    model = HiddenMarkovModel.from_samples(NormalDistribution,
                                           n_components=n_comp,
                                           X=[normal_signal],
                                           labels=[labels],
                                           algorithm='labeled')
    # model = HiddenMarkovModel.from_samples(GeneralMixtureModel, n_components = n_comp, X = [normal_signal],
    #                                     labels = [labels], algorithm='labeled')
    # fig = plt.figure(num = 1000, figsize=(15,4))
    # plt.plot(normal_signal,'b')
    # plt.plot([x / 3 for x in sequence.path], 'r')
    # plt.savefig('Graphs/path.png')
    # plt.close('all')

    pool = Pool(N_pool)
    with open('model.txt', 'w') as file:
        out = print_model_distribution(model)
        file.write(out)
    params = [
Ejemplo n.º 14
0
            anormal_seq[start:stop] = [new_s] * (stop - start)
    #     n_count = 5
    #     anormal_seq[20:20+n_count] = ['b']*n_count

    # print('Длина нормальной ',len(normal_seq),', аномальной ', len(anormal_seq))
    #Модель

    # model_hmm = MarkovChain.from_samples([normal_seq]);
        gc.collect()
        print(normal_seq[-5:])
        labels = list(map(myutils.rename_state, sequence.path))
        # plt.plot(normal_seq)
        # # break
        model_hmm = HiddenMarkovModel.from_samples(DiscreteDistribution,
                                                   n_components=len(alpha),
                                                   X=[normal_seq],
                                                   labels=[labels],
                                                   algorithm='labeled')
        # model_hmm = HiddenMarkovModel.from_samples(DiscreteDistribution,n_components = len(alpha),X=[normal_seq]);

        # model_hmm.bake()
        experiment_discret(model=model_hmm,
                           normal_seq=normal_seq[:N],
                           anormal_seq=anormal_seq,
                           N=N,
                           num_launch=i)

        # Вывод в файл
        file.write(str(i) + '\n')

        if isinstance(model_hmm, HiddenMarkovModel):
Ejemplo n.º 15
0
mixture = BayesianGaussianMixture(n_components=32)
mixture.fit(wv.vectors)

labels = mixture.predict(wv.vectors)
plt.hist(labels, bins=32)

# <codecell>
word_to_label = {}

for word in wv.vocab:
    idx = wv.vocab[word].index
    word_to_label[word] = labels[idx]


def _text_to_seq(text):
    return np.array([word_to_label[word] for word in text])


sequences = [_text_to_seq(text) for text in texts]

# <codecell>
hmm = HiddenMarkovModel.from_samples(
    NormalDistribution,  # TODO: identify discrete distribution
    n_components=16,
    X=sequences)

# <codecell>
hmm.dense_transition_matrix()

# <codecell>