def HiddenSpaceGenerator(cls, X, n_components): """This method creates more features by training a HiddenMarkovModel on the game statistics, then returns the hidden state space of each timestep/game as a new feature. HOWEVER, note that this doesn't return a list of features, instead it returns a HMM that can generate more features but can still be used for classification also. Arguments: X: the input features (with a series_idx) n_components: The number of hidden state space variables to initialize. Note that (sadly) pomegranate does not implement continuous HMMs, so it will discretize every continuous variable by K-means so then the outputted space will be discrete. """ # the last series id, none are negative so we will get a new one last_series_idx = -1 # restrict down, but since a temporal we need to make a list of entries _X = [] for row in X: if row[0] != last_series_idx: # create a new series to train on, start with empty input # series then add to them while the row has the same index last_series_idx = row[0] _X.append(np.full((0, X.shape[1] - 1), None, dtype=None)) # add the datapoint to the current series _X[-1] = np.vstack((_X[-1], row[1:])) # now train an HMM to the data return HiddenMarkovModel.from_samples(MultivariateGaussianDistribution, n_components, _X)
def hmm(df, emissions, n_states, algorithm): model = HiddenMarkovModel.from_samples( distribution=MultivariateGaussianDistribution, n_components=n_states, X=df[emissions].to_numpy(), algorithm=algorithm, verbose=True, ) return model
def hmm(df, num_states): "HMM program" # df['value']=df['value'].replace(0,np.nan) #this removes unmappable areas of chr # df_dropna=df.dropna(subset=['value']) #this removes unmappable areas of chr (NaN is otherwise considered 0) vals = df["value"].values model = HiddenMarkovModel.from_samples(NormalDistribution, X=[vals], n_components=num_states) states = model.predict(vals) # Rename states to increase with mean signal order = np.argsort(df['value'].groupby(states).mean()) states = [order[s] for s in states] df["state"] = states df['state'][np.isnan(df['value'])] = np.nan return df
def generate( genre_folder: str, bpm: int, beats: int, steps: int, onset: str, components: int, regex: str, output: str, include: bool, ): """ This command generates a new unique beat, based on the audio files in the given input folder. """ audios = util.read_audio_files(include, Path(genre_folder), regex) sequences, samples = util.create_knowledge_base( audios, OnsetAlgorithm(onset.lower()), beats, steps ) # Create the model # sequences = [add_up_ones(seq) for seq in sequences] model: HiddenMarkovModel = HiddenMarkovModel.from_samples( DiscreteDistribution, n_components=components, X=sequences, algorithm="viterbi", verbose=True, name="groover", ) # model: MarkovChain = MarkovChain.from_samples(X=sequences) # lengths: List[int] = [len(x) for x in sequences] sequence = model.sample(length=beats * steps) sequence = sequences[0] print(sequence) # sequence = ones(sequence) # print(len(sequence)) print( "BPM: {}, Beats: {}, Steps:{}, Onset Algorithm: {}".format( bpm, beats, steps, onset ) ) # Save the beat util.create_beat(sequence, samples, bpm, beats, steps).save(Path(output))
def run_test(arg, k): np.random.seed(k) exp_type = arg['type'] N = arg['N'] alpha = arg['alpha'] n_comp = arg['n_comp'] norm_params = arg['norm_params'] save_dir = arg['dir'] sequence = generator.Sequence(N, alpha, type=exp_type, params=norm_params) labels = list(map(myutils.rename_state, sequence.path)) model = HiddenMarkovModel.from_samples(DiscreteDistribution, n_components=n_comp, X=[sequence.sequence], labels=[labels], algorithm='labeled') return model, sequence.sequence
def fit_hmm(self, signal_arrays, state_vectors, distribution, state_transition_threshold=1e-4, **kwargs): # We want to bunch together artefact states with their # corresponding "clean" states. state_vectors = [np.abs(vec) for vec in state_vectors] # remove 'undefined' samples # TODO: let pomegranate handle that signal_arrays = [ arr[vec != 0] for arr, vec in zip(signal_arrays, state_vectors) ] state_vectors = [vec[vec != 0] for vec in state_vectors] # Pomegranate expects string labels for valid states and None for invalid states. # labels = [[str(state) if state != 0 else None for state in vec] for vec in state_vectors] labels = [[str(state) for state in vec] for vec in state_vectors] # construct matching state names # state_names = [str(state) for state in np.unique(np.concatenate(state_vectors)) if state != 0] state_names = [ str(state) for state in np.unique(np.concatenate(state_vectors)) ] # fit HMM states to transformed signals signals = [self.transform(arr) for arr in signal_arrays] hmm = HiddenMarkovModel.from_samples(distribution=distribution, n_components=len(state_names), X=signals, labels=labels, algorithm='labeled', state_names=state_names, **kwargs) if state_transition_threshold > 0.: new_hmm = _sparsify_hmm(hmm, state_transition_threshold) return new_hmm else: return hmm
def fit(self, X, y=None): X_processed = self._check_and_preprocess(X, True) self.hmmmodel = HiddenMarkovModel.from_samples( NormalDistribution, self.n_states, X_processed, algorithm="baum-welch", n_jobs=8, verbose=self.verbose, batches_per_epoch=20, max_iterations=self.max_iterations) self.hmmmodel.bake() self.decision_scores_ = np.zeros(X.shape[0]) for i, sequence in enumerate(X_processed): self.decision_scores_[i] = -self.hmmmodel.log_probability(sequence) self._process_decision_scores()
def fit(self, data): """ Fits a model---learns transition and emission probabilities Arguments: data: list of SMILES """ list_data = [list(smiles) for smiles in data] self.model = HiddenMarkovModel.from_samples( DiscreteDistribution, n_components=self.n_components, end_state=True, X=list_data, init='kmeans||', verbose=self.verbose, n_jobs=self.n_jobs, max_iterations=self.epochs, batches_per_epoch=self.batches_per_epoch, random_state=self.seed ) self.fitted = True return self
def create_casas7_HMM_with_prepared_train_and_test_based_on_seq_of_activities( train_set, list_of_persons_in_train, test_set, list_of_persons_in_test): ''' create a single HMM for all of persons train_set = an ndarray that has train_set for each person separately test_set = ''' #concatinate train_sets and test_sets of all of people number_of_persons = len(train_set) final_train_set = train_set[0] final_test_set = test_set[0] final_train_set_labels = list_of_persons_in_train[0] final_test_set_labels = list_of_persons_in_test[0] #print(type(final_train_set) , type(train_set) , type(train_set[1])) for per in range(1, number_of_persons): final_train_set = np.concatenate((final_train_set, train_set[per]), axis=0) final_test_set = np.concatenate((final_test_set, test_set[per]), axis=0) final_train_set_labels = np.concatenate( (final_train_set_labels, list_of_persons_in_train[per]), axis=0) final_test_set_labels = np.concatenate( (final_test_set_labels, list_of_persons_in_test[per]), axis=0) #r = np.shape(final_train_set) #for i in range(r[0]): # print(np.shape(final_train_set[i])) #final_train_set = np.array([[1,2,3,0,0] , [1,2,0,0,0]], dtype = np.ndarray) #final_train_set_labels = np.array([1,2] , dtype= np.ndarray) print(type(final_train_set[11]), np.shape(final_train_set[11])) print(final_train_set[0:2]) model = HiddenMarkovModel.from_samples( DiscreteDistribution, n_components=2, X=final_train_set, labels=final_train_set_labels, algorithm='labeled' ) # according to my tests :D n_components is number of hidden states print(model) #return 0 #test '''predicted_labels = np.zeros_like(actual_labels)
def create_hmm_from_sample(file_address): #data, _ , _ = read_sequence_based_CSV_file_with_activity(file_address = file_address, has_header = True , separate_data_based_on_persons = False ) #data = read_data_from_CSV_file(dest_file = file_address, data_type = np.int , has_header = True , return_as_pandas_data_frame = False ) ''' data = np.delete(data , 2, 1) data = np.delete(data , 2, 1) data = np.delete(data , 0, 1) data = np.delete(data , 0, 1) data = np.delete(data , 0, 1) print(np.shape(data)) ''' #print(data) data = np.array([['a', 'b'], ['a', 'b']]) data = np.array([[np.array([1, 2, 3]), np.array([1, 1, 1])], [np.array([1, 1, 2]), np.array([1, 2, 2])]]) data = [ np.array([[1, 2, 3], [1, 2, 3]], np.int32), np.array([[1, 2, 3], [1, 2, 3]], np.int32), np.array([[1, 2, 3], [1, 2, 3]], np.int32) ] print(data) #data = np.array([[['a' , 'b'] , ['a' , 'a']] , [['a' , 'b'] , ['b' , 'b']]]) #data = create_sequence_of_sensor_events_based_on_activity(address_to_read = file_address, has_header = False, address_for_save = " ", isSave = False)#read_data_from_CSV_file(dest_file = file_address, data_type = numpy.int , has_header = False , return_as_pandas_data_frame = False ) model = HiddenMarkovModel.from_samples( MultivariateDistribution, n_components=3, X=data ) # according to my tests :D n_components is number of hidden states #print(model) #print(model._baum_welch_summarize()) #model.plot() ''' print("dense_transition_matrix:" , model.dense_transition_matrix()) print("edge_count:" , model.edge_count()) print("edges:" , model.edges) print("name:" , model.name) print("state_count:" , model.state_count()) ''' print(model)
def create_casas7_hmm(file_address, has_activity): if has_activity: list_of_data, list_of_persons, _ = read_sequence_based_CSV_file_with_activity( file_address=file_address, has_header=True, separate_data_based_on_persons=False) else: list_of_data, list_of_persons = read_sequence_based_CSV_file_without_activity( file_address=file_address, has_header=True, separate_data_based_on_persons=False) model = "" try: model = HiddenMarkovModel.from_samples(DiscreteDistribution, n_components=5, X=list_of_data, algorithm='baum-welch') #model = HiddenMarkovModel.from_samples(DiscreteDistribution, n_components=2, X=list_of_data , labels = list_of_persons , algorithm = 'labeled' ) except KeyError: print('there is an exception') print(model) #print((list_of_persons[0])) print("np.shape(list_of_data):", np.shape(list_of_data)) #print(model._baum_welch_summarize()) model.plot() print("dense_transition_matrix:", model.dense_transition_matrix()) print("edge_count:", model.edge_count()) print("edges:", model.edges) print("name:", model.name) print("state_count:", model.state_count()) #print("summarize:" , model.summarize()) print(model.thaw())
def run(arg, k): np.random.seed(k) exp_type = arg['type'] N = arg['N'] alpha = arg['alpha'] n_comp = arg['n_comp'] norm_params = arg['norm_params'] an_params = arg['an_params'] save_dir = arg['dir'] mean = arg['mean'] variance = arg['varience'] anomal_mean = arg['anomal_mean'] anomal_variance = arg['anomal_varience'] norm_gen = generator.Sequence(N, alpha, type=exp_type, params=norm_params, mean=mean, variance=variance) norm_signal = norm_gen.sequence an_gen = generator.Sequence(N, alpha, type=exp_type, params=an_params, mean=anomal_mean, variance=anomal_variance) an_signal = an_gen.sequence # an_signal[180:200] = np.random.normal(2,0.02,20) an_labels = list(map(myutils.rename_state, an_gen.path)) labels = list(map(myutils.rename_state, norm_gen.path)) if exp_type == 'continue': model = HiddenMarkovModel.from_samples(NormalDistribution, n_components=n_comp, X=[norm_signal], labels=[labels], algorithm='labeled') an_model = HiddenMarkovModel.from_samples(NormalDistribution, n_components=n_comp, X=[an_signal], labels=[an_labels], algorithm='labeled') else: model = HiddenMarkovModel.from_samples(DiscreteDistribution, n_components=n_comp, X=[norm_signal], labels=[labels], algorithm='labeled') an_model = HiddenMarkovModel.from_samples(DiscreteDistribution, n_components=n_comp, X=[an_signal]) # model = HiddenMarkovModel.from_samples(DiscreteDistribution, n_components = n_comp, X = [norm_signal]) # an_model = HiddenMarkovModel.from_samples(DiscreteDistribution, n_components = n_comp, X = [an_signal]) l1 = model.log_probability(norm_signal) l2 = model.log_probability(an_signal) cdir = os.getcwd() path = cdir + '/' + arg['dir'] try: os.mkdir(path) except: pass with open(path + '/log_' + str(k) + '.txt', 'w') as file: out = myutils.print_model_distribution(model) file.write(out) out = myutils.print_model_distribution(an_model) file.write(out) file.write('l_normal = {} l_anomal = {}'.format(l1, l2)) # out = myutils.print_model_distribution(model_2) # file.write(out) # file.write(str(an_model.to_json())) # fig_sub = plt.figure(figsize = (18,5.9)) fig_sub = plt.figure(figsize=(16, 5.9)) ax2 = fig_sub.add_axes([0.12, 0.1, 0.07, 0.8]) ax2.plot([1] * len([l1]), l1, 'b.', markersize=12) ax2.plot([1] * len([l2]), l2, 'r.', markersize=12) # ax2.plot([1], normal_score, 'g*', markersize=12) ax2.set_ylabel('log probability') # ax2.set_xlim(0.9, 1.2) ax2.set_xticks([0.95, 1, 1.05]) ax2.set_xticklabels(['', '', '']) ax = fig_sub.add_axes([0.24, 0.1, 0.74, 0.8]) ax.plot(norm_signal, 'b', label='Normal') #Ошибка в цветах ax.plot(an_signal, 'r', label='Abnormal') ax.set_xlabel('Time', ) # ax.grid() plt.legend(loc=1) plt.tight_layout() plt.savefig(path + '/plot' + str(k) + '.png', dpi=180) plt.close() # ax.set_y print(' {}, {}'.format(l1, l2)) print('На аномальной') l1 = an_model.log_probability(norm_signal) l2 = an_model.log_probability(an_signal) print(' {}, {}'.format(l1, l2)) print(' Норма\n {}'.format(model.predict_proba(an_signal)))
anomal_variance = variance write_log(N, N_train, norm_params, mean, variance, an_params, anomal_mean, anomal_variance) sequence = generator.Sequence(N_train, alpha, type='continue', params=norm_params, mean=mean, variance=variance) normal_signal = sequence.sequence labels = list(map(myutils.rename_state, sequence.path)) model = HiddenMarkovModel.from_samples(NormalDistribution, n_components=n_comp, X=[normal_signal], labels=[labels], algorithm='labeled') # model = HiddenMarkovModel.from_samples(GeneralMixtureModel, n_components = n_comp, X = [normal_signal], # labels = [labels], algorithm='labeled') # fig = plt.figure(num = 1000, figsize=(15,4)) # plt.plot(normal_signal,'b') # plt.plot([x / 3 for x in sequence.path], 'r') # plt.savefig('Graphs/path.png') # plt.close('all') pool = Pool(N_pool) with open('model.txt', 'w') as file: out = print_model_distribution(model) file.write(out) params = [
anormal_seq[start:stop] = [new_s] * (stop - start) # n_count = 5 # anormal_seq[20:20+n_count] = ['b']*n_count # print('Длина нормальной ',len(normal_seq),', аномальной ', len(anormal_seq)) #Модель # model_hmm = MarkovChain.from_samples([normal_seq]); gc.collect() print(normal_seq[-5:]) labels = list(map(myutils.rename_state, sequence.path)) # plt.plot(normal_seq) # # break model_hmm = HiddenMarkovModel.from_samples(DiscreteDistribution, n_components=len(alpha), X=[normal_seq], labels=[labels], algorithm='labeled') # model_hmm = HiddenMarkovModel.from_samples(DiscreteDistribution,n_components = len(alpha),X=[normal_seq]); # model_hmm.bake() experiment_discret(model=model_hmm, normal_seq=normal_seq[:N], anormal_seq=anormal_seq, N=N, num_launch=i) # Вывод в файл file.write(str(i) + '\n') if isinstance(model_hmm, HiddenMarkovModel):
mixture = BayesianGaussianMixture(n_components=32) mixture.fit(wv.vectors) labels = mixture.predict(wv.vectors) plt.hist(labels, bins=32) # <codecell> word_to_label = {} for word in wv.vocab: idx = wv.vocab[word].index word_to_label[word] = labels[idx] def _text_to_seq(text): return np.array([word_to_label[word] for word in text]) sequences = [_text_to_seq(text) for text in texts] # <codecell> hmm = HiddenMarkovModel.from_samples( NormalDistribution, # TODO: identify discrete distribution n_components=16, X=sequences) # <codecell> hmm.dense_transition_matrix() # <codecell>