def __init__(self, N, M, params=None, writeout=False, brown_init_path=None, x_dict=None, approx=False, dirname=None): super().__init__(N, M, params=params, writeout=writeout, brown_init_path=brown_init_path, x_dict=x_dict, dirname=dirname) # sum-product instead of forward-backward self.inference = SumProduct(approximate=approx)
def __init__(self, N, M, R=None, params=None, writeout=False, brown_init_path=None, x_dict=None, approx=False, dirname=None, omit_class_cond=False, omit_emis_cond=False): """ :param N: number of states :param M: number of observation symbols :param R: number of dep relations (relation-specific HMTM only) :param params: numpy objects -initial_probs -transition_probs -final_probs -emission_probs) :param writeout: save hmm details to a file :param omit_class_cond: do not condition the class variable on the relation variable :param omit_emis_cond: do not condition the output/emission variable on the relation variable """ if dirname is None: sys.exit("Output dirname not given.") self.dirname = dirname self.N = N self.start_N = None # for split-merge self.M = M self.R = R self.omit_class_cond = omit_class_cond self.omit_emis_cond = omit_emis_cond # initial state probability vector if self.omit_class_cond: self.initial_probs = np.zeros(N, 'f') self.transition_probs = np.zeros([N, N], 'f') self.final_probs = np.zeros(N, 'f') else: self.initial_probs = np.zeros([N, R], 'f') self.transition_probs = np.zeros([N, N, R], 'f') self.final_probs = np.zeros([N, R], 'f') if self.omit_emis_cond: self.emission_probs = np.zeros([M, N], 'f') else: self.emission_probs = np.zeros([M, N, R], 'f') self.params_fixed_path = None self.params_fixed_type = None # random init or trained init; set by experimental script self.brown_init_path = brown_init_path if not params: if brown_init_path is None: self.initialize_params() self.params_exist = False else: if x_dict is None: sys.exit("wordrep vocab missing") self.initialize_brown_params(self.brown_init_path, x_dict, dist_even=True) self.params_exist = False else: try: (self.initial_probs, self.transition_probs, self.final_probs, self.emission_probs) = params self.initial_probs = self.initial_probs.astype('f', copy=False) self.transition_probs = self.transition_probs.astype('f', copy=False) self.final_probs = self.final_probs.astype('f', copy=False) self.emission_probs = self.emission_probs.astype('f', copy=False) self.params_exist = True except ValueError: print("Number of provided model parameters not right.") # for updates in em_multiprocess self.total_ll = 0.0 # Count matrices; use 64 dtype here to avoid overflow self.initial_counts = np.zeros([self.N, self.R]) self.transition_counts = np.zeros([self.N, self.N, self.R]) self.final_counts = np.zeros([self.N, self.R]) self.emission_counts = np.zeros([self.M, self.N, self.R]) # storing log likelihoods per iteration self.lls = [] self.sanity_check_init() self.inference = SumProduct(approximate=approx) self.max_iter = None self.n_proc = None self.n_sent = None self.data_name = None self.data_n_tokens = None #online EM: self.minibatch_size = None self.alpha = None self.a = None self.permute = None self.posttypes = None self.hmm_type = None self.writeout = writeout