コード例 #1
0
 def __init__(self,
              N,
              M,
              params=None,
              writeout=False,
              brown_init_path=None,
              x_dict=None,
              approx=False,
              dirname=None):
     super().__init__(N,
                      M,
                      params=params,
                      writeout=writeout,
                      brown_init_path=brown_init_path,
                      x_dict=x_dict,
                      dirname=dirname)
     # sum-product instead of forward-backward
     self.inference = SumProduct(approximate=approx)
コード例 #2
0
ファイル: hmrtm.py プロジェクト: dkaushik96/hmm-reps
    def __init__(self, N, M, R=None, params=None, writeout=False, brown_init_path=None, x_dict=None, approx=False,
                 dirname=None, omit_class_cond=False, omit_emis_cond=False):
        """
        :param N: number of states
        :param M: number of observation symbols
        :param R: number of dep relations (relation-specific HMTM only)
        :param params: numpy objects
          -initial_probs
          -transition_probs
          -final_probs
          -emission_probs)
        :param writeout: save hmm details to a file
        :param omit_class_cond: do not condition the class variable on the relation variable
        :param omit_emis_cond: do not condition the output/emission variable on the relation variable
        """
        if dirname is None:
            sys.exit("Output dirname not given.")
        self.dirname = dirname
        self.N = N
        self.start_N = None  # for split-merge
        self.M = M
        self.R = R
        self.omit_class_cond = omit_class_cond
        self.omit_emis_cond = omit_emis_cond
        # initial state probability vector

        if self.omit_class_cond:
            self.initial_probs = np.zeros(N, 'f')
            self.transition_probs = np.zeros([N, N], 'f')
            self.final_probs = np.zeros(N, 'f')
        else:
            self.initial_probs = np.zeros([N, R], 'f')
            self.transition_probs = np.zeros([N, N, R], 'f')
            self.final_probs = np.zeros([N, R], 'f')

        if self.omit_emis_cond:
            self.emission_probs = np.zeros([M, N], 'f')
        else:
            self.emission_probs = np.zeros([M, N, R], 'f')

        self.params_fixed_path = None
        self.params_fixed_type = None  # random init or trained init; set by experimental script
        self.brown_init_path = brown_init_path

        if not params:
            if brown_init_path is None:
                self.initialize_params()
                self.params_exist = False
            else:
                if x_dict is None:
                    sys.exit("wordrep vocab missing")
                self.initialize_brown_params(self.brown_init_path, x_dict, dist_even=True)
                self.params_exist = False

        else:
            try:
                (self.initial_probs,
                 self.transition_probs,
                 self.final_probs,
                 self.emission_probs) = params
                self.initial_probs = self.initial_probs.astype('f', copy=False)
                self.transition_probs = self.transition_probs.astype('f', copy=False)
                self.final_probs = self.final_probs.astype('f', copy=False)
                self.emission_probs = self.emission_probs.astype('f', copy=False)
                self.params_exist = True
            except ValueError:
                print("Number of provided model parameters not right.")

        # for updates in em_multiprocess
        self.total_ll = 0.0
        # Count matrices; use 64 dtype here to avoid overflow
        self.initial_counts = np.zeros([self.N, self.R])
        self.transition_counts = np.zeros([self.N, self.N, self.R])
        self.final_counts = np.zeros([self.N, self.R])
        self.emission_counts = np.zeros([self.M, self.N, self.R])

        # storing log likelihoods per iteration
        self.lls = []

        self.sanity_check_init()
        self.inference = SumProduct(approximate=approx)
        self.max_iter = None
        self.n_proc = None
        self.n_sent = None
        self.data_name = None
        self.data_n_tokens = None
        #online EM:
        self.minibatch_size = None
        self.alpha = None
        self.a = None
        self.permute = None

        self.posttypes = None

        self.hmm_type = None

        self.writeout = writeout