def predict(self, a_data): """Determine senses of discourse connectives. This is a memory-optimized version of prediction function. Due to these optimizations, however, it does not support the judge model. Args: a_data (list): input data to be analyzed Returns: void: updates input set in place """ if not self.model_paths: raise RuntimeError( "No paths to trained models are provided to make predictions.") rels = a_data[0] # normalize input relations self._preprocess_rels(rels) # predict sense imodel = isense = None # allocate space for predictions self.wbench = np.zeros((len(rels), len(self.cls2idx))) # iterate over each trained model and sum up their predictions for ipath in self.model_paths: print("ipath = {:s}".format(ipath).encode(ENCODING), file=sys.stderr) with open(ipath, "rb") as ifile: imodel = load(ifile) imodel.batch_predict(rels, a_data, self.wbench) del imodel imodel = None gc.collect() # make final judgements idx = -1 isense = None for i, irel in enumerate(rels): idx = int(np.argmax(self.wbench[i])) isense = self.idx2cls[idx] irel[SENSE].append(SHORT2FULL.get(isense, isense)) # free memory occupied by workbench del self.wbench self.wbench = None gc.collect() # postprocess input relations self._postprocess_rels(rels)
def _sense2idx(self, a_rels): """Convert symbolic senses to vectors. Args: a_rels (list): list of discourse relations Returns: void: Note: updates ``a_rels`` in place """ n_senses = len(self.cls2idx) isense = isenses = vsense = None for irel in a_rels: isenses = irel[SENSE] vsense = np.zeros(n_senses) for isense in isenses: isense = SHORT2FULL.get(isense, isense) vsense[self.cls2idx[isense]] = 1 irel[SENSE] = vsense / sum(vsense)
def train(self, a_train_data, a_type=DFLT_MODEL_TYPE, a_path=DFLT_MODEL_PATH, a_dev_data=None, a_grid_search=False, a_w2v=False, a_lstsq=False): """Train specified model(s) on the provided data. Args: a_train_data (list or None): training set a_path (str): path for storing the model a_type (str): type of the model to be trained a_dev_data (list or None): development set a_grid_search (bool): use grid search in order to determine hyper-paramaters of the model a_w2v (bool): use word2vec embeddings a_lstsq (bool): use least squares method Returns: void: """ if a_type == 0: raise RuntimeError("No model type specified.") if a_dev_data is None: a_dev_data = ([], {}) # initialize models if a_type & MJR: from dsenser.major import MajorSenser self.models.append(MajorSenser()) if a_type & WANG: from dsenser.wang import WangSenser self.models.append(WangSenser(a_grid_search=a_grid_search)) if a_type & XGBOOST: from dsenser.xgboost import XGBoostSenser self.models.append(XGBoostSenser(a_grid_search=a_grid_search)) # NN models have to go last, since we are pruning the parses for them # to free some memory nn_used = False if a_type & SVD: from dsenser.svd import SVDSenser # since we cannot differentiate SVD yet, we can only use word2vec # embeddings if not a_w2v or a_lstsq: print("SVD senser does not support task-specific embeddings " "and least squares yet.", file=sys.stderr) self.models.append(SVDSenser(a_w2v=True, a_lstsq=False, a_max_iters=256)) nn_used = True if a_type & LSTM: from dsenser.lstm import LSTMSenser self.models.append(LSTMSenser(a_w2v, a_lstsq)) nn_used = True # remember all possible senses n_senses = 0 for irel in chain(a_train_data[0], a_dev_data[0] if a_dev_data is not None else []): for isense in irel[SENSE]: isense = SHORT2FULL.get(isense, isense) if isense not in self.cls2idx: n_senses = len(self.cls2idx) self.cls2idx[isense] = n_senses self.idx2cls[n_senses] = isense if irel[TYPE] == EXPLICIT: self.econn.add(self._normalize_conn( irel[CONNECTIVE][RAW_TEXT])) else: irel[CONNECTIVE][RAW_TEXT] = "" # convert sense classes to indices self._sense2idx(a_train_data[0]) if a_dev_data is not None: self._sense2idx(a_dev_data[0]) # train models and remember their predictions (temporarly commented due # to memory optimization, since we are not using the judge now) # x_train = np.zeros((len(a_train_data[0]), len(self.models), # len(self.cls2idx))) # x_dev = np.zeros((len(a_dev_data[0] if a_dev_data else ()), # len(self.models), len(self.cls2idx))) i = 0 data_pruned = False imodel = x_train = x_dev = None imodel_name = imodel_path = "" imodel_dir = os.path.dirname(a_path) while i < len(self.models): imodel = self.models[i] imodel_name = imodel.__class__.__name__ imodel_path = a_path + '.' + imodel_name if nn_used and not data_pruned: from dsenser.svd import SVDSenser from dsenser.lstm import LSTMSenser if isinstance(imodel, LSTMSenser) or \ isinstance(imodel, SVDSenser): a_train_data = self._prune_data(*a_train_data) a_dev_data = self._prune_data(*a_dev_data) data_pruned = True # i = -1 (means do not make predictions for the judge) # imodel.train(a_train_data, a_dev_data, len(self.cls2idx), # i, x_train, x_dev) imodel.train(a_train_data, a_dev_data, len(self.cls2idx), -1, x_train, x_dev) self._dump(imodel, imodel_path) self.model_paths.append(os.path.relpath(imodel_path, imodel_dir)) self.models[i] = imodel = None gc.collect() i += 1 # convert training and development sets to the format appropriate for # the judge # x_train = [(x_i, irel, irel[SENSE]) # for x_i, irel in zip(x_train, a_train_data[0])] # x_dev = [(x_i, irel, irel[SENSE]) # for x_i, irel in zip(x_dev, a_dev_data[0])] # train the judge # from dsenser.judge import Judge # self.judge = Judge(len(self.models), len(self.cls2idx)) # self.judge.train(x_train, x_dev) # dump model (clean the model list before) self.models = [] self._dump(self, a_path)
def train(self, a_train_data, a_type=DFLT_MODEL_TYPE, a_path=DFLT_MODEL_PATH, a_dev_data=None, a_w2v=False, a_lstsq=False): """Train specified model(s) on the provided data. Args: a_train_data (list or None): training set a_path (str): path for storing the model a_type (str): type of the model to be trained a_dev_data (list or None): development set a_w2v (bool): use word2vec embeddings a_lstsq (bool): use least squares method Returns: void: """ if a_type == 0: raise RuntimeError("No model type specified.") if a_dev_data is None: a_dev_data = ([], {}) # initialize if a_type & MJR: from dsenser.major import MajorSenser self.models.append(MajorSenser()) if a_type & WANG: from dsenser.wang import WangSenser self.models.append(WangSenser()) if a_type & XGBOOST: from dsenser.xgboost import XGBoostSenser self.models.append(XGBoostSenser()) # NN models have to go last, since we are pruning the parses for them # to free some memory nn_used = False if a_type & SVD: from dsenser.svd import SVDSenser # since we cannot differentiate SVD yet, we can only use word2vec # embeddings if not a_w2v or a_lstsq: print( "SVD senser does not support task-specific embeddings " "and least squares yet.", file=sys.stderr) self.models.append( SVDSenser(a_w2v=True, a_lstsq=False, a_max_iters=256)) nn_used = True if a_type & LSTM: from dsenser.lstm import LSTMSenser self.models.append(LSTMSenser(a_w2v, a_lstsq)) nn_used = True # remember all possible senses n_senses = 0 isenses = None for irel in chain(a_train_data[0], a_dev_data[0] if a_dev_data is not None else []): isenses = irel[SENSE] for isense in isenses: isense = SHORT2FULL.get(isense, isense) if isense not in self.cls2idx: n_senses = len(self.cls2idx) self.cls2idx[isense] = n_senses self.idx2cls[n_senses] = isense if irel[TYPE] == EXPLICIT: self.econn.add(self._normalize_conn( irel[CONNECTIVE][RAW_TEXT])) else: irel[CONNECTIVE][RAW_TEXT] = "" # convert sense classes to indices self._sense2idx(a_train_data[0]) if a_dev_data is not None: self._sense2idx(a_dev_data[0]) # train models and remember their predictions (temporarly commented due # to memory optimization, since we are not using the judge now) # x_train = np.zeros((len(a_train_data[0]), len(self.models), # len(self.cls2idx))) # x_dev = np.zeros((len(a_dev_data[0] if a_dev_data else ()), # len(self.models), len(self.cls2idx))) i = 0 data_pruned = False imodel = x_train = x_dev = None imodel_name = imodel_path = "" imodel_dir = os.path.dirname(a_path) while i < len(self.models): imodel = self.models[i] imodel_name = imodel.__class__.__name__ imodel_path = a_path + '.' + imodel_name if nn_used and not data_pruned: from dsenser.svd import SVDSenser from dsenser.lstm import LSTMSenser if isinstance(imodel, LSTMSenser) or \ isinstance(imodel, SVDSenser): a_train_data = self._prune_data(*a_train_data) a_dev_data = self._prune_data(*a_dev_data) data_pruned = True # i = -1 (means do not make predictions for the judge) # imodel.train(a_train_data, a_dev_data, len(self.cls2idx), # i, x_train, x_dev) imodel.train(a_train_data, a_dev_data, len(self.cls2idx), -1, x_train, x_dev) self._dump(imodel, imodel_path) self.model_paths.append(os.path.relpath(imodel_path, imodel_dir)) self.models[i] = imodel = None gc.collect() i += 1 # convert training and development sets to the format appropriate for # the judge # x_train = [(x_i, irel, irel[SENSE]) # for x_i, irel in zip(x_train, a_train_data[0])] # x_dev = [(x_i, irel, irel[SENSE]) # for x_i, irel in zip(x_dev, a_dev_data[0])] # train the judge # from dsenser.judge import Judge # self.judge = Judge(len(self.models), len(self.cls2idx)) # self.judge.train(x_train, x_dev) # dump model (clean the model list before) self.models = [] self._dump(self, a_path)