def __init__(self, obj_type: str, pairing_func: Callable[[CorpusComponent], str], pos_label_func: Callable[[CorpusComponent], bool], neg_label_func: Callable[[CorpusComponent], bool], pair_mode: str = "random", pair_id_attribute_name: str = "pair_id", pair_id_feat_name=None, label_attribute_name: str = "pair_obj_label", label_feat_name=None, pair_orientation_attribute_name: str = "pair_orientation", pair_orientation_feat_name=None): assert obj_type in ["speaker", "utterance", "conversation"] self.obj_type = obj_type self.pairing_func = pairing_func self.pos_label_func = pos_label_func self.neg_label_func = neg_label_func self.pair_mode = pair_mode self.pair_id_attribute_name = pair_id_attribute_name if pair_id_feat_name is None else pair_id_feat_name self.label_attribute_name = label_attribute_name if label_feat_name is None else label_feat_name self.pair_orientation_attribute_name = pair_orientation_attribute_name if \ pair_orientation_feat_name is None else pair_orientation_feat_name for deprecated_set in [ (pair_id_feat_name, 'pair_id_feat_name', 'pair_id_attribute_name'), (label_feat_name, 'label_feat_name', 'label_attribute_name'), (pair_orientation_feat_name, 'pair_orientation_feat_name', 'pair_orientation_attribute_name') ]: if deprecated_set[0] is not None: deprecation(f"Pairer's {deprecated_set[1]} parameter", f'{deprecated_set[2]}')
def __init__(self, obj_type: str, pred_feats: List[str], clf=None, pair_id_attribute_name: str = "pair_id", pair_id_feat_name=None, label_attribute_name: str = "pair_obj_label", label_feat_name=None, pair_orientation_attribute_name: str = "pair_orientation", pair_orientation_feat_name=None): assert obj_type in ["speaker", "utterance", "conversation"] self.obj_type = obj_type self.clf = Pipeline([("standardScaler", StandardScaler(with_mean=False)), ("logreg", LogisticRegression( solver='liblinear'))]) if clf is None else clf self.pred_feats = pred_feats self.pair_id_attribute_name = pair_id_attribute_name if pair_id_feat_name is None else pair_id_feat_name self.label_attribute_name = label_attribute_name if label_feat_name is None else label_feat_name self.pair_orientation_attribute_name = pair_orientation_attribute_name if \ pair_orientation_feat_name is None else pair_orientation_feat_name for deprecated_set in [ (pair_id_feat_name, 'pair_id_feat_name', 'pair_id_attribute_name'), (label_feat_name, 'label_feat_name', 'label_attribute_name'), (pair_orientation_feat_name, 'pair_orientation_feat_name', 'pair_orientation_attribute_name') ]: if deprecated_set[0] is not None: deprecation( f"PairedPrediction's {deprecated_set[1]} parameter", f'{deprecated_set[2]}')
def __init__( self, obj_type: str, pred_feats: List[str], labeller: Callable[[CorpusComponent], bool] = lambda x: True, clf=None, clf_attribute_name: str = "prediction", clf_feat_name=None, clf_prob_attribute_name: str = "pred_score", clf_prob_feat_name=None, ): self.pred_feats = pred_feats self.labeller = labeller self.obj_type = obj_type if clf is None: clf = Pipeline([ ("standardScaler", StandardScaler(with_mean=False)), ("logreg", LogisticRegression(solver='liblinear')) ]) print( "Initialized default classification model (standard scaled logistic regression)." ) self.clf = clf self.clf_attribute_name = clf_attribute_name if clf_feat_name is None else clf_feat_name self.clf_prob_attribute_name = clf_prob_attribute_name if clf_prob_feat_name is None else clf_prob_feat_name if clf_feat_name is not None: deprecation("Classifier's clf_feat_name parameter", 'clf_attribute_name') if clf_prob_feat_name is not None: deprecation("Classifier's clf_prob_feat_name parameter", 'clf_prob_attribute_name')
def get_info(self, key): """ Gets attribute <key> of the corpus object. Returns None if the corpus object does not have this attribute. :param key: name of attribute :return: attribute <key> """ deprecation("get_info()", "retrieve_meta()") return self.meta.get(key, None)
def set_info(self, key, value): """ Sets attribute <key> of the corpus object to <value>. :param key: name of attribute :param value: value to set :return: None """ deprecation("set_info()", "add_meta()") self.meta[key] = value
def __init__(self, prefix_len: int = 10, min_convo_len: int = 10, vector_name: str = "hyperconvo", feat_name=None, invalid_val: float = np.nan): self.prefix_len = prefix_len self.min_convo_len = min_convo_len self.vector_name = vector_name if feat_name is not None else feat_name if feat_name is not None: deprecation("HyperConvo's feat_name parameter", "vector_name") self.invalid_val = invalid_val
def __init__(self, obj_type: str, score_func: Callable[[CorpusComponent], Union[int, float]], score_attribute_name: str = "score", score_feat_name=None, rank_attribute_name: str = "rank", rank_feat_name=None): self.obj_type = obj_type self.score_func = score_func self.score_attribute_name = score_attribute_name if score_feat_name is None else score_feat_name self.rank_attribute_name = rank_attribute_name if rank_feat_name is None else rank_feat_name if score_feat_name is not None: deprecation("Ranker's score_feat_name parameter", 'score_attribute_name') if rank_feat_name is not None: deprecation("Ranker's rank_feat_name parameter", 'rank_attribute_name')
def get_usernames(self) -> List[str]: """Produces a list of names of all speakers in the Conversation, which can be used in calls to get_speaker() to retrieve specific speakers. Provides no ordering guarantees for the list. :return: a list of usernames """ deprecation("get_usernames()", "get_speaker_ids()") if self._speaker_ids is None: # first call to get_usernames or iter_speakers; precompute cached list # of usernames self._speaker_ids = set() for ut_id in self._utterance_ids: ut = self._owner.get_utterance(ut_id) self._speaker_ids.add(ut.speaker.name) return list(self._speaker_ids)
def __init__(self, obj_type: str, pairing_func: Callable[[CorpusComponent], str], pos_label_func: Callable[[CorpusComponent], bool], neg_label_func: Callable[[CorpusComponent], bool], pair_mode: str = "random", pair_id_attribute_name: str = "pair_id", pair_id_feat_name=None, label_attribute_name: str = "pair_obj_label", label_feat_name=None, pair_orientation_attribute_name: str = "pair_orientation", pair_orientation_feat_name=None): """ :param pairing_func: the Corpus object characteristic to pair on, e.g. to pair on the first 10 characters of a well-structured id, use lambda obj: obj.id[:10] :param pos_label_func: The function to check if the object is a positive instance :param neg_label_func: The function to check if the object is a negative instance :param pair_mode: 'random': pick a single positive and negative object pair randomly (default), 'maximize': pick the maximum number of positive and negative object pairs possible randomly, or 'first': pick the first positive and negative object pair found. :param pair_id_attribute_name: metadata attribute name to use in annotating object with pair id, default: "pair_id". The value is determined by the output of pairing_func. If pair_mode is 'maximize', the value is the output of pairing_func + "_[i]", where i is the ith pair extracted from a given context. :param label_attribute_name: metadata attribute name to use in annotating object with whether it is positive or negative, default: "pair_obj_label" :param pair_orientation_attribute_name: metadata attribute name to use in annotating object with pair orientation, default: "pair_orientation" """ assert obj_type in ["speaker", "utterance", "conversation"] self.obj_type = obj_type self.pairing_func = pairing_func self.pos_label_func = pos_label_func self.neg_label_func = neg_label_func self.pair_mode = pair_mode self.pair_id_attribute_name = pair_id_attribute_name if pair_id_feat_name is None else pair_id_feat_name self.label_attribute_name = label_attribute_name if label_feat_name is None else label_feat_name self.pair_orientation_attribute_name = pair_orientation_attribute_name if \ pair_orientation_feat_name is None else pair_orientation_feat_name for deprecated_set in [ (pair_id_feat_name, 'pair_id_feat_name', 'pair_id_attribute_name'), (label_feat_name, 'label_feat_name', 'label_attribute_name'), (pair_orientation_feat_name, 'pair_orientation_feat_name', 'pair_orientation_attribute_name') ]: if deprecated_set[0] is not None: deprecation(f"Pairer's {deprecated_set[1]} parameter", f'{deprecated_set[2]}')
def __init__(self, forecast_attribute_name: str = "prediction", forecast_feat_name=None, forecast_prob_attribute_name: str = "score", forecast_prob_feat_name=None): """ :param forecast_attribute_name: name for DataFrame column containing predictions, default: "prediction" :param forecast_prob_attribute_name: name for column containing prediction scores, default: "score" """ self.forecast_attribute_name = forecast_attribute_name if forecast_feat_name is None else forecast_feat_name self.forecast_prob_attribute_name = forecast_prob_attribute_name if forecast_prob_feat_name is None else\ forecast_prob_feat_name for deprecated_set in [ (forecast_feat_name, 'forecast_feat_name', 'forecast_attribute_name'), (forecast_prob_feat_name, 'forecast_prob_feat_name', 'forecast_prob_attribute_name') ]: if deprecated_set[0] is not None: deprecation(f"Forecaster's {deprecated_set[1]} parameter", f'{deprecated_set[2]}')
def iter_users(self, selector=lambda speaker: True): deprecation("iter_users()", "iter_speakers()") return self.iter_speakers(selector)
def _set_name(self, value: str): deprecation("speaker.name", "speaker.id") self._id = value
def _get_name(self): deprecation("speaker.name", "speaker.id") return self._id
def _set_root(self, value: str): deprecation("utterance.root", "utterance.conversation_id") self.conversation_id = value
def _get_root(self): deprecation("utterance.root", "utterance.conversation_id") return self.conversation_id
def get_user(self, speaker_id: str): deprecation("get_user()", "get_speaker()") return self.get_speaker(speaker_id)
def averages_by_user(self): deprecation("averages_by_user()", "averages_by_speaker()") return { speaker: sum(scores.values()) / len(scores) for speaker, scores in self.items() }
def __init__(self, *args, **kwargs): deprecation("The User class", "the Speaker class") super().__init__(*args, **kwargs)