class AbsoluteRelativeSpinBox(QWidget): editingFinished = pyqtSignal() valueChanged = pyqtSignal() def __init__(self, *args, **kwargs): super().__init__(*args) layout = QStackedLayout(self) self.double_spin = QDoubleSpinBox() self.double_spin.valueChanged.connect(self.double_value_changed) self.double_spin.editingFinished.connect(self.double_editing_finished) layout.addWidget(self.double_spin) self.int_spin = QSpinBox() self.int_spin.setMaximum(10 ** 4) self.int_spin.valueChanged.connect(self.int_value_changed) self.int_spin.editingFinished.connect(self.int_editing_finished) layout.addWidget(self.int_spin) self.setValue(kwargs.get('value', 0.)) def double_value_changed(self): if self.double_spin.value() > 1: self.layout().setCurrentIndex(1) self.int_spin.setValue(self.double_spin.value()) self.valueChanged.emit() def double_editing_finished(self): if self.double_spin.value() <= 1.: self.editingFinished.emit() def int_value_changed(self): if self.int_spin.value() == 0: self.layout().setCurrentIndex(0) self.double_spin.setValue(1. - self.double_spin.singleStep()) # There is no need to emit valueChanged signal. def int_editing_finished(self): if self.int_spin.value() > 0: self.editingFinished.emit() def value(self): return self.int_spin.value() or self.double_spin.value() def setValue(self, value): if isinstance(value, int): self.layout().setCurrentIndex(1) self.int_spin.setValue(value) else: self.layout().setCurrentIndex(0) self.double_spin.setValue(value) def setSingleStep(self, step): if isinstance(step, float): self.double_spin.setSingleStep(step) else: self.int_spin.setSingleStep(step)
class PCA(BaseEditor): def __init__(self, parent=None, **kwargs): super().__init__(parent, **kwargs) self.setLayout(QVBoxLayout()) self.n_components = 10 form = QFormLayout() self.cspin = QSpinBox(minimum=1, value=self.n_components) self.cspin.valueChanged[int].connect(self.setC) self.cspin.editingFinished.connect(self.edited) form.addRow("Components:", self.cspin) self.layout().addLayout(form) def setParameters(self, params): self.n_components = params.get("n_components", 10) def parameters(self): return {"n_components": self.n_components} def setC(self, n_components): if self.n_components != n_components: self.n_components = n_components self.cspin.setValue(n_components) self.changed.emit() @staticmethod def createinstance(params): n_components = params.get("n_components", 10) return ProjectPCA(n_components=n_components) def __repr__(self): return "Components: {}".format(self.cspin.value())
class PCA(BaseEditor): def __init__(self, parent=None, **kwargs): super().__init__(parent, **kwargs) self.setLayout(QVBoxLayout()) self.n_components = 10 form = QFormLayout() self.cspin = QSpinBox(minimum=1, value=self.n_components) self.cspin.valueChanged[int].connect(self.setC) self.cspin.editingFinished.connect(self.edited) form.addRow("Components:", self.cspin) self.layout().addLayout(form) def setParameters(self, params): self.n_components = params.get("n_components", 10) def parameters(self): return {"n_components": self.n_components} def setC(self, n_components): if self.n_components != n_components: self.n_components = n_components self.cspin.setValue(n_components) self.changed.emit() @staticmethod def createinstance(params): n_components = params.get("n_components", 10) return ProjectPCA(n_components=n_components) def __repr__(self): return "Components: {}".format(self.cspin.value())
class CUR(BaseEditor): def __init__(self, parent=None, **kwargs): super().__init__(parent, **kwargs) self.setLayout(QVBoxLayout()) self.rank = 10 self.max_error = 1 form = QFormLayout() self.rspin = QSpinBox(minimum=2, maximum=1000000, value=self.rank) self.rspin.valueChanged[int].connect(self.setR) self.rspin.editingFinished.connect(self.edited) self.espin = QDoubleSpinBox(minimum=0.1, maximum=100.0, singleStep=0.1, value=self.max_error) self.espin.valueChanged[float].connect(self.setE) self.espin.editingFinished.connect(self.edited) form.addRow("Rank:", self.rspin) form.addRow("Relative error:", self.espin) self.layout().addLayout(form) def setParameters(self, params): self.setR(params.get("rank", 10)) self.setE(params.get("max_error", 1)) def parameters(self): return {"rank": self.rank, "max_error": self.max_error} def setR(self, rank): if self.rank != rank: self.rank = rank self.rspin.setValue(rank) self.changed.emit() def setE(self, max_error): if self.max_error != max_error: self.max_error = max_error self.espin.setValue(max_error) self.changed.emit() @staticmethod def createinstance(params): rank = params.get("rank", 10) max_error = params.get("max_error", 1) return ProjectCUR(rank=rank, max_error=max_error) def __repr__(self): return "Rank: {}, Relative error: {}".format(self.rspin.value(), self.espin.value())
class CUR(BaseEditor): def __init__(self, parent=None, **kwargs): super().__init__(parent, **kwargs) self.setLayout(QVBoxLayout()) self.rank = 10 self.max_error = 1 form = QFormLayout() self.rspin = QSpinBox(minimum=2, value=self.rank) self.rspin.valueChanged[int].connect(self.setR) self.rspin.editingFinished.connect(self.edited) self.espin = QDoubleSpinBox( minimum=0.1, maximum=100.0, singleStep=0.1, value=self.max_error) self.espin.valueChanged[float].connect(self.setE) self.espin.editingFinished.connect(self.edited) form.addRow("Rank:", self.rspin) form.addRow("Relative error:", self.espin) self.layout().addLayout(form) def setParameters(self, params): self.setR(params.get("rank", 10)) self.setE(params.get("max_error", 1)) def parameters(self): return {"rank": self.rank, "max_error": self.max_error} def setR(self, rank): if self.rank != rank: self.rank = rank self.rspin.setValue(rank) self.changed.emit() def setE(self, max_error): if self.max_error != max_error: self.max_error = max_error self.espin.setValue(max_error) self.changed.emit() @staticmethod def createinstance(params): rank = params.get("rank", 10) max_error = params.get("max_error", 1) return ProjectCUR(rank=rank, max_error=max_error) def __repr__(self): return "Rank: {}, Relative error: {}".format(self.rspin.value(), self.espin.value())
class FilteringModule(MultipleMethodModule): Stopwords, Lexicon, Regexp, DocFreq, DummyDocFreq, MostFreq = range(6) Methods = { Stopwords: StopwordsFilter, Lexicon: LexiconFilter, Regexp: RegexpFilter, DocFreq: FrequencyFilter, DummyDocFreq: FrequencyFilter, MostFreq: MostFrequentTokensFilter } DEFAULT_METHODS = [Stopwords] DEFAULT_LANG = "English" DEFAULT_NONE = None DEFAULT_PATTERN = "\.|,|:|;|!|\?|\(|\)|\||\+|\'|\"|‘|’|“|”|\'|" \ "\’|…|\-|–|—|\$|&|\*|>|<|\/|\[|\]" DEFAULT_FREQ_TYPE = 0 # 0 - relative freq, 1 - absolute freq DEFAULT_REL_START, DEFAULT_REL_END, REL_MIN, REL_MAX = 0.1, 0.9, 0, 1 DEFAULT_ABS_START, DEFAULT_ABS_END, ABS_MIN, ABS_MAX = 1, 10, 0, 10000 DEFAULT_N_TOKEN = 100 def __init__(self, parent=None, **kwargs): super().__init__(parent, **kwargs) self.__sw_lang = self.DEFAULT_LANG self.__sw_file = self.DEFAULT_NONE self.__lx_file = self.DEFAULT_NONE self.__pattern = self.DEFAULT_PATTERN self.__freq_type = self.DEFAULT_FREQ_TYPE self.__rel_freq_st = self.DEFAULT_REL_START self.__rel_freq_en = self.DEFAULT_REL_END self.__abs_freq_st = self.DEFAULT_ABS_START self.__abs_freq_en = self.DEFAULT_ABS_END self.__n_token = self.DEFAULT_N_TOKEN self.__invalidated = False self.__combo = ComboBox(self, [_DEFAULT_NONE] + StopwordsFilter.supported_languages(), self.__sw_lang, self.__set_language) self.__sw_loader = FileLoader() self.__sw_loader.set_file_list() self.__sw_loader.activated.connect(self.__sw_loader_activated) self.__sw_loader.file_loaded.connect(self.__sw_invalidate) self.__lx_loader = FileLoader() self.__lx_loader.set_file_list() self.__lx_loader.activated.connect(self.__lx_loader_activated) self.__lx_loader.file_loaded.connect(self.__lx_invalidate) validator = RegexpFilter.validate_regexp self.__edit = ValidatedLineEdit(self.__pattern, validator) self.__edit.editingFinished.connect(self.__edit_finished) rel_freq_rb = QRadioButton("相对:") abs_freq_rb = QRadioButton("绝对:") self.__freq_group = group = QButtonGroup(self, exclusive=True) group.addButton(rel_freq_rb, 0) group.addButton(abs_freq_rb, 1) group.buttonClicked.connect(self.__freq_group_clicked) group.button(self.__freq_type).setChecked(True) self.__rel_range_spins = RangeDoubleSpins(self.__rel_freq_st, 0.05, self.__rel_freq_en, self.REL_MIN, self.REL_MAX, self.__set_rel_freq_start, self.__set_rel_freq_end, self.__rel_spins_edited) self.__abs_range_spins = RangeSpins(self.__abs_freq_st, 1, self.__abs_freq_en, self.ABS_MIN, self.ABS_MAX, self.__set_abs_freq_start, self.__set_abs_freq_end, self.__abs_spins_edited) self.__spin_n = QSpinBox(minimum=1, maximum=10**6, value=self.__n_token) self.__spin_n.editingFinished.connect(self.__spin_n_edited) self.__spin_n.valueChanged.connect(self.changed) self.layout().addWidget(self.__combo, self.Stopwords, 1) self.layout().addWidget(self.__sw_loader.file_combo, self.Stopwords, 2, 1, 2) self.layout().addWidget(self.__sw_loader.browse_btn, self.Stopwords, 4) self.layout().addWidget(self.__sw_loader.load_btn, self.Stopwords, 5) self.layout().addWidget(self.__lx_loader.file_combo, self.Lexicon, 2, 1, 2) self.layout().addWidget(self.__lx_loader.browse_btn, self.Lexicon, 4) self.layout().addWidget(self.__lx_loader.load_btn, self.Lexicon, 5) self.layout().addWidget(self.__edit, self.Regexp, 1, 1, 5) spins = self.__rel_range_spins.spins() self.layout().addWidget(rel_freq_rb, self.DocFreq, 1) self.layout().addWidget(spins[0], self.DocFreq, 2) self.layout().addWidget(spins[1], self.DocFreq, 3) spins = self.__abs_range_spins.spins() self.layout().addWidget(abs_freq_rb, self.DummyDocFreq, 1) self.layout().addWidget(spins[0], self.DummyDocFreq, 2) self.layout().addWidget(spins[1], self.DummyDocFreq, 3) title = self.layout().itemAtPosition(self.DummyDocFreq, 0).widget() title.hide() self.layout().addWidget(self.__spin_n, self.MostFreq, 1) self.layout().setColumnStretch(3, 1) def __sw_loader_activated(self): self.__sw_file = self.__sw_loader.get_current_file() self.changed.emit() if self.Stopwords in self.methods: self.edited.emit() def __sw_invalidate(self): if self.Stopwords in self.methods and self.__sw_file: self.__invalidated = random.random() self.edited.emit() def __lx_loader_activated(self): self.__lx_file = self.__lx_loader.get_current_file() self.changed.emit() if self.Lexicon in self.methods: self.edited.emit() def __lx_invalidate(self): if self.Lexicon in self.methods and self.__lx_file: self.__invalidated = random.random() self.edited.emit() def __edit_finished(self): pattern = self.__edit.text() if self.__pattern != pattern: self.__set_pattern(pattern) if self.Regexp in self.methods: self.edited.emit() def __freq_group_clicked(self): i = self.__freq_group.checkedId() if self.__freq_type != i: self.__set_freq_type(i) if self.DocFreq in self.methods: self.edited.emit() def __rel_spins_edited(self): if self.DocFreq in self.methods and self.__freq_type == 0: self.edited.emit() def __abs_spins_edited(self): if self.DocFreq in self.methods and self.__freq_type == 1: self.edited.emit() def __spin_n_edited(self): n = self.__spin_n.value() if self.__n_token != n: self.__set_n_tokens(n) if self.MostFreq in self.methods: self.edited.emit() def setParameters(self, params: Dict): super().setParameters(params) self.__set_language(params.get("language", self.DEFAULT_LANG)) self.__set_sw_path(params.get("sw_path", self.DEFAULT_NONE), params.get("sw_list", [])) self.__set_lx_path(params.get("lx_path", self.DEFAULT_NONE), params.get("lx_list", [])) self.__set_pattern(params.get("pattern", self.DEFAULT_PATTERN)) self.__set_freq_type(params.get("freq_type", self.DEFAULT_FREQ_TYPE)) self.__set_rel_freq_range( params.get("rel_start", self.DEFAULT_REL_START), params.get("rel_end", self.DEFAULT_REL_END)) self.__set_abs_freq_range( params.get("abs_start", self.DEFAULT_ABS_START), params.get("abs_end", self.DEFAULT_ABS_END)) self.__set_n_tokens(params.get("n_tokens", self.DEFAULT_N_TOKEN)) self.__invalidated = False def __set_language(self, language: str): if self.__sw_lang != language: self.__sw_lang = language self.__combo.setCurrentText(language) self.changed.emit() if self.Stopwords in self.methods: self.edited.emit() def __set_sw_path(self, path: RecentPath, paths: List[RecentPath] = []): self.__sw_loader.recent_paths = paths self.__sw_loader.set_file_list() self.__sw_loader.set_current_file(_to_abspath(path)) self.__sw_file = self.__sw_loader.get_current_file() def __set_lx_path(self, path: RecentPath, paths: List[RecentPath] = []): self.__lx_loader.recent_paths = paths self.__lx_loader.set_file_list() self.__lx_loader.set_current_file(_to_abspath(path)) self.__lx_file = self.__lx_loader.get_current_file() def __set_pattern(self, pattern: str): if self.__pattern != pattern: self.__pattern = pattern self.__edit.setText(pattern) self.changed.emit() def __set_freq_type(self, freq_type: int): if self.__freq_type != freq_type: self.__freq_type = freq_type self.__freq_group.button(self.__freq_type).setChecked(True) self.changed.emit() def __set_rel_freq_range(self, start: float, end: float): self.__set_rel_freq_start(start) self.__set_rel_freq_end(end) self.__rel_range_spins.set_range(start, end) def __set_rel_freq_start(self, n: float): if self.__rel_freq_st != n: self.__rel_freq_st = n self.changed.emit() def __set_rel_freq_end(self, n: float): if self.__rel_freq_en != n: self.__rel_freq_en = n self.changed.emit() def __set_abs_freq_range(self, start: int, end: int): self.__set_abs_freq_start(start) self.__set_abs_freq_end(end) self.__abs_range_spins.set_range(start, end) def __set_abs_freq_start(self, n: int): if self.__abs_freq_st != n: self.__abs_freq_st = n self.changed.emit() def __set_abs_freq_end(self, n: int): if self.__abs_freq_en != n: self.__abs_freq_en = n self.changed.emit() def __set_n_tokens(self, n: int): if self.__n_token != n: self.__n_token = n self.__spin_n.setValue(n) self.changed.emit() def parameters(self) -> Dict: params = super().parameters() params.update({ "language": self.__sw_lang, "sw_path": self.__sw_file, "sw_list": self.__sw_loader.recent_paths, "lx_path": self.__lx_file, "lx_list": self.__lx_loader.recent_paths, "pattern": self.__pattern, "freq_type": self.__freq_type, "rel_start": self.__rel_freq_st, "rel_end": self.__rel_freq_en, "abs_start": self.__abs_freq_st, "abs_end": self.__abs_freq_en, "n_tokens": self.__n_token, "invalidated": self.__invalidated }) return params @staticmethod def createinstance(params: Dict) -> List[BaseTokenFilter]: def map_none(s): return "" if s == _DEFAULT_NONE else s methods = params.get("methods", FilteringModule.DEFAULT_METHODS) filters = [] if FilteringModule.Stopwords in methods: lang = params.get("language", FilteringModule.DEFAULT_LANG) path = params.get("sw_path", FilteringModule.DEFAULT_NONE) filters.append( StopwordsFilter(language=map_none(lang), path=_to_abspath(path))) if FilteringModule.Lexicon in methods: path = params.get("lx_path", FilteringModule.DEFAULT_NONE) filters.append(LexiconFilter(path=_to_abspath(path))) if FilteringModule.Regexp in methods: pattern = params.get("pattern", FilteringModule.DEFAULT_PATTERN) filters.append(RegexpFilter(pattern=pattern)) if FilteringModule.DocFreq in methods: if params.get("freq_type", FilteringModule.DEFAULT_FREQ_TYPE) == 0: st = params.get("rel_start", FilteringModule.DEFAULT_REL_START) end = params.get("rel_end", FilteringModule.DEFAULT_REL_END) else: st = params.get("abs_start", FilteringModule.DEFAULT_ABS_START) end = params.get("abs_end", FilteringModule.DEFAULT_ABS_END) filters.append(FrequencyFilter(min_df=st, max_df=end)) if FilteringModule.MostFreq in methods: n = params.get("n_tokens", FilteringModule.DEFAULT_N_TOKEN) filters.append(MostFrequentTokensFilter(keep_n=n)) return filters def __repr__(self): texts = [] for method in self.methods: if method == self.Stopwords: append = f"Language: {self.__sw_lang}, " \ f"File: {_to_abspath(self.__sw_file)}" elif method == self.Lexicon: append = f"File: {_to_abspath(self.__lx_file)}" elif method == self.Regexp: append = f"{self.__pattern}" elif method == self.DocFreq: if self.__freq_type == 0: append = f"[{self.__rel_freq_st}, {self.__rel_freq_en}]" else: append = f"[{self.__abs_freq_st}, {self.__abs_freq_en}]" elif method == self.MostFreq: append = f"{self.__n_token}" texts.append(f"{self.Methods[method].name} ({append})") return ", ".join(texts)
class StandardizeEditor(ScBaseEditor): DEFAULT_LOWER_CLIP = False DEFAULT_UPPER_CLIP = False DEFAULT_LOWER_BOUND = -10 DEFAULT_UPPER_BOUND = 10 def __init__(self, parent=None, **kwargs): super().__init__(parent, **kwargs) self._lower_bound = self.DEFAULT_LOWER_BOUND self._upper_bound = self.DEFAULT_UPPER_BOUND self.setLayout(QVBoxLayout()) box = QGroupBox(title="Clipping", flat=True) form = QFormLayout() self.lower_check = QCheckBox("Lower Bound: ") self.lower_check.clicked.connect(self.edited) self.lower_spin = QSpinBox(minimum=-99, maximum=0, value=self._lower_bound) self.lower_spin.valueChanged[int].connect(self._set_lower_bound) self.lower_spin.editingFinished.connect(self.edited) self.upper_check = QCheckBox("Upper Bound: ") self.upper_check.clicked.connect(self.edited) self.upper_spin = QSpinBox(value=self._upper_bound) self.upper_spin.valueChanged[int].connect(self._set_upper_bound) self.upper_spin.editingFinished.connect(self.edited) form.addRow(self.lower_check, self.lower_spin) form.addRow(self.upper_check, self.upper_spin) box.setLayout(form) self.layout().addWidget(box) def _set_lower_bound(self, x): if self._lower_bound != x: self._lower_bound = x self.lower_spin.setValue(x) self.changed.emit() def _set_upper_bound(self, x): if self._upper_bound != x: self._upper_bound = x self.upper_spin.setValue(x) self.changed.emit() def setParameters(self, params): lower_clip = params.get("lower_clip", self.DEFAULT_LOWER_CLIP) self.lower_check.setChecked(lower_clip) self._set_lower_bound(params.get("lower", self.DEFAULT_LOWER_BOUND)) upper_clip = params.get("upper_clip", self.DEFAULT_UPPER_CLIP) self.upper_check.setChecked(upper_clip) self._set_upper_bound(params.get("upper", self.DEFAULT_UPPER_BOUND)) def parameters(self): return { "lower_clip": self.lower_check.isChecked(), "lower": self._lower_bound, "upper_clip": self.upper_check.isChecked(), "upper": self._upper_bound } @staticmethod def createinstance(params): lower, upper = None, None if params.get("lower_clip", StandardizeEditor.DEFAULT_LOWER_CLIP): lower = params.get("lower", StandardizeEditor.DEFAULT_LOWER_BOUND) if params.get("upper_clip", StandardizeEditor.DEFAULT_UPPER_CLIP): upper = params.get("upper", StandardizeEditor.DEFAULT_UPPER_BOUND) return Standardize(lower, upper) def __repr__(self): clips = [] if self.lower_check.isChecked(): clips.append("Lower Bound: {}".format(self.lower_spin.value())) if self.upper_check.isChecked(): clips.append("Upper Bound: {}".format(self.upper_spin.value())) return ", ".join(clips) if clips else "No Clipping"