def _classify_nodes(self, node): """ classification result: link, navigation, spam """ is_link_node = self._classifiers["link_node_classifier"].classify(node) if is_link_node: Utils.add_class(node, 'dlinks')
def _mark_link_containers(self, node): features = self._all_features[node] if features["text_length"] > 0 and float(features['link_length'])/features["text_length"] > self._config["link_threshold"]: if float(features['short_link_count']) / features['link_count'] > self._config["short_link_threshold"]: self._shrink_nav_node(node) if features['short_link_count'] == 1: self._replace_child_class(node, 'dnav') Utils.add_class(node, 'dnav') elif features['short_link_count'] > 3: self._replace_child_class(node, 'dnavb', new_class_name='dnavg') Utils.add_class(node, 'dnavb') else: self._replace_child_class(node, 'dnavg') Utils.add_class(node, 'dnavg') else: Utils.add_class(node, 'dlst')
def _mark_link_containers(self, node): # TODO understand here, different kind of links marked as different # class. features = self._all_features[node] if features["text_length"] > 0 and float(features['link_length'])/features["text_length"] > self._config["link_threshold"]: if float(features['short_link_count']) / features['link_count'] > self._config["short_link_threshold"]: self._shrink_nav_node(node) if features['short_link_count'] == 1: # Just one short link # replace 'dnav' to empty string self._replace_child_class(node, 'dnav') Utils.add_class(node, 'dnav') elif features['short_link_count'] > 3: # short link greater than 3 # replace dnavb to dnavg self._replace_child_class(node, 'dnavb', new_class_name='dnavg') Utils.add_class(node, 'dnavb') else: self._replace_child_class(node, 'dnavg') Utils.add_class(node, 'dnavg') else: Utils.add_class(node, 'dlst')