def preprocess_data(graph: nx.Graph, shortest_paths_path: Union[str, Path]): """Reads graph and shortest paths.""" shortest_paths = serializer.load(shortest_paths_path) mapping = dict( zip(list(graph.nodes()), np.arange(0, graph.number_of_nodes()))) if nx.is_directed(graph): graph = nx.DiGraph(graph) else: graph = nx.Graph(graph) graph = nx.relabel_nodes(graph, mapping) graph_dists = nodes_to_mtx(shortest_paths, mapping) return graph, graph_dists, mapping
def prepare_aspect_graph( experiment_paths: ExperimentPaths, ) -> Tuple[Graph, ExperimentPaths]: logger.info( f"Load aspect 2 aspect graph - {str(experiment_paths.aspect_to_aspect_graph)}" ) aspect_graph = serializer.load(experiment_paths.aspect_hierarchical_tree) mlflow.log_param("min_aspect_graph_degree", MIN_DEGREE) remove = [ node for node, degree in dict(aspect_graph.degree()).items() if degree > MIN_DEGREE ] print(f'nodes: {len(aspect_graph.nodes())}') aspect_graph.remove_nodes_from(remove) print(f'nodes: {len(aspect_graph.nodes())}') aspect_graph = networkx_2_graph_tool(aspect_graph, node_name_property="aspect_name") remove_self_loops(aspect_graph) aspect_graph.reindex_edges() return Graph(aspect_graph), experiment_paths
results_dirs = { p.name: p for p in RESULTS_PATH.glob('*') } st.sidebar.title('Which dataset do you want to analyze?') results_dir_name = st.sidebar.selectbox('', sorted(results_dirs.keys()), index=len(results_dirs) - 1) st.header('Available files and directories for:') st.info(results_dir_name) st.write([p.name for p in results_dirs[results_dir_name].glob('*')]) paths = ExperimentPaths('', RESULTS_PATH / results_dir_name) st.header('Discourse Trees Data Frame structure') # discourse_tree_df_cache = st.cache(serializer.load) # discourse_tree_df = discourse_tree_df_cache(paths.discourse_trees_df) # st.write(discourse_tree_df.sample(5)) aspect_sentiments = dict(serializer.load(paths.aspect_sentiments)) st.header('Aspect sentiments') # st.write(aspect_sentiments) # arrg = serializer.load(paths.aspect_to_aspect_graph) # arrg_dot = nx.nx_pydot.to_pydot(arrg) # st.graphviz_chart(arrg_dot.to_string()) aht = serializer.load(paths.aspect_hierarchical_tree) aht_dot = nx.nx_pydot.to_pydot(aht) st.graphviz_chart(aht_dot.to_string())
from tqdm import tqdm from aspects.aspects.aspects_graph_builder import Aspect2AspectGraph from aspects.data_io import serializer from aspects.utilities import settings REVIEWS_RESULTS = settings.DEFAULT_OUTPUT_PATH.parent.parent / 'results' / 'reviews_Cell_Phones_and_Accessories' # aspects_per_edu = serializer.load((REVIEWS_RESULTS / 'aspects_per_edu').as_posix()) # edus = serializer.load('results/reviews_Cell_Phones_and_Accessories/raw_edu_list') # documents_info = serializer.load((REVIEWS_RESULTS / 'documents_info').as_posix()) with open((REVIEWS_RESULTS / 'aspects_per_edu.json').as_posix(), 'r') as f: aspects_per_edu = json.load(f) aspect_relations = serializer.load( (REVIEWS_RESULTS / 'edu_dependency_rules').as_posix()) aspect_graph_builder = Aspect2AspectGraph(aspects_per_edu=aspects_per_edu) aspect_rules = [] for relation in tqdm(aspect_relations.values()): for edu_1, edu_2, _, weight in relation: for aspect_left, aspect_right in aspect_graph_builder.aspects_iterator( edu_1, edu_2): if aspect_left != aspect_right: aspect_rules.append((aspect_left, aspect_right, weight)) df = pd.DataFrame(aspect_rules, columns=['id1', 'id2', 'weight']) df.to_csv(REVIEWS_RESULTS / 'aspect-rules.csv', index=False)
from aspects.graph.networkx.calculate_shortest_paths import calculate_shortest_paths_lengths from aspects.embeddings.graph.utils import preprocess_data, calculate_reconstruction_metrics from aspects.utilities.settings import DEFAULT_OUTPUT_PATH MAX_NUMBER_OF_NODES = 500 if __name__ == '__main__': data_path = DEFAULT_OUTPUT_PATH / 'reviews_Cell_Phones_and_Accessories-50000-docs' / 'our' dataset = torch.load( (data_path / 'aspect_2_aspect_graph-en_core_web_lg.en_core_web_lg.dataset' ).as_posix()) graph_path = data_path / 'aspect_2_aspect_graph.pkl' graph = serializer.load(graph_path) # sorted_nodes = sorted(list(graph.degree()), key=lambda node_degree_pair: node_degree_pair[1], reverse=True) # top_nodes = list(pluck(0, sorted_nodes[:MAX_NUMBER_OF_NODES])) # graph = graph.subgraph(top_nodes) shortest_paths_path = graph_path.with_suffix('.shortest_paths.pkl') if not shortest_paths_path.exists(): calculate_shortest_paths_lengths(graph, shortest_paths_path) graph, graph_dists, mapping = preprocess_data(graph, shortest_paths_path) model = torch.load( (data_path / 'aspect_2_aspect_graph-en_core_web_lg.en_core_web_lg.model' ).as_posix()) print(
def _setup_link_parse_tree_189(self): self.link_tree = serializer.load(settings.SAMPLE_TREE_189.as_posix())