def preprocess_data(graph: nx.Graph, shortest_paths_path: Union[str, Path]):
    """Reads graph and shortest paths."""
    shortest_paths = serializer.load(shortest_paths_path)
    mapping = dict(
        zip(list(graph.nodes()), np.arange(0, graph.number_of_nodes())))

    if nx.is_directed(graph):
        graph = nx.DiGraph(graph)
    else:
        graph = nx.Graph(graph)

    graph = nx.relabel_nodes(graph, mapping)
    graph_dists = nodes_to_mtx(shortest_paths, mapping)
    return graph, graph_dists, mapping
Esempio n. 2
0
def prepare_aspect_graph(
    experiment_paths: ExperimentPaths, ) -> Tuple[Graph, ExperimentPaths]:
    logger.info(
        f"Load aspect 2 aspect graph - {str(experiment_paths.aspect_to_aspect_graph)}"
    )
    aspect_graph = serializer.load(experiment_paths.aspect_hierarchical_tree)

    mlflow.log_param("min_aspect_graph_degree", MIN_DEGREE)
    remove = [
        node for node, degree in dict(aspect_graph.degree()).items()
        if degree > MIN_DEGREE
    ]
    print(f'nodes: {len(aspect_graph.nodes())}')
    aspect_graph.remove_nodes_from(remove)
    print(f'nodes: {len(aspect_graph.nodes())}')

    aspect_graph = networkx_2_graph_tool(aspect_graph,
                                         node_name_property="aspect_name")
    remove_self_loops(aspect_graph)
    aspect_graph.reindex_edges()
    return Graph(aspect_graph), experiment_paths
results_dirs = {
    p.name: p
    for p
    in RESULTS_PATH.glob('*')
}
st.sidebar.title('Which dataset do you want to analyze?')
results_dir_name = st.sidebar.selectbox('', sorted(results_dirs.keys()), index=len(results_dirs) - 1)

st.header('Available files and directories for:')
st.info(results_dir_name)
st.write([p.name for p in results_dirs[results_dir_name].glob('*')])
paths = ExperimentPaths('', RESULTS_PATH / results_dir_name)

st.header('Discourse Trees Data Frame structure')
# discourse_tree_df_cache = st.cache(serializer.load)
# discourse_tree_df = discourse_tree_df_cache(paths.discourse_trees_df)
# st.write(discourse_tree_df.sample(5))

aspect_sentiments = dict(serializer.load(paths.aspect_sentiments))
st.header('Aspect sentiments')
# st.write(aspect_sentiments)

# arrg = serializer.load(paths.aspect_to_aspect_graph)
# arrg_dot = nx.nx_pydot.to_pydot(arrg)
# st.graphviz_chart(arrg_dot.to_string())

aht = serializer.load(paths.aspect_hierarchical_tree)
aht_dot = nx.nx_pydot.to_pydot(aht)
st.graphviz_chart(aht_dot.to_string())
Esempio n. 4
0
from tqdm import tqdm

from aspects.aspects.aspects_graph_builder import Aspect2AspectGraph
from aspects.data_io import serializer
from aspects.utilities import settings

REVIEWS_RESULTS = settings.DEFAULT_OUTPUT_PATH.parent.parent / 'results' / 'reviews_Cell_Phones_and_Accessories'

# aspects_per_edu = serializer.load((REVIEWS_RESULTS / 'aspects_per_edu').as_posix())
# edus = serializer.load('results/reviews_Cell_Phones_and_Accessories/raw_edu_list')
# documents_info = serializer.load((REVIEWS_RESULTS / 'documents_info').as_posix())

with open((REVIEWS_RESULTS / 'aspects_per_edu.json').as_posix(), 'r') as f:
    aspects_per_edu = json.load(f)

aspect_relations = serializer.load(
    (REVIEWS_RESULTS / 'edu_dependency_rules').as_posix())

aspect_graph_builder = Aspect2AspectGraph(aspects_per_edu=aspects_per_edu)

aspect_rules = []

for relation in tqdm(aspect_relations.values()):
    for edu_1, edu_2, _, weight in relation:
        for aspect_left, aspect_right in aspect_graph_builder.aspects_iterator(
                edu_1, edu_2):
            if aspect_left != aspect_right:
                aspect_rules.append((aspect_left, aspect_right, weight))

df = pd.DataFrame(aspect_rules, columns=['id1', 'id2', 'weight'])
df.to_csv(REVIEWS_RESULTS / 'aspect-rules.csv', index=False)
Esempio n. 5
0
from aspects.graph.networkx.calculate_shortest_paths import calculate_shortest_paths_lengths
from aspects.embeddings.graph.utils import preprocess_data, calculate_reconstruction_metrics
from aspects.utilities.settings import DEFAULT_OUTPUT_PATH

MAX_NUMBER_OF_NODES = 500

if __name__ == '__main__':
    data_path = DEFAULT_OUTPUT_PATH / 'reviews_Cell_Phones_and_Accessories-50000-docs' / 'our'

    dataset = torch.load(
        (data_path /
         'aspect_2_aspect_graph-en_core_web_lg.en_core_web_lg.dataset'
         ).as_posix())

    graph_path = data_path / 'aspect_2_aspect_graph.pkl'
    graph = serializer.load(graph_path)

    # sorted_nodes = sorted(list(graph.degree()), key=lambda node_degree_pair: node_degree_pair[1], reverse=True)
    # top_nodes = list(pluck(0, sorted_nodes[:MAX_NUMBER_OF_NODES]))
    # graph = graph.subgraph(top_nodes)

    shortest_paths_path = graph_path.with_suffix('.shortest_paths.pkl')
    if not shortest_paths_path.exists():
        calculate_shortest_paths_lengths(graph, shortest_paths_path)
    graph, graph_dists, mapping = preprocess_data(graph, shortest_paths_path)

    model = torch.load(
        (data_path /
         'aspect_2_aspect_graph-en_core_web_lg.en_core_web_lg.model'
         ).as_posix())
    print(
 def _setup_link_parse_tree_189(self):
     self.link_tree = serializer.load(settings.SAMPLE_TREE_189.as_posix())