def topologically_sort_vtables(all_vtables: dict, type_: str) -> List[int]: graph = Graph() for name, vtables in all_vtables[type_].items(): classes = list(dict.fromkeys(reversed(vtables))) for i in range(len(classes) - 1): graph.add_edge(classes[i + 1], classes[i]) return graph.topological_sort()
def __init__(self, fname): """ 2013-07-24: Create a graph of the file """ self.astGraph = Graph() self.processFile(fname)
def main() -> None: parser = argparse.ArgumentParser( description="Shows AI classes with non-trivial class hierarchies.") parser.add_argument("--type", help="AI class type to visualise", choices=["Action", "AI", "Behavior", "Query"], required=True) parser.add_argument( "--out-names", help="Path to which a vtable -> name map will be written", required=True) args = parser.parse_args() all_vtables = ai_common.get_vtables() graph = Graph() reverse_graph = Graph() build_graph(all_vtables, args.type, graph, reverse_graph) interesting_nodes = set() node_colors = dict() colors = [ "#c7dcff", "#ffc7c7", "#ceffc7", "#dcc7ff", "#fffdc9", "#c9fff3", "#ffe0cc", "#ffcffe", "#96a8ff" ] components = graph.find_connected_components() num_nontrivial_cc = 0 for i, comp in enumerate(components): if len(comp) == 2: continue for node in comp: node_colors[node] = colors[i % len(colors)] num_nontrivial_cc += 1 interesting_nodes |= set(comp) print("digraph {") print("node [shape=rectangle]") for u in graph.nodes: if u not in interesting_nodes: continue for v in graph.nodes[u]: shape_u = "shape=component," if "[V]" not in u else "" shape_v = "shape=component," if "[V]" not in v else "" print( f'"{u}" [{shape_u}style=filled, fillcolor="{node_colors[u]}"]') print( f'"{v}" [{shape_v}style=filled, fillcolor="{node_colors[v]}"]') print(f'"{u}" -> "{v}"') print("}") print(f"# {len(components)} connected components") print(f"# {num_nontrivial_cc} non-trivial connected components") yaml.add_representer(int, lambda dumper, data: yaml.ScalarNode( 'tag:yaml.org,2002:int', f"{data:#x}"), Dumper=yaml.CSafeDumper) with Path(args.out_names).open("w") as f: yaml.dump(_known_vtables, f, Dumper=yaml.CSafeDumper)
def getDepGraph(moddict): depGraph = Graph() # build dependency graph for mod in moddict: deps = set(moddict[mod].getDependencies()["dependencies"]) assert len(deps) <= 0 or deps.issubset(moddict.keys()) for dep in deps: depGraph.addEdge(dep, mod) return depGraph
def create_graph(self, schedule): if os.path.isfile(self.graph_path): self.graph = GraphDecoder().load_from_file(self.graph_path) else: self.graph = Graph() self.create_stations(schedule.stops) self.create_sections(schedule) GraphEncoder().save_to_file(self.graph, self.graph_path) print('Stations: {}, Sections: {}'.format(len(self.graph.stations), len(self.graph.sections)))
def index(): api_url = "https://api.stackexchange.com/2.2/questions?page=1&pagesize=100&order=desc&sort=activity&site=datascience" file_url = ".//resources//" extract = ExtractData(api_url, file_url) graph = Graph() extract.extractData() graph.createGraph(extract.stack_exchange_tags) if (request.method == 'GET'): return render_template('index.html', show_tag=False) else: query = request.form.to_dict() associated_tags = graph.findNeighborsOfaTag( str(query['query']).lower()) return redirect(url_for('tag_results', tags=associated_tags))
def _build_model(self, config: dict, data_shape: tuple, num_classes: int) -> tuple: """ Build the network model, optimizer, loss function and learning rate scheduler given the. :param config: :return: tuple (model, loss function, optimizer, learning rate scheduler) """ skeleton_edges, center_joint = import_dataset_constants( self._base_config.dataset, ["skeleton_edges", "center_joint"]) graph = Graph(skeleton_edges, center_joint=center_joint) # https://pytorch.org/docs/stable/generated/torch.nn.Module.html # noinspection PyPep8Naming Model = import_model(self._base_config.model) model = Model(data_shape, num_classes, graph, mode=self._base_config.mode, **self._base_config.model_args).cuda() loss_function = torch.nn.CrossEntropyLoss().cuda() optimizer = session_helper.create_optimizer(config["optimizer"], model, config["base_lr"], **config["optimizer_args"]) lr_scheduler = session_helper.create_learning_rate_scheduler( config["lr_scheduler"], optimizer, **config["lr_scheduler_args"]) return model, loss_function, optimizer, lr_scheduler
def train_model_old(config, train_set, test_set, num_training_samples, num_test_samples): graph = Graph(skeleton_edges, is_directed=True) shape = (config.batch_size, 3, 300, 25, 2) model = create_model(config, graph, shape) model.compile(optimizer=keras.optimizers.SGD(learning_rate=config.base_lr, momentum=0.9, nesterov=True), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy", "top_k_categorical_accuracy"]) model.summary() lr_scheduler = keras.optimizers.schedules.PiecewiseConstantDecay( config.steps, [config.base_lr**i for i in range(1, len(config.steps) + 2)]) callbacks = [ keras.callbacks.LearningRateScheduler(lr_scheduler), keras.callbacks.TensorBoard(os.path.join( config.log_path, time.strftime("training_%Y_%m_%d-%H_%M_%S")), profile_batch="200,250"), keras.callbacks.ModelCheckpoint(os.path.join(config.checkpoint_path, "weights.{epoch:02d}.h5"), save_best_only=True) ] model.fit(train_set, epochs=config.epochs, validation_data=test_set, callbacks=callbacks)
def get_skeleton_imu_fusion_graph(skeleton_graph: Graph, imu_enhanced_mode: str, num_imu_joints: int, **kwargs): # create new edges for imu data new_edges = [] if imu_enhanced_mode == "append_center": # append imu joints to skeleton center joint center_joint = kwargs.get("center_joint", skeleton_graph.center_joint) new_edges.extend((skeleton_graph.num_vertices + i, center_joint) for i in range(num_imu_joints)) elif imu_enhanced_mode == "append_right": # append imu joints to skeleton right wrist and right hip right_wrist_joint = kwargs["right_wrist_joint"] right_hip_joint = kwargs["right_hip_joint"] for i in range(num_imu_joints): new_edges.append( (skeleton_graph.num_vertices + i, right_wrist_joint)) new_edges.append( (skeleton_graph.num_vertices + i, right_hip_joint)) else: raise ValueError("Unsupported imu_enhanced_mode: " + imu_enhanced_mode) if kwargs.get("interconnect_imu_joints", False): for i in range(num_imu_joints): for j in range(i + 1, num_imu_joints): new_edges.append((skeleton_graph.num_vertices + i, skeleton_graph.num_vertices + j)) return skeleton_graph.with_new_edges(new_edges)
def build_imu_graph(data_shape: tuple, num_signals: int = 0, temporal_back_connections: int = 1, inter_signal_back_connections=False) -> Graph: sequence_length, num_signals_0 = data_shape assert num_signals == 0 or (num_signals_0 % num_signals) == 0 if num_signals == 0: num_signals = num_signals_0 num_vertices = sequence_length * num_signals graph_edges = [] for i in range(0, num_vertices, num_signals): # spatial connections (connections between all values at a single time step) # IMU data is in form (sequence_length = [N + 1], num_signals = [M + 1]) with samples TnSm and 0<=n<=N; 0<=m<=M # memory layout for vertices will therefore be: T0S0, T0S1, T0S2, ... T0SM, T1S0, T1S1, ... T1Sm, ..., TNSM for j in range(num_signals): for k in range(j + 1, num_signals): graph_edges.append((i + j, i + k)) graph_edges.append((i + k, i + j)) # temporal back connections for j in range(min(i // num_signals, temporal_back_connections)): for k in range(num_signals): for m in range(num_signals): if k == m or inter_signal_back_connections: graph_edges.append( (i - num_signals * (j + 1) + k, i + m)) return Graph(graph_edges, num_vertices)
def main(): g = Graph() v1 = Matrix([[2, 1]]).T v2 = Matrix([[1, 1]]).T g.add_vector(v1) g.add_vector(v2) p1 = v1.copy() p2 = orthogonal(p1, v2) g.add_vector(p1, color='tab:blue') g.add_vector(p2, color='tab:blue') n1 = normalize(p1) n2 = normalize(p2) g.add_vector(n1, color='tab:green') g.add_vector(n2, color='tab:green') g.show()
def get_dataset(filepath): with open(filepath, 'r', encoding="utf-8") as fp: reader = csv.reader(fp) source = [] for row in reader: source.append(row) source = source[1:] def prepare(source): print("preparing data...") positive_graph = nx.Graph() negative_graph = nx.Graph() for row in source: u = row[1].lower() v = row[2].lower() w = row[3] w = float(w) # if w < 0: # print(u+"---"+v+"---"+str(w)) if w > 0: positive_graph.add_edge(u, v, weight=w) if w < 0: negative_graph.add_edge(u, v, weight=w) print("positive_number_of_nodes: " + str(positive_graph.number_of_nodes())) print("positive_number_of_edges: " + str(positive_graph.number_of_edges())) print("negative_number_of_nodes: " + str(negative_graph.number_of_nodes())) print("negative_number_of_edges: " + str(negative_graph.number_of_edges())) return positive_graph, negative_graph positive_graph, negative_graph = prepare(source) my_graph = Graph(positive_graph, negative_graph) print("getting triplets...") del source triplets = my_graph.get_triplets() vocab = my_graph.vocab.getnode2id() return triplets, vocab
def __init__(self, data_shape, num_classes: int, graph, **kwargs): super().__init__() num_layers = kwargs.get("num_layers", 10) edges = kwargs["rgb_patch_groups_edges"] edges = [tuple(map(int, edge.split(", "))) for edge in edges] graph = Graph(edges) self.agcn = agcn.Model(data_shape["rgb"], num_classes, graph, num_layers=num_layers, without_fc=kwargs.get("without_fc", False))
def build_graph(all_vtables: dict, type_: str, graph: Graph, reverse_graph: Graph): for name, vtables in all_vtables[type_].items(): classes = [name] + list(reversed(vtables)) # Each class has at least one parent, so the -1 is fine. for i in range(len(classes) - 1): from_ = classes[i] to_ = classes[i + 1] # Skip base classes to reduce noise. if to_ in BaseClasses: break reverse_graph.add_edge(to_, from_) guess_vtable_names(reverse_graph) for name, vtables in all_vtables[type_].items(): classes = [name] + list(reversed(vtables)) for i in range(len(classes) - 1): if classes[i + 1] in BaseClasses: break from_ = get_name_for_vtable(classes[i]) to_ = get_name_for_vtable(classes[i + 1]) graph.add_edge(from_, to_)
import numpy as np from util.graph import Graph A = np.array([[1, 2, 2, 1], [2, 2, 4, 4]]) B = np.array([[4, 4, 4, 4], [0, 0, 0, 0]]) C = np.array([[0, 1], [-1, 0]]) D = np.array([[-1, 0], [0, 1]]) g = Graph() g.add_shape(A) g.add_shape(A - B, color="tab:blue") g.add_shape(3 * A, color="tab:green") g.add_shape(C @ A, color="tab:orange") g.add_shape(D @ A, color="tab:purple") g.show()
import numpy as np from util.graph import Graph from sympy import * u = np.array([-3, 1]) v = np.array([1, -1 / 3]) # Solve Ax=0 where A = (u v) A = np.array([u, v, [0, 0]]).T A, _ = Matrix(A).rref() A = np.array(A, dtype='float') print(A) k, c = symbols('k c') e = k - (1 / 3) * c print(solveset(e, c)) # FiniteSet(3.0*k) # Show that any value of 1 results in ku + cv = the 0 vector k = 1 c = 3.0 * k print(k * u + c * v) # the 0 vector g = Graph() g.add_vector(u, color='tab:blue') g.add_vector(v, color='tab:green') g.show()
lr_scheduler = keras.optimizers.schedules.PiecewiseConstantDecay( config.steps, [config.base_lr**i for i in range(1, len(config.steps) + 2)]) callbacks = [ keras.callbacks.LearningRateScheduler(lr_scheduler), keras.callbacks.TensorBoard(os.path.join( config.log_path, time.strftime("training_%Y_%m_%d-%H_%M_%S")), profile_batch="200,250"), keras.callbacks.ModelCheckpoint(os.path.join(config.checkpoint_path, "weights.{epoch:02d}.h5"), save_best_only=True) ] model.fit(train_set, epochs=config.epochs, validation_data=test_set, callbacks=callbacks) if __name__ == "__main__": cf = get_config() setattr(cf, "kernel_regularizer", keras.regularizers.l2(cf.weight_decay)) graph = Graph(skeleton_edges, is_directed=True) model = create_model(cf, graph, data_shape) training_procedure = ModelTraining(cf, model, *load_data(cf)) training_procedure.start()
class Simulation: def __init__(self): self.gtfs_path = os.path.join(os.path.dirname(__file__), '..', 'data') self.graph_path = os.path.join(os.path.dirname(__file__), '..', 'data', 'graph.json') self.database_path = os.path.join(os.path.dirname(__file__), '..', 'data', 'database') self.trains = [] self.earliest_time = datetime.timedelta.max self.latest_time = datetime.timedelta.min def time_to_iteration(self, time): seconds = (time - self.earliest_time).total_seconds() return int(seconds / 60) def create_database_from_gtfs(self, path): if os.path.isfile(self.database_path): schedule = pygtfs.Schedule(self.database_path) else: schedule = pygtfs.Schedule(self.database_path) pygtfs.append_feed(schedule, path) return schedule def create_sections(self, schedule): n = 1 r = len(schedule.routes) for route in schedule.routes: sys.stdout.write('\rCreating Graph: Route {} of {}'.format(n, r)) sys.stdout.flush() n += 1 for trip in route.trips: stops = [] for stop_time in trip.stop_times: stops.append([stop_time.stop_sequence, stop_time.stop_id]) stops = sorted(stops, key=lambda x: (x[0])) for i in range(0, len(stops) - 2): first_station = self.graph.get_station_by_id(stops[i][1]) second_station = self.graph.get_station_by_id(stops[i + 1][1]) if not self.graph.section_existing(first_station, second_station): self.graph.create_section(first_station, second_station) print() def create_stations(self, stops): for stop in stops: if not self.graph.station_existing(stop.stop_id): self.graph.create_station(stop.stop_id, stop.stop_name) def create_graph(self, schedule): if os.path.isfile(self.graph_path): self.graph = GraphDecoder().load_from_file(self.graph_path) else: self.graph = Graph() self.create_stations(schedule.stops) self.create_sections(schedule) GraphEncoder().save_to_file(self.graph, self.graph_path) print('Stations: {}, Sections: {}'.format(len(self.graph.stations), len(self.graph.sections))) def create_trains_from_trips(self, schedule): n = 1 r = len(schedule.routes) for routes in schedule.routes: for trip in routes.trips: sys.stdout.write('\rCreating Trains: Route {} of {}'.format( n, r)) sys.stdout.flush() self.trains.append(Train(trip)) for stop_time in trip.stop_times: if self.earliest_time > stop_time.arrival_time: self.earliest_time = stop_time.arrival_time if self.latest_time < stop_time.departure_time: self.latest_time = stop_time.departure_time n += 1 print() def create_event(self, time, sender, receiver): iteration = self.time_to_iteration(time) if iteration < 0: return None event = Event(iteration, sender, receiver) self.event_queue[iteration].append(event) def find_station(self, name): for station in self.graph.stations: if station.stop_name == name: return station return None def print_progress(self, station, time): d = len(station.collected_data.keys() ) # amount of data at destination station o = len(self.graph.sections) # overall amount of data n = station.stop_name # name of destination station sys.stdout.write( '\r {} of {} section information has/have reached {} after {} min, collected: ' .format(d, o, n, time, station.collected_data)) sys.stdout.flush() def create_event_queue(self): total_iterations = self.time_to_iteration( self.latest_time) # iterations = minutes print("Simulation will have {} steps".format(total_iterations)) self.event_queue = [[] for n in range(total_iterations)] for train in self.trains: if train == None: continue for arrival in train.arrivals: station = self.graph.get_station_by_id(arrival[1]) self.create_event(arrival[0], train, station) for departure in train.departures: station = self.graph.get_station_by_id(departure[1]) self.create_event(departure[0], station, train) for on_section in train.on_section: first_station = self.graph.get_station_by_id(on_section[0][1]) second_station = self.graph.get_station_by_id(on_section[1][1]) section = self.graph.get_section(first_station, second_station) time = on_section[0][0] + datetime.timedelta(minutes=1) self.create_event(time, train, section) self.create_event(time, section, train) def run_event_queue(self): destination_station = None while destination_station == None: # destination = input("Please enter data destination station (x for abort): ") destination = 'Frankfurt(Main)Hbf' if destination == "x": print('Simulation aborted') return destination_station = self.find_station(destination) print(destination_station) for event_list in self.event_queue: if event_list == []: continue time = event_list[0].iteration for event in event_list: event.call() self.print_progress(destination_station, time) def main(self): # setup simulation from gtfs file now = time.time() schedule = self.create_database_from_gtfs(self.gtfs_path) print('Creating Database took {} seconds'.format(time.time() - now)) now = time.time() self.create_graph(schedule) print('Creating Graph object took {} seconds'.format(time.time() - now)) now = time.time() self.create_trains_from_trips(schedule) print('Creating Train objects took {} seconds'.format(time.time() - now)) now = time.time() event_queue = self.create_event_queue() print('Creating event queue took {} seconds'.format(time.time() - now)) now = time.time() self.run_event_queue() print('Running simulation took {} seconds'.format(time.time() - now))
m, _ = m.rref() k = m.row(0)[2] c = m.row(1)[2] print('k and c, respectively:') pprint([k, c]) print('ii') u = Matrix([1, 2]).T # first column of the original matrix v = Matrix([-1, 1]).T # second column of the original matrix # We want a vector [3 2] in terms of u and v, so we use k and c v1 = k.subs([(a, 3), (b, 2)]) * u v2 = c.subs([(a, 3), (b, 2)]) * v v1 = np.array(v1, dtype=float)[0] v2 = np.array(v2, dtype=float)[0] vv = v1 + v2 print('v1', v1) print('v2', v2) print('vv', vv) u = np.array(u, dtype=float).T v = np.array(v, dtype=float).T g = Graph() g.add_vector(v1, color='tab:blue') g.add_vector(v2, color='tab:green') g.add_vector(vv, color='tab:red') g.add_vector(u, color='tab:orange') g.add_vector(v, color='tab:olive') g.show()
from util.graph import Graph import pandas as pd from util.significance import test_significance from util.plot import add_binary_jitter, get_binary_distribution, plot_binary_distribution, plot_normal_distributions import matplotlib as mpl import matplotlib.pyplot as plt mpl.interactive(False) g = Graph() def calc_network_overlaps(g): nominations = g.get_nominations() raw_coord_pairs = [] for index, nomination in nominations.iterrows(): if (not nomination['source'].is_volunteer and not nomination['target'].is_volunteer) or ( nomination['target'].is_volunteer and nomination['source'].is_volunteer): coord_pair = { 'network_overlap': nomination['source'].get_network_overlap_with( nomination['target']), 'completed': 1 if nomination['target'].completed_challenge() else 0 } raw_coord_pairs.append(coord_pair)
from util.graph import Graph from util.showing_functions import * from util.path_finding import * import time graph = Graph({ 'A': ['B', 'C'], 'B': ['D', 'A'], 'C': ['D', 'A'], 'D': ['E', 'F', 'B', 'C'], 'E': ['D'], 'F': ['D'] }) print() print('The graph') print(graph) print() print() print('The Breadth-First Search') start_time = time.time() show_sorted_traversal(graph.bfs('A')) print("--- %s milliseconds ---" % ((time.time() - start_time) * 1000)) print() print() print('The shortest path between two vertices') start_time = time.time() shortest_path('A', 'E', graph.bfs('A')) print()
import numpy as np from util.graph import Graph v = np.array([3, 1]) g = Graph() g.add_vector(0.5 * v) g.add_vector(2 * v) g.add_vector(3 * v) g.add_vector(-v) g.show()
import numpy as np from util.graph import Graph A = np.array([[2, 2, 0], [0, 3, 0]]) print(A) g = Graph() for v in A.T: g.add_vector(v) A *= 2 print(A) for v in A.T: g.add_vector(v, color='tab:blue') g.show()
from timeit import timeit import os import shutil TESTS = 10 data_structures = [ Storage.queue, Storage.stack, Storage.multi_queue, Storage.multi_stack ] parameters = [True, False] graph_path = "../test_graphs" complete_path = "../complete_tests" results = "../results/" for g in os.listdir(graph_path): print("Working on graph %s" % g) test_graph = Graph.read_graph(graph_path + '/' + g) print("Starting Edmond-Karp Test...", end=" ") with open(results + 'edmond_karp_results', 'a') as f: ek_test_instance = ek(test_graph) f.write(g + ',') f.write(str(timeit(ek_test_instance.max_flow, number=TESTS) / TESTS)) f.write('\n') print("done") print("Starting Goldberg-Tarjan Test...", end=" ") with open(results + 'goldberg_tarjan_results', 'a') as f: gt_test_instance = gt(test_graph) f.write(g + ',') f.write(str(timeit(gt_test_instance.max_flow, number=TESTS) / TESTS)) f.write('\n') print("done") for data_structure in data_structures:
def testGrap(self): g = Graph() # g.addNode("test") g.addNode("b") g.addEdge("test1", "b") g.addEdge("test2", "b") g.addEdge("root", "test2") g.addEdge("c", "test2") #g.addEdge("b", "c") print(g.toDot()) print(g.dfs()) list1 = ['physics', 'chemistry', 1997, 2000] print("list2[1:]: ", list1[1:])
from util.graph import Graph from util.goldberg_tarjan import Goldberg_Tarjan as gt g = Graph.read_graph('../test_graphs/test0-c50-n300-e600.graph') my_gt = gt(g) my_gt.max_flow()
def main(): g = Graph() v1 = Matrix([[3, 0]]).T v2 = Matrix([[1, 2]]).T g.add_vector(v1, color='tab:green') g.add_vector(v2, color='tab:red') p1 = v1.copy() p2 = orthogonal(p1, v2) pprint([p1, p2]) g.add_vector(p1, color='tab:blue') # same as v1 (green) so it hides it g.add_vector(p2, color='tab:orange') g.show()
import numpy as np from util.graph import Graph A = np.array([ [1, 0.2], [0, 1] ]) F = np.array([ [1, 1, 2, 2, 1.4, 1.4, 2, 2, 1.4, 1.4], [1, 3, 3, 2.6, 2.6, 2, 2, 1.6, 1.6, 1] ]) g = Graph() g.add_shape(F) result = A @ F print(result) g.add_shape(result, color="tab:blue") g.show()
import numpy as np from util.graph import Graph u = np.array([-3, 2]) v = np.array([-2, -3]) g = Graph() g.add_vector(u) g.add_vector(v) g.show()
x, y = symbols('x y') eq1 = Eq(2 * x - y, 0) eq2 = Eq(-x + 2 * y, 3) pprint(eq1) pprint(eq2) result = linsolve([eq1, eq2], [x, y]) print('x, y:', pretty(result)) # row "picture" # See an intersection at [1, 2] g_row = plot(solve(eq1, y)[0], show=False, line_color='tab:blue', xlim=[-5, 5], ylim=[-5, 5]) g_row.append(plot(solve(eq2, y)[0], show=False, line_color='tab:blue')[0]) g_row.show() # col "picture" g_col = Graph() M, b = linear_eq_to_matrix([eq1, eq2], x, y) # Show the columns vectors in green u = np.array(M.col(0)).T[0] v = np.array(M.col(1)).T[0] g_col.add_vector(u, color='tab:green') g_col.add_vector(v, color='tab:green') # Show [0, 3] in orange (2x the second column, added to the first) w = 2 * v g_col.add_vector(w, start=u, color='tab:orange') g_col.show()
import numpy as np from util.graph import Graph u = np.array([2, -1]) g = Graph() g.add_vector(u) g.add_vector(-u) g.add_vector(2 * u) g.add_vector(3 * u) g.add_vector(-2 * u) g.show()
import numpy as np from util.graph import Graph from sympy import Matrix u = np.array([-1, 1]) v = np.array([2, 3]) # Solve Ax=0 where A = (u v) A = np.array([u, v, [0, 0]]).T A, _ = Matrix(A).rref() A = np.array(A) print(A) # u and v are linearly independent because scalars k and c = zero g = Graph() g.add_vector(u, color='b') g.add_vector(v, color='g') g.show()
class AstXmlParser: def __init__(self, fname): """ 2013-07-24: Create a graph of the file """ self.astGraph = Graph() self.processFile(fname) def getGraph(self): return self.astGraph def processElement(self, xxs, projectName): """ 2013-07-22: process one <file> element from the output """ filename = self._filename(xxs) for el in self._elements(xxs): edges = self._edges(el) name = self._elementName(el) kind = self._kind(el) node = Node(name, edge_list=edges, meta={"kind": kind}) self.astGraph.add(node) def processFile(self, filename): """ 2013-08-17: Iterative xml parsing, adapted from: http://www.ibm.com/developerworks/library/x-hiperfparse/ """ projectName = self._projectName(filename) stack = [] context = etree.iterparse(filename, events=("start", "end"), tag=["file"]) for event, elem in context: if event == "start": stack.append(elem.tag) elif stack.pop() != elem.tag: log.error("Unexpected empty stack during xml parsing") else: try: self.processElement(elem, projectName) except Exception as exc: log.error("Error during xml parsing: %s" % exc) if len(stack) == 0: # element is safe to delete elem.clear() while elem.getprevious() is not None: del elem.getparent()[0] del context def _edges(self, xxs, supertype=None): """ 2013-07-22: Edges are to the supertypes of a node, supertypes may have type arguments """ edges = [] supers = False for el in xxs.xpath("./supertypes/supertype"): supers = True name = el.xpath("./@name")[0] edge = Edge(name) edges.append(edge) # Get all type arguments to the supertype # This call should bypass the first for loop (supertypes) # and instead enter the second (type-args) for e in self._edges(el, supertype=name): edges.append(e) for el in xxs.xpath("./type-args/type"): if supers: log.error("Element %s has both supertypes and type arguments on the same level." % self._elementName(xxs)) supers = False name = el.xpath("./@name")[0] if supertype is None: log.error("Type argument %s has no supertype." % name) edge = Edge(name, label=supertype) edges.append(edge) return edges def _elementName(self, xxs): return xxs.xpath("./expanded-name/@name")[0] def _elements(self, xxs): return xxs.xpath(".//element") def _filename(self, xxs): return xxs.xpath("./@local-name")[0] def _kind(self, xxs): """ 2013-08-17: Examples of kinds: CLASS, TYPE, PACKAGE """ return xxs.xpath("./kind/text()")[0] def _projectName(self, filename): """ 2013-07-23: Get the value of the 'project' attribute in the <files> tag (there should only be one <files> tag) """ _, elem = next(etree.iterparse(filename, tag="files")) results = elem.xpath("./@project") if results != []: return results[0] else: return "UNDEF" def _typeArguments(self, xxs): """ 2013-07-22: Return the type arguments of an ast node If there's a node N<K,V>, the type arguments will be [K,V] For node N<M<K>>, type arguments are [M,K] """ return xxs.xpath("./expanded-name/type-args//type/@name")