def render_task(arg): """ This is the worker task run on a sub-process, it needs TARG and j2env set properly (done inside render()) """ fn, root = arg src = join(root, fn) dst = normpath(join("..", TARG, src)) lvl = root.count(os.sep) log("processing/f: %s" % src, nl=False) if fn.endswith(".html"): # we ignore html files starting with "_" (e.g. language specific templates) if fn.startswith("_"): return # assume it's a template and process it tmpl = j2env.get_template(src) c = fn.rsplit(".", 1)[0].split("-", 1)[0] content = tmpl.render(level=lvl, filename=fn, category=c) with open(dst, "wb") as output: output.write(content.encode("utf-8")) output.write(b"\n") elif islink(src): # we have a symlink, copy it # log("SYMLINK/files %s" % src) if islink(dst): os.remove(dst) os.symlink(os.readlink(src), dst) else: # all other files, hardlink them # log("hardlink %s -> %s" % (src, dst)) os.link(src, dst)
def copy_aux_files(): """ This copies auxiliary files into the output file tree. For example, the *.bib files from the publications subdirectory. """ log("copying auxiliary files") # publication files PUB = "publications" os.chdir(PUB) for bib in glob("*.bib"): dst = normpath(join("..", TARG_FILES, bib)) if exists(dst): os.remove(dst) os.link(bib, dst) os.chdir("..") # contributors for the devmap for xml in [join("conf", "geocode.xml"), join("conf", "contributors.xml")]: dst = join(TARG, "res", basename(xml)) if exists(dst): os.remove(dst) os.link(xml, dst) # mirror_manager.py files for mm in ["metalink.helper", "torrent.helper", "mirror_list"]: dst = join(TARG, mm) if exists(dst): os.remove(dst) os.link(join("scripts", mm), dst)
def copy_aux_files(): """ This copies auxiliary files into the output file tree. For example, the *.bib files from the publications subdirectory. """ log("copying auxiliary files") PUB = "publications" os.chdir(PUB) for bib in glob("*.bib"): dst = normpath(join("..", TARG_FILES, bib)) os.link(bib, dst)
def index_changelogs(): log("indexing changelogs") logs = [basename(_) for _ in glob(join("changelogs", "*.txt"))] def sortlogs(fn): name, version = splitext(basename(fn))[0].split("-") key = [0 if name == "pre" else 1] key.extend(int(_) for _ in version.split(".")) return key return reversed(sorted(logs, key=sortlogs))
def index_changelogs(): log("indexing changelogs") logs = [basename(_) for _ in glob(join("changelogs", "*.txt"))] def sortlogs(fn): name, version = splitext(basename(fn))[0].split("-") key = [0 if name == "pre" else 1] key.extend(int(_) for _ in version.split(".")) return key return reversed(sorted(logs, key = sortlogs))
def normalize(sizes: dict, min_val: float = None, max_val: float = None) -> dict: if min_val is None or max_val is None: log("Sizes not normalized") return sizes keys = list(sizes.keys()) values = list(sizes.values()) sizes_min = np.min(values) sizes_max = np.max(values) values_scaled = [ min_val + (x - sizes_min) * (max_val - min_val) / (sizes_max - sizes_min) for x in values ] return dict(zip(keys, values_scaled))
def get_raw_weights(): """Reads users_master.json file and returns weights of all games (number of users who have the game.)""" if self.raw_weights is not None: return self.raw_weights with open(self.users_master_json_path) as f: json_users = json.load(f) encoder = Encoder() games_raw_weights = {} for user, i in zip(json_users, range(len(json_users))): if i % 10000 == 0: log('Progress: {}/{} ({:.3f}%)'.format( i, len(json_users), i / len(json_users) * 100)) try: games_alpha = user['g'] except KeyError: continue try: games_arr = encoder.decode_games_string(games_alpha) except AssertionError: continue for game in games_arr: try: games_raw_weights[game] += 1 except KeyError: games_raw_weights[game] = 1 # if i > 50000: # break self.raw_weights = games_raw_weights return games_raw_weights
def _build_dicts(self, json_users: list) -> tuple: """Given the json_master file, this method creates user:games and game:users dicts for further transformation. :param json_users: List of users and their games (raw input). :return: (users_dict, games_dict) tuple. """ log('Building users_dict & games_dict...') users_dict = {} games_dict = {} for user, i in zip(json_users, range(len(json_users))): if i % 10000 == 0: log('progress: {}/{}: {:.3f}%'.format( i, len(json_users), i / len(json_users) * 100)) i += 1 try: user_games = self._get_games_array(user) except AssertionError: # raised if user has no games continue user_name = self._get_user_name(user) users_dict[user_name] = user_games for game in user_games: try: games_dict[game].add(user_name) except KeyError: games_dict[game] = set(user_name) # if i > 100000: # log('breaking building dicts after {} users'.format(i)) # break for game in games_dict.keys(): games_dict[game] = list(games_dict[game]) return users_dict, games_dict
def copy_aux_files(): """ This copies auxiliary files into the output file tree. For example, the *.bib files from the publications subdirectory. """ log("copying auxiliary files") # publication files PUB = "publications" os.chdir(PUB) for bib in glob("*.bib"): dst = normpath(join("..", TARG_FILES, bib)) os.link(bib, dst) os.chdir("..") # contributors for the devmap for xml in [join("conf", "geocode.xml"), join("conf", "contributors.xml")]: os.link(xml, join(TARG, "res", basename(xml))) # mirror_manager.py files for mm in ["metalink.helper", "torrent.helper", "mirror_list"]: os.link(join("scripts", mm), join(TARG, mm))
def render(): if not exists("www"): os.mkdir("www") log("config: {} version {} @ {}, {} mirrors and {} spkgs".format( config["sage"], config["version"], config["releasedate"], len(mirrors), len(packages["spkg"]))) # for line in yaml.dump(config, indent=True, default_flow_style=False).splitlines(): # log(" %s" % line) # everything is now rooted in the src directory os.chdir(SRC) global j2env tmpl_dirs = [join("..", _) for _ in ["publications", "templates", "src"]] j2loader = j2.FileSystemLoader(tmpl_dirs) j2env = j2.Environment(loader=j2loader, undefined=j2.StrictUndefined) j2env.globals.update(config) j2env.filters["prefix"] = filter_prefix j2env.filters["markdown"] = filter_markdown IGNORE_PATHS = ["old"] # pool must be created *after* global vars are set # it forks the main process, it's a "copy-on-write" memory architecture pool = mp.Pool() for root, paths, filenames in os.walk("."): # check if we ignore a branch in a sub-tree root_split = root.split(os.sep) if len(root_split) > 1 and root_split[1] in IGNORE_PATHS: continue # path need to exist in the target before we copy and process the files for path in paths: src = join(root, path) dst = normpath(join("..", TARG, src)) log("processing/d: %s" % src, nl=False) # we have to take care of symlinks here, too! if islink(src): #log("SYMLINK/paths: %s" % src) os.symlink(os.readlink(src), dst) elif not exists(dst): #log("mkdir %s" % dst) os.makedirs(dst) # bad error handling, disabled parallelization #pool.map(render_task, [(_, root) for _ in filenames]) for task in [(_, root) for _ in filenames]: render_task(task) log("processing: done", nl=False) os.chdir("..") copy_aux_files()
def get_graph(self, std_coefficient=4.5, min_neighbours=5, **kwargs) -> nx.Graph: """Returns NetworkX graph of games. 1. An adjacency matrix is calculated for all games. 2. Adjacency matrix is filtered - low adjacency pairs are discarded. For each game, filter threshold is calculated as: MEAN + std_coefficient * STANDARD_DEVIATION 3. A graph is constructed. :param std_coefficient: standard deviation coefficient in filter threshold calculation. :type std_coefficient: float :param min_neighbours: minimal number of neighbours a game should have. :type min_neighbours: int :return: NetworkX.Graph of games. """ if self.graph is not None: return self.graph mtx, ind = self.get_filtered_adjacency_matrix( std_coefficient=std_coefficient, min_neighbours=min_neighbours, **kwargs) log('Building graph...') nonzero = np.count_nonzero(mtx, axis=1) log('Non-zero elements in filtered adjacency matrix: ' 'median: {}, mean: {:.3f}, std: {:.3f}'.format( np.median(nonzero), np.mean(nonzero), np.std(nonzero))) graph = nx.from_numpy_matrix(mtx) mapping = dict(zip(graph.nodes(), ind)) labeled_graph = nx.relabel_nodes(graph, mapping) log('Graph: # of nodes: {}, # of edges: {}'.format( len(labeled_graph.nodes()), len(labeled_graph.edges()))) self.graph = labeled_graph return labeled_graph
def set_node_sizes(self, min_value=10, max_value=100, func=lambda x: (math.log(x + 1, 5) + 1)**2): """Calculates node sizes. :param func: Function to perform on node's weight. (Weight is number of game owners.) """ def get_raw_weights(): """Reads users_master.json file and returns weights of all games (number of users who have the game.)""" if self.raw_weights is not None: return self.raw_weights with open(self.users_master_json_path) as f: json_users = json.load(f) encoder = Encoder() games_raw_weights = {} for user, i in zip(json_users, range(len(json_users))): if i % 10000 == 0: log('Progress: {}/{} ({:.3f}%)'.format( i, len(json_users), i / len(json_users) * 100)) try: games_alpha = user['g'] except KeyError: continue try: games_arr = encoder.decode_games_string(games_alpha) except AssertionError: continue for game in games_arr: try: games_raw_weights[game] += 1 except KeyError: games_raw_weights[game] = 1 # if i > 50000: # break self.raw_weights = games_raw_weights return games_raw_weights def normalize(sizes: dict, min_val: float = None, max_val: float = None) -> dict: if min_val is None or max_val is None: log("Sizes not normalized") return sizes keys = list(sizes.keys()) values = list(sizes.values()) sizes_min = np.min(values) sizes_max = np.max(values) values_scaled = [ min_val + (x - sizes_min) * (max_val - min_val) / (sizes_max - sizes_min) for x in values ] return dict(zip(keys, values_scaled)) log("Calculating games weights...") sizes = {} for key, value in get_raw_weights().items(): sizes[key] = func(value) graph_node_sizes = {} for node in self.graph.nodes(): try: # log(node, sizes[node]) graph_node_sizes[node] = sizes[int(node)] except KeyError: log("Node {} not found in users_master.json file!".format( node)) graph_node_sizes[node] = 1 sizes_normalized = normalize(graph_node_sizes, min_value, max_value) nx.set_node_attributes(self.graph, sizes_normalized, 'size') log("Calculating games weights... Complete")
def write_graph(self): """Writes graph back to disc.""" nx.write_gml(self.graph, self.graph_path) log("Graph written successfully")
def read_graph(self) -> nx.Graph: """Reads graph to disc and returns it.""" G = nx.read_gml(self.graph_path) log("Graph loaded successfully") return G
def normalize_to_color_space(pos: np.ndarray, min_val=0.25, max_val=0.9, loss=0.1) -> np.ndarray: """Returns copy of position array normalized to be used as color. This algorithm is dumb, but calculations are cheap, so we can afford it! :param pos: position np.ndarray :param min_val: expected minimum value :param max_val: expected maximum value :param loss: expected loss of transformation - ratio of values clipped to lower or upper bound. Lower loss -> less clipping, values more pushed together Higher loss -> more clipping, values less pushed together :return: normalized position np.ndarray """ def _normalize_pass(std_factor) -> tuple: """Returns (normalized positions, loss) tuple.""" norm = pos.copy() norm_mean = np.mean(norm, axis=0) norm_std = np.std(norm, axis=0) norm -= norm_mean norm /= norm_std * std_factor norm -= -1 norm *= (max_val - min_val) norm /= 2 norm += min_val min_mask = norm < min_val min_mask_inv = np.ones(min_mask.shape) - min_mask norm *= min_mask_inv norm += min_mask * min_val max_mask = norm > max_val max_mask_inv = np.ones(max_mask.shape) - max_mask norm *= max_mask_inv norm += max_mask * max_val clipped_total = np.sum(min_mask) + np.sum(max_mask) loss = clipped_total / (pos.shape[0] * pos.shape[1]) return norm, loss results = [] for i in np.arange(0.1, 2, 0.025): results.append(_normalize_pass(i)) best_result, best_loss = results[0] for res, ls in results: if abs(loss - ls) < abs(loss - best_loss): best_result = res best_loss = ls log('best loss:', best_loss) log('best result: mean: {:.3f} std: {:.3f}'.format( np.mean(best_result), np.std(best_result))) return best_result
def set_node_names(self): """Queries Steam's API to gather names for games present in the graph. """ # TODO offline json backup log("Querying Steam to get app names...") request = "https://api.steampowered.com/ISteamApps/GetAppList/v2/" try: response = requests.get(request) except TimeoutError: log('Steam API: timeout error') return try: json_response = json.loads(response.text) except: log("Steam API: wrong response") return apps = {} for app in json_response['applist']['apps']: try: apps[int(app['appid'])] = app['name'] except KeyError: log("error with app: {}".format(app)) names_dict = {} for node in self.graph.nodes(): try: name = apps[int(node)] except KeyError: log("name not found for game: {}".format(node)) name = str(node) names_dict[node] = name nx.set_node_attributes(self.graph, names_dict, 'name') log("Querying Steam to get app names... Complete")
if games_str == "" or games_str == "error": raise AssertionError games_arr = Encoder().decode_games_string(games_str) return games_arr @staticmethod def _get_user_name(user) -> str: try: return user['p'] except KeyError: return 'NoName' if __name__ == '__main__': json_path = "test_users_master.json" graph_path = "test_master_graph.gml" gb = GraphBuilder(json_master_path=json_path) kwargs = { 'trim_min_users': 50, 'trim_optimal_users': 80, 'trim_optimal_number_of_games_per_user': 40 } G = gb.get_graph(std_coefficient=7, **kwargs) nx.write_gml(G, graph_path) log('Saved graph to ' + graph_path) gb.plot_info()
""" from subprocess import check_output, call, CalledProcessError try: check_output(["which", "xdotool"]) except CalledProcessError: print("You seem to not have installed 'xdotool'") print("$ sudo apt-get install xdotool") else: # firefox or chrome? for browser in ["Chrome", "Mozilla Firefox", "Chromium"]: try: check_output(["xdotool", "search", "--name", "%s" % browser]) except CalledProcessError: continue print("RELOAD ==> detected '%s' and sending Shift+Ctrl+R" % browser) call([ 'xdotool', 'search', "--name", "%s" % browser, 'key', '--clearmodifiers', 'ctrl+shift+r' ]) break else: print("==> sorry, could not find your browser?") if __name__ == '__main__': render() if len(sys.argv) >= 2 and sys.argv[-1] == "reload": reload() log('Finished')
""" This little helper will shift-reload the open tab in your web-browser via ctrl-shift-F5 -- but only if you have xdotool installed! """ from subprocess import check_output, call, CalledProcessError try: check_output(["which", "xdotool"]) except CalledProcessError: print("You seem to not have installed 'xdotool'") print("$ sudo apt-get install xdotool") else: # firefox or chrome? for browser in ["Chrome", "Mozilla Firefox", "Chromium"]: try: check_output(["xdotool", "search", "--name", "%s" % browser]) except CalledProcessError: continue print("RELOAD ==> detected '%s' and sending Shift+Ctrl+R" % browser) call(['xdotool', 'search', "--name", "%s" % browser, 'key', '--clearmodifiers', 'ctrl+shift+r']) break else: print("==> sorry, could not find your browser?") if __name__ == '__main__': render() if len(sys.argv) >= 2 and sys.argv[-1] == "reload": reload() log('Finished')
def _trim_games_dict(users_dict: dict, games_dict: dict, **kwargs) -> dict: """Trims a games_dict to a minimal size. Given that the source games_dict might have hundreds of thousands of users, not all of these users give new meaningful information about relationships between games. Thus, this function aims to remove as many users as possible, but still leave enough so that every game could have a sizable and diverse users pool. :param users_dict: raw users_dict as created by _build_dicts() :type users_dict: dict :param games_dict: raw games_dict as created by _build_dicts() :type games_dict: dict :param min_users: minimal amount of users a game has to have in order not to be removed from pool of games. :type min_users: int :param optimal_users: minimal amount of users each game should have, if able. :type optimal_users: int :return: trimmed games_dict. """ log('Trimming games dict...') min_users = kwargs.pop('trim_min_users', 25) optimal_users = kwargs.pop('trim_optimal_users', 100) optimal_number_of_games = kwargs.pop( 'trim_optimal_number_of_games_per_user', 20) saved_users = set() games = games_dict.keys() for game in games: game_users = games_dict[game] if min_users <= len(game_users) < optimal_users: saved_users |= set(game_users) # # log('saved users: {}'.format(len(saved_users))) result_dict = {} for game in games_dict.keys(): game_users = games_dict[game] if len(game_users) < min_users: continue if len(game_users) < optimal_users: result_dict[game] = game_users continue fresh_users = set(game_users) - saved_users users_to_add = optimal_users - len(set(game_users) & saved_users) if users_to_add > 0: sorted_users = sorted( list(fresh_users), key=lambda user: 0 - abs(optimal_number_of_games - len( users_dict[user])), reverse=True) saved_users |= set(sorted_users[:users_to_add]) result_dict[game] = list(set(game_users) & saved_users) # trimmed_games_dict = result_dict log('Trimmed games_dict: Total users remaining:', len(saved_users)) log('Trimmed games_dict: Total games remaining:', len(result_dict)) check = set() for users in result_dict.values(): check |= set(users) assert len(check) == len(saved_users) # log('CHECK: ', len(check)) return result_dict
def set_node_colors(self, method='tsne_3d'): """Colorizes graph based on given method, and stores result in given label within the graph. :param method: :return: """ log("Colorizing game nodes...") def normalize_to_color_space(pos: np.ndarray, min_val=0.25, max_val=0.9, loss=0.1) -> np.ndarray: """Returns copy of position array normalized to be used as color. This algorithm is dumb, but calculations are cheap, so we can afford it! :param pos: position np.ndarray :param min_val: expected minimum value :param max_val: expected maximum value :param loss: expected loss of transformation - ratio of values clipped to lower or upper bound. Lower loss -> less clipping, values more pushed together Higher loss -> more clipping, values less pushed together :return: normalized position np.ndarray """ def _normalize_pass(std_factor) -> tuple: """Returns (normalized positions, loss) tuple.""" norm = pos.copy() norm_mean = np.mean(norm, axis=0) norm_std = np.std(norm, axis=0) norm -= norm_mean norm /= norm_std * std_factor norm -= -1 norm *= (max_val - min_val) norm /= 2 norm += min_val min_mask = norm < min_val min_mask_inv = np.ones(min_mask.shape) - min_mask norm *= min_mask_inv norm += min_mask * min_val max_mask = norm > max_val max_mask_inv = np.ones(max_mask.shape) - max_mask norm *= max_mask_inv norm += max_mask * max_val clipped_total = np.sum(min_mask) + np.sum(max_mask) loss = clipped_total / (pos.shape[0] * pos.shape[1]) return norm, loss results = [] for i in np.arange(0.1, 2, 0.025): results.append(_normalize_pass(i)) best_result, best_loss = results[0] for res, ls in results: if abs(loss - ls) < abs(loss - best_loss): best_result = res best_loss = ls log('best loss:', best_loss) log('best result: mean: {:.3f} std: {:.3f}'.format( np.mean(best_result), np.std(best_result))) return best_result mtx = nx.to_numpy_matrix(self.graph) embedding = None if method == 'tsne_3d': tsne = TSNE(n_components=3, verbose=True) embedding = tsne.fit_transform(mtx) elif method == 'pca_3d': pca = PCA(n_components=3) embedding = pca.fit_transform(mtx) if embedding is not None: normalized = normalize_to_color_space(embedding) hex_values = self._to_hex(normalized) hex_values_dict = dict(zip(list(self.graph.nodes()), hex_values)) nx.set_node_attributes(self.graph, hex_values_dict, 'color') log("Colorizing game nodes... Complete")