def __init__(self): self.dict = self.init_db() self.finished = [] # read user p = util.get_root("user", "codeforces") entries = os.listdir(p) self.finished = entries
def main(): cache = get_cache() failed_uris = get_failed() parse_failed_uris = get_parse_failed() uris = cache.keys() peak_missing = [uri for uri in uris if LISTENERPEAK not in cache[uri]] peak_missing = set(peak_missing) - failed_uris # XXX: fetch_stream_infos is the same for each root url peak_missing = {get_root(uri) for uri in peak_missing} peak_missing = set(peak_missing) - parse_failed_uris pool = Pool(PROCESSES) try: pfunc = fetch_stream_infos for i, res in enumerate(pool.imap_unordered(pfunc, peak_missing)): uri, streams = res # save all 1000 if (i+1) % 1000 == 0: set_cache(cache) print "%d/%d " % (i+1, len(peak_missing)) + uri + " -> ", print "%d new streams" % len(streams) if not streams: parse_failed_uris.add(uri) # add new found uris to cache + listener count for stream in streams: peak = str(int(stream.peak)) current = str(int(stream.current)) uri = stream.stream if uri not in cache: cache[uri] = {} if LISTENERPEAK in cache[uri]: cache[uri][LISTENERPEAK].append(peak) else: cache[uri][LISTENERPEAK] = [peak] if LISTENERCURRENT in cache[uri]: cache[uri][LISTENERCURRENT].append(current) else: cache[uri][LISTENERCURRENT] = [current] except Exception as e: print e finally: set_parse_failed(parse_failed_uris) set_cache(cache) pool.terminate() pool.join()
def __init__(self): self.dict = self.init_db() self.finishes = [] self.flasks = [] # read user p = util.get_root("user", "leetcode") entries = os.listdir(p) for k in entries: if k.endswith(".cpp"): self.finishes.append(k) elif k.endswith(".md"): self.flasks.append(k)
def find_shortest_path(start_node: int): nodes = { index: (sys.maxsize, [index], UNVISITED) for index in neighbors.keys() } nodes[start_node] = (0, [start_node], FRONTIER) round = 0 directory = os.path.realpath( os.path.join(get_root(), 'output/shortest', str(start_node))) if not os.path.exists(directory): os.mkdir(directory) with open(os.path.join(directory, str(round)), 'w', encoding='utf-8') as file: for key, value in nodes.items(): file.write(f'{key},{value}\n') current = 0 print(f'Searching from node {start_node}') print(f'Start {datetime.now()}') while True: done = True current_file = os.path.join(directory, str(round)) print(current_file) job = ShortestPath(args=[current_file]) with job.make_runner() as runner: runner.run() round += 1 with open(os.path.join(directory, str(round)), 'w', encoding='utf-8') as f: for key, row in job.parse_output(runner.cat_output()): dist, path, state = row f.write(f'{key},({dist}, {path}, {state})\n') if state == UNVISITED or state == FRONTIER: done = False if state == FRONTIER: current += 1 print(f'{round} completed') if current == 0: print('No more nodes on the frontier, stopping') break current = 0 if done: break print(f'Done {datetime.now()}')
def get_tree_log_prob(self, tree, obs_embedding=None, previous_sample_embedding=None, inference_hidden=None, obs=None): """Log probability of tree given obs. Args: tree: list or string obs_embedding: tensor [obs_embedding_dim] previous_sample_embedding: tensor [sample_embedding_dim] inference_hidden: tensor [inference_hidden_dim] obs: sentence (list of strings) or ys (torch.tensor of shape [100]) Returns: log_prob (scalar tensor)""" if obs_embedding is None: obs_embedding = self.get_obs_embedding(obs) if previous_sample_embedding is None: previous_sample_embedding = torch.zeros( (self.sample_embedding_dim, )) if inference_hidden is None: inference_hidden = torch.zeros((self.inference_hidden_dim, )) if isinstance(tree, list): non_terminal = tree[0] sample_address_embedding = util.get_sample_address_embedding( non_terminal, self.grammar['non_terminals']) inference_gru_output = self.get_inference_gru_output( obs_embedding, previous_sample_embedding, sample_address_embedding, inference_hidden) subtrees = tree[1:] production = [util.get_root(subtree) for subtree in subtrees] production_index = util.get_production_index( non_terminal, production, self.grammar['productions']) sample_embedding = self.get_sample_embedding(production_index) logits = self.get_logits_from_inference_gru_output( inference_gru_output, non_terminal) dist = Categorical(logits=logits) log_prob = dist.log_prob(torch.tensor(production_index)) subtree_log_probs = [ self.get_tree_log_prob(subtree, obs_embedding, sample_embedding, inference_gru_output) for subtree in subtrees ] return log_prob + sum(subtree_log_probs) else: return torch.zeros(())
def get_tree_log_prob(self, tree): """Log probability of tree. Args: tree: list of lists or string Returns: scalar tensor """ if isinstance(tree, list): non_terminal = tree[0] subtrees = tree[1:] production = [util.get_root(subtree) for subtree in subtrees] production_index = util.get_production_index( non_terminal, production, self.grammar['productions']) dist = Categorical(logits=self.production_logits[non_terminal]) log_prob = dist.log_prob(torch.tensor(production_index)) subtree_log_probs = [ self.get_tree_log_prob(subtree) for subtree in subtrees ] return log_prob + sum(subtree_log_probs) else: return torch.zeros(())
@app.errorhandler(404) def not_found(e): return send_file(os.path.join(static_folder, "index.html")) @app.route("/api/static/<path:path>") def send_static(path): return send_from_directory("static", path) @app.route("/api/conf", methods=["POST"]) def post_set_conf(): some_json = request.get_json() try: for i in some_json: for j in some_json[i]: config.set_conf(section=i, element=j, value=some_json[i][j]) except: _, error, _ = sys.exc_info() return jsonify({"Error": "{err}".format(err=error)}), 500 return jsonify(some_json), 200 return app if __name__ == "__main__": CONF_PATH = os.path.join(util.get_root(), "configuration.ini") conf = Configuration(CONF_PATH) STATIC_PATH = os.path.join(util.get_root(), conf.get_conf("Client", "static-files-path")) APP = create_app(conf, STATIC_PATH, None) APP.run(debug=True)
import sys import os import logging import json from datetime import datetime from enum import Enum from typing import Callable from ax_listener import AXFrame from db_interface import TelemetryDB import util if getattr(sys, 'frozen', False): sys.path.append(os.path.join(util.get_root(), 'src')) class TelemetryFrame: """ Data structure for the output of the telemetry listener that is sent to the repository. """ def __init__(self, packet_timestamp: datetime, fields): self.timestamp = packet_timestamp self.fields = fields def __repr__(self): return (("Timestamp: {}; recv_timestamp: {}; fields: {}").format( self.timestamp, self.recv_timestamp, self.fields)) class TimestampType(Enum): """ Enum of the supported timestamp types. """ unix = "unix_timestamp"
import csv import os import sys from util import get_root csv_filename = sys.argv[1] with open(os.path.join(get_root(), 'output', csv_filename), 'r', encoding='utf-8') as f: reader = csv.reader(f, delimiter='|') sorted_list = sorted(reader, key=lambda x: int(x[1]), reverse=True) with open(os.path.join(get_root(), 'output', f'sorted_{csv_filename}'), 'w', encoding='utf-8') as f: writer = csv.writer(f, delimiter='|') for row in sorted_list: writer.writerow(row)
from typing import List import os import pygame import util root = util.get_root() class Text: def __init__(self, start: List[int], text: str, font, color): self.start = start self.text = font.render(text, True, color) def render(self, display): display.blit(self.text, self.start) class FileText(Text): def __init__(self, start, text: str, font: str, size, color): font = pygame.font.Font(font, size) Text.__init__(self, start, text, font, color) class ExoText(FileText): def __init__(self, start, text: str, size, color): FileText.__init__(self, start, text, os.path.join(root, "fonts/Exo/regular.ttf"), size, color) class ExoTextLight(FileText): def __init__(self, start, text: str, size, color): FileText.__init__(self, start, text, os.path.join(root, "fonts/Exo/light.ttf"), size, color)
import json import os import sys from util import get_root filename = sys.argv[1] with open(os.path.join(get_root(), 'output', filename), 'r', encoding='utf-8') as f: degrees = json.load(f) with open(os.path.join(get_root(), 'output', os.path.splitext(filename)[0] + '.csv'), 'w', encoding='utf-8') as f: f.write('x,y\n') for key, value in sorted(degrees.items(), key=lambda x: int(x[0])): f.write(f'{key},{value}\n')
import os import sys from util import get_root import csv filename = sys.argv[1] with open(os.path.join(get_root(), 'output', filename), 'r', encoding='utf-8') as f: reader = csv.reader(f, delimiter='|') total_links = 0 count = 0 min_degree = sys.maxsize max_degree = -1 for _, degree in reader: degree = int(degree) total_links += degree count += 1 if degree < min_degree: min_degree = degree if degree > max_degree: max_degree = degree with open(os.path.join(get_root(), 'output', f'statistics_{filename}'), 'w', encoding='utf-8') as f: f.write(f'Total pages: {count}\n') f.write(f'Total links: {total_links}\n')
def create_app(config: Configuration, tnc_pool: TNCPool, sids_relay: SIDSRelay) -> Flask: """ Creates a flask app for the api. """ log = logging.getLogger(__name__) static_folder = os.path.join(util.get_root(), config.get_conf("Client", "static-files-path")) app = Flask(__name__, static_url_path="", static_folder=static_folder) CORS(app) if not config.get_conf("Client", "debug-log"): server_log = logging.getLogger("werkzeug") server_log.setLevel(logging.WARN) # swagger specific swagger_url = "/api/docs" api_url = "/api/static/swagger.yaml" swaggerui_blueprint = get_swaggerui_blueprint( swagger_url, api_url, config={ "app_name": "Estcube 2 Telemetry API" } ) app.register_blueprint(swaggerui_blueprint, url_prefix=swagger_url) # end swagger specific @app.route("/api/sids/status", methods=["GET"]) def get_sids_status(): return jsonify(sids_relay.get_status()), 200 @app.route("/api/sids/toggle", methods=["POST"]) def toggle_relay(): response_json = request.get_json() current_relay_status = response_json["Mission Control"]["relay-enabled"] config.set_conf(section="Mission Control", element="relay-enabled", value=current_relay_status) if current_relay_status: threading.Thread(target=sids_relay.relay_unrelayed_packets, daemon=True).start() return response_json, 200 @app.route("/api/tnc/<name>/status", methods=["GET"]) def get_tnc_connection_check(name: str): if tnc_pool is None: return jsonify({"error": "TNC Pool is not defined."}), 500 res = tnc_pool.check_tnc(name) return jsonify({"name": name, "status": res.name}), 200 @app.route("/api/tnc/Main/start", methods=["POST"]) def post_tnc_main_start(): if tnc_pool is None: return jsonify({"error": "TNC Pool is not defined."}), 500 tnc_pool.connect_main_tnc() return "", 204 @app.route("/api/tnc/<name>/stop", methods=["POST"]) def post_tnc_connection_stop(name: str): if tnc_pool is None: return jsonify({"error": "TNC Pool is not defined."}), 500 tnc_pool.stop_tnc(name) return "", 204 @app.route("/api/conf", methods=["GET"]) def getconf(): """ Returns the whole current configuration object. """ res = config.get_all_conf() return jsonify(res) @app.route("/api/conf/constraints", methods=["GET"]) def get_constraints(): """ Returns all of the constraints for the configuration. """ constrs = config.get_constraints() return jsonify(constrs) @app.route("/api/conf/full", methods=["GET"]) def get_full_conf(): res = config.get_conf_with_constraints() return res @app.route("/", methods=["GET"]) def get_index(): return send_file(os.path.join(static_folder, "index.html")) @app.errorhandler(404) def not_found(e): return send_file(os.path.join(static_folder, "index.html")) @app.route("/api/static/<path:path>") def send_static(path): return send_from_directory("static", path) @app.route("/api/conf", methods=["POST"]) def post_set_conf(): some_json = request.get_json() try: for i in some_json: for j in some_json[i]: config.set_conf(section=i, element=j, value=some_json[i][j]) except: _, error, _ = sys.exc_info() return jsonify({"Error": "{err}".format(err=error)}), 500 return jsonify(some_json), 200 return app
import matplotlib.pyplot as plt import json import os import numpy as np from util import get_root with open(os.path.join(get_root(), 'output', 'degrees.json'), 'r', encoding='utf-8') as f: degrees = json.load(f) xs = [int(x) for x in degrees.keys()] fig, ax = plt.subplots(1, 1, figsize=(20, 10)) ax.scatter(xs, degrees.values()) ax.set_xlabel('Degree') ax.set_ylabel('Frequency') ax.set_title('Degree distribution') ax.set_xticks(np.arange(0, max(xs), step=200)) fig.savefig('degree_distrbution.png') fig, ax = plt.subplots(1, 1, figsize=(20, 10)) ax.loglog(xs, degrees.values(), 'o') ax.set_xlabel('Degree') ax.set_ylabel('Frequency') ax.set_title('Loglog Degree distribution') fig.savefig('degree_distrbution_loglog.png')
import sys import os from util import get_filename, get_root # DTU's HPC won't install mrjob. Cloned repo and placed it locally sys.path.insert(0, os.path.join(get_root(), 'mrjob')) from mrjob.job import MRJob from mrjob.step import MRStep import mrjob.compat from collections import Counter import csv data_path = os.path.join(get_root(), 'data') class AverageInDegree(MRJob): def mapper(self, _, page): with open(os.path.join(data_path, 'links', get_filename(page)), mode='r', encoding='utf-8') as f: for link in f: yield link.strip(), 1 def reducer_sum(self, link, values): yield 'link_in', sum(values) def reducer_freq(self, link, values): yield 'degrees', dict(Counter(values)) def steps(self): return [ MRStep(mapper=self.mapper, reducer=self.reducer_sum),
def find_shortest_path(start_node: int): subgraph_no_counter = 0 nodes = {index: (sys.maxsize, UNVISITED) for index in neighbors.keys()} nodes[start_node] = (subgraph_no_counter, FRONTIER) round = 0 directory = os.path.realpath( os.path.join(get_root(), 'output/subgraphs', str(start_node))) if not os.path.exists(directory): os.mkdir(directory) current_file = os.path.join(directory, 'data') with open(current_file, 'w', encoding='utf-8') as file: for key, value in nodes.items(): file.write(f'{key},{value}\n') current = 0 print(f'Searching from node {start_node}') print(f'Start {datetime.now()}') while True: while True: done = True print(current_file) job = Subgraphs(args=[current_file]) with job.make_runner() as runner: runner.run() round += 1 with open(current_file, 'w', encoding='utf-8') as f: for key, row in job.parse_output(runner.cat_output()): subgraph_no, state = row f.write(f'{key},({subgraph_no}, {state})\n') if state == UNVISITED or state == FRONTIER: done = False if state == FRONTIER: current += 1 print(f'{round} completed') if current == 0: print('No more nodes on the frontier, stopping') break current = 0 if done: break subgraph_no_counter += 1 all_subgraphs_found = True with open(current_file, 'r', encoding='utf-8') as f: for line in f: m = re.match(regex, str(line)) key = int(m.group('key')) state = int(m.group('state')) if (state == UNVISITED): print(line) with open(current_file, 'a', encoding='utf-8') as f: f.write(f'{key},({subgraph_no_counter}, {FRONTIER})\n') all_subgraphs_found = False break if (all_subgraphs_found): break print(f'Done {datetime.now()}')
root = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..') # DTU's HPC won't install mrjob. Cloned repo and placed it locally sys.path.insert(0, os.path.join(root, 'mrjob')) from mrjob.job import MRJob from util import get_filename, get_root import re from datetime import datetime UNVISITED = 0 FRONTIER = 1 VISITED = 2 indexes = {} with open(os.path.join(get_root(), 'data', 'pages.txt'), 'r', encoding='utf-8') as f: counter = 0 for line in f: indexes[counter] = line.rstrip('\n') counter += 1 neighbors = {} with open(os.path.join(get_root(), 'data', 'graph_file'), 'r', encoding='utf-8') as file: for line in file: key, values = line.split(':') values = values[1:-3] if values == '':
def main(argv): """ Main loop function. """ """ Parse command line options """ opts, args = getopt(argv, "vc:") conf_path = None for opt, arg in opts: if opt == "-c": conf_path = arg if conf_path is None: """ Default conf path """ conf_path = "../configuration.ini" """ Create the configuration object """ conf = Configuration(conf_path) """ Set up logging """ if not conf.get_conf("Client", "debug-log"): logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.DEBUG) _logger = logging.getLogger(__name__) if not os.path.isdir("../logs"): os.mkdir("../logs") formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s') handler = logging.FileHandler("../logs/system_logs.log") handler.setFormatter(formatter) logging.getLogger('').addHandler(handler) _logger.info("Using configuration from: %s", conf_path) """ Create the database object """ db_loc = os.path.join(util.get_root(), conf.get_conf("Client", "database")) database = TelemetryDB(db_loc) database.init_db() """ Update grafana and kaitai configurations """ if conf.get_conf("Client", "automatic-updating"): Updater(conf).checkForUpdates() """ Build the other components. """ ax_listener = AXListener(conf) sids_relay = SIDSRelay(conf, database) telemetry_listener = TelemetryListener(database) file_logger = FileLogger(conf, conf.get_conf("Client", "logs"), "log") """ Create the flask app and start it in a forked process. """ port = conf.get_conf("Client", "frontend-port") """ Set the handler for SIGTERM, so we can exit a bit more gracefully. """ signal.signal(signal.SIGTERM, terminate_handler) """ Hook the callbacks to the ax_listener. """ ax_listener.add_callback(database.insert_ax_frame) ax_listener.add_callback(sids_relay.relay) ax_listener.add_callback(file_logger.log_ax_frame) ax_listener.add_callback(telemetry_listener.receive) tnc_pool = TNCPool(conf, ax_listener) tnc_pool.connect_main_tnc() api_app = api.create_app(conf, tnc_pool, sids_relay) """ We set the daemon option to True, so that the client will quit once the other threads have finished because we don't have a good way of stopping the Flask app properly. """ api_thread = Thread(target=api_app.run, kwargs={"port": port}, daemon=True) api_thread.start() _logger.info("For the GUI open localhost:{}".format(port)) try: """ On windows, the KeyboardInterrupt doesn't break the join. """ if platform.system() == "Windows": while api_thread.isAlive: api_thread.join(2) else: api_thread.join() except (KeyboardInterrupt, SystemExit): pass finally: tnc_pool.cleanup()
import sys import os from util import get_filename, get_root # DTU's HPC won't install mrjob. Cloned repo and placed it locally sys.path.insert(0, os.path.join(get_root(), 'mrjob')) from mrjob.job import MRJob data_path = os.path.join(get_root(), 'data') class DegreeIn(MRJob): def mapper(self, _, page): with open(os.path.join(data_path, 'links', get_filename(page)), mode='r', encoding='utf-8') as f: for line in f: yield line.rstrip('\n'), 1 def reducer(self, key, values): yield key, sum(values) names_file = os.path.join(data_path, 'pages.txt') nodes = sum(1 for _ in open(names_file)) job = DegreeIn(args=[names_file, '--jobconf', 'nodes=' + str(nodes)]) with job.make_runner() as runner: runner.run() with open(os.path.join(get_root(), 'output', 'degree_in.csv'), 'w',