def __prepare(self): ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y%m%d-%H%M%S') self.backup_dir = os.path.join(self.config.get_base_configuration()['backupdir'], st) logger.info("Launching the probe...") if not os.path.isdir(self.config.get_flume_configuration()['outdir']): os.makedirs(self.config.get_flume_configuration()['outdir']) if not os.path.isdir(self.backup_dir): os.makedirs(self.backup_dir) self.tstat_out_file = self.config.get_database_configuration()['tstatfile'] self.harfile = self.config.get_database_configuration()['harfile'] try: self.launcher = PJSLauncher(self.config) except PhantomjsNotFoundError: logger.error("PhantomJS browser not found. Exiting.") sys.exit(-1) logger.debug('Backup dir set at: %s' % self.backup_dir) try: self.dbcli = DBClient(self.config) self.dbcli.get_probe_id() logger.info("Probe data already stored.") except sqlite3.OperationalError: self.loc_info = utils.get_location() if not self.loc_info: logger.warning("No info on location retrieved.") else: for k in ['city', 'region']: self.loc_info.update({k: self.loc_info[k].replace("'", "''")}) self.dbcli = DBClient(self.config, self.loc_info, create=True) try: self.flumemanager = FlumeManager(self.config) self.flumemanager.start_flume() self.flumepid = self.flumemanager.check_flume() logger.info("Flume started: pid = {}".format(self.flumepid)) except FlumeNotFoundError: self.flumemanager = None logger.warning("Flume not found, sending to server instead.") self.pjs_config = self.config.get_phantomjs_configuration() self.tstatmanager = TstatManager(self.config) try: self.tstatmanager.start_capture() logger.info("Tstat.check_tstat. ret = {}".format(self.tstatmanager.check_tstat())) logger.info("start.out process launched") except AttributeError: logger.error("Unable to start tstat process. Quit.") sys.exit(-1) logger.info("Ready")
def set_score_percentile(org_grouped_by_type): """This function computes the score percentile of each organization within its own group, and store it into the database. For organization whose credit score is unavailable, its percentile will be Null. The computation will only be done among organization will valid score. """ dbclient = DBClient() for orgs_queue in org_grouped_by_type.values(): total_count = len(orgs_queue) index = 1 while len(orgs_queue) != 0: score, electronic_id = heappop(orgs_queue) percentile = round(float(index) / total_count, 2) index += 1 dbclient.update_score_percentile(percentile, electronic_id)
def __prepare(self): ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y%m%d-%H%M%S') self.backup_dir = os.path.join(self.config.get_base_configuration()['backupdir'], st) logger.info("Launching the probe...") if not os.path.isdir(self.config.get_flume_configuration()['outdir']): os.makedirs(self.config.get_flume_configuration()['outdir']) if not os.path.isdir(self.backup_dir): os.makedirs(self.backup_dir) self.tstat_out_file = self.config.get_database_configuration()['tstatfile'] self.harfile = self.config.get_database_configuration()['harfile'] try: self.launcher = PJSLauncher(self.config) except PhantomjsNotFoundError: logger.error("PhantomJS browser not found. Exiting.") sys.exit(-1) logger.debug('Backup dir set at: %s' % self.backup_dir) try: self.dbcli = DBClient(self.config) self.dbcli.get_probe_id() logger.info("Probe data already stored.") except Exception: self.loc_info = utils.get_location() if not self.loc_info: logger.warning("No info on location retrieved.") self.dbcli = DBClient(self.config, self.loc_info, create=True) try: self.flumemanager = FlumeManager(self.config) self.flumemanager.start_flume() self.flumepid = self.flumemanager.check_flume() logger.info("Flume started: pid = {}".format(self.flumepid)) except FlumeNotFoundError: self.flumemanager = None logger.warning("Flume not found, sending to server instead.") self.pjs_config = self.config.get_phantomjs_configuration() self.tstatmanager = TstatManager(self.config) try: self.tstatmanager.start_capture() logger.info("start.out process launched") except AttributeError: logger.error("Unable to start tstat process. Quit.") sys.exit(-1) logger.info("Ready")
def scan_source_data(source): """This function scan the the nonprofit data source and store interested data into the database. """ dbclient = DBClient() # This dict stores the credit score of each non profit organization, grouped by organization type. org_grouped_by_type = {} for url, prefix in source.items(): for org in jsonparser.parse_json_index(url, prefix, LIMIT): dbclient.upsert(org) # We only care about organization with valid score here. if org['cy_credit_score'] is not None: # If an organization type is encountered for the first time, create a list for it. if org['organization_type'] not in org_grouped_by_type: org_grouped_by_type[org['organization_type']] = [] # Use priority queue to store the score, id tuple so that we can always keep it in order. priority_queue = org_grouped_by_type[org['organization_type']] heappush(priority_queue, (org['cy_credit_score'], org['electronic_id'])) set_score_percentile(org_grouped_by_type)
class PhantomProbe(): def __init__(self, conffile, url): self.config = Configuration(conffile) self.url = url self.__prepare() self.diagnosis = {} self.flumepid = None def __prepare(self): ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y%m%d-%H%M%S') self.backup_dir = os.path.join(self.config.get_base_configuration()['backupdir'], st) logger.info("Launching the probe...") if not os.path.isdir(self.config.get_flume_configuration()['outdir']): os.makedirs(self.config.get_flume_configuration()['outdir']) if not os.path.isdir(self.backup_dir): os.makedirs(self.backup_dir) self.tstat_out_file = self.config.get_database_configuration()['tstatfile'] self.harfile = self.config.get_database_configuration()['harfile'] try: self.launcher = PJSLauncher(self.config) except PhantomjsNotFoundError: logger.error("PhantomJS browser not found. Exiting.") sys.exit(-1) logger.debug('Backup dir set at: %s' % self.backup_dir) try: self.dbcli = DBClient(self.config) self.dbcli.get_probe_id() logger.info("Probe data already stored.") except sqlite3.OperationalError: self.loc_info = utils.get_location() if not self.loc_info: logger.warning("No info on location retrieved.") else: for k in ['city', 'region']: self.loc_info.update({k: self.loc_info[k].replace("'", "''")}) self.dbcli = DBClient(self.config, self.loc_info, create=True) try: self.flumemanager = FlumeManager(self.config) self.flumemanager.start_flume() self.flumepid = self.flumemanager.check_flume() logger.info("Flume started: pid = {}".format(self.flumepid)) except FlumeNotFoundError: self.flumemanager = None logger.warning("Flume not found, sending to server instead.") self.pjs_config = self.config.get_phantomjs_configuration() self.tstatmanager = TstatManager(self.config) try: self.tstatmanager.start_capture() logger.info("Tstat.check_tstat. ret = {}".format(self.tstatmanager.check_tstat())) logger.info("start.out process launched") except AttributeError: logger.error("Unable to start tstat process. Quit.") sys.exit(-1) logger.info("Ready") def browse(self): try: stats = self.launcher.browse_url(self.url) logger.info("Got data from browse_url") except AttributeError: logger.error("Problems in browser thread. Aborting session...") logger.error("Forcing tstat to stop.") if not self.tstatmanager.stop_capture(): logger.error("Unable to stop tstat.") sys.exit("Problems in browser thread. Aborting session...") if not stats: logger.warning('Problem in session to [%s].. skipping' % self.url) utils.clean_tmp_files(self.backup_dir, [self.tstat_out_file, self.harfile], self.url, True) sys.exit("Problems in stats collecting. Quitting...") if not os.path.exists(self.tstat_out_file): logger.error('tstat outfile missing. Check your network configuration.') sys.exit("tstat outfile missing. Check your network configuration.") #testbed #logger.debug("Sleeping 10 sec") #time.sleep(10) #end testbed if not self.tstatmanager.stop_capture(): logger.error("Unable to stop tstat.") else: logger.info("tstat successfully stopped.") inserted_sid = self.dbcli.load_to_db(stats) logger.info('Ended browsing to %s' % self.url) self.passive = self.dbcli.pre_process_raw_table() if not self.passive: logger.error("Unable to retrieve passive measurements.") logger.error("Check if Tstat is running properly.") logger.error("Quitting.") return False utils.clean_tmp_files(self.backup_dir, [self.tstat_out_file, self.harfile], self.url, False) logger.debug('Saved backup files.') return inserted_sid def execute(self): inserted_sid = self.browse() if inserted_sid: monitor = ActiveMonitor(self.config, self.dbcli) self.active = monitor.run_active_measurement() logger.debug('Ended Active probing to url %s' % (self.url)) for tracefile in [f for f in os.listdir('.') if f.endswith('.traceroute')]: os.remove(tracefile) l = LocalDiagnosisManager(self.dbcli, self.url) self.diagnosis = l.run_diagnosis(inserted_sid) self.send_results() else: self.diagnosis = {"Warning": "Unable to perform browsing"} def send_results(self): jc = JSONClient(self.config, self.dbcli) measurements = jc.prepare_data() to_update = [el['sid'] for el in measurements] csv_path_fname_list = jc.save_csv_files(measurements) if self.flumemanager: self.flumepid = self.flumemanager.check_flume() logger.info("Waiting for flume to stop...[{}]".format(self.flumepid)) time.sleep(5) if self.flumepid: self.flumemanager.stop_flume(self.flumepid) else: logger.error("Unable to stop flume") else: logger.info("Sending data to server...") # FIXME #try: # jc.send_csv() #except TimeoutError: # logger.error("Timeout in server connection") # pass #finally: # logger.info("Done.") self.dbcli.update_sent(to_update) try: for csv_path_fname in csv_path_fname_list: shutil.copyfile(csv_path_fname, os.path.join(self.backup_dir, os.path.basename(csv_path_fname))) except FileNotFoundError: pass logger.info("Packing backups...") for root, _, files in os.walk(self.backup_dir): if len(files) > 0: tar = tarfile.open("%s.tar.gz" % self.backup_dir, "w:gz") tar.add(self.backup_dir) tar.close() logger.info('tar.gz backup file created.') shutil.rmtree(self.backup_dir) logger.info('Done.') def get_result(self): return self.diagnosis
class PhantomProbe(): def __init__(self, conffile, url): self.config = Configuration(conffile) self.url = url self.__prepare() self.diagnosis = {} self.flumepid = None def __prepare(self): ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y%m%d-%H%M%S') self.backup_dir = os.path.join(self.config.get_base_configuration()['backupdir'], st) logger.info("Launching the probe...") if not os.path.isdir(self.config.get_flume_configuration()['outdir']): os.makedirs(self.config.get_flume_configuration()['outdir']) if not os.path.isdir(self.backup_dir): os.makedirs(self.backup_dir) self.tstat_out_file = self.config.get_database_configuration()['tstatfile'] self.harfile = self.config.get_database_configuration()['harfile'] try: self.launcher = PJSLauncher(self.config) except PhantomjsNotFoundError: logger.error("PhantomJS browser not found. Exiting.") sys.exit(-1) logger.debug('Backup dir set at: %s' % self.backup_dir) try: self.dbcli = DBClient(self.config) self.dbcli.get_probe_id() logger.info("Probe data already stored.") except Exception: self.loc_info = utils.get_location() if not self.loc_info: logger.warning("No info on location retrieved.") self.dbcli = DBClient(self.config, self.loc_info, create=True) try: self.flumemanager = FlumeManager(self.config) self.flumemanager.start_flume() self.flumepid = self.flumemanager.check_flume() logger.info("Flume started: pid = {}".format(self.flumepid)) except FlumeNotFoundError: self.flumemanager = None logger.warning("Flume not found, sending to server instead.") self.pjs_config = self.config.get_phantomjs_configuration() self.tstatmanager = TstatManager(self.config) try: self.tstatmanager.start_capture() logger.info("start.out process launched") except AttributeError: logger.error("Unable to start tstat process. Quit.") sys.exit(-1) logger.info("Ready") def browse(self): try: stats = self.launcher.browse_url(self.url) except AttributeError: logger.error("Problems in browser thread. Aborting session...") logger.error("Forcing tstat to stop.") if not self.tstatmanager.stop_capture(): logger.error("Unable to stop tstat.") sys.exit("Problems in browser thread. Aborting session...") if not stats: logger.warning('Problem in session to [%s].. skipping' % self.url) utils.clean_tmp_files(self.backup_dir, [self.tstat_out_file, self.harfile], self.url, True) sys.exit("Problems in stats collecting. Quitting...") if not os.path.exists(self.tstat_out_file): logger.error('tstat outfile missing. Check your network configuration.') sys.exit("tstat outfile missing. Check your network configuration.") if not self.tstatmanager.stop_capture(): logger.error("Unable to stop tstat.") else: logger.info("tstat successfully stopped.") self.dbcli.load_to_db(stats) logger.info('Ended browsing to %s' % self.url) self.passive = self.dbcli.pre_process_raw_table() if not self.passive: logger.error("Unable to retrieve passive measurements.") logger.error("Check if Tstat is running properly.") logger.error("Quitting.") return False utils.clean_tmp_files(self.backup_dir, [self.tstat_out_file, self.harfile], self.url, False) logger.debug('Saved backup files.') return True def execute(self): if self.browse(): monitor = ActiveMonitor(self.config, self.dbcli) self.active = monitor.run_active_measurement() logger.debug('Ended Active probing to url %s' % (self.url)) for tracefile in [f for f in os.listdir('.') if f.endswith('.traceroute')]: os.remove(tracefile) l = LocalDiagnosisManager(self.dbcli, self.url) self.diagnosis = l.run_diagnosis(self.passive, self.active) self.send_results() else: self.diagnosis = {"Warning": "Unable to perform browsing"} def send_results(self): jc = JSONClient(self.config, self.dbcli) measurements = jc.prepare_data() to_update = [el['sid'] for el in measurements] csv_path_fname_list = jc.save_csv_files(measurements) if self.flumemanager: self.flumepid = self.flumemanager.check_flume() logger.info("Waiting for flume to stop...[{}]".format(self.flumepid)) time.sleep(5) if self.flumepid: self.flumemanager.stop_flume(self.flumepid) else: logger.error("Unable to stop flume") else: logger.info("Sending data to server...") try: jc.send_csv() except TimeoutError: logger.error("Timeout in server connection") pass finally: logger.info("Done.") self.dbcli.update_sent(to_update) try: for csv_path_fname in csv_path_fname_list: shutil.copyfile(csv_path_fname, os.path.join(self.backup_dir, os.path.basename(csv_path_fname))) except FileNotFoundError: pass logger.info("Packing backups...") for root, _, files in os.walk(self.backup_dir): if len(files) > 0: tar = tarfile.open("%s.tar.gz" % self.backup_dir, "w:gz") tar.add(self.backup_dir) tar.close() logger.info('tar.gz backup file created.') shutil.rmtree(self.backup_dir) logger.info('Done.') def get_result(self): return self.diagnosis
""" This script is the main file for our web application based on Flask framework. It contains the URL routing and business logic to handle valid requests. """ import logging import json from db.dbclient import DBClient from flask import Flask from flask import render_template from flask import request from flask import jsonify mysql_client = DBClient() app = Flask(__name__) @app.route('/') def index(): return render_template('index.html') @app.route('/score', methods=['POST']) def get_score(): key = request.form['key'] if key.isdigit(): key = int(key) else: key = key results = mysql_client.query_by_key_metrics(key)