def __prepare(self): ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y%m%d-%H%M%S') self.backup_dir = os.path.join(self.config.get_base_configuration()['backupdir'], st) logger.info("Launching the probe...") if not os.path.isdir(self.config.get_flume_configuration()['outdir']): os.makedirs(self.config.get_flume_configuration()['outdir']) if not os.path.isdir(self.backup_dir): os.makedirs(self.backup_dir) self.tstat_out_file = self.config.get_database_configuration()['tstatfile'] self.harfile = self.config.get_database_configuration()['harfile'] try: self.launcher = PJSLauncher(self.config) except PhantomjsNotFoundError: logger.error("PhantomJS browser not found. Exiting.") sys.exit(-1) logger.debug('Backup dir set at: %s' % self.backup_dir) try: self.dbcli = DBClient(self.config) self.dbcli.get_probe_id() logger.info("Probe data already stored.") except Exception: self.loc_info = utils.get_location() if not self.loc_info: logger.warning("No info on location retrieved.") self.dbcli = DBClient(self.config, self.loc_info, create=True) try: self.flumemanager = FlumeManager(self.config) self.flumemanager.start_flume() self.flumepid = self.flumemanager.check_flume() logger.info("Flume started: pid = {}".format(self.flumepid)) except FlumeNotFoundError: self.flumemanager = None logger.warning("Flume not found, sending to server instead.") self.pjs_config = self.config.get_phantomjs_configuration() self.tstatmanager = TstatManager(self.config) try: self.tstatmanager.start_capture() logger.info("start.out process launched") except AttributeError: logger.error("Unable to start tstat process. Quit.") sys.exit(-1) logger.info("Ready")
def set_score_percentile(org_grouped_by_type): """This function computes the score percentile of each organization within its own group, and store it into the database. For organization whose credit score is unavailable, its percentile will be Null. The computation will only be done among organization will valid score. """ dbclient = DBClient() for orgs_queue in org_grouped_by_type.values(): total_count = len(orgs_queue) index = 1 while len(orgs_queue) != 0: score, electronic_id = heappop(orgs_queue) percentile = round(float(index) / total_count, 2) index += 1 dbclient.update_score_percentile(percentile, electronic_id)
def scan_source_data(source): """This function scan the the nonprofit data source and store interested data into the database. """ dbclient = DBClient() # This dict stores the credit score of each non profit organization, grouped by organization type. org_grouped_by_type = {} for url, prefix in source.items(): for org in jsonparser.parse_json_index(url, prefix, LIMIT): dbclient.upsert(org) # We only care about organization with valid score here. if org['cy_credit_score'] is not None: # If an organization type is encountered for the first time, create a list for it. if org['organization_type'] not in org_grouped_by_type: org_grouped_by_type[org['organization_type']] = [] # Use priority queue to store the score, id tuple so that we can always keep it in order. priority_queue = org_grouped_by_type[org['organization_type']] heappush(priority_queue, (org['cy_credit_score'], org['electronic_id'])) set_score_percentile(org_grouped_by_type)
""" This script is the main file for our web application based on Flask framework. It contains the URL routing and business logic to handle valid requests. """ import logging import json from db.dbclient import DBClient from flask import Flask from flask import render_template from flask import request from flask import jsonify mysql_client = DBClient() app = Flask(__name__) @app.route('/') def index(): return render_template('index.html') @app.route('/score', methods=['POST']) def get_score(): key = request.form['key'] if key.isdigit(): key = int(key) else: key = key results = mysql_client.query_by_key_metrics(key)