コード例 #1
0
    def __prepare(self):
        ts = time.time()
        st = datetime.datetime.fromtimestamp(ts).strftime('%Y%m%d-%H%M%S')
        self.backup_dir = os.path.join(self.config.get_base_configuration()['backupdir'], st)
        logger.info("Launching the probe...")
        if not os.path.isdir(self.config.get_flume_configuration()['outdir']):
            os.makedirs(self.config.get_flume_configuration()['outdir'])
        if not os.path.isdir(self.backup_dir):
            os.makedirs(self.backup_dir)
        self.tstat_out_file = self.config.get_database_configuration()['tstatfile']
        self.harfile = self.config.get_database_configuration()['harfile']
        try:
            self.launcher = PJSLauncher(self.config)
        except PhantomjsNotFoundError:
            logger.error("PhantomJS browser not found. Exiting.")
            sys.exit(-1)

        logger.debug('Backup dir set at: %s' % self.backup_dir)
        try:
            self.dbcli = DBClient(self.config)
            self.dbcli.get_probe_id()
            logger.info("Probe data already stored.")
        except sqlite3.OperationalError:
            self.loc_info = utils.get_location()
            if not self.loc_info:
                logger.warning("No info on location retrieved.")
            else:
                for k in ['city', 'region']:
                    self.loc_info.update({k: self.loc_info[k].replace("'", "''")})
            self.dbcli = DBClient(self.config, self.loc_info, create=True)

        try:
            self.flumemanager = FlumeManager(self.config)
            self.flumemanager.start_flume()
            self.flumepid = self.flumemanager.check_flume()
            logger.info("Flume started: pid = {}".format(self.flumepid))
        except FlumeNotFoundError:
            self.flumemanager = None
            logger.warning("Flume not found, sending to server instead.")
        self.pjs_config = self.config.get_phantomjs_configuration()
        self.tstatmanager = TstatManager(self.config)
        try:
            self.tstatmanager.start_capture()
            logger.info("Tstat.check_tstat. ret = {}".format(self.tstatmanager.check_tstat()))
            logger.info("start.out process launched")
        except AttributeError:
            logger.error("Unable to start tstat process. Quit.")
            sys.exit(-1)
        logger.info("Ready")
コード例 #2
0
def set_score_percentile(org_grouped_by_type):
    """This function computes the score percentile of each organization within
       its own group, and store it into the database. For organization whose credit
       score is unavailable, its percentile will be Null. The computation will only
       be done among organization will valid score.
    """
    dbclient = DBClient()
    for orgs_queue in org_grouped_by_type.values():
        total_count = len(orgs_queue)
        index = 1
        while len(orgs_queue) != 0:
            score, electronic_id = heappop(orgs_queue)
            percentile = round(float(index) / total_count, 2)
            index += 1
            dbclient.update_score_percentile(percentile, electronic_id)
コード例 #3
0
    def __prepare(self):
        ts = time.time()
        st = datetime.datetime.fromtimestamp(ts).strftime('%Y%m%d-%H%M%S')
        self.backup_dir = os.path.join(self.config.get_base_configuration()['backupdir'], st)
        logger.info("Launching the probe...")
        if not os.path.isdir(self.config.get_flume_configuration()['outdir']):
            os.makedirs(self.config.get_flume_configuration()['outdir'])
        if not os.path.isdir(self.backup_dir):
            os.makedirs(self.backup_dir)
        self.tstat_out_file = self.config.get_database_configuration()['tstatfile']
        self.harfile = self.config.get_database_configuration()['harfile']
        try:
            self.launcher = PJSLauncher(self.config)
        except PhantomjsNotFoundError:
            logger.error("PhantomJS browser not found. Exiting.")
            sys.exit(-1)

        logger.debug('Backup dir set at: %s' % self.backup_dir)
        try:
            self.dbcli = DBClient(self.config)
            self.dbcli.get_probe_id()
            logger.info("Probe data already stored.")
        except Exception:
            self.loc_info = utils.get_location()
            if not self.loc_info:
                logger.warning("No info on location retrieved.")
            self.dbcli = DBClient(self.config, self.loc_info, create=True)

        try:
            self.flumemanager = FlumeManager(self.config)
            self.flumemanager.start_flume()
            self.flumepid = self.flumemanager.check_flume()
            logger.info("Flume started: pid = {}".format(self.flumepid))
        except FlumeNotFoundError:
            self.flumemanager = None
            logger.warning("Flume not found, sending to server instead.")
        self.pjs_config = self.config.get_phantomjs_configuration()
        self.tstatmanager = TstatManager(self.config)
        try:
            self.tstatmanager.start_capture()
            logger.info("start.out process launched")
        except AttributeError:
            logger.error("Unable to start tstat process. Quit.")
            sys.exit(-1)
        logger.info("Ready")
コード例 #4
0
def scan_source_data(source):
    """This function scan the the nonprofit data source and store interested data
       into the database.
    """
    dbclient = DBClient()
    # This dict stores the credit score of each non profit organization, grouped by organization type.
    org_grouped_by_type = {}

    for url, prefix in source.items():
        for org in jsonparser.parse_json_index(url, prefix, LIMIT):
            dbclient.upsert(org)

            # We only care about organization with valid score here.
            if org['cy_credit_score'] is not None:
                # If an organization type is encountered for the first time, create a list for it.
                if org['organization_type'] not in org_grouped_by_type:
                    org_grouped_by_type[org['organization_type']] = []
                # Use priority queue to store the score, id tuple so that we can always keep it in order.
                priority_queue = org_grouped_by_type[org['organization_type']]
                heappush(priority_queue, (org['cy_credit_score'], org['electronic_id']))

    set_score_percentile(org_grouped_by_type)
コード例 #5
0
class PhantomProbe():
        
    def __init__(self, conffile, url):
        self.config = Configuration(conffile)
        self.url = url
        self.__prepare()
        self.diagnosis = {}
        self.flumepid = None

    def __prepare(self):
        ts = time.time()
        st = datetime.datetime.fromtimestamp(ts).strftime('%Y%m%d-%H%M%S')
        self.backup_dir = os.path.join(self.config.get_base_configuration()['backupdir'], st)
        logger.info("Launching the probe...")
        if not os.path.isdir(self.config.get_flume_configuration()['outdir']):
            os.makedirs(self.config.get_flume_configuration()['outdir'])
        if not os.path.isdir(self.backup_dir):
            os.makedirs(self.backup_dir)
        self.tstat_out_file = self.config.get_database_configuration()['tstatfile']
        self.harfile = self.config.get_database_configuration()['harfile']
        try:
            self.launcher = PJSLauncher(self.config)
        except PhantomjsNotFoundError:
            logger.error("PhantomJS browser not found. Exiting.")
            sys.exit(-1)

        logger.debug('Backup dir set at: %s' % self.backup_dir)
        try:
            self.dbcli = DBClient(self.config)
            self.dbcli.get_probe_id()
            logger.info("Probe data already stored.")
        except sqlite3.OperationalError:
            self.loc_info = utils.get_location()
            if not self.loc_info:
                logger.warning("No info on location retrieved.")
            else:
                for k in ['city', 'region']:
                    self.loc_info.update({k: self.loc_info[k].replace("'", "''")})
            self.dbcli = DBClient(self.config, self.loc_info, create=True)

        try:
            self.flumemanager = FlumeManager(self.config)
            self.flumemanager.start_flume()
            self.flumepid = self.flumemanager.check_flume()
            logger.info("Flume started: pid = {}".format(self.flumepid))
        except FlumeNotFoundError:
            self.flumemanager = None
            logger.warning("Flume not found, sending to server instead.")
        self.pjs_config = self.config.get_phantomjs_configuration()
        self.tstatmanager = TstatManager(self.config)
        try:
            self.tstatmanager.start_capture()
            logger.info("Tstat.check_tstat. ret = {}".format(self.tstatmanager.check_tstat()))
            logger.info("start.out process launched")
        except AttributeError:
            logger.error("Unable to start tstat process. Quit.")
            sys.exit(-1)
        logger.info("Ready")

    def browse(self):
        try:
            stats = self.launcher.browse_url(self.url)
            logger.info("Got data from browse_url")
        except AttributeError:
            logger.error("Problems in browser thread. Aborting session...")
            logger.error("Forcing tstat to stop.")
            if not self.tstatmanager.stop_capture():
                logger.error("Unable to stop tstat.")
            sys.exit("Problems in browser thread. Aborting session...")
        if not stats:
            logger.warning('Problem in session to [%s].. skipping' % self.url)
            utils.clean_tmp_files(self.backup_dir, [self.tstat_out_file, self.harfile], self.url, True)
            sys.exit("Problems in stats collecting. Quitting...")
        if not os.path.exists(self.tstat_out_file):
            logger.error('tstat outfile missing. Check your network configuration.')
            sys.exit("tstat outfile missing. Check your network configuration.")

        #testbed
        #logger.debug("Sleeping 10 sec")
        #time.sleep(10)
        #end testbed
        if not self.tstatmanager.stop_capture():
            logger.error("Unable to stop tstat.")
        else:
            logger.info("tstat successfully stopped.")

        inserted_sid = self.dbcli.load_to_db(stats)
        logger.info('Ended browsing to %s' % self.url)
        self.passive = self.dbcli.pre_process_raw_table()
        if not self.passive:
            logger.error("Unable to retrieve passive measurements.")
            logger.error("Check if Tstat is running properly.")
            logger.error("Quitting.")
            return False
        utils.clean_tmp_files(self.backup_dir, [self.tstat_out_file, self.harfile], self.url, False)
        logger.debug('Saved backup files.')
        return inserted_sid

    def execute(self):
        inserted_sid = self.browse()
        if inserted_sid:
            monitor = ActiveMonitor(self.config, self.dbcli)
            self.active = monitor.run_active_measurement()
            logger.debug('Ended Active probing to url %s' % (self.url))
            for tracefile in [f for f in os.listdir('.') if f.endswith('.traceroute')]:
                os.remove(tracefile)
            l = LocalDiagnosisManager(self.dbcli, self.url)
            self.diagnosis = l.run_diagnosis(inserted_sid)
            self.send_results()
        else:
            self.diagnosis = {"Warning": "Unable to perform browsing"}

    def send_results(self):
        jc = JSONClient(self.config, self.dbcli)
        measurements = jc.prepare_data()
        to_update = [el['sid'] for el in measurements]
        csv_path_fname_list = jc.save_csv_files(measurements)
        if self.flumemanager:
            self.flumepid = self.flumemanager.check_flume()
            logger.info("Waiting for flume to stop...[{}]".format(self.flumepid))
            time.sleep(5)
            if self.flumepid:
                self.flumemanager.stop_flume(self.flumepid)
            else:
                logger.error("Unable to stop flume")
        else:
            logger.info("Sending data to server...")
            # FIXME
            #try:
            #    jc.send_csv()
            #except TimeoutError:
            #    logger.error("Timeout in server connection")
            #    pass
            #finally:
            #    logger.info("Done.")
        self.dbcli.update_sent(to_update)
        try:
            for csv_path_fname in csv_path_fname_list:
                shutil.copyfile(csv_path_fname, os.path.join(self.backup_dir, os.path.basename(csv_path_fname)))
        except FileNotFoundError:
            pass

        logger.info("Packing backups...")
        for root, _, files in os.walk(self.backup_dir):
            if len(files) > 0:
                tar = tarfile.open("%s.tar.gz" % self.backup_dir, "w:gz")
                tar.add(self.backup_dir)
                tar.close()
                logger.info('tar.gz backup file created.')
        shutil.rmtree(self.backup_dir)
        logger.info('Done.')
        
    def get_result(self):
        return self.diagnosis
コード例 #6
0
class PhantomProbe():
        
    def __init__(self, conffile, url):
        self.config = Configuration(conffile)
        self.url = url
        self.__prepare()
        self.diagnosis = {}
        self.flumepid = None

    def __prepare(self):
        ts = time.time()
        st = datetime.datetime.fromtimestamp(ts).strftime('%Y%m%d-%H%M%S')
        self.backup_dir = os.path.join(self.config.get_base_configuration()['backupdir'], st)
        logger.info("Launching the probe...")
        if not os.path.isdir(self.config.get_flume_configuration()['outdir']):
            os.makedirs(self.config.get_flume_configuration()['outdir'])
        if not os.path.isdir(self.backup_dir):
            os.makedirs(self.backup_dir)
        self.tstat_out_file = self.config.get_database_configuration()['tstatfile']
        self.harfile = self.config.get_database_configuration()['harfile']
        try:
            self.launcher = PJSLauncher(self.config)
        except PhantomjsNotFoundError:
            logger.error("PhantomJS browser not found. Exiting.")
            sys.exit(-1)

        logger.debug('Backup dir set at: %s' % self.backup_dir)
        try:
            self.dbcli = DBClient(self.config)
            self.dbcli.get_probe_id()
            logger.info("Probe data already stored.")
        except Exception:
            self.loc_info = utils.get_location()
            if not self.loc_info:
                logger.warning("No info on location retrieved.")
            self.dbcli = DBClient(self.config, self.loc_info, create=True)

        try:
            self.flumemanager = FlumeManager(self.config)
            self.flumemanager.start_flume()
            self.flumepid = self.flumemanager.check_flume()
            logger.info("Flume started: pid = {}".format(self.flumepid))
        except FlumeNotFoundError:
            self.flumemanager = None
            logger.warning("Flume not found, sending to server instead.")
        self.pjs_config = self.config.get_phantomjs_configuration()
        self.tstatmanager = TstatManager(self.config)
        try:
            self.tstatmanager.start_capture()
            logger.info("start.out process launched")
        except AttributeError:
            logger.error("Unable to start tstat process. Quit.")
            sys.exit(-1)
        logger.info("Ready")

    def browse(self):
        try:
            stats = self.launcher.browse_url(self.url)
        except AttributeError:
            logger.error("Problems in browser thread. Aborting session...")
            logger.error("Forcing tstat to stop.")
            if not self.tstatmanager.stop_capture():
                logger.error("Unable to stop tstat.")
            sys.exit("Problems in browser thread. Aborting session...")
        if not stats:
            logger.warning('Problem in session to [%s].. skipping' % self.url)
            utils.clean_tmp_files(self.backup_dir, [self.tstat_out_file, self.harfile], self.url, True)
            sys.exit("Problems in stats collecting. Quitting...")
        if not os.path.exists(self.tstat_out_file):
            logger.error('tstat outfile missing. Check your network configuration.')
            sys.exit("tstat outfile missing. Check your network configuration.")
            
        if not self.tstatmanager.stop_capture():
            logger.error("Unable to stop tstat.")
        else:
            logger.info("tstat successfully stopped.")

        self.dbcli.load_to_db(stats)
        logger.info('Ended browsing to %s' % self.url)
        self.passive = self.dbcli.pre_process_raw_table()
        if not self.passive:
            logger.error("Unable to retrieve passive measurements.")
            logger.error("Check if Tstat is running properly.")
            logger.error("Quitting.")
            return False
        utils.clean_tmp_files(self.backup_dir, [self.tstat_out_file, self.harfile], self.url, False)
        logger.debug('Saved backup files.')
        return True

    def execute(self):
        if self.browse():
            monitor = ActiveMonitor(self.config, self.dbcli)
            self.active = monitor.run_active_measurement()
            logger.debug('Ended Active probing to url %s' % (self.url))
            for tracefile in [f for f in os.listdir('.') if f.endswith('.traceroute')]:
                os.remove(tracefile)
            l = LocalDiagnosisManager(self.dbcli, self.url)
            self.diagnosis = l.run_diagnosis(self.passive, self.active)
            self.send_results()
        else:
            self.diagnosis = {"Warning": "Unable to perform browsing"}

    def send_results(self):
        jc = JSONClient(self.config, self.dbcli)
        measurements = jc.prepare_data()
        to_update = [el['sid'] for el in measurements]
        csv_path_fname_list = jc.save_csv_files(measurements)
        if self.flumemanager:
            self.flumepid = self.flumemanager.check_flume()
            logger.info("Waiting for flume to stop...[{}]".format(self.flumepid))
            time.sleep(5)
            if self.flumepid:
                self.flumemanager.stop_flume(self.flumepid)
            else:
                logger.error("Unable to stop flume")
        else:
            logger.info("Sending data to server...")
            try:
                jc.send_csv()
            except TimeoutError:
                logger.error("Timeout in server connection")
                pass
            finally:
                logger.info("Done.")
        self.dbcli.update_sent(to_update)
        try:
            for csv_path_fname in csv_path_fname_list:
                shutil.copyfile(csv_path_fname, os.path.join(self.backup_dir, os.path.basename(csv_path_fname)))
        except FileNotFoundError:
            pass

        logger.info("Packing backups...")
        for root, _, files in os.walk(self.backup_dir):
            if len(files) > 0:
                tar = tarfile.open("%s.tar.gz" % self.backup_dir, "w:gz")
                tar.add(self.backup_dir)
                tar.close()
                logger.info('tar.gz backup file created.')
        shutil.rmtree(self.backup_dir)
        logger.info('Done.')
        
    def get_result(self):
        return self.diagnosis
コード例 #7
0
"""
This script is the main file for our web application based on Flask framework.
It contains the URL routing and business logic to handle valid requests. 
"""

import logging
import json
from db.dbclient import DBClient
from flask import Flask
from flask import render_template
from flask import request
from flask import jsonify

mysql_client = DBClient()
app = Flask(__name__)


@app.route('/')
def index():
    return render_template('index.html')


@app.route('/score', methods=['POST'])
def get_score():
    key = request.form['key']
    if key.isdigit():
        key = int(key)
    else:
        key = key

    results = mysql_client.query_by_key_metrics(key)