Exemple #1
0
def main():
    ds = Datastore()
    print('immediate packages')
    ds.get_imm_packages()
    ds.get_seen_info()
    ds.get_decision_info()
    ds.save()
Exemple #2
0
    def test_readfile(self):
        config.PCAP_ROOT = os.path.join(config.BASE_DIR,
                                        'honeypatckBenattackTest/sysdig')
        config.DATA_SOURCE = 65
        config.NUM_BENIGN_CLASSES = 12
        config.SYSDIG = './sysdigtest'

        #eventTrace = sysdigparser.readfileHoneyPatch( 5, 156 )

        #print str(eventTrace.getId())

        for traceId in range(12, 30):
            #traceId = 5
            traceStart = 0
            traceEnd = 3

            webpage = Datastore.getWebpagesHoneyPatchSysdig(
                [traceId], traceStart,
                traceEnd)  # bug, files missing 157, 158, ...
            webpageTest = webpage[0]
            webpageList = [webpageTest]

            postCountermeasureOverhead = 0

            for w in webpageList:
                for trace in w.getTraces():
                    print 'ben/attck id: ' + str(
                        trace.getId()) + '. trace id: ' + str(
                            trace.getTraceIndex())
                    traceWithCountermeasure = trace
                    postCountermeasureOverhead += traceWithCountermeasure.getBandwidth(
                    )

            print 'num of syscalls: ' + str(postCountermeasureOverhead)
            print '--------------'
    def apply(self):
        # print 'applying countermeasure'
        # print('WEBPAGE', int(self.trace.webpage))

        if self.dst_trace is None:
            self.open_db_connection()

            src_clust = self.get_site_cluster(int(self.trace.webpage))

            dst_clust = cluster_distances[src_clust][self.D - 1]
            # print('DST-CLUST', dst_clust)

            alg = self.params['CLUSTERING_ALGORITHM']
            self.cur.execute(
                'SELECT site_id FROM ClustTable WHERE {}=%s ORDER BY RAND() LIMIT 1'
                .format(alg), (dst_clust, ))
            selected_site_id = int(round(self.cur.fetchone()[0], 0))
            # print('SELECTED-SITE', selected_site_id)

            sample_trace = Datastore.get_trace(site_id=selected_site_id)
            # print sample_trace
            self.dst_trace = sample_trace

        # print self.trace.get_sizes_str()
        # print self.dst_trace.get_sizes_str()
        print 'Morphing: {} -> {}'.format(self.trace, self.dst_trace),
        self.morph_trace(self.trace, self.dst_trace)
        print '\t{:.2f}'.format(self.new_trace.size * 1.0 /
                                max(1, self.trace.size) - 1)
Exemple #4
0
    def getNonMonitoredTarget(webpageTrainSource):

        if config.BUILD_ALL_WEBPAGES:
            webpageTrain = Datastore.getWebpagesWangTor([101], 1, 2)

            webpageTrain = webpageTrain[0]
            config.ALL_WEBPAGES.append(webpageTrain)

            config.BUILD_ALL_WEBPAGES = False

            config.LARGEST_WEBPAGE = webpageTrain

        #print "src id: " + str(webpageTrainSource.getId())
        #print "largest target id: " + str(config.LARGEST_WEBPAGE.getId())
        #print "---"
        return config.LARGEST_WEBPAGE
    def __init__(self, handle):
        activity.Activity.__init__(self, handle)
        logger.debug("test")

        # Configure the toolbox
        toolbox = activity.ActivityToolbox(self)
        activity_toolbar = toolbox.get_activity_toolbar()
        activity_toolbar.keep.props.visible = False
        activity_toolbar.share.props.visible = False
        self.set_toolbox(toolbox)
        toolbox.show()

        # Create a data store and the app
        datastore = Datastore("127.0.0.1:8080")
        backend = BackEnd(datastore, self.get_activity_root())
        main = SortingPanel(datastore, backend)
        widget = main.get_widget()

        # pack
        self.set_canvas(widget)
        widget.grab_focus()
        widget.modify_font(pango.FontDescription("sans %d" % style.zoom(10)))
Exemple #6
0
'Forbidden' (403) error message. The message did not show up on Windows. Also on Solaris 5.10 the separate components
worked fine, only combination seems to fail.
PROBLEM SOLVED - The error occurs when the Proxy server is set. Check on Public Cognos needs to be done on internal
network. So execute this script before setting the proxy server.
"""
from Datastore import Datastore
from PublicCognos import PublicCognos
from lib import my_env

# Initialize Environment
projectname = "mowdr"
modulename = my_env.get_modulename(__file__)
config = my_env.get_inifile(projectname, __file__)
my_log = my_env.init_loghandler(config, modulename)
my_log.info('Start Application')
ds = Datastore(config)
for indic_id in ds.get_indicator_ids():
    if not ds.check_resource(indic_id, "cognos"):
        indicatorname = ds.get_indicator_value(indic_id, "title")[0][0]
        # Verify if Cognos URL exist on PublicCognos. Load if it does.
        pc_url = PublicCognos(indicatorname)  # Get my PublicCognos URL Object
        # Check if Cognos Public URL exists
        if pc_url.check_if_cognos_report_exists():
            # get redirect_file and redirect_page.
            redirect_file, redirect_url = pc_url.redirect2cognos_page(
                indic_id, config)
            # Add Cognos URL to indicators table. Cognos Resource ID (id_cognos) is not available as long as package
            # has not been created.
            ds.insert_indicator(indic_id, 'url_cognos', redirect_url)
my_log.info("End Application")
Exemple #7
0
This script will create a dcat_ap catalog file for the MOW Dataroom Open Data.
"""

from Datastore import Datastore
from datetime import datetime
from Ftp_Handler import Ftp_Handler
from lib import my_env
from xml.etree.ElementTree import ElementTree, Element, SubElement

# Initialize Environment
projectname = "vea_od"
modulename = my_env.get_modulename(__file__)
config = my_env.get_inifile(projectname, __file__)
my_log = my_env.init_loghandler(config, modulename)
my_log.info('Start Application')
ds = Datastore(config)
store = config['xmlns']['store']
lang = {'xml:lang': 'nl'}

# Define URI's for resources
catalog_uri = store + 'dr_catalog'
publ_uri = store + 'organisatie'  # Publisher
contact_uri = store + 'contact'  # ContactPoint


def get_license(el):
    """
    This method will add the license resource to the specified element
    :param el: element to which the license resource need to be added
    :return lic_res: License Resource object
    """
Exemple #8
0
    """
    # Create new array with attribute and resource fields
    attrib_res_fields = {}
    for key in attrib_od_fields.keys():
        new_key = key + "_" + resource
        attrib_res_fields[new_key] = attrib_od_fields[key]
    handle_attributes(source, target, action, attrib_res_fields)
    return


# Initialize Environment
projectname = "mowdr"
modulename = my_env.get_modulename(__file__)
config = my_env.get_inifile(projectname)
my_env.init_logfile(config, modulename)
ds = Datastore(config)
logging.info('\n\n\nStart Application')
# all_attribs = ds.get_all_attribs()
logging.info("Handle Main Attributes on Dataset")
populate_attribs_main()
logging.info("Handle Extra Attributes on Dataset")
populate_attribs_extra()
logging.info("Handle Main Attributes that are populated from ckan")
populate_attribs_main_ckan()
resources = my_env.get_resource_types()
resource_files = my_env.get_resource_type_file()
for resource_name in resources:
    logging.info("Handle Resource Attributes for resource %s", resource_name)
    populate_attribs_resource(resource_name)
    logging.info("Handle Resource Attributes for resource %s from Open Data",
                 resource_name)
Exemple #9
0
                        if not os.path.exists(WangOpenWorldKnnfolder):
                            os.mkdir(WangOpenWorldKnnfolder)
                        else:
                            shutil.rmtree(WangOpenWorldKnnfolder
                                          )  # delete and remake folder
                            os.mkdir(WangOpenWorldKnnfolder)

                        # batch folder
                        os.mkdir(WangOpenWorldKnnfolder + '/' + 'batch')

                    for webpageId in webpageIds:
                        if config.DATA_SOURCE == 0 or config.DATA_SOURCE == 3 or config.DATA_SOURCE == 4:
                            if config.COVARIATE_SHIFT == 0:
                                # Normal case
                                webpageTrain = Datastore.getWebpagesLL(
                                    [webpageId],
                                    seed - config.NUM_TRAINING_TRACES, seed)
                                webpageTest = Datastore.getWebpagesLL(
                                    [webpageId], seed,
                                    seed + config.NUM_TESTING_TRACES)
                            else:
                                # span time training/testing

                                endSlideTrain = config.START_SLIDE_TRAIN + config.NUM_TRAINING_TRACES
                                webpageTrain = Datastore.getWebpagesLL(
                                    [webpageId], config.START_SLIDE_TRAIN,
                                    endSlideTrain)
                                #webpageTest  = Datastore.getWebpagesLL( [webpageId], len(config.DATA_SET)-config.NUM_TESTING_TRACES, len(config.DATA_SET) )
                                # a span of config.COVARIATE_SHIFT days
                                #webpageTest  = Datastore.getWebpagesLL( [webpageId], config.NUM_TRAINING_TRACES+config.COVARIATE_SHIFT, config.NUM_TRAINING_TRACES+config.COVARIATE_SHIFT+config.NUM_TESTING_TRACES)
                                webpageTest = Datastore.getWebpagesLL(
Exemple #10
0
class FileHandler:
    def __init__(self, config):
        self.config = config
        self.ds = Datastore(config)
        self.ckan = CKANConnector(self.config, self.ds)
        self.ftp = Ftp_Handler(self.config)

    def url_in_db(self, file):
        """
        Remove the url attribute for this resource.
        If file does not contain 'empty', then calculate URL the file and set result in indicators table.
        :param file:
        :return:
        """
        logging.debug('Add/Remove file %s to indicators table.', file)
        indic_id = my_env.indic_from_file(file)
        attribute = my_env.attr_from_file('url', file)
        # Always remove attribute for this indicator. Then no insert / update logic is required.
        self.ds.remove_indicator_attribute(indic_id, attribute)
        if 'empty' not in file:
            # Calculate URL
            ftp_home = self.config['FTPServer']['ftp_home']
            # Add FTP Subdirectory (if any)
            ftpdir = self.config['FTPServer']['dir']
            if len(ftpdir) > 0:
                dirname = ftpdir + '/'
            else:
                dirname = ''
            url = ftp_home + '/' + dirname + file
            # Add URL to indicator table.
            self.ds.insert_indicator(indic_id, attribute, url)
        return

    def size_of_file(self, handledir, file):
        """
        Remove the size attribute for this resource.
        If file does not contain 'empty', then calculate Size of the file and set result in indicators table.
        :param handledir: Current directory of the file.
        :param file:
        :return:
        """
        logging.debug('Add/Remove filesize %s to indicators table.', file)
        indic_id = my_env.indic_from_file(file)
        attribute = my_env.attr_from_file('size', file)
        # Always remove attribute for this indicator. Then no insert / update logic is required.
        self.ds.remove_indicator_attribute(indic_id, attribute)
        if 'empty' not in file:
            # Calculate size of file
            filename = os.path.join(handledir, file)
            size = os.path.getsize(filename)
            # Add size of file to indicator table.
            self.ds.insert_indicator(indic_id, attribute, size)
        return

    def load_metadata(self, metafile, indic_id):
        """
        Read the file with metadata and add or replace the information to table indicators. This procedure will populate
        all fields that come from the 'Dataroom'.
        Call function to populate the dataset if this is a new dataset or an update of the dataset.
        Pre-requisite for this call is that dataset exists already.
        Cognos Add / Remove needs to be added here.

        :param metafile: pointer to the file with metadata.

        :param indic_id: Indicator ID

        :return:
        """
        # TODO: Add URL for 'bijsluiter' to database
        log_msg = "In load_metadata for file " + metafile
        logging.debug(log_msg)
        try:
            tree = Et.parse(metafile)
        except:  # catch all errors for now, try to be more specific in the future.
            e = sys.exc_info()[1]
            ec = sys.exc_info()[0]
            log_msg = "Error during parsing metafile xml: %s %s"
            logging.critical(log_msg, e, ec)
            return
        root = tree.getroot()
        # metadata is available, get list of attributes from Dataroom Application and required for Dataset Page.
        # First collect all attribute names in list attrib_names.
        attrib_names = []
        attribs = self.ds.get_attribs_source('Dataroom')
        for row in attribs:
            attrib_names.append(row[0])
        # Then remove information from Dataroom for Dataset for this indicator ID.
        for attrib_name in attrib_names:
            self.ds.remove_indicator_attribute(indic_id, attrib_name)
        # indicatorname = ""
        # Add variable data from indicator metadata xml to indicator table.
        for child in root:
            # First get child text
            if child.text:
                child_text = child.text.strip()
            else:
                # Metadata entry does not have a value (key only).
                child_text = '(niet ingevuld)'
            # Then see how to handle this text depending on the attribute
            if child.tag in attrib_names:
                # Metadata entry exists as an attribute
                self.ds.insert_indicator(indic_id, child.tag, child_text)
                # Some metadata fields will be used more than once in Open Data set.
                # The 'notes' field is a copy of 'definitie'.
                if child.tag.lower() == 'definitie':
                    self.ds.insert_indicator(indic_id, 'notes', child_text)
            # The 'title' field will be used for all Dataset and all resources and gets special threatment.
            elif child.tag.lower() == 'title':
                # Set Title for cijfers, commentaar and Cognos report (to do).
                indicatorname = child_text
                name_cijfersxml = child_text + " - cijfers (XML)"
                name_cijferstable = child_text + " - cijfers (Tabel)"
                name_commentaar = child_text + " - commentaar"
                name_cognos = indicatorname + " - cognos"
                self.ds.insert_indicator(indic_id, 'title', indicatorname)
                self.ds.insert_indicator(indic_id, 'name_cijfersxml',
                                         name_cijfersxml)
                self.ds.insert_indicator(indic_id, 'name_commentaar',
                                         name_commentaar)
                self.ds.insert_indicator(indic_id, 'name_cijferstable',
                                         name_cijferstable)
                self.ds.insert_indicator(indic_id, 'name_cognos', name_cognos)
            elif child.tag != 'id':
                log_msg = "Found Dataroom Attribute **" + child.tag + "** not required for Open Data Dataset"
                logging.warning(log_msg)

        # Add fixed information from 'OpenData' section in Config file to indicator table.
        additional_attribs = [
            'description_cijfersxml', 'format_cijfersxml', 'tdt_cijfersxml',
            'description_cijferstable', 'format_cijferstable',
            'tdt_cijferstable', 'description_commentaar', 'format_commentaar',
            'tdt_commentaar', 'description_cognos', 'format_cognos',
            'tdt_cognos', 'bijsluiter', 'dcat_ap_profile', 'license_id',
            'author_name', 'author_email', 'maintainer_name',
            'maintainer_email', 'language'
        ]
        for add_attrib in additional_attribs:
            self.ds.insert_indicator(indic_id, add_attrib,
                                     self.config['OpenData'][add_attrib])

        # Now check if dataset exist already: is there an ID available in the indicators table for this indicator.
        values_lst = self.ds.get_indicator_value(indic_id, 'id')
        upd_pkg = "NOK"
        # I want to have 0 or 1 rows in the list
        if len(values_lst) == 0:
            log_msg = "Open Data dataset is not registered for Indicator ID %s, this should have been done"
            logging.error(log_msg, indic_id)
        elif len(values_lst) == 1:
            log_msg = "Open Data dataset exists for Indicator ID %s, no need to create nor to complain about too many"
            logging.info(log_msg, indic_id)
            upd_pkg = "OK"
        else:
            log_msg = "Multiple Open Data dataset links found for Indicator ID %s, please review"
            logging.warning(log_msg, indic_id)
        if upd_pkg == "OK":
            self.ckan.update_package(indic_id)
        return True

    def process_input_directory(self):
        """
        Function to scan input directory for new files in groups. First group contains the resource files commentaar,
        cijfersXML and cijfersTable.
        The second group of files is the metadata files.
        In the first group of files, the file is moved first. Then if the file contains string 'empty' then the file
        is removed from FTP site since it cannot be available for external parties anymore. Then the resource
        information is removed from CKAN.
        If the file is valid information (does not contain string 'empty') then the file is loaded on the FTP site.
        In both cases the size of the file and the url are calculated and handled: added to the database or removed
        from the database if filename contains 'empty'.
        Then the second group of files is handled: the metadata. The file is moved to first. Then if the dataset exists
        on the Open Data platform and the string contains 'empty' or cijfersxml does not exist, then the update_package
        method is called to display the package as private on Open Data.
        Else (the dataset does not yet exist or cijfersxml does exist so a dataset package mmust be created) the
        load_metadata method is called.

        :return:
        """
        # Get ckan connection first
        scandir = self.config['Main']['scandir']
        handledir = self.config['Main']['handledir']
        log_msg = "Scan %s for files"
        logging.debug(log_msg, scandir)
        # Don't use os.listdir in for loop since I'll move files. For loop will get confused.
        # Extract filelist first for cijfersXML, cijfersTable or commentaar types. Cognos is also known as
        # resource type, but no files expected so no problem in leaving this.
        type_list = my_env.get_resource_types()
        filelist = [
            file for file in os.listdir(scandir)
            if my_env.type_from_file(file) in type_list
        ]
        for file in filelist:
            log_msg = "Filename: %s"
            logging.debug(log_msg, file)
            my_env.move_file(
                file, scandir,
                handledir)  # Move file done in own function, such a hassle...
            if 'empty' in file:
                # remove_file handles paths, empty in filename, ...
                self.ftp.remove_file(file=file)
                # Strip empty from filename
                filename = re.sub('empty\.', '', file)
                indic_id = my_env.indic_from_file(filename)
                res_type = my_env.type_from_file(filename)
                self.ckan.remove_resource(indic_id, res_type)
            else:
                self.ftp.load_file(file=os.path.join(handledir, file))
            self.size_of_file(handledir, file)
            self.url_in_db(file)
        # Now handle meta-data
        filelist = [file for file in os.listdir(scandir) if 'metadata' in file]
        for file in filelist:
            # At least one update, so set flag for dcat_ap create. If any change then new metafile is required,
            # so no need to have create in block above.
            open(os.path.join(scandir, "dcat_ap_create"), 'w').close()
            log_msg = "Filename: %s"
            logging.debug(log_msg, file)
            my_env.move_file(
                file, scandir,
                handledir)  # Move file done in own function, such a hassle...
            # Get indic_id before adding pathname to filename.
            indic_id = my_env.indic_from_file(file)
            filename = os.path.join(handledir, file)
            # Rework logic.
            # If dataset does not exist, then it needs to be created here (not in load_metadata)
            if not self.ckan.check_dataset(indic_id):
                self.ckan.create_package(indic_id)
            # If cijfersxml does not exist or metadata file has empty string, then set package to private.
            if 'empty' in file or not self.ckan.check_resource(
                    indic_id, 'cijfersxml'):
                # Required and sufficient reason to set package to private.
                # I'm sure that package ID exist.
                values_lst = self.ds.get_indicator_value(indic_id, 'id')
                self.ckan.set_pkg_private(values_lst[0][0])
            else:
                # Dataset package does not yet exist or new valid resource file available and cijfersxml exist.
                self.load_metadata(filename, indic_id)
        return

    def add_cognos_resources(self):
        """
        This procedure will find all indicators for which Cognos report is available but resource is not published on
        Open Dataset.
        The Resource will be published on the Open Dataset.
        :return:
        """
        logging.debug("In add_cognos_resources")
        for indic_id in self.ds.get_indicator_cognos_urls():
            if not self.ds.check_resource_published(indic_id, "cognos"):
                logging.info(
                    "Cognos URL available, but not yet on Open Dataset for ID {0}"
                    .format(str(indic_id)))
                self.ckan.update_package(indic_id)
        trainingSet = []
        testingSet = []

        targetWebpage = None
        traintracesofWebsite = []
        testtracesofWebsite = []

        if config.CLASSIFIER == config.GLOVE_CLASSIFIER or config.CLASSIFIER == config.GLOVE_CLASSIFIER2:
            getModelData(webpageIds,runID)

        tempRunID = runID

        for webpageId in webpageIds:
            if config.DATA_SOURCE == 0 or config.DATA_SOURCE == 3:
                webpageTrain = Datastore.getWebpagesLL([webpageId], seed - config.NUM_TRAINING_TRACES, seed)
                webpageTest = Datastore.getWebpagesLL([webpageId], seed, seed + config.NUM_TESTING_TRACES)
            elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2:
                webpageTrain = Datastore.getWebpagesHerrmann([webpageId], seed - config.NUM_TRAINING_TRACES, seed)
                webpageTest = Datastore.getWebpagesHerrmann([webpageId], seed, seed + config.NUM_TESTING_TRACES)

            webpageTrain = webpageTrain[0]
            webpageTest = webpageTest[0]
            # print webpageTrain
            # print webpageTrain.getHistogram()
            if targetWebpage == None:
                targetWebpage = webpageTrain

            preCountermeasureOverhead += webpageTrain.getBandwidth()
            preCountermeasureOverhead += webpageTest.getBandwidth()
            #print preCountermeasureOverhead
Exemple #12
0
def run_morphing():
    run_id, countermeasure_params, classifier_params = read_arguments()

    # Selecting Algorithms
    classifier = int_to_classifier(config.CLASSIFIER)
    countermeasure = int_to_countermeasure(config.COUNTERMEASURE)
    countermeasure.initialize()
    countermeasure = countermeasure()
    set_params(countermeasure, countermeasure_params)

    conn = mdb.connect('localhost', config.MYSQL_USER, config.MYSQL_PASSWD,
                       'Harrmann')

    def select_random_site(cluster, algorithm='PAM10'):
        c = conn.cursor()
        c.execute(
            'SELECT site_id FROM ClustTable WHERE {}=%s ORDER BY RAND() LIMIT 1'
            .format(algorithm), (cluster, ))
        return c.fetchone()[0]

    # Run
    for run_index in range(config.NUM_TRIALS):
        run_start_time = time.time()
        print('Run #{}'.format(run_index))

        # Selecting Sample Webpages
        src_clust = 4
        d = 7
        k = config.BUCKET_SIZE
        pt = config.NUM_TRAINING_TRACES
        pT = config.NUM_TESTING_TRACES
        alg = 'PAM10'
        dst_clust = config.cluster_distances[src_clust][d - 1]
        print('cluster: {} -> {}'.format(src_clust, dst_clust))
        conn = mdb.connect('localhost', config.MYSQL_USER, config.MYSQL_PASSWD,
                           'Harrmann')
        cur = conn.cursor()
        # cur.execute('SELECT site_id FROM ClustTable WHERE {}=%s ORDER BY RAND() LIMIT 1'.format(alg), (dst_clust,))
        # D_site = cur.fetchone()[0]
        # web_pages = [D_site]
        web_pages = []
        cur.execute(
            'SELECT site_id FROM ClustTable WHERE {}=%s ORDER BY RAND() LIMIT {}'
            .format(alg, k), (src_clust, ))
        for s in cur.fetchall():
            web_pages.append(s[0])
        print('Webpages:', web_pages)

        training = []
        testing = []
        rl = {'size': 0, 'time': 0}
        ov = {'size': 0, 'time': 0}

        site_n = len(web_pages)
        for site_i, wp in enumerate(web_pages):
            if site_i % 10 == 0:
                print('--> progress: {}/{}'.format(site_i, site_n))
            t = Datastore.get_trace(site_id=wp, limit=pt + pT, multi=True)
            for i, trace in enumerate(t):
                countermeasure.dst_trace = None
                morphed_trace = countermeasure.apply_to_trace(trace)
                rl['size'] += trace.getBandwidth()
                ov['size'] += morphed_trace.getBandwidth()
                if i < pT:
                    testing.append(morphed_trace)
                elif i < pT + pt:
                    training.append(morphed_trace)
                else:
                    break

        print('Overhead:\n\tsize: {}, {:.0f}%\n\ttime: N/A'.format(
            *calc_overhead(ov['size'], rl['size'])))

        print('Classifying...')
        training_set = [classifier.traceToInstance(t) for t in training]
        testing_set = [classifier.traceToInstance(t) for t in testing]
        cl = classifier.classify(str(run_index), training_set, testing_set)
        report_summary(cl,
                       classifier=classifier,
                       countermeasure=countermeasure)
Exemple #13
0
        webpageIds = webpageIds[0:config.BUCKET_SIZE]

        seed = random.randint( startIndex, endIndex )

        preCountermeasureOverhead = 0
        postCountermeasureOverhead = 0

        classifier     = intToClassifier(config.CLASSIFIER)
        countermeasure = intToCountermeasure(config.COUNTERMEASURE)

        trainingSet = []
        testingSet  = []

        targetWebpage = None

	webpageAll = Datastore.getWebpagesLL( webpageIds, 0, len(config.DATA_SET) )
	#print_triplets(webpageAll)
        for webpageId in webpageIds:
            if config.DATA_SOURCE == 0:
                webpageTrain = Datastore.getWebpagesLL( [webpageId], seed-config.NUM_TRAINING_TRACES, seed )
                webpageTest  = Datastore.getWebpagesLL( [webpageId], seed, seed+config.NUM_TESTING_TRACES )
            elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2:
                webpageTrain = Datastore.getWebpagesHerrmann( [webpageId], seed-config.NUM_TRAINING_TRACES, seed )
                webpageTest  = Datastore.getWebpagesHerrmann( [webpageId], seed, seed+config.NUM_TESTING_TRACES )

            webpageTrain = webpageTrain[0]
            webpageTest = webpageTest[0]

            if targetWebpage == None:
                targetWebpage = webpageTrain
def run():
    run_id, countermeasure_params, classifier_params = read_arguments()

    output_filename_list = [
        'results',
        'k' + str(config.BUCKET_SIZE),
        'c' + str(config.COUNTERMEASURE),
        'd' + str(config.DATA_SOURCE),
        'C' + str(config.CLASSIFIER),
        'N' + str(config.TOP_N),
        't' + str(config.NUM_TRAINING_TRACES),
        'T' + str(config.NUM_TESTING_TRACES),
    ]
    output_filename = os.path.join(config.OUTPUT_DIR, '.'.join(output_filename_list))

    if not os.path.exists(config.CACHE_DIR):
        os.mkdir(config.CACHE_DIR)

    if not os.path.exists(output_filename + '.output'):
        banner = ['accuracy', 'overhead', 'timeElapsedTotal', 'timeElapsedClassifier']
        f = open(output_filename + '.output', 'w')
        f.write(','.join(banner))
        f.close()
    if not os.path.exists(output_filename + '.debug'):
        f = open(output_filename + '.debug', 'w')
        f.close()

    # Data-set Selection
    training_set_size = config.NUM_TRAINING_TRACES
    testing_set_size = config.NUM_TESTING_TRACES
    if config.DATA_SOURCE == 0:
        dataset_size = len(config.DATA_SET)
        start_index = config.NUM_TRAINING_TRACES
        end_index = len(config.DATA_SET) - config.NUM_TESTING_TRACES
    elif config.DATA_SOURCE == 1:
        dataset_size = 160
        max_traces_per_website_h = 160
        start_index = config.NUM_TRAINING_TRACES
        end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES
    elif config.DATA_SOURCE == 2:
        dataset_size = 18
        max_traces_per_website_h = 18
        start_index = config.NUM_TRAINING_TRACES
        end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES
    else:
        error('Invalid data-source id:', config.DATA_SOURCE)
        return 3

    # Checking Training-set and Test-set Sizes
    info('|dataset|={}\t|training-set|={}, |testing-set|={}'.format(dataset_size, training_set_size, testing_set_size))
    if training_set_size + testing_set_size > dataset_size:
        print('[ERROR] t+T is larger than data-set size!')
        print('\tThe data-set is divided into two parts: Training set (t) and Testing set (T), so t+T must be ')
        print('\tless than or equal to the total number of data in data-set.')
        sys.exit(4)

    # Selecting Algorithms
    classifier = int_to_classifier(config.CLASSIFIER)
    countermeasure = int_to_countermeasure(config.COUNTERMEASURE)
    if issubclass(countermeasure, CounterMeasure):
        countermeasure.initialize()
        countermeasure = countermeasure()  # also instantiating
        new_style_cm = True
    else:
        new_style_cm = False
    countermeasure_params = countermeasure_params.split(',')
    for p in countermeasure_params:
        if not p or not p.strip():
            continue
        try:
            attr, val = p.strip().split('=', 1)
        except ValueError:
            error('Invalid parameter:', p)
            return 3
        try:
            val = int(val)
        except ValueError:
            pass
        if new_style_cm:
            countermeasure.set_param(attr, val)
        else:
            setattr(countermeasure, attr, val)

    # Run
    for run_index in range(config.NUM_TRIALS):
        run_start_time = time.time()
        print('Run #{}'.format(run_index))

        # Select a sample of size k from websites 1..N
        webpage_ids = range(0, config.TOP_N - 1)
        random.shuffle(webpage_ids)
        webpage_ids = webpage_ids[0:config.BUCKET_SIZE]
        seed = random.randint(start_index, end_index)
        info('selected webpages:', webpage_ids)

        training_set = []
        testing_set = []
        target_webpage = None

        actual_bandwidth = 0
        modified_bandwidth = 0
        actual_timing = 0
        modified_timing = 0

        for page_id in webpage_ids:
            print('.', end='')
            sys.stdout.flush()

            # Sampling From Data-source
            if config.DATA_SOURCE == 0:
                webpage_train = Datastore.getWebpagesLL([page_id], seed - config.NUM_TRAINING_TRACES, seed)
                webpage_test = Datastore.getWebpagesLL([page_id], seed, seed + config.NUM_TESTING_TRACES)
            elif config.DATA_SOURCE in [1, 2]:
                webpage_train = Datastore.getWebpagesHerrmann([page_id], seed - config.NUM_TRAINING_TRACES, seed)
                webpage_test = Datastore.getWebpagesHerrmann([page_id], seed, seed + config.NUM_TESTING_TRACES)
            else:
                error('Invalid data-source id:', config.DATA_SOURCE)
                return 3

            # Selecting Targets
            webpage_train = webpage_train[0]
            webpage_test = webpage_test[0]
            if target_webpage is None:
                target_webpage = webpage_train
            print(webpage_test, webpage_train)

            # Accounting
            actual_bandwidth += webpage_train.getBandwidth()
            actual_bandwidth += webpage_test.getBandwidth()

            # Train Countermeasure
            metadata = None
            if new_style_cm:
                countermeasure.train(src_page=webpage_train, target_page=target_webpage)
            else:
                if countermeasure in [DirectTargetSampling, WrightStyleMorphing]:
                    metadata = countermeasure.buildMetadata(webpage_train, target_webpage)

            # Applying Countermeasure (and feeding data to classifier)
            for i, w in enumerate([webpage_train, webpage_test]):
                for trace in w.getTraces():
                    actual_timing += trace.get_total_time()
                    # print(trace.get_total_time(), '-', end='')

                    if countermeasure:
                        if new_style_cm:
                            modified_trace = countermeasure.apply_to_trace(trace)
                        else:
                            if countermeasure in [DirectTargetSampling, WrightStyleMorphing]:
                                if w.getId() != target_webpage.getId():
                                    modified_trace = countermeasure.applyCountermeasure(trace, metadata)
                                else:
                                    modified_trace = trace
                            else:
                                modified_trace = countermeasure.applyCountermeasure(trace)
                    else:
                        modified_trace = trace

                    # Overhead Accounting
                    modified_bandwidth += modified_trace.getBandwidth()
                    modified_timing += modified_trace.get_total_time()
                    # print(modified_trace.get_total_time())

                    instance = classifier.traceToInstance(modified_trace)
                    if instance:
                        if i == 0:  # train-page
                            training_set.append(instance)
                        elif i == 1:  # test-page
                            testing_set.append(instance)

        # Classification
        print('')
        classification_start_time = time.time()
        cl = classifier.classify(run_id, training_set, testing_set)
        run_end_time = time.time()
        run_total_time = run_end_time - run_start_time
        classification_total_time = run_end_time - classification_start_time
        report_summary(cl, output_filename=output_filename,
                       classifier=classifier, countermeasure=countermeasure)

    return 0
def run_morphing():
    run_id, countermeasure_params, classifier_params = read_arguments()

    # Selecting Algorithms
    classifier = int_to_classifier(config.CLASSIFIER)
    countermeasure = int_to_countermeasure(config.COUNTERMEASURE)
    countermeasure.initialize()
    countermeasure = countermeasure()
    set_params(countermeasure, countermeasure_params)

    conn = mdb.connect('localhost', config.MYSQL_USER, config.MYSQL_PASSWD, 'Harrmann')

    def select_random_site(cluster, algorithm='PAM10'):
        c = conn.cursor()
        c.execute('SELECT site_id FROM ClustTable WHERE {}=%s ORDER BY RAND() LIMIT 1'.format(algorithm), (cluster,))
        return c.fetchone()[0]

    # Run
    for run_index in range(config.NUM_TRIALS):
        run_start_time = time.time()
        print('Run #{}'.format(run_index))

        # Selecting Sample Webpages
        src_clust = 4
        d = 7
        k = config.BUCKET_SIZE
        pt = config.NUM_TRAINING_TRACES
        pT = config.NUM_TESTING_TRACES
        alg = 'PAM10'
        dst_clust = config.cluster_distances[src_clust][d - 1]
        print('cluster: {} -> {}'.format(src_clust, dst_clust))
        conn = mdb.connect('localhost', config.MYSQL_USER, config.MYSQL_PASSWD, 'Harrmann')
        cur = conn.cursor()
        # cur.execute('SELECT site_id FROM ClustTable WHERE {}=%s ORDER BY RAND() LIMIT 1'.format(alg), (dst_clust,))
        # D_site = cur.fetchone()[0]
        # web_pages = [D_site]
        web_pages = []
        cur.execute('SELECT site_id FROM ClustTable WHERE {}=%s ORDER BY RAND() LIMIT {}'.format(alg, k), (src_clust,))
        for s in cur.fetchall():
            web_pages.append(s[0])
        print('Webpages:', web_pages)

        training = []
        testing = []
        rl = {'size': 0, 'time': 0}
        ov = {'size': 0, 'time': 0}

        site_n = len(web_pages)
        for site_i, wp in enumerate(web_pages):
            if site_i % 10 == 0:
                print('--> progress: {}/{}'.format(site_i, site_n))
            t = Datastore.get_trace(site_id=wp, limit=pt+pT, multi=True)
            for i, trace in enumerate(t):
                countermeasure.dst_trace = None
                morphed_trace = countermeasure.apply_to_trace(trace)
                rl['size'] += trace.getBandwidth()
                ov['size'] += morphed_trace.getBandwidth()
                if i < pT:
                    testing.append(morphed_trace)
                elif i < pT + pt:
                    training.append(morphed_trace)
                else:
                    break

        print('Overhead:\n\tsize: {}, {:.0f}%\n\ttime: N/A'.format(*calc_overhead(ov['size'], rl['size'])))

        print('Classifying...')
        training_set = [classifier.traceToInstance(t) for t in training]
        testing_set = [classifier.traceToInstance(t) for t in testing]
        cl = classifier.classify(str(run_index), training_set, testing_set)
        report_summary(cl, classifier=classifier, countermeasure=countermeasure)
Exemple #16
0
 def __init__(self, config):
     self.config = config
     self.ds = Datastore(config)
     self.ckan = CKANConnector(self.config, self.ds)
     self.ftp = Ftp_Handler(self.config)
                    if not os.path.exists(WangOpenWorldKnnfolder):
                        os.mkdir(WangOpenWorldKnnfolder)
                    else:
                        shutil.rmtree(
                            WangOpenWorldKnnfolder)  # delete and remake folder
                        os.mkdir(WangOpenWorldKnnfolder)

                    # batch folder
                    os.mkdir(WangOpenWorldKnnfolder + '/' + 'batch')

                for webpageId in webpageIds:
                    if config.DATA_SOURCE == 0 or config.DATA_SOURCE == 3 or config.DATA_SOURCE == 4:
                        if config.COVARIATE_SHIFT == 0:
                            # Normal case
                            webpageTrain = Datastore.getWebpagesLL(
                                [webpageId], seed - config.NUM_TRAINING_TRACES,
                                seed)
                            webpageTest = Datastore.getWebpagesLL(
                                [webpageId], seed,
                                seed + config.NUM_TESTING_TRACES)
                        else:
                            # span time training/testing
                            webpageTrain = Datastore.getWebpagesLL(
                                [webpageId], 0, config.NUM_TRAINING_TRACES)
                            #webpageTest  = Datastore.getWebpagesLL( [webpageId], len(config.DATA_SET)-config.NUM_TESTING_TRACES, len(config.DATA_SET) )
                            # a span of config.COVARIATE_SHIFT days
                            webpageTest = Datastore.getWebpagesLL(
                                [webpageId], config.NUM_TRAINING_TRACES +
                                config.COVARIATE_SHIFT,
                                config.NUM_TRAINING_TRACES +
                                config.COVARIATE_SHIFT +
        seed = random.randint( startIndex, endIndex )

        preCountermeasureOverhead = 0
        postCountermeasureOverhead = 0

        classifier     = intToClassifier(config.CLASSIFIER)
        countermeasure = intToCountermeasure(config.COUNTERMEASURE)

        trainingSet = []
        testingSet  = []

        targetWebpage = None

        for webpageId in webpageIds:
            if config.DATA_SOURCE == 0:
                webpageTrain = Datastore.getWebpagesLL( [webpageId], seed-config.NUM_TRAINING_TRACES, seed )
                webpageTest  = Datastore.getWebpagesLL( [webpageId], seed, seed+config.NUM_TESTING_TRACES )
            elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2:
                webpageTrain = Datastore.getWebpagesHerrmann( [webpageId], seed-config.NUM_TRAINING_TRACES, seed )
                webpageTest  = Datastore.getWebpagesHerrmann( [webpageId], seed, seed+config.NUM_TESTING_TRACES )

            webpageTrain = webpageTrain[0]
            webpageTest = webpageTest[0]

            if targetWebpage == None:
                targetWebpage = webpageTrain

            preCountermeasureOverhead  += webpageTrain.getBandwidth()
            preCountermeasureOverhead  += webpageTest.getBandwidth()

            metadata = None
Exemple #19
0
def run():
    run_id, countermeasure_params, classifier_params = read_arguments()

    output_filename_list = [
        'results',
        'k' + str(config.BUCKET_SIZE),
        'c' + str(config.COUNTERMEASURE),
        'd' + str(config.DATA_SOURCE),
        'C' + str(config.CLASSIFIER),
        'N' + str(config.TOP_N),
        't' + str(config.NUM_TRAINING_TRACES),
        'T' + str(config.NUM_TESTING_TRACES),
    ]
    output_filename = os.path.join(config.OUTPUT_DIR,
                                   '.'.join(output_filename_list))

    if not os.path.exists(config.CACHE_DIR):
        os.mkdir(config.CACHE_DIR)

    if not os.path.exists(output_filename + '.output'):
        banner = [
            'accuracy', 'overhead', 'timeElapsedTotal', 'timeElapsedClassifier'
        ]
        f = open(output_filename + '.output', 'w')
        f.write(','.join(banner))
        f.close()
    if not os.path.exists(output_filename + '.debug'):
        f = open(output_filename + '.debug', 'w')
        f.close()

    # Data-set Selection
    training_set_size = config.NUM_TRAINING_TRACES
    testing_set_size = config.NUM_TESTING_TRACES
    if config.DATA_SOURCE == 0:
        dataset_size = len(config.DATA_SET)
        start_index = config.NUM_TRAINING_TRACES
        end_index = len(config.DATA_SET) - config.NUM_TESTING_TRACES
    elif config.DATA_SOURCE == 1:
        dataset_size = 160
        max_traces_per_website_h = 160
        start_index = config.NUM_TRAINING_TRACES
        end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES
    elif config.DATA_SOURCE == 2:
        dataset_size = 18
        max_traces_per_website_h = 18
        start_index = config.NUM_TRAINING_TRACES
        end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES
    else:
        error('Invalid data-source id:', config.DATA_SOURCE)
        return 3

    # Checking Training-set and Test-set Sizes
    info('|dataset|={}\t|training-set|={}, |testing-set|={}'.format(
        dataset_size, training_set_size, testing_set_size))
    if training_set_size + testing_set_size > dataset_size:
        print('[ERROR] t+T is larger than data-set size!')
        print(
            '\tThe data-set is divided into two parts: Training set (t) and Testing set (T), so t+T must be '
        )
        print('\tless than or equal to the total number of data in data-set.')
        sys.exit(4)

    # Selecting Algorithms
    classifier = int_to_classifier(config.CLASSIFIER)
    countermeasure = int_to_countermeasure(config.COUNTERMEASURE)
    if issubclass(countermeasure, CounterMeasure):
        countermeasure.initialize()
        countermeasure = countermeasure()  # also instantiating
        new_style_cm = True
    else:
        new_style_cm = False
    countermeasure_params = countermeasure_params.split(',')
    for p in countermeasure_params:
        if not p or not p.strip():
            continue
        try:
            attr, val = p.strip().split('=', 1)
        except ValueError:
            error('Invalid parameter:', p)
            return 3
        try:
            val = int(val)
        except ValueError:
            pass
        if new_style_cm:
            countermeasure.set_param(attr, val)
        else:
            setattr(countermeasure, attr, val)

    # Run
    for run_index in range(config.NUM_TRIALS):
        run_start_time = time.time()
        print('Run #{}'.format(run_index))

        # Select a sample of size k from websites 1..N
        webpage_ids = range(0, config.TOP_N - 1)
        random.shuffle(webpage_ids)
        webpage_ids = webpage_ids[0:config.BUCKET_SIZE]
        seed = random.randint(start_index, end_index)
        info('selected webpages:', webpage_ids)

        training_set = []
        testing_set = []
        target_webpage = None

        actual_bandwidth = 0
        modified_bandwidth = 0
        actual_timing = 0
        modified_timing = 0

        for page_id in webpage_ids:
            print('.', end='')
            sys.stdout.flush()

            # Sampling From Data-source
            if config.DATA_SOURCE == 0:
                webpage_train = Datastore.getWebpagesLL(
                    [page_id], seed - config.NUM_TRAINING_TRACES, seed)
                webpage_test = Datastore.getWebpagesLL(
                    [page_id], seed, seed + config.NUM_TESTING_TRACES)
            elif config.DATA_SOURCE in [1, 2]:
                webpage_train = Datastore.getWebpagesHerrmann(
                    [page_id], seed - config.NUM_TRAINING_TRACES, seed)
                webpage_test = Datastore.getWebpagesHerrmann(
                    [page_id], seed, seed + config.NUM_TESTING_TRACES)
            else:
                error('Invalid data-source id:', config.DATA_SOURCE)
                return 3

            # Selecting Targets
            webpage_train = webpage_train[0]
            webpage_test = webpage_test[0]
            if target_webpage is None:
                target_webpage = webpage_train
            print(webpage_test, webpage_train)

            # Accounting
            actual_bandwidth += webpage_train.getBandwidth()
            actual_bandwidth += webpage_test.getBandwidth()

            # Train Countermeasure
            metadata = None
            if new_style_cm:
                countermeasure.train(src_page=webpage_train,
                                     target_page=target_webpage)
            else:
                if countermeasure in [
                        DirectTargetSampling, WrightStyleMorphing
                ]:
                    metadata = countermeasure.buildMetadata(
                        webpage_train, target_webpage)

            # Applying Countermeasure (and feeding data to classifier)
            for i, w in enumerate([webpage_train, webpage_test]):
                for trace in w.getTraces():
                    actual_timing += trace.get_total_time()
                    # print(trace.get_total_time(), '-', end='')

                    if countermeasure:
                        if new_style_cm:
                            modified_trace = countermeasure.apply_to_trace(
                                trace)
                        else:
                            if countermeasure in [
                                    DirectTargetSampling, WrightStyleMorphing
                            ]:
                                if w.getId() != target_webpage.getId():
                                    modified_trace = countermeasure.applyCountermeasure(
                                        trace, metadata)
                                else:
                                    modified_trace = trace
                            else:
                                modified_trace = countermeasure.applyCountermeasure(
                                    trace)
                    else:
                        modified_trace = trace

                    # Overhead Accounting
                    modified_bandwidth += modified_trace.getBandwidth()
                    modified_timing += modified_trace.get_total_time()
                    # print(modified_trace.get_total_time())

                    instance = classifier.traceToInstance(modified_trace)
                    if instance:
                        if i == 0:  # train-page
                            training_set.append(instance)
                        elif i == 1:  # test-page
                            testing_set.append(instance)

        # Classification
        print('')
        classification_start_time = time.time()
        cl = classifier.classify(run_id, training_set, testing_set)
        run_end_time = time.time()
        run_total_time = run_end_time - run_start_time
        classification_total_time = run_end_time - classification_start_time
        report_summary(cl,
                       output_filename=output_filename,
                       classifier=classifier,
                       countermeasure=countermeasure)

    return 0
def getModelData(webpageIds,runID):
    countermeasure = intToCountermeasure(config.COUNTERMEASURE)
    traintracesofWebsite = []
    targetWebpage = None
    if config.DATA_SOURCE == 0:
        startIndex = config.GLOVE_OPTIONS['ModelTraceNum']
        endIndex = len(config.DATA_SET) - config.NUM_TESTING_TRACES
    elif config.DATA_SOURCE == 1:
        maxTracesPerWebsiteH = 160
        startIndex = config.GLOVE_OPTIONS['ModelTraceNum']
        endIndex = maxTracesPerWebsiteH - config.NUM_TESTING_TRACES
    elif config.DATA_SOURCE == 2:
        maxTracesPerWebsiteH = 18
        startIndex = config.GLOVE_OPTIONS['ModelTraceNum']
        endIndex = maxTracesPerWebsiteH - config.NUM_TESTING_TRACES
    elif config.DATA_SOURCE == 3:
        config.DATA_SET = config.DATA_SET_ANDROID_TOR
        startIndex = config.GLOVE_OPTIONS['ModelTraceNum']
        endIndex = len(config.DATA_SET) - config.NUM_TESTING_TRACES
        config.PCAP_ROOT = os.path.join(config.BASE_DIR, 'pcap-logs-Android-Tor-Grouping')
    seed = random.randint(startIndex, endIndex)

    for webpageId in webpageIds:
            if config.DATA_SOURCE == 0 or config.DATA_SOURCE == 3:
                webpageTrain = Datastore.getWebpagesLL([webpageId], seed - config.GLOVE_OPTIONS['ModelTraceNum'], seed)
            elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2:
                webpageTrain = Datastore.getWebpagesHerrmann([webpageId], seed - config.GLOVE_OPTIONS['ModelTraceNum'], seed)

            webpageTrain = webpageTrain[0]

            # print webpageTrain
            # print webpageTrain.getHistogram()
            if targetWebpage == None:
                targetWebpage = webpageTrain




            metadata = None
            if config.COUNTERMEASURE in [config.DIRECT_TARGET_SAMPLING, config.WRIGHT_STYLE_MORPHING]:
                metadata = countermeasure.buildMetadata(webpageTrain, targetWebpage)

            i = 0


            for w in [webpageTrain]:

                for trace in w.getTraces():

                    if countermeasure:
                        if config.COUNTERMEASURE in [config.DIRECT_TARGET_SAMPLING, config.WRIGHT_STYLE_MORPHING]:
                            if w.getId() != targetWebpage.getId():
                                traceWithCountermeasure = countermeasure.applyCountermeasure(trace, metadata)
                            else:
                                traceWithCountermeasure = trace
                        else:
                            traceWithCountermeasure = countermeasure.applyCountermeasure(trace)
                    else:
                        traceWithCountermeasure = trace
                    if i == 0:
                        traintracesofWebsite.append(traceWithCountermeasure)


    generateModel(traintracesofWebsite, runID)
Exemple #21
0
print('starting')

debug = os.environ['IS_DEBUG'] == 'true'
dryRun = False  #os.environ['DRY_RUN'] == 'true'
messageHost = 'localhost' if debug else os.environ['RABBIT_ADDR']

print('messageHost: %s' % messageHost)

connection = pika.BlockingConnection(
    pika.ConnectionParameters(host=messageHost, heartbeat=20))
channel = connection.channel()

channel.queue_declare(queue='frame_jobs', durable=True)

datastore = Datastore()

imageParser = ImageParser()

print("openface ready")

if dryRun:
    print("Dry run enabled")

# print(" [x] Sent 'Hello World!'")


def onFrameJob(ch, method, properties, body):
    msg = json.loads(body)

    videoId = msg['videoId']
Exemple #22
0
'''
Created on Mar 7, 2011

@author: cgueret
'''
from Backend import BackEnd
from Datastore import Datastore
import os

if __name__ == '__main__':
    # Create a data store
    datastore = Datastore("127.0.0.1:8080")
    backend = BackEnd(datastore, ".")

    for file in os.listdir('items'):
        print 'Add %s' % file
        backend.add_item('items/%s' % file)
    #backend.add_item("chair.jpg")
Exemple #23
0
    def test_readfile(self):
        '''
        # By Dyer
        actualTrace = pcapparser.readfile( month=3, day=14, hour=22, webpageId=8 )

        expectedTrace = Trace(8)
        expectedTrace.addPacket( Packet( Packet.UP  , 0  , 148 ) )
        expectedTrace.addPacket( Packet( Packet.DOWN, 0  , 100 ) )
        expectedTrace.addPacket( Packet( Packet.UP  , 0  , 52  ) )
        expectedTrace.addPacket( Packet( Packet.UP  , 3  , 500 ) )
        expectedTrace.addPacket( Packet( Packet.DOWN, 18 , 244 ) )
        expectedTrace.addPacket( Packet( Packet.UP  , 35 , 436 ) )
        expectedTrace.addPacket( Packet( Packet.DOWN, 75 , 52  ) )
        expectedTrace.addPacket( Packet( Packet.DOWN, 118, 292 ) )
        expectedTrace.addPacket( Packet( Packet.UP  , 158, 52  ) )
        '''

        # By Khaled
        config.PCAP_ROOT = os.path.join(config.BASE_DIR   ,'honeypatckBenattackTest/net')
        config.DATA_SOURCE = 64 # HoneyPatch pcap dataset
        config.NUM_BENIGN_CLASSES = 12

        for traceId in range(12,33):
            #traceId = 5
            traceStart = 0
            traceEnd = 3

            # training
            print 'training'
            webpage = Datastore.getWebpagesHoneyPatch([traceId], traceStart, traceEnd)
            webpageTest = webpage[0]
            webpageList = [webpageTest]

            postCountermeasureOverhead = 0

            for w in webpageList:
                for trace in w.getTraces():
                    traceWithCountermeasure = trace

                    postCountermeasureOverhead += traceWithCountermeasure.getBandwidth()

            print 'Bandwidth = ' + str(postCountermeasureOverhead)



            # testing
            print 'testing'
            webpage = Datastore.getWebpagesHoneyPatchSomePackets([traceId], traceStart, traceEnd)
            webpageTest = webpage[0]
            webpageList = [webpageTest]

            postCountermeasureOverhead = 0

            for w in webpageList:
                for trace in w.getTraces():
                    traceWithCountermeasure = trace

                    postCountermeasureOverhead += traceWithCountermeasure.getBandwidth()

            print 'Bandwidth = ' + str(postCountermeasureOverhead)

            print '------------'