def getModelData(webpageIds,runID):
    countermeasure = intToCountermeasure(config.COUNTERMEASURE)
    traintracesofWebsite = []
    targetWebpage = None
    if config.DATA_SOURCE == 0:
        startIndex = config.GLOVE_OPTIONS['ModelTraceNum']
        endIndex = len(config.DATA_SET) - config.NUM_TESTING_TRACES
    elif config.DATA_SOURCE == 1:
        maxTracesPerWebsiteH = 160
        startIndex = config.GLOVE_OPTIONS['ModelTraceNum']
        endIndex = maxTracesPerWebsiteH - config.NUM_TESTING_TRACES
    elif config.DATA_SOURCE == 2:
        maxTracesPerWebsiteH = 18
        startIndex = config.GLOVE_OPTIONS['ModelTraceNum']
        endIndex = maxTracesPerWebsiteH - config.NUM_TESTING_TRACES
    elif config.DATA_SOURCE == 3:
        config.DATA_SET = config.DATA_SET_ANDROID_TOR
        startIndex = config.GLOVE_OPTIONS['ModelTraceNum']
        endIndex = len(config.DATA_SET) - config.NUM_TESTING_TRACES
        config.PCAP_ROOT = os.path.join(config.BASE_DIR, 'pcap-logs-Android-Tor-Grouping')
    seed = random.randint(startIndex, endIndex)

    for webpageId in webpageIds:
            if config.DATA_SOURCE == 0 or config.DATA_SOURCE == 3:
                webpageTrain = Datastore.getWebpagesLL([webpageId], seed - config.GLOVE_OPTIONS['ModelTraceNum'], seed)
            elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2:
                webpageTrain = Datastore.getWebpagesHerrmann([webpageId], seed - config.GLOVE_OPTIONS['ModelTraceNum'], seed)

            webpageTrain = webpageTrain[0]

            # print webpageTrain
            # print webpageTrain.getHistogram()
            if targetWebpage == None:
                targetWebpage = webpageTrain




            metadata = None
            if config.COUNTERMEASURE in [config.DIRECT_TARGET_SAMPLING, config.WRIGHT_STYLE_MORPHING]:
                metadata = countermeasure.buildMetadata(webpageTrain, targetWebpage)

            i = 0


            for w in [webpageTrain]:

                for trace in w.getTraces():

                    if countermeasure:
                        if config.COUNTERMEASURE in [config.DIRECT_TARGET_SAMPLING, config.WRIGHT_STYLE_MORPHING]:
                            if w.getId() != targetWebpage.getId():
                                traceWithCountermeasure = countermeasure.applyCountermeasure(trace, metadata)
                            else:
                                traceWithCountermeasure = trace
                        else:
                            traceWithCountermeasure = countermeasure.applyCountermeasure(trace)
                    else:
                        traceWithCountermeasure = trace
                    if i == 0:
                        traintracesofWebsite.append(traceWithCountermeasure)


    generateModel(traintracesofWebsite, runID)
        targetWebpage = None
        traintracesofWebsite = []
        testtracesofWebsite = []

        if config.CLASSIFIER == config.GLOVE_CLASSIFIER or config.CLASSIFIER == config.GLOVE_CLASSIFIER2:
            getModelData(webpageIds,runID)

        tempRunID = runID

        for webpageId in webpageIds:
            if config.DATA_SOURCE == 0 or config.DATA_SOURCE == 3:
                webpageTrain = Datastore.getWebpagesLL([webpageId], seed - config.NUM_TRAINING_TRACES, seed)
                webpageTest = Datastore.getWebpagesLL([webpageId], seed, seed + config.NUM_TESTING_TRACES)
            elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2:
                webpageTrain = Datastore.getWebpagesHerrmann([webpageId], seed - config.NUM_TRAINING_TRACES, seed)
                webpageTest = Datastore.getWebpagesHerrmann([webpageId], seed, seed + config.NUM_TESTING_TRACES)

            webpageTrain = webpageTrain[0]
            webpageTest = webpageTest[0]
            # print webpageTrain
            # print webpageTrain.getHistogram()
            if targetWebpage == None:
                targetWebpage = webpageTrain

            preCountermeasureOverhead += webpageTrain.getBandwidth()
            preCountermeasureOverhead += webpageTest.getBandwidth()
            #print preCountermeasureOverhead


            metadata = None
Exemple #3
0
def run():
    run_id, countermeasure_params, classifier_params = read_arguments()

    output_filename_list = [
        'results',
        'k' + str(config.BUCKET_SIZE),
        'c' + str(config.COUNTERMEASURE),
        'd' + str(config.DATA_SOURCE),
        'C' + str(config.CLASSIFIER),
        'N' + str(config.TOP_N),
        't' + str(config.NUM_TRAINING_TRACES),
        'T' + str(config.NUM_TESTING_TRACES),
    ]
    output_filename = os.path.join(config.OUTPUT_DIR,
                                   '.'.join(output_filename_list))

    if not os.path.exists(config.CACHE_DIR):
        os.mkdir(config.CACHE_DIR)

    if not os.path.exists(output_filename + '.output'):
        banner = [
            'accuracy', 'overhead', 'timeElapsedTotal', 'timeElapsedClassifier'
        ]
        f = open(output_filename + '.output', 'w')
        f.write(','.join(banner))
        f.close()
    if not os.path.exists(output_filename + '.debug'):
        f = open(output_filename + '.debug', 'w')
        f.close()

    # Data-set Selection
    training_set_size = config.NUM_TRAINING_TRACES
    testing_set_size = config.NUM_TESTING_TRACES
    if config.DATA_SOURCE == 0:
        dataset_size = len(config.DATA_SET)
        start_index = config.NUM_TRAINING_TRACES
        end_index = len(config.DATA_SET) - config.NUM_TESTING_TRACES
    elif config.DATA_SOURCE == 1:
        dataset_size = 160
        max_traces_per_website_h = 160
        start_index = config.NUM_TRAINING_TRACES
        end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES
    elif config.DATA_SOURCE == 2:
        dataset_size = 18
        max_traces_per_website_h = 18
        start_index = config.NUM_TRAINING_TRACES
        end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES
    else:
        error('Invalid data-source id:', config.DATA_SOURCE)
        return 3

    # Checking Training-set and Test-set Sizes
    info('|dataset|={}\t|training-set|={}, |testing-set|={}'.format(
        dataset_size, training_set_size, testing_set_size))
    if training_set_size + testing_set_size > dataset_size:
        print('[ERROR] t+T is larger than data-set size!')
        print(
            '\tThe data-set is divided into two parts: Training set (t) and Testing set (T), so t+T must be '
        )
        print('\tless than or equal to the total number of data in data-set.')
        sys.exit(4)

    # Selecting Algorithms
    classifier = int_to_classifier(config.CLASSIFIER)
    countermeasure = int_to_countermeasure(config.COUNTERMEASURE)
    if issubclass(countermeasure, CounterMeasure):
        countermeasure.initialize()
        countermeasure = countermeasure()  # also instantiating
        new_style_cm = True
    else:
        new_style_cm = False
    countermeasure_params = countermeasure_params.split(',')
    for p in countermeasure_params:
        if not p or not p.strip():
            continue
        try:
            attr, val = p.strip().split('=', 1)
        except ValueError:
            error('Invalid parameter:', p)
            return 3
        try:
            val = int(val)
        except ValueError:
            pass
        if new_style_cm:
            countermeasure.set_param(attr, val)
        else:
            setattr(countermeasure, attr, val)

    # Run
    for run_index in range(config.NUM_TRIALS):
        run_start_time = time.time()
        print('Run #{}'.format(run_index))

        # Select a sample of size k from websites 1..N
        webpage_ids = range(0, config.TOP_N - 1)
        random.shuffle(webpage_ids)
        webpage_ids = webpage_ids[0:config.BUCKET_SIZE]
        seed = random.randint(start_index, end_index)
        info('selected webpages:', webpage_ids)

        training_set = []
        testing_set = []
        target_webpage = None

        actual_bandwidth = 0
        modified_bandwidth = 0
        actual_timing = 0
        modified_timing = 0

        for page_id in webpage_ids:
            print('.', end='')
            sys.stdout.flush()

            # Sampling From Data-source
            if config.DATA_SOURCE == 0:
                webpage_train = Datastore.getWebpagesLL(
                    [page_id], seed - config.NUM_TRAINING_TRACES, seed)
                webpage_test = Datastore.getWebpagesLL(
                    [page_id], seed, seed + config.NUM_TESTING_TRACES)
            elif config.DATA_SOURCE in [1, 2]:
                webpage_train = Datastore.getWebpagesHerrmann(
                    [page_id], seed - config.NUM_TRAINING_TRACES, seed)
                webpage_test = Datastore.getWebpagesHerrmann(
                    [page_id], seed, seed + config.NUM_TESTING_TRACES)
            else:
                error('Invalid data-source id:', config.DATA_SOURCE)
                return 3

            # Selecting Targets
            webpage_train = webpage_train[0]
            webpage_test = webpage_test[0]
            if target_webpage is None:
                target_webpage = webpage_train
            print(webpage_test, webpage_train)

            # Accounting
            actual_bandwidth += webpage_train.getBandwidth()
            actual_bandwidth += webpage_test.getBandwidth()

            # Train Countermeasure
            metadata = None
            if new_style_cm:
                countermeasure.train(src_page=webpage_train,
                                     target_page=target_webpage)
            else:
                if countermeasure in [
                        DirectTargetSampling, WrightStyleMorphing
                ]:
                    metadata = countermeasure.buildMetadata(
                        webpage_train, target_webpage)

            # Applying Countermeasure (and feeding data to classifier)
            for i, w in enumerate([webpage_train, webpage_test]):
                for trace in w.getTraces():
                    actual_timing += trace.get_total_time()
                    # print(trace.get_total_time(), '-', end='')

                    if countermeasure:
                        if new_style_cm:
                            modified_trace = countermeasure.apply_to_trace(
                                trace)
                        else:
                            if countermeasure in [
                                    DirectTargetSampling, WrightStyleMorphing
                            ]:
                                if w.getId() != target_webpage.getId():
                                    modified_trace = countermeasure.applyCountermeasure(
                                        trace, metadata)
                                else:
                                    modified_trace = trace
                            else:
                                modified_trace = countermeasure.applyCountermeasure(
                                    trace)
                    else:
                        modified_trace = trace

                    # Overhead Accounting
                    modified_bandwidth += modified_trace.getBandwidth()
                    modified_timing += modified_trace.get_total_time()
                    # print(modified_trace.get_total_time())

                    instance = classifier.traceToInstance(modified_trace)
                    if instance:
                        if i == 0:  # train-page
                            training_set.append(instance)
                        elif i == 1:  # test-page
                            testing_set.append(instance)

        # Classification
        print('')
        classification_start_time = time.time()
        cl = classifier.classify(run_id, training_set, testing_set)
        run_end_time = time.time()
        run_total_time = run_end_time - run_start_time
        classification_total_time = run_end_time - classification_start_time
        report_summary(cl,
                       output_filename=output_filename,
                       classifier=classifier,
                       countermeasure=countermeasure)

    return 0
        postCountermeasureOverhead = 0

        classifier     = intToClassifier(config.CLASSIFIER)
        countermeasure = intToCountermeasure(config.COUNTERMEASURE)

        trainingSet = []
        testingSet  = []

        targetWebpage = None

        for webpageId in webpageIds:
            if config.DATA_SOURCE == 0:
                webpageTrain = Datastore.getWebpagesLL( [webpageId], seed-config.NUM_TRAINING_TRACES, seed )
                webpageTest  = Datastore.getWebpagesLL( [webpageId], seed, seed+config.NUM_TESTING_TRACES )
            elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2:
                webpageTrain = Datastore.getWebpagesHerrmann( [webpageId], seed-config.NUM_TRAINING_TRACES, seed )
                webpageTest  = Datastore.getWebpagesHerrmann( [webpageId], seed, seed+config.NUM_TESTING_TRACES )

            webpageTrain = webpageTrain[0]
            webpageTest = webpageTest[0]

            if targetWebpage == None:
                targetWebpage = webpageTrain

            preCountermeasureOverhead  += webpageTrain.getBandwidth()
            preCountermeasureOverhead  += webpageTest.getBandwidth()

            metadata = None
            if config.COUNTERMEASURE in [config.DIRECT_TARGET_SAMPLING, config.WRIGHT_STYLE_MORPHING]:
                metadata = countermeasure.buildMetadata( webpageTrain,  targetWebpage )
def run():
    run_id, countermeasure_params, classifier_params = read_arguments()

    output_filename_list = [
        'results',
        'k' + str(config.BUCKET_SIZE),
        'c' + str(config.COUNTERMEASURE),
        'd' + str(config.DATA_SOURCE),
        'C' + str(config.CLASSIFIER),
        'N' + str(config.TOP_N),
        't' + str(config.NUM_TRAINING_TRACES),
        'T' + str(config.NUM_TESTING_TRACES),
    ]
    output_filename = os.path.join(config.OUTPUT_DIR, '.'.join(output_filename_list))

    if not os.path.exists(config.CACHE_DIR):
        os.mkdir(config.CACHE_DIR)

    if not os.path.exists(output_filename + '.output'):
        banner = ['accuracy', 'overhead', 'timeElapsedTotal', 'timeElapsedClassifier']
        f = open(output_filename + '.output', 'w')
        f.write(','.join(banner))
        f.close()
    if not os.path.exists(output_filename + '.debug'):
        f = open(output_filename + '.debug', 'w')
        f.close()

    # Data-set Selection
    training_set_size = config.NUM_TRAINING_TRACES
    testing_set_size = config.NUM_TESTING_TRACES
    if config.DATA_SOURCE == 0:
        dataset_size = len(config.DATA_SET)
        start_index = config.NUM_TRAINING_TRACES
        end_index = len(config.DATA_SET) - config.NUM_TESTING_TRACES
    elif config.DATA_SOURCE == 1:
        dataset_size = 160
        max_traces_per_website_h = 160
        start_index = config.NUM_TRAINING_TRACES
        end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES
    elif config.DATA_SOURCE == 2:
        dataset_size = 18
        max_traces_per_website_h = 18
        start_index = config.NUM_TRAINING_TRACES
        end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES
    else:
        error('Invalid data-source id:', config.DATA_SOURCE)
        return 3

    # Checking Training-set and Test-set Sizes
    info('|dataset|={}\t|training-set|={}, |testing-set|={}'.format(dataset_size, training_set_size, testing_set_size))
    if training_set_size + testing_set_size > dataset_size:
        print('[ERROR] t+T is larger than data-set size!')
        print('\tThe data-set is divided into two parts: Training set (t) and Testing set (T), so t+T must be ')
        print('\tless than or equal to the total number of data in data-set.')
        sys.exit(4)

    # Selecting Algorithms
    classifier = int_to_classifier(config.CLASSIFIER)
    countermeasure = int_to_countermeasure(config.COUNTERMEASURE)
    if issubclass(countermeasure, CounterMeasure):
        countermeasure.initialize()
        countermeasure = countermeasure()  # also instantiating
        new_style_cm = True
    else:
        new_style_cm = False
    countermeasure_params = countermeasure_params.split(',')
    for p in countermeasure_params:
        if not p or not p.strip():
            continue
        try:
            attr, val = p.strip().split('=', 1)
        except ValueError:
            error('Invalid parameter:', p)
            return 3
        try:
            val = int(val)
        except ValueError:
            pass
        if new_style_cm:
            countermeasure.set_param(attr, val)
        else:
            setattr(countermeasure, attr, val)

    # Run
    for run_index in range(config.NUM_TRIALS):
        run_start_time = time.time()
        print('Run #{}'.format(run_index))

        # Select a sample of size k from websites 1..N
        webpage_ids = range(0, config.TOP_N - 1)
        random.shuffle(webpage_ids)
        webpage_ids = webpage_ids[0:config.BUCKET_SIZE]
        seed = random.randint(start_index, end_index)
        info('selected webpages:', webpage_ids)

        training_set = []
        testing_set = []
        target_webpage = None

        actual_bandwidth = 0
        modified_bandwidth = 0
        actual_timing = 0
        modified_timing = 0

        for page_id in webpage_ids:
            print('.', end='')
            sys.stdout.flush()

            # Sampling From Data-source
            if config.DATA_SOURCE == 0:
                webpage_train = Datastore.getWebpagesLL([page_id], seed - config.NUM_TRAINING_TRACES, seed)
                webpage_test = Datastore.getWebpagesLL([page_id], seed, seed + config.NUM_TESTING_TRACES)
            elif config.DATA_SOURCE in [1, 2]:
                webpage_train = Datastore.getWebpagesHerrmann([page_id], seed - config.NUM_TRAINING_TRACES, seed)
                webpage_test = Datastore.getWebpagesHerrmann([page_id], seed, seed + config.NUM_TESTING_TRACES)
            else:
                error('Invalid data-source id:', config.DATA_SOURCE)
                return 3

            # Selecting Targets
            webpage_train = webpage_train[0]
            webpage_test = webpage_test[0]
            if target_webpage is None:
                target_webpage = webpage_train
            print(webpage_test, webpage_train)

            # Accounting
            actual_bandwidth += webpage_train.getBandwidth()
            actual_bandwidth += webpage_test.getBandwidth()

            # Train Countermeasure
            metadata = None
            if new_style_cm:
                countermeasure.train(src_page=webpage_train, target_page=target_webpage)
            else:
                if countermeasure in [DirectTargetSampling, WrightStyleMorphing]:
                    metadata = countermeasure.buildMetadata(webpage_train, target_webpage)

            # Applying Countermeasure (and feeding data to classifier)
            for i, w in enumerate([webpage_train, webpage_test]):
                for trace in w.getTraces():
                    actual_timing += trace.get_total_time()
                    # print(trace.get_total_time(), '-', end='')

                    if countermeasure:
                        if new_style_cm:
                            modified_trace = countermeasure.apply_to_trace(trace)
                        else:
                            if countermeasure in [DirectTargetSampling, WrightStyleMorphing]:
                                if w.getId() != target_webpage.getId():
                                    modified_trace = countermeasure.applyCountermeasure(trace, metadata)
                                else:
                                    modified_trace = trace
                            else:
                                modified_trace = countermeasure.applyCountermeasure(trace)
                    else:
                        modified_trace = trace

                    # Overhead Accounting
                    modified_bandwidth += modified_trace.getBandwidth()
                    modified_timing += modified_trace.get_total_time()
                    # print(modified_trace.get_total_time())

                    instance = classifier.traceToInstance(modified_trace)
                    if instance:
                        if i == 0:  # train-page
                            training_set.append(instance)
                        elif i == 1:  # test-page
                            testing_set.append(instance)

        # Classification
        print('')
        classification_start_time = time.time()
        cl = classifier.classify(run_id, training_set, testing_set)
        run_end_time = time.time()
        run_total_time = run_end_time - run_start_time
        classification_total_time = run_end_time - classification_start_time
        report_summary(cl, output_filename=output_filename,
                       classifier=classifier, countermeasure=countermeasure)

    return 0