Exemple #1
def driver():
    reservoir = [0] * 100
    sequence_no = 0
    num_of_asks = int(argv[3])
    stream_size = int(argv[2])
    bx = BlackBox()
    results = []

    for _ in range(num_of_asks):
        stream_users = bx.ask(str(argv[1]), stream_size)
        for user in stream_users:
            sequence_no += 1
            if sequence_no <= 100:
                reservoir[sequence_no - 1] = user
                # reservoir full.
                # choose to keep user with prob 100 / sequence_no.
                p_keep_user = random.randint(0, 100000) % sequence_no
                if p_keep_user < 100:
                    # have to keep user.
                    # replace one elt in reservoir with uniform prob.
                    position_to_replace = random.randint(0, 100000) % 100
                    reservoir[position_to_replace] = user

            if sequence_no % 100 == 0:
                        str(sequence_no) + "," + reservoir[0] + "," +
                        reservoir[20] + "," + reservoir[40] + "," +
                        reservoir[60] + "," + reservoir[80]))

    return results
Exemple #2
def main(input_file_path, stream_size, num_of_asks, output_file_path):
    global bloom_filter
    bx = BlackBox()
    gt_set = set()
    fp = 0
    output_file = open(output_file_path, "wt")
    for it in range(num_of_asks):
        stream_users = bx.ask(input_file_path, stream_size)
        for s in stream_users:
            indices = myhashs(s)
            is_not_present = False

            for i in indices:
                if (not bloom_filter[i]):
                    is_not_present = True

            if (is_not_present):
                for i in indices:
                    bloom_filter[i] = True

            if (not s in gt_set and not is_not_present):
                fp += 1

        output_file.write("{},{}\n".format(it, fp / ((it + 1) * stream_size)))
Exemple #3
def flajolet_martin(input_file_path, stream_size, num_of_asks,
    global number_of_hashes
    bx = BlackBox()
    output_file = open(output_file_path, "wt")
    output_file.write("Time,Ground Truth,Estimation\n")
    predicted_sum = 0
    actual_sum = 0
    for it in range(num_of_asks):
        stream_users = bx.ask(input_file_path, stream_size)
        max_number_of_trainling_zeros = [-sys.maxsize] * number_of_hashes
        for s in stream_users:
            hashes = myhashs(s)
            for i, h in enumerate(hashes):
                h = format(h, '016b')
                number_of_trailing_zeros = len(h) - len(h.rstrip('0'))

                if (number_of_trailing_zeros >
                    max_number_of_trainling_zeros[i] = number_of_trailing_zeros
        count = calculate_count(max_number_of_trainling_zeros)
        output_file.write("{},{},{}\n".format(it, stream_size, int(count)))
        predicted_sum += count
        actual_sum += stream_size
    print(predicted_sum / actual_sum)
Exemple #4
def main():
    blackBox = BlackBox()
    op = "Time,FPR"
    for i in range(numOfAsks):
        stream_users = blackBox.ask(inputFile, streamSize)
        op += bloom_filtering(i, stream_users)
def main():
    input_file, output_file = sys.argv[1], sys.argv[4]
    stream_size, num_of_asks = int(sys.argv[2]), int(
        sys.argv[3])  #stream size = 100

    bx = BlackBox()
    seq_num = 0
    window_size = 100
    user_list = []
    with open(output_file, "w") as f:
        for i in range(num_of_asks):
            stream_users = bx.ask(input_file, stream_size)
            if seq_num == 0:
                user_list += stream_users
                seq_num += stream_size
                for user in stream_users:
                    seq_num += 1
                    prob = random.randint(0, 100000) % seq_num
                    if prob < window_size:
                        pos = random.randint(0, 100000) % window_size
                        user_list[pos] = user
            f.write("\n{},{},{},{},{},{}".format(seq_num, user_list[0],
                                                 user_list[20], user_list[40],
                                                 user_list[60], user_list[80]))
            print("{},{},{},{},{},{}".format(seq_num, user_list[0],
                                             user_list[20], user_list[40],
                                             user_list[60], user_list[80]))
def driver():
    bx = BlackBox()
    num_of_asks = int(argv[3])
    stream_size = int(argv[2])
    results = []

    for i in range(num_of_asks):
        stream_users = bx.ask(str(argv[1]), stream_size)
        ground_truth = set()
        for user in stream_users:
        estimation = flajolet_martin(stream_users)
        results.append((i, len(ground_truth), int(estimation)))

    sum_estimations = 0
    sum_ground_truth = 0
    for i in results:
        # print(i)
        sum_ground_truth += i[1]
        sum_estimations += i[2]

    print("Final Result = ", i[2] / i[1])

    with open(str(argv[4]), "w") as file:
        file.write("Time,Ground Truth,Estimation")
        for r in results:
            file.write("\n" + str(r[0]) + "," + str(r[1]) + "," + str(r[2]))
def main():
    blackBox = BlackBox()
    op = "Time,Ground Truth,Estimation"
    for i in range(numOfAsks):
        stream_users = blackBox.ask(inputFile, streamSize)
        op += flajolet_martin(i, stream_users)
Exemple #8
def main():
    input_file, output_file = sys.argv[1], sys.argv[4]
    stream_size, num_of_asks = int(sys.argv[2]), int(sys.argv[3])

    hash_function_num = 16
    m = 69997

    bx = BlackBox()
    boom_filter = [0 for _ in range(m)]
    with open(output_file, "w") as f:
        for time in range(num_of_asks):
            stream_users = bx.ask(input_file, stream_size)
            visited_users = set()
            FP, TN = 0, 0
            for user in stream_users:
                hash_values = myhashs(user)
                count = 0
                for hash_value in hash_values:
                    if boom_filter[hash_value] == 1:
                        count += 1

                if user not in visited_users:
                    if count == hash_function_num:
                        FP += 1
                        TN += 1

                for hash_value in hash_values:
                    boom_filter[hash_value] = 1
            FPR = float(FP / (FP + TN))
            f.write("\n{},{}".format(time, FPR))
Exemple #9
def getServices(body, userId):
    userService = UserService(userServiceApi+"?format=json&agencyId="+agencyId+"&userId="+str(userId))
    print(" [x] Loading user service "+userServiceApi+"?format=json&agencyId="+agencyId+"&userId="+str(userId))
    box = BlackBox(serviceApi, userService)
    services = box.getWidgets(body)
    for service in services:
        service["AgencyID"] = agencyId
    result = json.dumps(services)
    return result
Exemple #10
 def __init__(self, files, debugMode):
     self.__cards = None
     self.__tableCards = None
     self.__table = None
     self.__players = None
     self.doCallback = False
     self.endGameCallback = None
     self.win = True
     self.debugMode = debugMode
     self.slackMessage = ""
     self.blackbox = BlackBox(files, 6, 7, 7, 6)
     self.playerName = -1
     self.response = [0, 0, 0, 0, 0, 0]
     self.reload = 0
     self.betAmount = 100
def main():
    input_file, output_file = sys.argv[1], sys.argv[4]
    stream_size, num_of_asks = int(sys.argv[2]), int(sys.argv[3])

    hash_function_num = 1000
    groups_len = 10
    hash_functions_per_group = int(hash_function_num / groups_len)

    bx = BlackBox()

    with open(output_file, "w") as f:
        f.write("Time,Ground Truth,Estimation")
        est_all = 0
        gt_all = 0
        for time in range(num_of_asks):
            gt = set()
            stream_users = bx.ask(input_file, stream_size)
            all_hash_values = []
            for user in stream_users:
                hash_values = myhashs(user)

            estimates = []
            for i in range(hash_function_num):
                longest_trailing_zeros = 0
                for hash_values in all_hash_values:
                    hash_value = hash_values[i]
                    trailing_zeros = 0
                    while hash_value & 1 == 0 and hash_value > 0:
                        trailing_zeros += 1
                        hash_value = hash_value >> 1
                    longest_trailing_zeros = max(trailing_zeros,

            estimates_avg = []
            for i in range(groups_len):
                sum_est = 0
                for j in range(hash_functions_per_group):
                    sum_est += estimates[i * hash_functions_per_group + j]
                estimates_avg.append(float(sum_est / hash_functions_per_group))
            estimate = round(estimates_avg[int(groups_len / 2)])

            est_all += estimate
            gt_all += len(gt)
            f.write("\n{},{},{}".format(time, len(gt), estimate))
Exemple #12
def main():
    blackbox = BlackBox(config="repack/config.ini")

    #blackbox.updateConfig("POLYFIT", "poly_degree", 15, wipe=False)
        f"[+] {blackbox.accuracy}, {blackbox.test_hash}, {blackbox.train_hash}"
Exemple #13
def driver():
    bx = BlackBox()
    num_of_asks = int(argv[3])
    stream_size = int(argv[2])
    fpr = []
    bit_array = [0 for _ in range(69997)]
    previous_users = set()

    for i in range(num_of_asks):
        stream_users = bx.ask(str(argv[1]), stream_size)
        batch_fpr = bloom_filter(bit_array, stream_users, previous_users)
        fpr.append((i, batch_fpr))

    for f in fpr:

    with open(str(argv[4]), "w") as file:
        for f in fpr:
            file.write("\n" + str(f[0]) + "," + str(f[1]))
Exemple #14
    # print(result)

    return result

if __name__ == '__main__':

    start = time.time()

    input_path = sys.argv[1]
    stream_size = int(sys.argv[2])
    num_of_asks = int(sys.argv[3])
    output_path = sys.argv[4]

    bx = BlackBox()
    ground_truth = []
    estimate_length = []

    for _ in range(num_of_asks):
        stream_users = bx.ask(input_path, stream_size)
        longest_trail = [0 for _ in range(hash_num)]
        for user in stream_users:
            user_hash = myhashs(user)
            for i, h in enumerate(user_hash):
                longest_trail[i] = max(longest_trail[i],
                                       len(h) - len(h.rstrip('0')))

        for i in range(hash_num):
            longest_trail[i] = 2**longest_trail[i]
Exemple #15
import random
import sys

#class BlackBox:

#    def ask(self, file, num):
#        lines = open(file, 'r').readlines()
#        users = [0 for i in range(num)]
#        for i in range(num):
#            users[i] = lines[random.randint(0, len(lines) - 1)].rstrip("\n")
#        return users

# This is how we read the data!
from blackbox import BlackBox

bx = BlackBox()

users_fn = sys.argv[1]
stream_size = int(sys.argv[2])
num_asks = int(sys.argv[3])
print("This is user FN:", users_fn)
print("This is stream size:", stream_size)
print("This is number of asks:", num_asks)

# Task 2.1 -- Bloom Filter
filter_bit_array = [0] * 69997

import binascii

num_rows = 69997
all_primes = [
Exemple #16
        for s in stream:
            h = myhashs(s)
            hash_str = list(map(lambda x: bin(x), h))
            tzeros = list(map(lambda x: len(x) - len(x.rstrip("0")), hash_str))
            longest_tzeros = [
                max(a, b) for a, b in zip(longest_tzeros, tzeros)
        data = sorted([2**r for r in longest_tzeros])
        data = [
            data[GROUP_SIZE * i:GROUP_SIZE * (i + 1)] for i in range(NUM_GROUP)
        avg = list(map(lambda x: sum(x) / len(x), data))
        med = int(len(avg) / 2)
        return round(avg[med])

if __name__ == "__main__":
    infile = sys.argv[1]
    stream_size = int(sys.argv[2])
    num_asks = int(sys.argv[3])
    outfile = sys.argv[4]
    previous_user = BlackBox().ask(infile, stream_size)
    t0 = time.time()
    with open(outfile, 'w+') as f:
        for i in range(num_asks):
            stream = BlackBox().ask(infile, stream_size)
            fpr = Task2.one_run(stream)
            f.write(str(i) + ',' + str(stream_size) + ',' + str(fpr) + '\n')
    print("Duration:", time.time() - t0)
def testBlackBoxSentimentAnalysis(data_type, model_type, num_samples):
    start = time.time()
    ## Import glove-vectors once
    if (data_type == 'RT' or data_type == 'IMDB'):
        glove_vectors = json.load( open( "glove_final.json".format(data_type, data_type), "rb") )
    elif (data_type == 'Kaggle'):
        glove_vectors = json.load( open( "datasets/{}/glove_kaggle.json".format(data_type, data_type), "rb") )

    embed_map = pickle.load( open( "datasets/{}/{}_embed_map.p".format(data_type, data_type), "rb" ) )

    ## Get Dataset (3 types: IMDB, RT, Kaggle)
    if (data_type == 'IMDB'):
        data = pickle.load( open( "datasets/IMDB/IMDB_tokens.p", "rb" ) )
    elif (data_type == 'RT'):
        data = pickle.load( open( "datasets/RT/RT_tokens.p", "rb" ) )
    elif(data_type == 'Kaggle'):
        data = pickle.load(open("datasets/Kaggle/Kaggle_tokens.p","rb"))

    ## Get Model (3 types: LR, LSTM, CNN)
    if (model_type == 'LR'):
        if (data_type == 'IMDB'):
            model = pickle.load( open( "models/LR/LR_SA_IMDB.p", "rb" ))
        elif(data_type == 'RT'):
            model = pickle.load( open( "models/LR/LR_SA_RT.p", "rb" ))
        elif(data_type == 'Kaggle'):
            model = pickle.load( open( "models/LR/LR_TCD_Kaggle.p", "rb" ))
    elif (model_type == 'LSTM'):
        if (data_type == 'IMDB'):
            model = pickle.load( open( "models/LSTM/LSTM_SA_IMDB.p", "rb" ))
        elif(data_type == 'RT'):
            model = pickle.load( open( "models/LSTM/LSTM_SA_RT.p", "rb" ))
        elif(data_type == 'Kaggle'):
            model = pickle.load( open( "models/LSTM/LSTM_TCD_Kaggle.p", "rb" ))
    elif (model_type == 'CNN'):
        if (data_type == 'IMDB'):
            model = pickle.load( open( "models/CNN/CNN_SA_IMDB.p", "rb" ))
        elif(data_type == 'RT'):
            model = pickle.load( open( "models/CNN/CNN_SA_RT.p", "rb" ))
        elif(data_type == 'Kaggle'):
            model = pickle.load( open( "models/CNN/CNN_TCD_Kaggle.p", "rb" ))

    #---- DONE LOADING ----------
    end = time.time()
    print("DONE LOADING: {} minutes".format(np.round((end-start)/60),4))

    num_successes = 0 
    sample_id = 1
    percent_perturbed = []

    pos_samples = data['test']['pos'][0:num_samples]
    neg_samples = data['test']['neg'][0:num_samples]

    num_successes = 0 
    total_docs = 0

    for token_list in pos_samples:
        sentence = TreebankWordDetokenizer().detokenize(token_list)
        y = get_blackbox_classifier_score(model_type, sentence)
        y_class = np.round(y,0)
        # print(sentence)
        # print('Original Score: {} | Label: {}'.format(y,y_class))

        blackbox = BlackBox(token_list,y_class,0.8, model_type, glove_vectors, data_type)
        res = blackbox.blackBoxAttack()
        if res != None:
            num_successes += 1
        total_docs += 1

    for token_list in neg_samples:
        sentence = TreebankWordDetokenizer().detokenize(token_list)
        y = get_blackbox_classifier_score(model_type, sentence)
        y_class = np.round(y,0)
        # print(sentence)
        # print('Original Score: {} | Label: {}'.format(y,y_class))

        blackbox = BlackBox(token_list,y_class,0.8, model_type, glove_vectors, data_type)
        res = blackbox.blackBoxAttack()
        if res != None:
            num_successes += 1
            # print("Successful adversary. Fraction of original input perturbed: {}".format(np.round(percent_perturbed,2)))
        total_docs += 1

    total_docs = 2 * num_samples
    success_rate = np.round((num_successes/total_docs)*100,3)
    perturb_rate = np.round(np.mean(percent_perturbed)*100,3)
    print("Avg % Perturbed: {}".format(perturb_rate))
    print("{} | {} | {}".format(data_type, model_type, success_rate))

# testBlackBoxSentimentAnalysis('RT', 'Google_NLP')
# testBlackBoxSentimentAnalysis('RT', 'IBM_Watson')
# testBlackBoxSentimentAnalysis('RT', 'Microsoft_Azure')
# testBlackBoxSentimentAnalysis('RT', 'AWS_Comprehend')
# testBlackBoxSentimentAnalysis('RT', 'FB_fastText')

# testBlackBoxSentimentAnalysis('IMDB', 'Google_NLP')
# testBlackBoxSentimentAnalysis('IMDB', 'IBM_Watson')
# testBlackBoxSentimentAnalysis('IMDB', 'Microsoft_Azure')
# testBlackBoxSentimentAnalysis('IMDB', 'AWS_Comprehend')
# testBlackBoxSentimentAnalysis('IMDB', 'FB_fastText')

# testBlackBoxSentimentAnalysis('Kaggle', 'Google_NLP', 2)
# testBlackBoxSentimentAnalysis('Kaggle', 'IBM_Watson', 2)
# testBlackBoxSentimentAnalysis('Kaggle', 'Microsoft_Azure',10)
# testBlackBoxSentimentAnalysis('Kaggle', 'AWS_Comprehend',10)
# testBlackBoxSentimentAnalysis('Kaggle', 'FB_fastText',2) 16.2%P | 75%A
Exemple #18
class TakeAction:
    def __init__(self, files, debugMode):
        self.__cards = None
        self.__tableCards = None
        self.__table = None
        self.__players = None
        self.doCallback = False
        self.endGameCallback = None
        self.win = True
        self.debugMode = debugMode
        self.slackMessage = ""
        self.blackbox = BlackBox(files, 6, 7, 7, 6)
        self.playerName = -1
        self.response = [0, 0, 0, 0, 0, 0]
        self.reload = 0
        self.betAmount = 100

    def setCallback(self, callback):
        self.doCallback = True
        self.endGameCallback = callback

    def getVectorResponse(self):
        # format of response vector: [call/check, fold, allin, raise/bet, reload (T/F), bet amount]
        # we can also perform a check (i.e. calling with an amount of 0 chips)
        cards = self.__cards.copy()
        self.response = self.blackbox.run(cards, self.__players, self.__table)
        if not self.debugMode:

    # Parses the Json and chooses an appropriate action
    def processRequest(self, jsonObject):
        # if the json is form a file use json.load(file)
            action = json.loads(jsonObject)
        except Exception as e:
            return None

        if "eventName" not in action:
            print("json object has no eventName")
            return None

        self.slackMessage = ""  # reset message
        if not self.debugMode:

        # The Json for players and table is different for __action, __bet and __show_action.
        if action["eventName"] == "__action":
            if self.playerName == -1:
                self.playerName = action["data"]["self"]["playerName"]
                if not self.debugMode:
                    print("Hello. My name is " + str(self.playerName))

            response = self.response[:4]

            # It's our turn, we should respond with an __action.
            actionObj = {"eventName": "__action", "data": {"action": ""}}
            maxValue = max(response)
            maxIndex = response.index(maxValue)

            if maxIndex == 0:
                actionObj["data"]["action"] = "call"
            elif maxIndex == 1:
                actionObj["data"]["action"] = "fold"
            elif maxIndex == 2:
                actionObj["data"]["action"] = "allin"
                if action["data"]["self"]["chips"] >= 5000:
                    self.slackMessage = "Oh no. We are betting " + str(
                        ["chips"]) + " chips!!! Wish me luck."
            elif maxIndex == 3:
                actionObj["data"]["action"] = "raise"

            self.playerName = action["data"]["self"]["playerName"]

            return json.dumps(actionObj)
        elif action["eventName"] == "__show_action":
            # Broadcast to everyone when someone makes an __action (on their turn)
        elif action["eventName"] == "__bet":
            # possibilities: [check, bet, fold]
            if not self.debugMode:
                print("We are betting!\n")
            response = self.response[:
                                     2]  # index 2 is "allin" which is not applicable here

            actionObj = {
                "eventName": "__action",
                "data": {
                    "action": "",
                    "amount": 0
            maxValue = max(response)
            maxIndex = response.index(maxValue)
            self.betAmount = self.response[5]

            if maxIndex == 0:
                actionObj["data"]["action"] = "check"
            elif maxIndex == 1:
                actionObj["data"]["action"] = "fold"
            elif maxIndex == 2:
                actionObj["data"]["action"] = "bet"
                actionObj["data"]["amount"] = int(
                    self.betAmount * action["data"]["self"]["chips"])

            return json.dumps(actionObj)

        elif action["eventName"] == "__deal":
            # The small and big blinds are set!
        elif action["eventName"] == "__start_reload":
            # we should either reload or not, so T/F
            if not self.debugMode:
                print("Reload probability " + str(self.response[5]) + "\n")
            if self.response[5] > 0.5:
                if not self.debugMode:
                return json.dumps({"eventName": "__reload"})
        elif action["eventName"] == "__new_round":
            # The round begins, we have some useful info here.
        elif action["eventName"] == "__round_end":
            # End of a round, shows everything. Usefull for dynamic learning
        elif action["eventName"] == "__game_over":
            # Shows the winner
            self.win = self.__Survive(action)
            if self.win:
                self.slackMessage = "We survived!"
                self.slackMessage = "We didn't survive."

            if self.doCallback:

        elif action["eventName"] == "__new_peer":
            # response to our __join request
            if not self.debugMode:
                print("I'm in!")

        return None

    # Checks if we survived or not.
    def __Survive(self, action):
        for element in action["data"]["players"]:
            if not self.debugMode:
            if element["playerName"] == self.playerName and element[
                return True
        return False

    #     self.__sendSlackStatus()

    # sends AI status to slack webhook
    def __sendSlackStatus(self):
        if not self.debugMode:

    # Table object
    #   tableNumber     int             Id of the table.
    #   roundName       String          Name of the round. (preflop, flop, turn, river)
    #   board           String Array    Probably the cards in the middle.
    #   roundCount      int             Max amount of reloads I think.
    #   raiseCount      int             Not sure.
    #   betCount        int             Number of raises this round.
    #   totalBet        int             I think this is the pot.
    #   smallBlind      object          Name of the player and amount.
    #   bigBlind        object          Name of the player and amount.

    def __setTable(self, table):
        tbl = []
        self.__tableCards = [self.__parseCards(c) for c in table['board']]
        self.__table = tbl

    # Players should be ab array of player objects.
    # Player object:
    #   playerName      String          Name of the player.
    #   chips           int             Amount of money available.
    #   folded          boolean         Has the player folded yet.
    #   allIn           boolean         Is the player All in.
    #   cards           String Array    The player's cards.
    #   isSurvive       boolean         Not sure.
    #   reloadCount     int             How many times the player has reloaded.
    #   roundBet        int             Not sure, probably the big blind or something.
    #   bet             int             What the player has bet already I think.

    def __setPlayers(self, players):
        plrs = []
        for plr in players:
            arr = []
            if 'cards' in plr:
                self.__cards = [self.__parseCards(c) for c in plr['cards']]
        self.__players = plrs

    def normalize(self, x):
        return 0 if x == 0 else 1 / float(x)

    def __parseCards(self, card):
        c = []
        num = "A23456789TJQK".index(card[0]) * 4 + "HDCS".index(card[1])
        for i in [32, 16, 8, 4, 2, 1]:
            c.append(1 if num >= i else 0)
            num = num - i if num >= i else num
        return c
Exemple #19
import random
import sys
from pyspark import SparkContext
import binascii
from blackbox import BlackBox

sc = SparkContext.getOrCreate()

bx = BlackBox()
class BlackBox:

    def ask(self, file, num):
        lines = open(file,'r').readlines()
        users = [0 for i in range(num)]
        for i in range(num):
            users[i] = lines[random.randint(0, len(lines) - 1)].rstrip("\n")
        return users

if __name__ == '__main__':
    bx = BlackBox()

input_data = sys.argv[1]

stream_size = int(sys.argv[2])

num_of_asks = int(sys.argv[3])

output_file = sys.argv[4]
Exemple #20
        return gen_func

    def one_run(stream):
        fp, tn = 0., 0.
        filters = set()
        prev = set()
        for s in stream:
            h = set(myhashs(s))
            if h.issubset(filters) == True and set(s).issubset(prev) == False:
                fp += 1
            if h.issubset(filters) == False and set(s).issubset(prev) == False:
                tn += 1
            filters |= h
        return float(fp) / float((fp + tn))

if __name__ == "__main__":
    infile = sys.argv[1]
    stream_size = int(sys.argv[2])
    num_asks = int(sys.argv[3])
    outfile = sys.argv[4]
    t0 = time.time()
    with open(outfile, 'w+') as f:
        for i in range(num_asks):
            stream = BlackBox().ask(infile, stream_size)
            fpr = Task1.one_run(stream)
            f.write(str(i) + ',' + str(fpr) + '\n')
    print("Duration:", time.time() - t0)
Exemple #21
import random
import sys
from statistics import median,mean

#class BlackBox:

#    def ask(self, file, num):
#        lines = open(file,'r').readlines()
#        users = [0 for i in range(num)]
#        for i in range(num):
#            users[i] = lines[random.randint(0, len(lines) - 1)].rstrip("\n")
#        return users
#This is how we read the data!
from blackbox import BlackBox
bx = BlackBox()

users_fn = sys.argv[1]
stream_size = int(sys.argv[2])
num_asks = int(sys.argv[3])
print("This is user FN:",users_fn)
print("This is stream size:",stream_size)
print("This is number of asks:",num_asks)

#Task 2.1 -- Bloom Filter
filter_bit_array = [0]*69997

import binascii

import math
def isPrime(n):
Exemple #22

def reservoir(seqNum, data):
    global reservoirList
    count = seqNum
    if len(reservoirList) == 0:
        reservoirList = data
        count += len(data)
        for d in data:
            count += 1
            prob = random.randint(0, 100000) % count
            if (prob < streamSize):
                pos = random.randint(0, 100000) % streamSize
                reservoirList[pos] = d
    return str(count) + "," + str(reservoirList[0]) + "," + str(
        reservoirList[20]) + "," + str(reservoirList[40]) + "," + str(
            reservoirList[60]) + "," + str(reservoirList[80] + "\n")

if __name__ == "__main__":
    blackBox = BlackBox()
    op = "seqnum,0_id,20_id,40_id,60_id,80_id\n"
    for i in range(numOfAsks):
        data = blackBox.ask(inputFile, streamSize)
        op += reservoir(i * streamSize, data)
    end = time.time()
    print("Duration:" + str(round(end - start, 2)))
Exemple #23
     stream_size = 100
     num_of_asks = 30
     output_filename = 'task3_out.csv'
 elif len(sys.argv) != 5:
     print('Usage : python task1.py <input_filename> stream_size num_of_asks <output_filename>')
     input_filename = sys.argv[1]
     stream_size = int(sys.argv[2])
     num_of_asks = int(sys.argv[3])
     output_filename = sys.argv[4]
 saved_users = []
 bx = BlackBox()
 n = 0
 print('seqnum, 0_id, 20_id, 40_id, 60_id, 80_id\n')
 with open(output_filename, 'w+') as file:
     file.write('seqnum, 0_id, 20_id, 40_id, 60_id, 80_id\n')
     for i in range(num_of_asks):
         data = bx.ask(input_filename, stream_size)
         for s in data:
             n += 1
             if n <= 100:
             elif random.randint(0, 100000) % n < 100:
                 index = random.randint(0, 100000) % 100
                 saved_users[index] = s
         print((i+1)*stream_size, saved_users[0], saved_users[20],saved_users[40],saved_users[60],saved_users[80])
    # calculate FPR = FP / (FP + TN)
    false_positive_rate = 0.0 if (false_pos + true_neg
                                  == 0) else false_pos / (false_pos + true_neg)

    with open(output, 'a') as f:
        f.write(str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
        f.write("," + str(float(false_positive_rate)))
            str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + " " +

if __name__ == '__main__':
    file = "users.txt"
    num = 100
    times = 30
    output = "Bloomfilter_output.csv"

    filter_array = [0 for i in range(69997)]
    seen_user = set()

    with open(output, 'w+') as f:

    from blackbox import BlackBox
    bx = BlackBox()
    for i in range(times):
        bloom(bx.ask(file, num))
Exemple #25
    estimate = (int)(statistics.median((avg_group)))

    with open(output, "a") as f:
        f.write(str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
        f.write("," + str(ground_truth))
        f.write("," + str(estimate))
            str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + " " +
            str(ground_truth) + " " + str(estimate))

if __name__ == '__main__':
    file = "users.txt"
    num = 300
    times = 30
    output = "FlajoletMartin_output.csv"

    # num of hash_functions
    k = 15
    # groups
    g = 5
    # num in group
    l = 3
    with open(output, 'w+') as f:
        f.write("Time,Ground Truth,Estimation")
    bx = BlackBox()
    for i in range(times):
        flajolet(bx.ask(file, num))
Exemple #26

if __name__ == '__main__':
    from blackbox import BlackBox
    import sys
    import time

    start = time.time()
    file_name = sys.argv[1]
    stream_size = int(sys.argv[2])
    num_ask = int(sys.argv[3])
    output_path = sys.argv[4]

    bx = BlackBox()
    data_holder = []
    n = 0
    res = []
    for _ in range(num_ask):
        stream_users = bx.ask(file_name, stream_size)
        for user in stream_users:
            n += 1
            if len(data_holder) < 100:
                prob = random.random()
                if prob < 100 / n:
                    idx = random.randint(0, 99)
                    data_holder[idx] = user
        res.append((n, data_holder[0], data_holder[20], data_holder[40], data_holder[60], data_holder[80]))
Exemple #27
from pyspark import SparkContext
import random
import sys
import binascii
from blackbox import BlackBox

sc = SparkContext.getOrCreate()
bx = BlackBox()

class BlackBox:

    def ask(self, file, num):
        lines = open(file,'r').readlines()
        users = [0 for i in range(num)]
        for i in range(num):
            users[i] = lines[random.randint(0, len(lines) - 1)].rstrip("\n")
        return users

if __name__ == '__main__':
    bx = BlackBox()
    # users = bx.ask()
    # print(users)


input_data = sys.argv[1]

stream_size = int(sys.argv[2])
Exemple #28
        fpr = float(false_positives / float(false_positives + true_negatives))
    f.write(str(ask) + "," + str(fpr) + "\n")

if __name__ == "__main__":
    # time python3 task1.py $ASNLIB/publicdata/users.txt 500 30 task1.csv
    start_time = time.time()

    input_file = 'dataset/users.txt'
    stream_size = 100
    num_of_asks = 300
    output_file = 'output/task1.csv'

    # input_file = sys.argv[1]
    # stream_size = int(sys.argv[2])
    # num_of_asks = int(sys.argv[3])
    # output_file = sys.argv[4]

    filter_bit_array = [0] * 69997
    global_user_set = set()

    f = open(output_file, "w")

    bx = BlackBox()
    for ask in range(num_of_asks):
        stream_users = bx.ask(input_file, stream_size)
        bloom_filter(stream_users, ask)
    print("Duration : ", time.time() - start_time)
Exemple #29
def main(_):
  start_t = time.time()
      # import data
      mdl = BlackBox(FLAGS)
      # NOTE this will work with format: mdl.oracle = (image, pred_val, true_val)
      logger.error('black blox training failed...........shutting down!')
  logger.info('obtained black box training data')
  logger.info('oracle data capture time: %f' %(time.time()-start_t))
  mnist = mdl.oracle
  prep_t = time.time()
  with tf.device('/cpu:0'):
      # translate into tensorflow style nparrays
      x_vals = image_list_to_np(mnist, 0)
      true_vals = image_list_to_np(mnist,2)
      # yvals converted to one hot vector
      y_vals = [ x[1] for x in mnist ]
      y_vals = [ one_hot(i) for i in y_vals]
      y_vals = np.array(y_vals)
      y_vals = y_vals.reshape((len(y_vals),10))
      # split training and test data into nparrays
      train_images, train_labels, test_images, test_labels =split_train_data(x_vals, y_vals, FLAGS.split)
  # Tensorflow variable setup

  # input vector
  x = tf.placeholder(tf.float32, [None, 784])
  # y output vector
  y_ = tf.placeholder(tf.float32, [None, 10])
  # build the graph for the deep net
  y_conv, keep_prob = deepnn(x)
  # define loss function -> cross entropy for now with softmax
  cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
  # train step, 1e-4 is default, best to use -2/-3 depending on time
  train_step = tf.train.AdamOptimizer(FLAGS.optimize).minimize(cross_entropy)
  # define correct prediction vectore and accuracy comparison
  correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  logger.info('training sets: %d test sets: %d' % (len(train_images),len(test_images)))
  cnn_saver = tf.train.Saver()
  logger.info('cpu preprocessing time: %f' %(time.time()-prep_t))
  train_t = time.time()
  logger.info('starting adversarvial model training')
  #begin training with session
  with tf.Session() as sess:
    # TODO create batching loop
    for i in range(FLAGS.iters):
      #sanity check on accuracy should be going down -> necessary but not sufficient
      if i % int((FLAGS.iters/5)) == 0:
        train_accuracy = accuracy.eval(feed_dict={x: train_images, y_: train_labels, keep_prob: 1.0})
        logger.info('step %d, training accuracy %g' % (i, train_accuracy))
      #update rule - softmax vector obtained for checking
      trainer, softmax = sess.run([train_step, cross_entropy],feed_dict={x: train_images, y_: train_labels, keep_prob: 0.5})
    logger.info('adversarial model has been trained')
    # snag the gradient vector wrt X inputs
    grads = tf.gradients(cross_entropy, [x])
    jacobian = sess.run(grads, feed_dict={x:test_images, y_:test_labels, keep_prob: 1.0})
    #use test data as input for perturbations
    #test_ = tf.argmax(y_,1)
    #test_vals = test_.eval(feed_dict={y_:test_labels})
    # use this...
    verify = sess.run(tf.argmax(test_labels, 1))
    #for ver in zip(verify, test_vals):
        #print(ver, test_vals)
    pred_ = tf.argmax(y_conv,1)
    #vify = tf.argmax(y_,1)
    pred_vals = pred_.eval(feed_dict={x:test_images, y_:test_labels, keep_prob:1.0})
    #vify_vals = vify.eval(feed_dict={x:test_images, y_:test_labels, keep_prob:1.0})
    true_pred = [ (pxl, p) for pxl, p, r in zip(test_images, pred_vals, verify) if p==r ]
    logger.info('true positive test exemplars: %f' %(len(true_pred)))
    logger.results('adversary accuracy: %g' % (accuracy.eval(feed_dict={x: test_images, y_: test_labels, keep_prob: 1.0})))
    #setup the goodfellow attack iterations
    logger.info('attack model train time: %f' %(time.time()-train_t))
    pert_t = time.time()
    adv_list = []
    for idx,pos in enumerate(true_pred):
      for epsilon in np.linspace(0.025,.25,num=FLAGS.augments):
          xp = goodfellow_mod(np.array(pos[0]), jacobian[0][idx], epsilon)
          prime_label = one_hot(int(pos[1]))
          xprime = np.array(xp).reshape((1,784))
          #xprime = xprime.reshape((1,784))
          yprime = np.array(prime_label).reshape((1,10))
          #yprime = yprime.reshape((1, 10))
          pred_vals = pred_.eval(feed_dict={x: xprime, y_: yprime, keep_prob:1.0})
          acc = accuracy.eval(feed_dict={x: xprime, y_: yprime, keep_prob: 1.0})
          #corr = sess.run(mdl.y, feed_dict={x:xprime})
          if acc < 1.0:
              #img = pos[0].reshape((28,28))
              #img1 = xp.reshape((28,28))
              #print(pos[1], np.argmax(yprime), pred_vals, epsilon, np.sum(xp), np.sum(pos[0]), np.sum(xprime))
              adv_list.append((xprime, np.argmax(yprime), pred_vals, epsilon, pos[0]))
              #logger.results('YES adversary accuracy: %g %f' % (acc, epsilon))
        #can do this each iteration - or as a whole...at this point timing doesnt matter, but will
    logger.results('true positive adversary count: %f' % (float(len(adv_list))/float(len(true_pred))))

    logger.info('distortion vector time: %f' %(time.time()-pert_t))
    att_t = time.time()
    # save model to file
    cnn_saver_path = cnn_saver.save(sess, 'cnn_saver.ckpt')
    # at this point adv_list is a tuple (x modifed image, y label, true label, epsilon found) 
    adv_images = [ a[0] for a in adv_list ]
    l = len(adv_list)
    adv_images = np.array(adv_images).reshape((l,784))
    #adv_images = adv_images.reshape((l, 784))
    adv_labels = [ a[1] for a in adv_list ]
    adv_labels = [ one_hot(int(v)) for v in adv_labels ]
    adv_labels = np.array(adv_labels).reshape((l,10))

    adv_real = [ a[2] for a in adv_list ]
    adv_real = np.array(adv_real)
    #adv_labels = adv_labels.reshape((l,10))
    adv_epsilon = [ a[3] for a in adv_list ]
    adv_epsilon = np.array(adv_epsilon)

    adv_real_image = [ a[4] for a in adv_list ]

    # test for transferability
    adv_real = mdl.sess.run(tf.argmax(adv_labels,1))
    adv_ = tf.argmax(mdl.y,1)
    adv_pred = mdl.sess.run(adv_, feed_dict={mdl.x: adv_images})
    winners = []
    epsilon_tracker = collections.defaultdict(int)
    for idx, (a, l, r) in enumerate(zip(adv_pred, adv_labels, adv_real)):
        #print( a, l, r, a == r)
        if a != r:
            #logger.info('found adversarial example: %g %g' % (a, r))
            epsilon_tracker[adv_epsilon[idx]] += 1
    logger.info('attack results time: %f' %(time.time()-att_t))
    logger.info('****************** results **************')
    logger.results('black box adversarial attack transferability: %g' % (1 - sess.run(mdl.accuracy, feed_dict={mdl.x: adv_images,mdl.y_: adv_labels})))
    for d,v in sorted(epsilon_tracker.items()):
        logger.results('epsilon %s %s' % (d,v))
    # grab first two success stories and show them -> lets assume two or error handle later
    adv_pic0 = adv_images[winners[0]].reshape((28,28))
    adv_pic0_real = adv_real_image[winners[0]].reshape((28,28))
    rando = random.randint(1,(len(winners)-1))
    adv_pic1 = adv_images[winners[rando]].reshape((28,28))
    adv_pic1_real = adv_real_image[winners[rando]].reshape((28,28))
    true_pic = mdl.pictrue
    false_pic = mdl.picfalse
    labels = ['ORIGINAL NEURAL NET CORRECT ON THIS %s' %( mdl.pictruelabel[0]), 'ORIGINAL NEURAL NET THOUGHT UNTAMPERED %s WAS %s'% (mdl.picfalselabel[1], mdl.picfalselabel[0]), 'ORIGINAL IMAGE %s' % (adv_real[winners[0]]), 'ORIGINAL NET THOUGHT %s'%(adv_pred[winners[0]]),'ORIGINAL IMAGE %s' % (adv_real[winners[rando]]), 'ORIGINAL NET THOUGHT %s' % (adv_pred[winners[rando]]) ]
    logger.info('total program run time: %f' %(time.time()-start_t))
    if not FLAGS.nograph:
      graphics([true_pic, false_pic, adv_pic0_real, adv_pic0, adv_pic1_real, adv_pic1], labels)
Exemple #30

## Defining the path to the data. 
dataSetPath = sys.argv[1]

## Defining the stream size.
streamSize = int(sys.argv[2])

## Defining the number of asks.
numAsks = int(sys.argv[3])

## Defining the path to the output file.
outfilePath = sys.argv[4]

## Initiate an instance of blackbox.
bxInstance = BlackBox()

## Initiating a list to hold the user ID's.
userIDList = []

## Defining the maximum size of the list.
maxListSize = 100

## Initialising  a variable to hold the number 
## of elements seen till now.
numElems = 0

## Function to run the reservoir sampling.
def reservoirSampling(userStream):

	## Global parameters.