Exemple #1
0
def trendCurves():
    model = MixedUsersModel()
    experimentFileName = spamModelFolder+model.id
    conf = {'model': model, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.trendCurves, 1)], 'ratio': {'normal': 0.985, 'spammer': 0.015},
            'experimentFileName': experimentFileName}
    GeneralMethods.runCommand('rm -rf %s'%experimentFileName); run(**conf)
    Analysis.trendCurves(experimentFileName=experimentFileName)
Exemple #2
0
def performanceWithSpamFilteringForLatestMessages(generateData):
    experimentData = defaultdict(dict)
    for iteration in range(10):
#        for spammerPercentage in range(1,21):
##            spammerPercentage = 20
#            spammerPercentage = spammerPercentage*0.05
#        for spammerPercentage in range(1,11):
#            spammerPercentage = spammerPercentage*0.02
#        for spammerPercentage in range(1,201):
#            spammerPercentage = spammerPercentage* 0.005
        l1 = [spammerPercentage* 0.001 for spammerPercentage in range(1,51)]
        l2 = [spammerPercentage* 0.05 for spammerPercentage in range(1,21)]
        l3 = [0.01]+l2
        for spammerPercentage in l1:
            experimentFileName = spamModelFolder+'performanceWithSpamFilteringForLatestMessages/%s/%0.3f'%(iteration,spammerPercentage)
            print experimentFileName
            if generateData:
                model = MixedUsersModel()
                conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage},
                        'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesSpamFiltered],
                        'experimentFileName': experimentFileName,
#                        'noOfPayloadsPerSpammer': 1, 'noOfTopics': 10
                        }
                
#                conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage},
#                        'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages],
#                        'experimentFileName': experimentFileName}
                
                GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
            else:
                tempData = defaultdict(list)
                for data in FileIO.iterateJsonFromFile(experimentFileName):
                    for ranking_id in data['spammmess']:
                        tempData[ranking_id]+=data['spammmess'][ranking_id]
                experimentData[iteration][spammerPercentage]=tempData
    if not generateData:
        realDataY = defaultdict(dict)
        for iteration in experimentData:
            dataY = defaultdict(list)
            dataX = []
            for perct in sorted(experimentData[iteration]):
                dataX.append(perct)
                for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values))
            dataX=sorted(dataX)
            for ranking_id in dataY:
                for x, y in zip(dataX, dataY[ranking_id]): 
                    if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[] 
                    realDataY[ranking_id][x].append(y)
        for ranking_id in dataY: plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id])
        plt.xlabel('Percentage of Spammers', fontsize=16, fontweight='bold')
        plt.ylabel('Spamness', fontsize=16, fontweight='bold')
#        plt.title('Performance with spam filtering')
        plt.legend(loc=2)
#        plt.show()
        plt.xlim(xmax=0.05)
        plt.savefig('performanceWithSpamFilteringForLatestMessages.png')
        plt.clf()
Exemple #3
0
def main(args):
    """ saving paths """
    output_dir = "logs"
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    if args.model_name is None:
        t = time.strftime('%Y-%m-%d_%H_%M_%S_%z')
        model_name = "env_{}_algo_{}_ep_{}_{}".format(args.env, args.algo,
                                                      args.episodes, t)
        print("[*] created model folder: {}".format(model_name))
        model_dir = '{}/{}'.format(output_dir, model_name)
    else:
        model_name = args.model_name
        print("[*] proceeding to load model: {}".format(model_name))
        model_dir = model_name

    image_dir = '{}/images'.format(model_dir)
    checkpoints_dir = '{}/checkpoints'.format(model_dir)
    for path in [output_dir, model_dir, image_dir, checkpoints_dir]:
        if not os.path.exists(path):
            os.mkdir(path)
    """ tf session definitions """
    tf.reset_default_graph()
    tf.random.set_random_seed(args.rand_seed)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #config.log_device_placement = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.9
    sess = tf.Session(config=config)
    """ load env """
    print("[*] attempting to load {} env".format(args.env))
    env = gym.make(args.env)
    print("[*] success")

    supported_algorithms = ['dqn', 'ddqn', 'a2c', 'pompdp']
    assert args.algo in supported_algorithms, "Unsupported Algorithm! Please choose a supported one: {}".format(
        *supported_algorithms)
    """ main loop """
    if args.algo in ['dqn', 'ddqn']:
        run(sess=sess,
            env=env,
            algo=args.algo,
            checkpoints_dir=checkpoints_dir,
            n_episodes=args.episodes,
            gui=args.gui)
    else:
        run_a2c(sess=sess,
                env=env,
                algo=args.algo,
                checkpoints_dir=checkpoints_dir,
                n_episodes=args.episodes,
                gui=args.gui,
                BATCH_SIZE=args.BATCH_SIZE)
Exemple #4
0
def performanceWithSpamDetection(generateData):
    experimentData = defaultdict(dict)
    ratios = [0.0,0.4,0.9]
    marker = dict([(0.0, 's'), (0.4, 'o'), (0.9, 'd')])
#    spammerPercentages = [0.2, 0.01, 0.01]
    spammerPercentages = [0.015, 0.015, 0.015]
    for iteration in range(10):
        for spamDetectionRatio, spammerPercentage in zip(ratios, spammerPercentages):
            experimentFileName = spamModelFolder+'performanceWithSpamDetection/%s/%0.3f'%(iteration,spamDetectionRatio)
            print experimentFileName
            if generateData:
                model = MixedUsersModel()
                conf = {'model': model, 'numberOfTimeSteps': 100, 'addUsersMethod': User.addUsersUsingRatioWithSpamDetection, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage},
    #                        'spammerMessagingProbability': spammerBudget,
                        'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesSpamFiltered, RankingModel.popularMessages, RankingModel.popularMessagesSpamFiltered],
                        'spamDetectionRatio': spamDetectionRatio,
                        'experimentFileName': experimentFileName}
                GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
            else:
                for data in FileIO.iterateJsonFromFile(experimentFileName):
                    for ranking_id in data['spammmess']:
                        if data['currentTimeStep'] not in experimentData[spamDetectionRatio]: experimentData[spamDetectionRatio][data['currentTimeStep']]=defaultdict(list)
                        experimentData[spamDetectionRatio][data['currentTimeStep']][ranking_id]+=data['spammmess'][ranking_id]
    if not generateData:
        sdr = {}
        for spamDetectionRatio in sorted(experimentData.keys()):
            dataToPlot = defaultdict(list)
            for timeUnit in experimentData[spamDetectionRatio]:
                dataToPlot['x'].append(timeUnit)
                for ranking_id in experimentData[spamDetectionRatio][timeUnit]: dataToPlot[ranking_id].append(np.mean(experimentData[spamDetectionRatio][timeUnit][ranking_id]))
            sdr[spamDetectionRatio]=dataToPlot
        for ranking_id in [RankingModel.LATEST_MESSAGES_SPAM_FILTERED, RankingModel.POPULAR_MESSAGES_SPAM_FILTERED]:
#        for ranking_id in [RankingModel.LATEST_MESSAGES, RankingModel.POPULAR_MESSAGES]:
            for spamDetectionRatio in ratios:
                print ranking_id, spamDetectionRatio
                dataY = smooth(sdr[spamDetectionRatio][ranking_id],8)[:len(sdr[spamDetectionRatio]['x'])]
                dataX, dataY = sdr[spamDetectionRatio]['x'][10:], dataY[10:]
                print 'x', [x-10 for x in dataX]
                if spamDetectionRatio==0.0: 
                    print ranking_id, dataY
                    plt.plot([x-10 for x in dataX], dataY, label='%s'%(labels[ranking_id]), lw=1, marker=marker[spamDetectionRatio])
                else: 
                    print ranking_id, dataY
                    plt.plot([x-10 for x in dataX], dataY, label='%s (%d'%(labels[ranking_id].replace('Filtering', 'Detection'),spamDetectionRatio*100)+'%)', lw=1, marker=marker[spamDetectionRatio])
            plt.ylim(ymin=0, ymax=1)
            plt.xlim(xmin=0, xmax=75)
#            plt.title(ranking_id)
            plt.legend()
            plt.xlabel('Time', fontsize=16, fontweight='bold')
            plt.ylabel('Spamness', fontsize=16, fontweight='bold')
#            plt.show()
#            plt.savefig('performanceWithSpamDetection_%s.png'%ranking_id)
            savefig('performanceWithSpamDetection_%s.png'%ranking_id)
            plt.clf()
Exemple #5
0
def performanceAsPercentageOfGlobalSpammerVaries(generateData):
    experimentData = defaultdict(dict)
    for iteration in range(10):
#        for spammerPercentage in range(1,21):
#            spammerPercentage = spammerPercentage*0.05
        for spammerPercentage in range(1,11):
            spammerPercentage = spammerPercentage*0.1
            experimentFileName = spamModelFolder+'performanceAsPercentageOfGlobalSpammerVaries/%s/%0.3f'%(iteration,spammerPercentage)
            print experimentFileName
            if generateData:
                model = MixedUsersModel()
                conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 
                        'ratio': {'normal': 0.985, 'spammer': 0.015},
                        'spamRatio': {'localPayloads': 1-spammerPercentage, 'globalPayloads': spammerPercentage},
                        'noOfGlobalSpammerPayloads': 10,
                        'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages],
                        'experimentFileName': experimentFileName}
                GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
            else:
                tempData = defaultdict(list)
                for data in FileIO.iterateJsonFromFile(experimentFileName):
                    for ranking_id in data['spammmess']:
                        tempData[ranking_id]+=data['spammmess'][ranking_id]
                experimentData[iteration][spammerPercentage]=tempData
    if not generateData:
        realDataY = defaultdict(dict)
        for iteration in experimentData:
            dataY = defaultdict(list)
            dataX = []
            for perct in sorted(experimentData[iteration]):
                dataX.append(perct)
                for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values))
            dataX=sorted(dataX)
            for ranking_id in dataY:
                for x, y in zip(dataX, dataY[ranking_id]): 
                    if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[] 
                    realDataY[ranking_id][x].append(y)
        for ranking_id in dataY: 
            if ranking_id in labels:
                plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id])
        plt.xlabel('Percentage of Spammers Using Group Strategy', fontsize=16, fontweight='bold')
        plt.ylabel('Spamness', fontsize=16, fontweight='bold')
#        plt.title('Spammness when spammers use mixed strategy')
        plt.legend(loc=4)
#        plt.show()
        plt.savefig('performanceAsPercentageOfGlobalSpammerVaries.png')
        plt.clf()
Exemple #6
0
def performanceAsNoOfGlobalPayloadsVary(generateData):
    experimentData = defaultdict(dict)
    for iteration in range(10):
        for noOfGlobalSpammerPayloads in range(1,500):
#        for noOfGlobalSpammerPayloads in range(10,11):
            Spammer.globalPayloads = None
            experimentFileName = spamModelFolder+'performanceAsNoOfGlobalPayloadsVary/%s/%0.3f'%(iteration,noOfGlobalSpammerPayloads)
            print experimentFileName
            if generateData:
                model = MixedUsersModel()
                conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 0.985, 'spammer': 0.015},
                        'noOfGlobalSpammerPayloads': noOfGlobalSpammerPayloads,
                        'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages],
                        'experimentFileName': experimentFileName}
                GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
            else:
                tempData = defaultdict(list)
                for data in FileIO.iterateJsonFromFile(experimentFileName):
                    for ranking_id in data['spammmess']:
                        tempData[ranking_id]+=data['spammmess'][ranking_id]
                experimentData[iteration][noOfGlobalSpammerPayloads]=tempData
    if not generateData:
        realDataY = defaultdict(dict)
        for iteration in experimentData:
            dataY = defaultdict(list)
            dataX = []
            for perct in sorted(experimentData[iteration]):
                dataX.append(perct)
                for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values))
            dataX=sorted(dataX)
            for ranking_id in dataY:
                for x, y in zip(dataX, dataY[ranking_id]): 
                    if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[] 
                    realDataY[ranking_id][x].append(y)
        for ranking_id in dataY:
            if ranking_id in labels: 
                dy = [np.mean(realDataY[ranking_id][x]) for x in dataX[:20]] + list(smooth([np.mean(realDataY[ranking_id][x]) for x in dataX[20:]])) #+smooth([np.mean(realDataY[ranking_id][x]) for x in dataX[20:]]
                plt.semilogx(dataX, dy[:len(dataX)], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id])
#        for ranking_id in dataY: plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id])  
        plt.xlabel('Payloads Per Spam Group', fontsize=15, fontweight='bold')
        plt.ylabel('Spamness', fontsize=15, fontweight='bold')
#        plt.title('Spammness with changing global payloads')
        plt.legend(loc=4)
#        plt.show()
        plt.savefig('performanceAsNoOfGlobalPayloadsVary.png')
        plt.clf()
Exemple #7
0
def performanceAsSpammerPayloadVaries(generateData):
    experimentData = defaultdict(dict)
    for iteration in range(10):
        for spammerPayload in range(1,11):
            experimentFileName = spamModelFolder+'performanceAsSpammerPayloadVaries/%s/%0.3f'%(iteration,spammerPayload)
            if generateData:
                model = MixedUsersModel()
                conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 0.985, 'spammer': 0.015},
                        'noOfPayloadsPerSpammer': spammerPayload,
                        'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages],
                        'experimentFileName': experimentFileName}
                GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
            else:
                tempData = defaultdict(list)
                for data in FileIO.iterateJsonFromFile(experimentFileName):
                    for ranking_id in data['spammmess']:
                        tempData[ranking_id]+=data['spammmess'][ranking_id]
                experimentData[iteration][spammerPayload]=tempData
    if not generateData:
        realDataY = defaultdict(dict)
        for iteration in experimentData:
            dataY = defaultdict(list)
            dataX = []
            for perct in sorted(experimentData[iteration]):
                dataX.append(perct)
                for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values))
            dataX=sorted(dataX)
            for ranking_id in dataY:
                for x, y in zip(dataX, dataY[ranking_id]): 
                    if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[] 
                    realDataY[ranking_id][x].append(y)
        for ranking_id in dataY: 
            if ranking_id in labels:
                plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id])
        plt.xlabel('No. of Spam Payload', fontsize=16, fontweight='bold')
        plt.ylabel('Spamness', fontsize=16, fontweight='bold')
#        plt.title('Spammness with changing spammer payloads')
        plt.legend(prop=prop, loc='upper center', bbox_to_anchor=(0.5, 1.12), ncol=3, fancybox=True, shadow=False)
#        plt.show()
        plt.savefig('performanceAsSpammerPayloadVaries.png')
        plt.clf()
def run_model(data, meta):
    """Run a model on the data"""
    models.run(CFG.model_name, data=data.model_data, number=42)
Exemple #9
0
#!/usr/local/bin/python
# -*- coding: utf-8 -*-

import models

if __name__ == '__main__':
    models.run()
Exemple #10
0
def performanceWithSpamDetectionVaryingPercentageOfSpammers(generateData):
    experimentData = defaultdict(dict)
    ratios = [0.0,0.4,0.9]
    marker = dict([(0.0, 's'), (0.4, 'o'), (0.9, 'd')])
#    spammerPercentages = [0.2, 0.01, 0.01]
#    spammerPercentages = [0.015, 0.015, 0.015]
    for iteration in range(10):
        l1 = [spammerPercentage* 0.001 for spammerPercentage in range(1,51)]
        l2 = [spammerPercentage* 0.05 for spammerPercentage in range(1,21)]
        l3 = [0.01]+l2
        spammer_percentages = l3
        for spammerPercentage in spammer_percentages:
            for spamDetectionRatio, spammerPercentage in zip(ratios, [spammerPercentage]*3):
                experimentFileName = spamModelFolder+'performanceWithSpamDetectionVaryingPercentageOfSpammers/%s/%0.3f/%0.3f'%(iteration,spammerPercentage, spamDetectionRatio)
                print experimentFileName
                if generateData:
                    model = MixedUsersModel()
                    conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatioWithSpamDetection, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage},
        #                        'spammerMessagingProbability': spammerBudget,
                            'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesSpamFiltered, RankingModel.popularMessages, RankingModel.popularMessagesSpamFiltered],
                            'spamDetectionRatio': spamDetectionRatio,
                            'experimentFileName': experimentFileName}
                    GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
                else:
#                    for data in FileIO.iterateJsonFromFile(experimentFileName):
#                        for ranking_id in data['spammmess']:
#                            if data['currentTimeStep'] not in experimentData[spamDetectionRatio]: experimentData[spamDetectionRatio][data['currentTimeStep']]=defaultdict(list)
#                            experimentData[spamDetectionRatio][data['currentTimeStep']][ranking_id]+=data['spammmess'][ranking_id]
                            
                    tempData = defaultdict(list)
                    for data in FileIO.iterateJsonFromFile(experimentFileName):
                        for ranking_id in data['spammmess']:
                            tempData[ranking_id]+=data['spammmess'][ranking_id]
                    if spammerPercentage not in experimentData[spamDetectionRatio]: experimentData[spamDetectionRatio][spammerPercentage]=defaultdict(list)
                    for ranking_id in tempData:
                        experimentData[spamDetectionRatio][spammerPercentage][ranking_id]+=tempData[ranking_id]
    if not generateData:
        sdr = {}
        for spamDetectionRatio in sorted(experimentData.keys()):
            dataToPlot = defaultdict(list)
#            for spammerPercentage in sorted(experimentData[spamDetectionRatio]):
            for spammerPercentage in spammer_percentages:
                dataToPlot['x'].append(spammerPercentage)
                for ranking_id in experimentData[spamDetectionRatio][spammerPercentage]: dataToPlot[ranking_id].append(np.mean(experimentData[spamDetectionRatio][spammerPercentage][ranking_id]))
            sdr[spamDetectionRatio]=dataToPlot
#        for ranking_id in [RankingModel.LATEST_MESSAGES_SPAM_FILTERED, RankingModel.POPULAR_MESSAGES_SPAM_FILTERED]:
        for ranking_id in [RankingModel.LATEST_MESSAGES, RankingModel.POPULAR_MESSAGES]:
            for spamDetectionRatio in ratios:
                print ranking_id, spamDetectionRatio
#                dataY = smooth(sdr[spamDetectionRatio][ranking_id],8)[:len(sdr[spamDetectionRatio]['x'])]
                dataY = sdr[spamDetectionRatio][ranking_id][:len(sdr[spamDetectionRatio]['x'])]
#                dataX, dataY = sdr[spamDetectionRatio]['x'][10:], dataY[10:]
                dataX, dataY = sdr[spamDetectionRatio]['x'], dataY
#                dataX, dataY = splineSmooth(dataX, dataY)
#                if spamDetectionRatio==0.0: plt.plot([x-10 for x in dataX], dataY, label='%s'%(labels[ranking_id]), lw=1, marker=marker[spamDetectionRatio])
#                else: plt.plot([x-10 for x in dataX], dataY, label='%s (%d'%(labels[ranking_id].replace('Filtering', 'Detection'),spamDetectionRatio*100)+'%)', lw=1, marker=marker[spamDetectionRatio])
                if spamDetectionRatio==0.0: plt.plot(dataX, dataY, label='%s'%(labels[ranking_id]), lw=1, marker=marker[spamDetectionRatio])
                else: plt.plot(dataX, dataY, label='%s after spam detection (%d'%(labels[ranking_id].replace('Filtering', 'Detection'),spamDetectionRatio*100)+'%)', lw=1, marker=marker[spamDetectionRatio])
#            plt.show()
#            plt.xlim(xmax=0.05)
#            plt.ylim(ymax=0.8)
            plt.legend(loc=4)
            plt.xlabel('Time', fontsize=16, fontweight='bold')
            plt.ylabel('Spamness', fontsize=16, fontweight='bold')
#            plt.show()
#            plt.savefig('performanceWithSpamDetectionVaryingPercentageOfSpammers_%s.png'%ranking_id)
            savefig('/Users/krishnakamath/Dropbox/temp/performanceWithSpamDetectionVaryingPercentageOfSpammers_%s.png'%ranking_id)
#            plt.show()
            plt.clf()