def trendCurves(): model = MixedUsersModel() experimentFileName = spamModelFolder+model.id conf = {'model': model, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.trendCurves, 1)], 'ratio': {'normal': 0.985, 'spammer': 0.015}, 'experimentFileName': experimentFileName} GeneralMethods.runCommand('rm -rf %s'%experimentFileName); run(**conf) Analysis.trendCurves(experimentFileName=experimentFileName)
def performanceWithSpamFilteringForLatestMessages(generateData): experimentData = defaultdict(dict) for iteration in range(10): # for spammerPercentage in range(1,21): ## spammerPercentage = 20 # spammerPercentage = spammerPercentage*0.05 # for spammerPercentage in range(1,11): # spammerPercentage = spammerPercentage*0.02 # for spammerPercentage in range(1,201): # spammerPercentage = spammerPercentage* 0.005 l1 = [spammerPercentage* 0.001 for spammerPercentage in range(1,51)] l2 = [spammerPercentage* 0.05 for spammerPercentage in range(1,21)] l3 = [0.01]+l2 for spammerPercentage in l1: experimentFileName = spamModelFolder+'performanceWithSpamFilteringForLatestMessages/%s/%0.3f'%(iteration,spammerPercentage) print experimentFileName if generateData: model = MixedUsersModel() conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage}, 'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesSpamFiltered], 'experimentFileName': experimentFileName, # 'noOfPayloadsPerSpammer': 1, 'noOfTopics': 10 } # conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage}, # 'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages], # 'experimentFileName': experimentFileName} GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf) else: tempData = defaultdict(list) for data in FileIO.iterateJsonFromFile(experimentFileName): for ranking_id in data['spammmess']: tempData[ranking_id]+=data['spammmess'][ranking_id] experimentData[iteration][spammerPercentage]=tempData if not generateData: realDataY = defaultdict(dict) for iteration in experimentData: dataY = defaultdict(list) dataX = [] for perct in sorted(experimentData[iteration]): dataX.append(perct) for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values)) dataX=sorted(dataX) for ranking_id in dataY: for x, y in zip(dataX, dataY[ranking_id]): if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[] realDataY[ranking_id][x].append(y) for ranking_id in dataY: plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id]) plt.xlabel('Percentage of Spammers', fontsize=16, fontweight='bold') plt.ylabel('Spamness', fontsize=16, fontweight='bold') # plt.title('Performance with spam filtering') plt.legend(loc=2) # plt.show() plt.xlim(xmax=0.05) plt.savefig('performanceWithSpamFilteringForLatestMessages.png') plt.clf()
def main(args): """ saving paths """ output_dir = "logs" if not os.path.exists(output_dir): os.mkdir(output_dir) if args.model_name is None: t = time.strftime('%Y-%m-%d_%H_%M_%S_%z') model_name = "env_{}_algo_{}_ep_{}_{}".format(args.env, args.algo, args.episodes, t) print("[*] created model folder: {}".format(model_name)) model_dir = '{}/{}'.format(output_dir, model_name) else: model_name = args.model_name print("[*] proceeding to load model: {}".format(model_name)) model_dir = model_name image_dir = '{}/images'.format(model_dir) checkpoints_dir = '{}/checkpoints'.format(model_dir) for path in [output_dir, model_dir, image_dir, checkpoints_dir]: if not os.path.exists(path): os.mkdir(path) """ tf session definitions """ tf.reset_default_graph() tf.random.set_random_seed(args.rand_seed) config = tf.ConfigProto() config.gpu_options.allow_growth = True #config.log_device_placement = True config.gpu_options.per_process_gpu_memory_fraction = 0.9 sess = tf.Session(config=config) """ load env """ print("[*] attempting to load {} env".format(args.env)) env = gym.make(args.env) print("[*] success") supported_algorithms = ['dqn', 'ddqn', 'a2c', 'pompdp'] assert args.algo in supported_algorithms, "Unsupported Algorithm! Please choose a supported one: {}".format( *supported_algorithms) """ main loop """ if args.algo in ['dqn', 'ddqn']: run(sess=sess, env=env, algo=args.algo, checkpoints_dir=checkpoints_dir, n_episodes=args.episodes, gui=args.gui) else: run_a2c(sess=sess, env=env, algo=args.algo, checkpoints_dir=checkpoints_dir, n_episodes=args.episodes, gui=args.gui, BATCH_SIZE=args.BATCH_SIZE)
def performanceWithSpamDetection(generateData): experimentData = defaultdict(dict) ratios = [0.0,0.4,0.9] marker = dict([(0.0, 's'), (0.4, 'o'), (0.9, 'd')]) # spammerPercentages = [0.2, 0.01, 0.01] spammerPercentages = [0.015, 0.015, 0.015] for iteration in range(10): for spamDetectionRatio, spammerPercentage in zip(ratios, spammerPercentages): experimentFileName = spamModelFolder+'performanceWithSpamDetection/%s/%0.3f'%(iteration,spamDetectionRatio) print experimentFileName if generateData: model = MixedUsersModel() conf = {'model': model, 'numberOfTimeSteps': 100, 'addUsersMethod': User.addUsersUsingRatioWithSpamDetection, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage}, # 'spammerMessagingProbability': spammerBudget, 'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesSpamFiltered, RankingModel.popularMessages, RankingModel.popularMessagesSpamFiltered], 'spamDetectionRatio': spamDetectionRatio, 'experimentFileName': experimentFileName} GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf) else: for data in FileIO.iterateJsonFromFile(experimentFileName): for ranking_id in data['spammmess']: if data['currentTimeStep'] not in experimentData[spamDetectionRatio]: experimentData[spamDetectionRatio][data['currentTimeStep']]=defaultdict(list) experimentData[spamDetectionRatio][data['currentTimeStep']][ranking_id]+=data['spammmess'][ranking_id] if not generateData: sdr = {} for spamDetectionRatio in sorted(experimentData.keys()): dataToPlot = defaultdict(list) for timeUnit in experimentData[spamDetectionRatio]: dataToPlot['x'].append(timeUnit) for ranking_id in experimentData[spamDetectionRatio][timeUnit]: dataToPlot[ranking_id].append(np.mean(experimentData[spamDetectionRatio][timeUnit][ranking_id])) sdr[spamDetectionRatio]=dataToPlot for ranking_id in [RankingModel.LATEST_MESSAGES_SPAM_FILTERED, RankingModel.POPULAR_MESSAGES_SPAM_FILTERED]: # for ranking_id in [RankingModel.LATEST_MESSAGES, RankingModel.POPULAR_MESSAGES]: for spamDetectionRatio in ratios: print ranking_id, spamDetectionRatio dataY = smooth(sdr[spamDetectionRatio][ranking_id],8)[:len(sdr[spamDetectionRatio]['x'])] dataX, dataY = sdr[spamDetectionRatio]['x'][10:], dataY[10:] print 'x', [x-10 for x in dataX] if spamDetectionRatio==0.0: print ranking_id, dataY plt.plot([x-10 for x in dataX], dataY, label='%s'%(labels[ranking_id]), lw=1, marker=marker[spamDetectionRatio]) else: print ranking_id, dataY plt.plot([x-10 for x in dataX], dataY, label='%s (%d'%(labels[ranking_id].replace('Filtering', 'Detection'),spamDetectionRatio*100)+'%)', lw=1, marker=marker[spamDetectionRatio]) plt.ylim(ymin=0, ymax=1) plt.xlim(xmin=0, xmax=75) # plt.title(ranking_id) plt.legend() plt.xlabel('Time', fontsize=16, fontweight='bold') plt.ylabel('Spamness', fontsize=16, fontweight='bold') # plt.show() # plt.savefig('performanceWithSpamDetection_%s.png'%ranking_id) savefig('performanceWithSpamDetection_%s.png'%ranking_id) plt.clf()
def performanceAsPercentageOfGlobalSpammerVaries(generateData): experimentData = defaultdict(dict) for iteration in range(10): # for spammerPercentage in range(1,21): # spammerPercentage = spammerPercentage*0.05 for spammerPercentage in range(1,11): spammerPercentage = spammerPercentage*0.1 experimentFileName = spamModelFolder+'performanceAsPercentageOfGlobalSpammerVaries/%s/%0.3f'%(iteration,spammerPercentage) print experimentFileName if generateData: model = MixedUsersModel() conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 0.985, 'spammer': 0.015}, 'spamRatio': {'localPayloads': 1-spammerPercentage, 'globalPayloads': spammerPercentage}, 'noOfGlobalSpammerPayloads': 10, 'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages], 'experimentFileName': experimentFileName} GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf) else: tempData = defaultdict(list) for data in FileIO.iterateJsonFromFile(experimentFileName): for ranking_id in data['spammmess']: tempData[ranking_id]+=data['spammmess'][ranking_id] experimentData[iteration][spammerPercentage]=tempData if not generateData: realDataY = defaultdict(dict) for iteration in experimentData: dataY = defaultdict(list) dataX = [] for perct in sorted(experimentData[iteration]): dataX.append(perct) for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values)) dataX=sorted(dataX) for ranking_id in dataY: for x, y in zip(dataX, dataY[ranking_id]): if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[] realDataY[ranking_id][x].append(y) for ranking_id in dataY: if ranking_id in labels: plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id]) plt.xlabel('Percentage of Spammers Using Group Strategy', fontsize=16, fontweight='bold') plt.ylabel('Spamness', fontsize=16, fontweight='bold') # plt.title('Spammness when spammers use mixed strategy') plt.legend(loc=4) # plt.show() plt.savefig('performanceAsPercentageOfGlobalSpammerVaries.png') plt.clf()
def performanceAsNoOfGlobalPayloadsVary(generateData): experimentData = defaultdict(dict) for iteration in range(10): for noOfGlobalSpammerPayloads in range(1,500): # for noOfGlobalSpammerPayloads in range(10,11): Spammer.globalPayloads = None experimentFileName = spamModelFolder+'performanceAsNoOfGlobalPayloadsVary/%s/%0.3f'%(iteration,noOfGlobalSpammerPayloads) print experimentFileName if generateData: model = MixedUsersModel() conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 0.985, 'spammer': 0.015}, 'noOfGlobalSpammerPayloads': noOfGlobalSpammerPayloads, 'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages], 'experimentFileName': experimentFileName} GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf) else: tempData = defaultdict(list) for data in FileIO.iterateJsonFromFile(experimentFileName): for ranking_id in data['spammmess']: tempData[ranking_id]+=data['spammmess'][ranking_id] experimentData[iteration][noOfGlobalSpammerPayloads]=tempData if not generateData: realDataY = defaultdict(dict) for iteration in experimentData: dataY = defaultdict(list) dataX = [] for perct in sorted(experimentData[iteration]): dataX.append(perct) for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values)) dataX=sorted(dataX) for ranking_id in dataY: for x, y in zip(dataX, dataY[ranking_id]): if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[] realDataY[ranking_id][x].append(y) for ranking_id in dataY: if ranking_id in labels: dy = [np.mean(realDataY[ranking_id][x]) for x in dataX[:20]] + list(smooth([np.mean(realDataY[ranking_id][x]) for x in dataX[20:]])) #+smooth([np.mean(realDataY[ranking_id][x]) for x in dataX[20:]] plt.semilogx(dataX, dy[:len(dataX)], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id]) # for ranking_id in dataY: plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id]) plt.xlabel('Payloads Per Spam Group', fontsize=15, fontweight='bold') plt.ylabel('Spamness', fontsize=15, fontweight='bold') # plt.title('Spammness with changing global payloads') plt.legend(loc=4) # plt.show() plt.savefig('performanceAsNoOfGlobalPayloadsVary.png') plt.clf()
def performanceAsSpammerPayloadVaries(generateData): experimentData = defaultdict(dict) for iteration in range(10): for spammerPayload in range(1,11): experimentFileName = spamModelFolder+'performanceAsSpammerPayloadVaries/%s/%0.3f'%(iteration,spammerPayload) if generateData: model = MixedUsersModel() conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 0.985, 'spammer': 0.015}, 'noOfPayloadsPerSpammer': spammerPayload, 'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages], 'experimentFileName': experimentFileName} GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf) else: tempData = defaultdict(list) for data in FileIO.iterateJsonFromFile(experimentFileName): for ranking_id in data['spammmess']: tempData[ranking_id]+=data['spammmess'][ranking_id] experimentData[iteration][spammerPayload]=tempData if not generateData: realDataY = defaultdict(dict) for iteration in experimentData: dataY = defaultdict(list) dataX = [] for perct in sorted(experimentData[iteration]): dataX.append(perct) for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values)) dataX=sorted(dataX) for ranking_id in dataY: for x, y in zip(dataX, dataY[ranking_id]): if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[] realDataY[ranking_id][x].append(y) for ranking_id in dataY: if ranking_id in labels: plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id]) plt.xlabel('No. of Spam Payload', fontsize=16, fontweight='bold') plt.ylabel('Spamness', fontsize=16, fontweight='bold') # plt.title('Spammness with changing spammer payloads') plt.legend(prop=prop, loc='upper center', bbox_to_anchor=(0.5, 1.12), ncol=3, fancybox=True, shadow=False) # plt.show() plt.savefig('performanceAsSpammerPayloadVaries.png') plt.clf()
def run_model(data, meta): """Run a model on the data""" models.run(CFG.model_name, data=data.model_data, number=42)
#!/usr/local/bin/python # -*- coding: utf-8 -*- import models if __name__ == '__main__': models.run()
def performanceWithSpamDetectionVaryingPercentageOfSpammers(generateData): experimentData = defaultdict(dict) ratios = [0.0,0.4,0.9] marker = dict([(0.0, 's'), (0.4, 'o'), (0.9, 'd')]) # spammerPercentages = [0.2, 0.01, 0.01] # spammerPercentages = [0.015, 0.015, 0.015] for iteration in range(10): l1 = [spammerPercentage* 0.001 for spammerPercentage in range(1,51)] l2 = [spammerPercentage* 0.05 for spammerPercentage in range(1,21)] l3 = [0.01]+l2 spammer_percentages = l3 for spammerPercentage in spammer_percentages: for spamDetectionRatio, spammerPercentage in zip(ratios, [spammerPercentage]*3): experimentFileName = spamModelFolder+'performanceWithSpamDetectionVaryingPercentageOfSpammers/%s/%0.3f/%0.3f'%(iteration,spammerPercentage, spamDetectionRatio) print experimentFileName if generateData: model = MixedUsersModel() conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatioWithSpamDetection, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage}, # 'spammerMessagingProbability': spammerBudget, 'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesSpamFiltered, RankingModel.popularMessages, RankingModel.popularMessagesSpamFiltered], 'spamDetectionRatio': spamDetectionRatio, 'experimentFileName': experimentFileName} GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf) else: # for data in FileIO.iterateJsonFromFile(experimentFileName): # for ranking_id in data['spammmess']: # if data['currentTimeStep'] not in experimentData[spamDetectionRatio]: experimentData[spamDetectionRatio][data['currentTimeStep']]=defaultdict(list) # experimentData[spamDetectionRatio][data['currentTimeStep']][ranking_id]+=data['spammmess'][ranking_id] tempData = defaultdict(list) for data in FileIO.iterateJsonFromFile(experimentFileName): for ranking_id in data['spammmess']: tempData[ranking_id]+=data['spammmess'][ranking_id] if spammerPercentage not in experimentData[spamDetectionRatio]: experimentData[spamDetectionRatio][spammerPercentage]=defaultdict(list) for ranking_id in tempData: experimentData[spamDetectionRatio][spammerPercentage][ranking_id]+=tempData[ranking_id] if not generateData: sdr = {} for spamDetectionRatio in sorted(experimentData.keys()): dataToPlot = defaultdict(list) # for spammerPercentage in sorted(experimentData[spamDetectionRatio]): for spammerPercentage in spammer_percentages: dataToPlot['x'].append(spammerPercentage) for ranking_id in experimentData[spamDetectionRatio][spammerPercentage]: dataToPlot[ranking_id].append(np.mean(experimentData[spamDetectionRatio][spammerPercentage][ranking_id])) sdr[spamDetectionRatio]=dataToPlot # for ranking_id in [RankingModel.LATEST_MESSAGES_SPAM_FILTERED, RankingModel.POPULAR_MESSAGES_SPAM_FILTERED]: for ranking_id in [RankingModel.LATEST_MESSAGES, RankingModel.POPULAR_MESSAGES]: for spamDetectionRatio in ratios: print ranking_id, spamDetectionRatio # dataY = smooth(sdr[spamDetectionRatio][ranking_id],8)[:len(sdr[spamDetectionRatio]['x'])] dataY = sdr[spamDetectionRatio][ranking_id][:len(sdr[spamDetectionRatio]['x'])] # dataX, dataY = sdr[spamDetectionRatio]['x'][10:], dataY[10:] dataX, dataY = sdr[spamDetectionRatio]['x'], dataY # dataX, dataY = splineSmooth(dataX, dataY) # if spamDetectionRatio==0.0: plt.plot([x-10 for x in dataX], dataY, label='%s'%(labels[ranking_id]), lw=1, marker=marker[spamDetectionRatio]) # else: plt.plot([x-10 for x in dataX], dataY, label='%s (%d'%(labels[ranking_id].replace('Filtering', 'Detection'),spamDetectionRatio*100)+'%)', lw=1, marker=marker[spamDetectionRatio]) if spamDetectionRatio==0.0: plt.plot(dataX, dataY, label='%s'%(labels[ranking_id]), lw=1, marker=marker[spamDetectionRatio]) else: plt.plot(dataX, dataY, label='%s after spam detection (%d'%(labels[ranking_id].replace('Filtering', 'Detection'),spamDetectionRatio*100)+'%)', lw=1, marker=marker[spamDetectionRatio]) # plt.show() # plt.xlim(xmax=0.05) # plt.ylim(ymax=0.8) plt.legend(loc=4) plt.xlabel('Time', fontsize=16, fontweight='bold') plt.ylabel('Spamness', fontsize=16, fontweight='bold') # plt.show() # plt.savefig('performanceWithSpamDetectionVaryingPercentageOfSpammers_%s.png'%ranking_id) savefig('/Users/krishnakamath/Dropbox/temp/performanceWithSpamDetectionVaryingPercentageOfSpammers_%s.png'%ranking_id) # plt.show() plt.clf()