def build_data_set(self,sourcefile,sus_example_count=100,non_sus_example_count=100): self.source_file=sourcefile self.data_source=preprocessdump.datasource(sourcefile) self.has_data_source=True self.normalize_base=self.data_source.load_base_from_file(sourcefile) #self.data_set=SupervisedDataSet(5,1) self.data_set=ClassificationDataSet(5,1,nb_classes=2,class_labels=['good','p2p']) count=0 #for sus_example in self.data_source.get_sus_dict(sus_example_count): for sus_example in self.data_source.load_dump_from_file(sourcefile+"_sus"): self.data_set.addSample(self.normalize_data(sus_example,self.normalize_base) , 1) if count*100/sus_example_count%10==0: progress=count*100/sus_example_count print '\r[{0}] {1}%'.format('#'*(progress/10), progress), if count==sus_example_count: break count+=1 print "\n %d suspicious examples added" % count count=0 for normal_example in self.data_source.load_dump_from_file(sourcefile+"_non_sus"): #for normal_example in self.data_source.get_non_sus_dict(non_sus_example_count): self.data_set.addSample(self.normalize_data(normal_example,self.normalize_base) , 0) if count*100/non_sus_example_count%10==0: progress=count*100/non_sus_example_count print '\r[{0}] {1}%'.format('#'*(progress/10), progress), if count==non_sus_example_count: break count+=1 print "\n %d non_suspicious examples added" % count print "Training data ready" print "Number of training patterns: ", len(self.data_set) print "Input and output dimensions: ", self.data_set.indim, self.data_set.outdim
import preprocessdump import sys, os, getopt from collections import OrderedDict def loader(argv=sys.argv): try: opts, args= getopt.getopt(sys.argv[1:],"h",["help"]) except getopt.error, msg: print msg print "for help use --help" sys.exit(2) for o, a in opts: if o in ("-h","--help"): print "there is no damn help" sys.exit(0) #paras: processed_dumpfile limit outputfile ANN=NetFlow_ANN.NetFlow_ANN() ANN.set_datasource(preprocessdump.datasource(args[0])) ANN.normalize_base=ANN.data_source.load_base_from_file(args[0]) input=ANN.data_source.load_dump_from_file(args[0]+'_sus') input.extend(ANN.data_source.load_dump_from_file(args[0]+'_non_sus')) result=ANN.classifier(input,args[1]) result=OrderedDict(sorted(result.items(), key=lambda t: t[1])) items=result.items() items.reverse() result=OrderedDict(items) outputfile=open(args[2],'w') for key,value in result.items(): outputfile.write(str(key)+' : '+str(value)+'\n') if __name__=='__main__': sys.exit(loader())