Ejemplo n.º 1
0
    def classify(self, text, language):
	'''Given input text and language, method calculates probability of text being relevant to topic. @result probability that text is relevant'''
	input_entry = Entry(id=None, guid=None, entry=text, language=language)
	self.word_dict.words.setdefault(language, {})
	# for each token claculate probability of being relevant to topic
	# and calculate according to bayes theorem
	#
	#		  p1p2p3........pn		      a
	# P = ------------------------------------------ = -------
	#	p1p2p3........pn + (1-p1)(1-p2)...(1-pn)    a + b
	#
	a = 1.0
	b = 1.0
	for i in xrange(1, self.MAX_TOKEN_SIZE + 1):
	    for token in input_entry.get_token(i):
		if not token in self.word_dict.words[language]:
		    probability = 0.5
		else:
		    token_stats = self.word_dict.words[language][token]
		    probability = token_stats['weight'] / token_stats['count']
		a *= probability
		b *= 1 - probability

        if a + b == 0:
            return 0
        else:
            result = a / (a + b)
            if result == 0.5:
                return -1
            else:
                return a / (a + b)
Ejemplo n.º 2
0
    def classify(self, text, language):
        '''Given input text and language, method calculates probability of text being relevant to topic. @result probability that text is relevant'''
        input_entry = Entry(id=None, guid=None, entry=text, language=language)
        self.word_dict.words.setdefault(language, {})
        # for each token claculate probability of being relevant to topic
        # and calculate according to bayes theorem
        #
        #		  p1p2p3........pn		      a
        # P = ------------------------------------------ = -------
        #	p1p2p3........pn + (1-p1)(1-p2)...(1-pn)    a + b
        #
        a = 1.0
        b = 1.0
        for i in xrange(1, self.MAX_TOKEN_SIZE + 1):
            for token in input_entry.get_token(i):
                if not token in self.word_dict.words[language]:
                    probability = 0.5
                else:
                    token_stats = self.word_dict.words[language][token]
                    probability = token_stats['weight'] / token_stats['count']
                a *= probability
                b *= 1 - probability

        if a + b == 0:
            return 0
        else:
            result = a / (a + b)
            if result == 0.5:
                return -1
            else:
                return a / (a + b)
Ejemplo n.º 3
0
    def train_from_human_classification(self, filename, language):
        'Method for training current bayesian filter from external human classification file'
	filehandler = open(filename, 'rb')
        content = pickle.load(filehandler)

        for entry_id in content:
            e = Entry(entry_id, list(content[entry_id])[1], list(content[entry_id])[2])
            if e.get_language() == language:
                self._add_to_human_classification(e, list(content[entry_id])[0])
        self.human.store()
        self.word_dict.store()
Ejemplo n.º 4
0
    def train_from_human_classification(self, filename, language):
        'Method for training current bayesian filter from external human classification file'
        filehandler = open(filename, 'rb')
        content = pickle.load(filehandler)

        for entry_id in content:
            e = Entry(entry_id,
                      list(content[entry_id])[1],
                      list(content[entry_id])[2])
            if e.get_language() == language:
                self._add_to_human_classification(e,
                                                  list(content[entry_id])[0])
        self.human.store()
        self.word_dict.store()
Ejemplo n.º 5
0
 def train_from_file(self, filename, language, classification):
     'method trains classifier from some file'
     file = open(filename, 'r')
     for line in file:
         e = Entry(None, None, line, language)
         self._add_to_human_classification(e, classification)
     self.human.store()
     self.word_dict.store()
Ejemplo n.º 6
0
 def manual_train(self, text, language, classification):
     'Method for manual training of bayesian filter.'
     e = Entry(None, text, language)
     if classification is True:
         self._add_classification(e, True)
     if classification is False:
         self._add_classification(e, False)
     self.word_dict.store()
 def setUp(self):
     self.instance = HighTrafficMonitoring(publisher=pub,
                                           threshold=self.THRESHOLD,
                                           duration_s=self.DURATION_S)
     self.entry = Entry(remote_host='127.0.0.1',
                        timestamp=time.time(),
                        method='GET',
                        url='/',
                        status=200,
                        size=1337)
    def test_parse_line(self):
        line = '127.0.0.1 - - [01/Jan/1970:00:00:00 +0000] "GET / HTTP/1.1" 200 1337'
        entry = Entry(remote_host='127.0.0.1',
                      timestamp=0,
                      method='GET',
                      url='/',
                      status=200,
                      size=1337)

        self.assertTrue(LogEntryMonitoring.parse_line(line) == entry)
Ejemplo n.º 9
0
    def test_searchName__should__return_record(self):
        name = "John"
        mobile = "123 456"

        data = []
        record = Entry(name=name, mobile=mobile)
        data.append(record)

        self.phonebook.records = data
        result = self.phonebook.searchName(name)

        self.assertIsNotNone(result)
        self.assertIn(record, result)
Ejemplo n.º 10
0
 def regenerate_word_dict(self):
     'regenerate word dictionary according to human_input.'
     print self.human.classification
     self.word_dict.words = {}
     # go through human classification and create new word dictionary using classification
     for entry_id in self.human.classification:
         e = Entry(entry_id,
                   list(self.human.classification[entry_id])[1],
                   list(self.human.classification[entry_id])[2])
         if list(self.human.classification[entry_id])[0] == True:
             self._add_classification(e, True)
         if list(self.human.classification[entry_id])[0] == False:
             self._add_classification(e, False)
     self.word_dict.store()
Ejemplo n.º 11
0
    def test_searchName_with_partial_string__should__return_record(self):

        name = "John"
        mobile = "123 456"

        data = []
        record = Entry(name=name, mobile=mobile)
        data.append(record)

        self.phonebook.records = data
        result = self.phonebook.searchName(name[2:])  # start

        self.assertIsNotNone(result)
        self.assertIn(record, result)

        result = self.phonebook.searchName(name[:2])  # end

        self.assertIsNotNone(result)
        self.assertIn(record, result)
Ejemplo n.º 12
0
           "normal")
    prints(
        "_   ___\ \033[1;032mHACK_HUT\033[1;00m __ \_ |__  /   _____//  |_|__| ____  |  | __",
        "normal")
    prints(" |   \  \/\_  __ \__  \ | __ \ \_____  \\   __\  |/ ___\|  |/ /",
           "normal")
    prints(" |   \_____|  | \// __ \| \_\ \/        \|  | |  \  \___|    <",
           "normal")
    prints(" \______  /|__|  (______/_____/_________/|__| |__|\_____>__|_ \\",
           "normal")
    prints("        \/  ", "normal")
    prints(
        "            \033[1;032mA tool for HTTP file inclusion exploits\033[1;00m",
        "normal")
    prints("Usage:", "normal")
    prints("python3 crabstick.py --help", "normal")
    prints("", "normal")
    prints("", "line")
    prints("", "normal")


if __name__ == "__main__":
    arguments = docopt(__doc__)
    banner()
    start = Entry(arguments)
    try:
        start.run()
    except KeyboardInterrupt:
        print("")
        prints("Exiting")
Ejemplo n.º 13
0
 def _getTestEntry(self):
     name = "John"
     mobile = "+39 333 123456"
     entry = Entry(name=name, mobile=mobile)
     return entry
Ejemplo n.º 14
0
    def parse_line(line):
        parts = LogEntryMonitoring.PARSER(line)

        return Entry.factory(parts)
Ejemplo n.º 15
0
import traceback
from src.entry import Entry

if __name__ == "__main__":
    try:
        entry = Entry()
        entry.main()
    except Exception:
        print(traceback.format_exc())