def classify(self, text, language): '''Given input text and language, method calculates probability of text being relevant to topic. @result probability that text is relevant''' input_entry = Entry(id=None, guid=None, entry=text, language=language) self.word_dict.words.setdefault(language, {}) # for each token claculate probability of being relevant to topic # and calculate according to bayes theorem # # p1p2p3........pn a # P = ------------------------------------------ = ------- # p1p2p3........pn + (1-p1)(1-p2)...(1-pn) a + b # a = 1.0 b = 1.0 for i in xrange(1, self.MAX_TOKEN_SIZE + 1): for token in input_entry.get_token(i): if not token in self.word_dict.words[language]: probability = 0.5 else: token_stats = self.word_dict.words[language][token] probability = token_stats['weight'] / token_stats['count'] a *= probability b *= 1 - probability if a + b == 0: return 0 else: result = a / (a + b) if result == 0.5: return -1 else: return a / (a + b)
def train_from_human_classification(self, filename, language): 'Method for training current bayesian filter from external human classification file' filehandler = open(filename, 'rb') content = pickle.load(filehandler) for entry_id in content: e = Entry(entry_id, list(content[entry_id])[1], list(content[entry_id])[2]) if e.get_language() == language: self._add_to_human_classification(e, list(content[entry_id])[0]) self.human.store() self.word_dict.store()
def train_from_file(self, filename, language, classification): 'method trains classifier from some file' file = open(filename, 'r') for line in file: e = Entry(None, None, line, language) self._add_to_human_classification(e, classification) self.human.store() self.word_dict.store()
def manual_train(self, text, language, classification): 'Method for manual training of bayesian filter.' e = Entry(None, text, language) if classification is True: self._add_classification(e, True) if classification is False: self._add_classification(e, False) self.word_dict.store()
def setUp(self): self.instance = HighTrafficMonitoring(publisher=pub, threshold=self.THRESHOLD, duration_s=self.DURATION_S) self.entry = Entry(remote_host='127.0.0.1', timestamp=time.time(), method='GET', url='/', status=200, size=1337)
def test_parse_line(self): line = '127.0.0.1 - - [01/Jan/1970:00:00:00 +0000] "GET / HTTP/1.1" 200 1337' entry = Entry(remote_host='127.0.0.1', timestamp=0, method='GET', url='/', status=200, size=1337) self.assertTrue(LogEntryMonitoring.parse_line(line) == entry)
def test_searchName__should__return_record(self): name = "John" mobile = "123 456" data = [] record = Entry(name=name, mobile=mobile) data.append(record) self.phonebook.records = data result = self.phonebook.searchName(name) self.assertIsNotNone(result) self.assertIn(record, result)
def regenerate_word_dict(self): 'regenerate word dictionary according to human_input.' print self.human.classification self.word_dict.words = {} # go through human classification and create new word dictionary using classification for entry_id in self.human.classification: e = Entry(entry_id, list(self.human.classification[entry_id])[1], list(self.human.classification[entry_id])[2]) if list(self.human.classification[entry_id])[0] == True: self._add_classification(e, True) if list(self.human.classification[entry_id])[0] == False: self._add_classification(e, False) self.word_dict.store()
def test_searchName_with_partial_string__should__return_record(self): name = "John" mobile = "123 456" data = [] record = Entry(name=name, mobile=mobile) data.append(record) self.phonebook.records = data result = self.phonebook.searchName(name[2:]) # start self.assertIsNotNone(result) self.assertIn(record, result) result = self.phonebook.searchName(name[:2]) # end self.assertIsNotNone(result) self.assertIn(record, result)
"normal") prints( "_ ___\ \033[1;032mHACK_HUT\033[1;00m __ \_ |__ / _____// |_|__| ____ | | __", "normal") prints(" | \ \/\_ __ \__ \ | __ \ \_____ \\ __\ |/ ___\| |/ /", "normal") prints(" | \_____| | \// __ \| \_\ \/ \| | | \ \___| <", "normal") prints(" \______ /|__| (______/_____/_________/|__| |__|\_____>__|_ \\", "normal") prints(" \/ ", "normal") prints( " \033[1;032mA tool for HTTP file inclusion exploits\033[1;00m", "normal") prints("Usage:", "normal") prints("python3 crabstick.py --help", "normal") prints("", "normal") prints("", "line") prints("", "normal") if __name__ == "__main__": arguments = docopt(__doc__) banner() start = Entry(arguments) try: start.run() except KeyboardInterrupt: print("") prints("Exiting")
def _getTestEntry(self): name = "John" mobile = "+39 333 123456" entry = Entry(name=name, mobile=mobile) return entry
def parse_line(line): parts = LogEntryMonitoring.PARSER(line) return Entry.factory(parts)
import traceback from src.entry import Entry if __name__ == "__main__": try: entry = Entry() entry.main() except Exception: print(traceback.format_exc())