def main(): f = open('../data/train_data.tsv') data = [] for line in f: row = extract_features_from_log(line.split('\t'), 'training_data') data.append(row) columns = [ 'ip_1', 'ip_2', 'ip_3', 'ip_4', 'category', 'os_version', 'version', 'vendor', 'name', 'os', 'scheme', 'hostname', 'alexa_top_million', 'len_path', 'len_query', 'len_host', 'status' ] df = DataFrame(data, columns=columns) # Write feature matrix df.to_csv('../data/features.csv') # Separate data into test and training sets mask = np.random.rand(len(df)) < 0.8 train = df[mask] test = df[~mask] train.to_csv('../data/training.csv') test.to_csv('../data/test.csv')
def process_log_line(log_line): # Transform log line into a vector that can be fed into the model items = parse_log_line(log_line) features = np.array( extract_features_from_log(items, 'live_data') ) encoded_features = enc.transform( features).toarray() scaled_features = scaler.transform(encoded_features) # Feed the vector to the model in order to get the prediction prediction = clf.predict(scaled_features) return prediction[0]
def process_log_line(log_line): # Transform log line into a vector that can be fed into the model items = parse_log_line(log_line) features = np.array(extract_features_from_log(items, 'live_data')) encoded_features = enc.transform(features).toarray() scaled_features = scaler.transform(encoded_features) # Feed the vector to the model in order to get the prediction prediction = clf.predict(scaled_features) return prediction[0]
def get(self): self.set_header("Access-Control-Allow-Origin", "*") ip = self.get_argument('ip') user_agent = self.get_argument('user_agent') referer = self.get_argument('referer') items = [ip, user_agent, referer] try: features = np.array( extract_features_from_log(items, 'live_data') ) encoded_features = enc.transform( features).toarray() scaled_features = scaler.transform(encoded_features) prediction = clf.predict(scaled_features) if prediction == 0: self.set_status(403) self.finish() elif prediction == 1: self.set_status(204) self.finish() except: self.set_status(403) self.finish()
def main(): f = open('../data/train_data.tsv') data = [] for line in f: row = extract_features_from_log( line.split('\t'), 'training_data' ) data.append(row ) columns = ['ip_1', 'ip_2', 'ip_3', 'ip_4', 'category', 'os_version', 'version', 'vendor', 'name', 'os', 'scheme', 'hostname', 'alexa_top_million', 'len_path', 'len_query', 'len_host','status'] df = DataFrame(data, columns = columns) # Write feature matrix df.to_csv('../data/features.csv') # Separate data into test and training sets mask = np.random.rand(len(df)) < 0.8 train = df[mask] test = df[~mask] train.to_csv('../data/training.csv') test.to_csv('../data/test.csv')
def get(self): self.set_header("Access-Control-Allow-Origin", "*") ip = self.get_argument('ip') user_agent = self.get_argument('user_agent') referer = self.get_argument('referer') items = [ip, user_agent, referer] try: features = np.array(extract_features_from_log(items, 'live_data')) encoded_features = enc.transform(features).toarray() scaled_features = scaler.transform(encoded_features) prediction = clf.predict(scaled_features) if prediction == 0: self.set_status(403) self.finish() elif prediction == 1: self.set_status(204) self.finish() except: self.set_status(403) self.finish()