コード例 #1
0
def main():

    f = open('../data/train_data.tsv')
    data = []
    for line in f:
        row = extract_features_from_log(line.split('\t'), 'training_data')
        data.append(row)

    columns = [
        'ip_1', 'ip_2', 'ip_3', 'ip_4', 'category', 'os_version', 'version',
        'vendor', 'name', 'os', 'scheme', 'hostname', 'alexa_top_million',
        'len_path', 'len_query', 'len_host', 'status'
    ]

    df = DataFrame(data, columns=columns)

    # Write feature matrix
    df.to_csv('../data/features.csv')

    # Separate data into test and training sets
    mask = np.random.rand(len(df)) < 0.8
    train = df[mask]
    test = df[~mask]
    train.to_csv('../data/training.csv')
    test.to_csv('../data/test.csv')
コード例 #2
0
def process_log_line(log_line):

    # Transform log line into a vector that can be fed into the model
    items = parse_log_line(log_line)
    features = np.array( extract_features_from_log(items, 'live_data') )
    encoded_features = enc.transform( features).toarray()
    scaled_features = scaler.transform(encoded_features)

    # Feed the vector to the model in order to get the prediction
    prediction = clf.predict(scaled_features)

    return prediction[0]
コード例 #3
0
def process_log_line(log_line):

    # Transform log line into a vector that can be fed into the model
    items = parse_log_line(log_line)
    features = np.array(extract_features_from_log(items, 'live_data'))
    encoded_features = enc.transform(features).toarray()
    scaled_features = scaler.transform(encoded_features)

    # Feed the vector to the model in order to get the prediction
    prediction = clf.predict(scaled_features)

    return prediction[0]
コード例 #4
0
    def get(self):
        self.set_header("Access-Control-Allow-Origin", "*")

        ip = self.get_argument('ip')
        user_agent = self.get_argument('user_agent')
        referer = self.get_argument('referer')
        items = [ip, user_agent, referer]

        try:
            features = np.array( extract_features_from_log(items, 'live_data') )
            encoded_features = enc.transform( features).toarray()
            scaled_features = scaler.transform(encoded_features)
            prediction = clf.predict(scaled_features)
            if prediction == 0:
                self.set_status(403)
                self.finish()
            elif prediction == 1:
                self.set_status(204)
                self.finish()

        except:
            self.set_status(403)
            self.finish()
コード例 #5
0
def main():

    f = open('../data/train_data.tsv')
    data = []
    for line in f:
        row = extract_features_from_log( line.split('\t'), 'training_data' )
        data.append(row )

    columns = ['ip_1', 'ip_2', 'ip_3', 'ip_4', 'category', 'os_version', 'version', 'vendor',
                  'name', 'os', 'scheme', 'hostname', 'alexa_top_million', 'len_path', 'len_query',
                  'len_host','status']

    df = DataFrame(data, columns = columns)

    # Write feature matrix
    df.to_csv('../data/features.csv')

    # Separate data into test and training sets
    mask = np.random.rand(len(df)) < 0.8
    train = df[mask]
    test = df[~mask]
    train.to_csv('../data/training.csv')
    test.to_csv('../data/test.csv')
コード例 #6
0
    def get(self):
        self.set_header("Access-Control-Allow-Origin", "*")

        ip = self.get_argument('ip')
        user_agent = self.get_argument('user_agent')
        referer = self.get_argument('referer')
        items = [ip, user_agent, referer]

        try:
            features = np.array(extract_features_from_log(items, 'live_data'))
            encoded_features = enc.transform(features).toarray()
            scaled_features = scaler.transform(encoded_features)
            prediction = clf.predict(scaled_features)
            if prediction == 0:
                self.set_status(403)
                self.finish()
            elif prediction == 1:
                self.set_status(204)
                self.finish()

        except:
            self.set_status(403)
            self.finish()