def filter_name_reason_stats_processor(line_array):

    curr_cache_code = line_array[20]

    if curr_cache_code == "TCP_DENIED":

        curr_username = line_array[11]
        if curr_username == "" or curr_username == " " or curr_username == "-":
            curr_username = line_array[10]
        curr_url = line_array[13]
        curr_filter_name = line_array[17]
        curr_filter_reason = line_array[18]
        curr_download_bytes = line_array[8]
        curr_upload_bytes = line_array[7]
        curr_elapsed_time = line_array[4]

        curr_u_u_f_f = curr_username + curr_url + curr_filter_name + curr_filter_reason

        curr_key_md5 = md5_hash(bytes(curr_u_u_f_f,
                                      'utf-8')).hexdigest()  # Full Unique Key

        curr_key_md5_value = filter_name_reason_stats_dict.get(
            curr_key_md5, None)

        if curr_key_md5_value:
            prev_this_filter_stats = curr_key_md5_value

            filter_name_reason_stats_dict[curr_key_md5][
                "d"] = prev_this_filter_stats.get("d") + int(
                    curr_download_bytes)
            filter_name_reason_stats_dict[curr_key_md5][
                "u"] = prev_this_filter_stats.get("u") + int(curr_upload_bytes)
            filter_name_reason_stats_dict[curr_key_md5][
                "rc"] = prev_this_filter_stats.get("rc") + 1
            filter_name_reason_stats_dict[curr_key_md5][
                "et"] = prev_this_filter_stats.get("et") + int(
                    curr_elapsed_time)
        else:
            filter_name_reason_stats_dict[curr_key_md5] = {
                "un": curr_username,
                "f_n": curr_filter_name,
                "f_r": curr_filter_reason,
                "d": int(curr_download_bytes),
                "u": int(curr_upload_bytes),
                "rc": 1,
                "et": int(curr_elapsed_time),
                "url": curr_url
            }
Example #2
0
def update_thread(data_dir, author, subject=None, key=None):
    """Update the thread, creating a new thread if key is None. Returns the
    key (hash).

    author  - String, the ID of the author.
    subject - String, the title of the thread.
    key     - String, the key to an existing thread to update.

    If <subject> is given, then it's assumed that we're starting a new thread
    and if <key> is given, then we should be updating an existing thread.
    """
    now = time.strftime(DATE_FORMAT)

    if key:
        row_hash = key
    else:
        row_hash = md5_hash('%s%s%s' % (now, author, subject)).hexdigest()

    # Read the index of threads in.
    threads = read_threads(data_dir)
    new_threads = []

    # Index format:
    # hash, date, num_replies, last_reply, author, subject
    if not key:
        # A new thread, put at the top.
        new_threads.append('\t'.join(
            (row_hash, now, '0', '-', author, subject)))

    for thread in threads:
        if thread.startswith(row_hash):
            # insert the updated thread at the beginning.
            # (_ ignore last reply - we're setting it to now)
            _, date, num_replies, _, author, subject = \
                    thread.strip().split('\t')
            num_replies = str(int(num_replies) + 1)
            new_threads.insert(
                0, '\t'.join(
                    (row_hash, date, num_replies, now, author, subject)))
        else:
            new_threads.append(thread.strip())

    # Overwrite the existing index with the updated index.
    write_threads(data_dir, new_threads)

    return row_hash
Example #3
0
def update_thread(data_dir, author, subject=None, key=None):
    """Update the thread, creating a new thread if key is None. Returns the
    key (hash).

    author  - String, the ID of the author.
    subject - String, the title of the thread.
    key     - String, the key to an existing thread to update.

    If <subject> is given, then it's assumed that we're starting a new thread
    and if <key> is given, then we should be updating an existing thread.
    """
    now = time.strftime(DATE_FORMAT)

    if key:
        row_hash = key
    else:
        row_hash = md5_hash('%s%s%s' % (now, author, subject)).hexdigest()

    # Read the index of threads in.
    threads = read_threads(data_dir)
    new_threads = []

    # Index format:
    # hash, date, num_replies, last_reply, author, subject
    if not key:
        # A new thread, put at the top.
        new_threads.append('\t'.join(
                (row_hash, now, '0', '-', author, subject)))

    for thread in threads:
        if thread.startswith(row_hash):
            # insert the updated thread at the beginning.
            # (_ ignore last reply - we're setting it to now)
            _, date, num_replies, _, author, subject = \
                    thread.strip().split('\t')
            num_replies = str(int(num_replies) + 1)
            new_threads.insert(0, '\t'.join(
                (row_hash, date, num_replies, now, author, subject)))
        else:
            new_threads.append(thread.strip())

    # Overwrite the existing index with the updated index.
    write_threads(data_dir, new_threads)

    return row_hash
Example #4
0
 def __hash__(self):
     return int(md5_hash(self._id.encode("utf-8")).hexdigest(), 16)
Example #5
0
def write_visited(data_dir, author, visited):
    """Write list of thread visits to for author"""
    path = get_visited_path(data_dir, md5_hash(author).hexdigest())
    return write_common(path, visited)
Example #6
0
def read_visited(data_dir, author):
    """Load list of thread visits for author"""
    path = get_visited_path(data_dir, md5_hash(author).hexdigest())
    return read_common(path)
Example #7
0
INDEX_NAME = 'index.txt'

# Thread helper file name
THREAD_NAME = 'thread'
SUBSCRIBER_NAME = 'subscribe'
VISITED_NAME = 'visited'

# How dates are stored (see python time module for details)
DATE_FORMAT = '%d %b %Y %H:%M:%S'

# Maximum lengths for subjects and message bodies.
# (currently we chop them off without warning)
MAX_SUBJECT_LEN = 100
MAX_BODY_LEN = 10000

HASH_LENGTH = len(md5_hash('dummy').hexdigest())

ERR_INVALID_THREAD = 'Invalid Thread Specified'
ERR_NO_SUBJECT = 'No Subject Given'
ERR_NO_BODY = 'No body text!'


def get_thread_path(data_dir, hash_string):
    """build thread path from data_dir and hash_string"""
    return os.path.join(data_dir, "%s-%s" % (THREAD_NAME, hash_string))


def get_visited_path(data_dir, hash_string):
    """build visited path from data_dir and hash_string"""
    return os.path.join(data_dir, "%s-%s" % (VISITED_NAME, hash_string))
Example #8
0
def write_visited(data_dir, author, visited):
    """Write list of thread visits to for author"""
    path = get_visited_path(data_dir, md5_hash(author).hexdigest())
    return write_common(path, visited)
Example #9
0
def read_visited(data_dir, author):
    """Load list of thread visits for author"""
    path = get_visited_path(data_dir, md5_hash(author).hexdigest())
    return read_common(path)
Example #10
0
INDEX_NAME = 'index.txt'

# Thread helper file name
THREAD_NAME = 'thread'
SUBSCRIBER_NAME = 'subscribe'
VISITED_NAME = 'visited'

# How dates are stored (see python time module for details)
DATE_FORMAT = '%d %b %Y %H:%M:%S'

# Maximum lengths for subjects and message bodies.
# (currently we chop them off without warning)
MAX_SUBJECT_LEN = 100
MAX_BODY_LEN = 10000

HASH_LENGTH = len(md5_hash('dummy').hexdigest())

ERR_INVALID_THREAD = 'Invalid Thread Specified'
ERR_NO_SUBJECT = 'No Subject Given'
ERR_NO_BODY = 'No body text!'


def get_thread_path(data_dir, hash_string):
    """build thread path from data_dir and hash_string"""
    return os.path.join(data_dir, "%s-%s" % (THREAD_NAME, hash_string))

def get_visited_path(data_dir, hash_string):
    """build visited path from data_dir and hash_string"""
    return os.path.join(data_dir, "%s-%s" % (VISITED_NAME, hash_string))

def get_subscriber_path(data_dir, hash_string=''):