def filter_name_reason_stats_processor(line_array): curr_cache_code = line_array[20] if curr_cache_code == "TCP_DENIED": curr_username = line_array[11] if curr_username == "" or curr_username == " " or curr_username == "-": curr_username = line_array[10] curr_url = line_array[13] curr_filter_name = line_array[17] curr_filter_reason = line_array[18] curr_download_bytes = line_array[8] curr_upload_bytes = line_array[7] curr_elapsed_time = line_array[4] curr_u_u_f_f = curr_username + curr_url + curr_filter_name + curr_filter_reason curr_key_md5 = md5_hash(bytes(curr_u_u_f_f, 'utf-8')).hexdigest() # Full Unique Key curr_key_md5_value = filter_name_reason_stats_dict.get( curr_key_md5, None) if curr_key_md5_value: prev_this_filter_stats = curr_key_md5_value filter_name_reason_stats_dict[curr_key_md5][ "d"] = prev_this_filter_stats.get("d") + int( curr_download_bytes) filter_name_reason_stats_dict[curr_key_md5][ "u"] = prev_this_filter_stats.get("u") + int(curr_upload_bytes) filter_name_reason_stats_dict[curr_key_md5][ "rc"] = prev_this_filter_stats.get("rc") + 1 filter_name_reason_stats_dict[curr_key_md5][ "et"] = prev_this_filter_stats.get("et") + int( curr_elapsed_time) else: filter_name_reason_stats_dict[curr_key_md5] = { "un": curr_username, "f_n": curr_filter_name, "f_r": curr_filter_reason, "d": int(curr_download_bytes), "u": int(curr_upload_bytes), "rc": 1, "et": int(curr_elapsed_time), "url": curr_url }
def update_thread(data_dir, author, subject=None, key=None): """Update the thread, creating a new thread if key is None. Returns the key (hash). author - String, the ID of the author. subject - String, the title of the thread. key - String, the key to an existing thread to update. If <subject> is given, then it's assumed that we're starting a new thread and if <key> is given, then we should be updating an existing thread. """ now = time.strftime(DATE_FORMAT) if key: row_hash = key else: row_hash = md5_hash('%s%s%s' % (now, author, subject)).hexdigest() # Read the index of threads in. threads = read_threads(data_dir) new_threads = [] # Index format: # hash, date, num_replies, last_reply, author, subject if not key: # A new thread, put at the top. new_threads.append('\t'.join( (row_hash, now, '0', '-', author, subject))) for thread in threads: if thread.startswith(row_hash): # insert the updated thread at the beginning. # (_ ignore last reply - we're setting it to now) _, date, num_replies, _, author, subject = \ thread.strip().split('\t') num_replies = str(int(num_replies) + 1) new_threads.insert( 0, '\t'.join( (row_hash, date, num_replies, now, author, subject))) else: new_threads.append(thread.strip()) # Overwrite the existing index with the updated index. write_threads(data_dir, new_threads) return row_hash
def update_thread(data_dir, author, subject=None, key=None): """Update the thread, creating a new thread if key is None. Returns the key (hash). author - String, the ID of the author. subject - String, the title of the thread. key - String, the key to an existing thread to update. If <subject> is given, then it's assumed that we're starting a new thread and if <key> is given, then we should be updating an existing thread. """ now = time.strftime(DATE_FORMAT) if key: row_hash = key else: row_hash = md5_hash('%s%s%s' % (now, author, subject)).hexdigest() # Read the index of threads in. threads = read_threads(data_dir) new_threads = [] # Index format: # hash, date, num_replies, last_reply, author, subject if not key: # A new thread, put at the top. new_threads.append('\t'.join( (row_hash, now, '0', '-', author, subject))) for thread in threads: if thread.startswith(row_hash): # insert the updated thread at the beginning. # (_ ignore last reply - we're setting it to now) _, date, num_replies, _, author, subject = \ thread.strip().split('\t') num_replies = str(int(num_replies) + 1) new_threads.insert(0, '\t'.join( (row_hash, date, num_replies, now, author, subject))) else: new_threads.append(thread.strip()) # Overwrite the existing index with the updated index. write_threads(data_dir, new_threads) return row_hash
def __hash__(self): return int(md5_hash(self._id.encode("utf-8")).hexdigest(), 16)
def write_visited(data_dir, author, visited): """Write list of thread visits to for author""" path = get_visited_path(data_dir, md5_hash(author).hexdigest()) return write_common(path, visited)
def read_visited(data_dir, author): """Load list of thread visits for author""" path = get_visited_path(data_dir, md5_hash(author).hexdigest()) return read_common(path)
INDEX_NAME = 'index.txt' # Thread helper file name THREAD_NAME = 'thread' SUBSCRIBER_NAME = 'subscribe' VISITED_NAME = 'visited' # How dates are stored (see python time module for details) DATE_FORMAT = '%d %b %Y %H:%M:%S' # Maximum lengths for subjects and message bodies. # (currently we chop them off without warning) MAX_SUBJECT_LEN = 100 MAX_BODY_LEN = 10000 HASH_LENGTH = len(md5_hash('dummy').hexdigest()) ERR_INVALID_THREAD = 'Invalid Thread Specified' ERR_NO_SUBJECT = 'No Subject Given' ERR_NO_BODY = 'No body text!' def get_thread_path(data_dir, hash_string): """build thread path from data_dir and hash_string""" return os.path.join(data_dir, "%s-%s" % (THREAD_NAME, hash_string)) def get_visited_path(data_dir, hash_string): """build visited path from data_dir and hash_string""" return os.path.join(data_dir, "%s-%s" % (VISITED_NAME, hash_string))
INDEX_NAME = 'index.txt' # Thread helper file name THREAD_NAME = 'thread' SUBSCRIBER_NAME = 'subscribe' VISITED_NAME = 'visited' # How dates are stored (see python time module for details) DATE_FORMAT = '%d %b %Y %H:%M:%S' # Maximum lengths for subjects and message bodies. # (currently we chop them off without warning) MAX_SUBJECT_LEN = 100 MAX_BODY_LEN = 10000 HASH_LENGTH = len(md5_hash('dummy').hexdigest()) ERR_INVALID_THREAD = 'Invalid Thread Specified' ERR_NO_SUBJECT = 'No Subject Given' ERR_NO_BODY = 'No body text!' def get_thread_path(data_dir, hash_string): """build thread path from data_dir and hash_string""" return os.path.join(data_dir, "%s-%s" % (THREAD_NAME, hash_string)) def get_visited_path(data_dir, hash_string): """build visited path from data_dir and hash_string""" return os.path.join(data_dir, "%s-%s" % (VISITED_NAME, hash_string)) def get_subscriber_path(data_dir, hash_string=''):