def get_non_eval_clip_data_d(): non_eval_clips_row_dl = logger.readCSV(NON_EVAL_CLIPS_DATA_CSV_PATH) non_eval_clip_data_d = {} for row_d in non_eval_clips_row_dl: non_eval_clip_data_d[row_d['postId']] = row_d['clip_path'] return non_eval_clip_data_d
def correct_txt_file(input_txt_file_path, output_txt_file_path, corrected_chars_csv_path, font_path): input_lines_t = tools.read_text_file(input_txt_file_path) corrected_lines_l = [] font = TTFont(font_path) corrected_chars_dl = logger.readCSV(corrected_chars_csv_path) unknown_char_equiv_d = build_unknown_char_equiv_d(corrected_chars_dl) for input_line in input_lines_t: new_corrected_line = '' for char in input_line: if tools.char_in_font(char, font) == False: try: unkown_char_unicode = tools.char_2_unicode(char) corrected_char = unknown_char_equiv_d[unkown_char_unicode] new_corrected_line += corrected_char except Exception as e: raise TypeError( '''ERROR: The input text file contains a char that is not recognized by char_in_font(), most likely this is because you have not loaded the correct and/or completed unkown_chars.csv, the unkown char unicode is: ''' + str(e)) else: new_corrected_line += char corrected_lines_l.append(new_corrected_line) tools.write_text_file(output_txt_file_path, corrected_lines_l)
def _prune_non_eval_clips(): # print(NON_EVAL_CLIPS_DIR_PATH)#```````````````````````````````````````````````````````````````````````````````````````````` # remove clips until under max dir size age_sorted_non_eval_clip_path_l = file_system_utils.get_file_paths_in_dir_by_age( NON_EVAL_CLIPS_DIR_PATH) # print(age_sorted_non_eval_clip_path_l)#`````````````````````````````````````````````````````````````````````````````````` deleted_clip_path_l = [] while (file_system_utils.get_size(NON_EVAL_CLIPS_DIR_PATH) > MAX_NON_EVAL_CLIPS_DIR_SIZE): pos = len(deleted_clip_path_l) os.remove(age_sorted_non_eval_clip_path_l[pos]) deleted_clip_path_l.append( os.path.abspath(age_sorted_non_eval_clip_path_l[pos])) # remove rows that go to the paths of the clips that were just deleted non_eval_clips_row_dl = logger.readCSV(NON_EVAL_CLIPS_DATA_CSV_PATH) del_row_d_l = [] for row_d_num, row_d in enumerate(non_eval_clips_row_dl): if os.path.abspath(row_d['clip_path']) in deleted_clip_path_l: del_row_d_l.append(row_d) for row_d in del_row_d_l: non_eval_clips_row_dl.remove(row_d) # go back through and rename everything so that in the csv it shows up as non_eval_0, 1, 2,... # need this so you don't get stuff overwritten next time rename_clips_for_order(non_eval_clips_row_dl)
def write_to_current(header, value): row_dl = logger.readCSV(POOL_CLIPS_DATA_CSV_PATH) cur_row_num = utils.get_cur_row_num(row_dl) row_dl[cur_row_num][header] = value logger.logList(row_dl, POOL_CLIPS_DATA_CSV_PATH, False, HEADER_LIST, 'overwrite')
def build_log_event_l(input_csv_path): row_dl = logger.readCSV(input_csv_path) log_event_l = [] for row_d in row_dl: log_event_l.append(Log_Event.Log_Event(row_d)) return log_event_l
def load_unknown_chars_csv(unknown_chars_csv_path, corrected_chars_csv_path): unknown_chars_dl = logger.readCSV(unknown_chars_csv_path) #make sure unknown_chars csv has had all correct_chars added if all_correct_chars_entered(unknown_chars_dl) == False: raise TypeError('ERROR: You must enter all values for "correct_char" in unknown_chars') #read original corrected chars csv if it exists if os.path.isfile(corrected_chars_csv_path) == True: og_corrected_chars_dl = logger.readCSV(corrected_chars_csv_path) else: og_corrected_chars_dl = [] new_corrected_chars_dl = find_new_corrected_chars(og_corrected_chars_dl, unknown_chars_dl) header_order_list = ['correct_char', 'unknown_char_unicode', 'example'] if new_corrected_chars_dl != []: logger.logList(new_corrected_chars_dl, corrected_chars_csv_path, WANT_BACKUP, header_order_list, 'append')
def __make_og_non_eval_post_id_clip_path_dl(): new_row_dl = [] pool_row_dl = logger.readCSV(CURRENT_DATA_DIR_PATH + '/pool_clips_data.csv') for pool_row_d in pool_row_dl: if pool_row_d['status'] == '': new_row_dl.append({ 'postId': pool_row_d['postId'], 'clip_path': pool_row_d['clip_path'] }) return new_row_dl
def pull_clip(clip_path, dest_path): os.rename(clip_path, dest_path) non_eval_clips_row_dl = logger.readCSV(NON_EVAL_CLIPS_DATA_CSV_PATH) # find and remove row in csv for the clip that was just renamed for row_d in non_eval_clips_row_dl: if row_d['clip_path'] == clip_path: non_eval_clips_row_dl.remove(row_d) break rename_clips_for_order(non_eval_clips_row_dl)
def build_row_dict_list(export_filename): row_dl = [] raw_input = logger.readCSV(export_filename) #get header list # headers_str = raw_input[0].keys()[0] headers_str = list(raw_input[0].keys())[0] header_list = headers_str.split(export_delim) #print(header_list) for raw_row_dict in raw_input: row_dict = make_row_dict(raw_row_dict, header_list) row_dl.append(row_dict) return row_dl
def move_current(move_amount): row_dl = logger.readCSV(POOL_CLIPS_DATA_CSV_PATH) # get row num of original current clip and set current 'current' = '' og_current_row_num = utils.get_cur_row_num(row_dl) row_dl[og_current_row_num]['current'] = '' #print(og_current_row_num) new_cur_row_num = og_current_row_num + move_amount if new_cur_row_num not in range(len(row_dl)): if move_amount > 0: new_cur_row_num = 0 else: new_cur_row_num = len(row_dl) - 1 row_dl[new_cur_row_num]['current'] = '1' logger.logList(row_dl, POOL_CLIPS_DATA_CSV_PATH, False, HEADER_LIST, 'overwrite')
def _log_small_historical_data(): file_system_utils.make_dir_if_not_exist(HISTORICAL_DATA_DIR_PATH) # make new log dir path now = datetime.datetime.now() date_time_str = now.strftime("%Y-%m-%d__%H_%M") new_log_dir_path = HISTORICAL_DATA_DIR_PATH + '/log__' + date_time_str # add new dir, delete old if exists file_system_utils.delete_if_exists(new_log_dir_path) os.mkdir(new_log_dir_path) # copy data from current_data to new dir in historical_data copy_path_l = [ CURRENT_DATA_DIR_PATH + '/download_log.csv', CURRENT_DATA_DIR_PATH + '/pool_clips_data.csv', CURRENT_DATA_DIR_PATH + '/LOG_FILES' ] file_system_utils.copy_objects_to_dest(copy_path_l, new_log_dir_path) # get list of evaluated postIds pool_evaluated_post_id_l = [] pool_clips_data_row_dl = logger.readCSV(CURRENT_DATA_DIR_PATH + '/pool_clips_data.csv') for row_d in pool_clips_data_row_dl: if row_d['status'] != '': pool_evaluated_post_id_l.append(row_d['postId']) # print(pool_evaluated_post_id_l)#`````````````````````````````````````````````````````````````````````````` # add pool_evaluated_post_id_l to existing list of evaluated post ids evaluated_post_id_l = get_evaluated_post_id_l() # print(evaluated_post_id_l)#````````````````````````````````````````````````````````````````````````` json_logger.write(pool_evaluated_post_id_l + evaluated_post_id_l, EVALUATED_POST_IDS_JSON_PATH)
def _log_non_eval_clips(): def __make_og_non_eval_post_id_clip_path_dl(): new_row_dl = [] pool_row_dl = logger.readCSV(CURRENT_DATA_DIR_PATH + '/pool_clips_data.csv') for pool_row_d in pool_row_dl: if pool_row_d['status'] == '': new_row_dl.append({ 'postId': pool_row_d['postId'], 'clip_path': pool_row_d['clip_path'] }) return new_row_dl def __get_post_id_l(non_eval_clips_row_dl): post_id_l = [] for row_dl in non_eval_clips_row_dl: post_id_l.append(row_dl['postId']) return post_id_l file_system_utils.make_dir_if_not_exist(NON_EVAL_CLIPS_DIR_PATH) try: non_eval_clips_row_dl = logger.readCSV( NON_EVAL_CLIPS_DATA_CSV_PATH) except FileNotFoundError: non_eval_clips_row_dl = [] # make row_dl of postIDs and original clip paths og_non_eval_post_id_clip_path_dl = __make_og_non_eval_post_id_clip_path_dl( ) # build final_non_eval_post_id_clip_path_dl - contains postId and new clip path that clip is about to be saved to # also will not include any postIds that are already logged final_non_eval_post_id_clip_path_dl = [] existing_post_id_l = __get_post_id_l(non_eval_clips_row_dl) clips_added = 0 for d in og_non_eval_post_id_clip_path_dl: if d['postId'] not in existing_post_id_l: new_save_name = 'non_eval_' + str( len(non_eval_clips_row_dl) + clips_added) + '.mp4' final_non_eval_post_id_clip_path_dl.append({ 'postId': d['postId'], 'clip_path': NON_EVAL_CLIPS_DIR_PATH + '/' + new_save_name }) clips_added += 1 # copy all non-evaluated clips to thier new home in non_eval_clips # could just rename, but this is nicer for testing og_pos = 0 for d in final_non_eval_post_id_clip_path_dl: while (d['postId'] != og_non_eval_post_id_clip_path_dl[og_pos]['postId']): og_pos += 1 og_clip_path = og_non_eval_post_id_clip_path_dl[og_pos][ 'clip_path'] file_system_utils.copy_files_to_dest([og_clip_path], NON_EVAL_CLIPS_DIR_PATH) just_copied_clip_path = NON_EVAL_CLIPS_DIR_PATH + '/' + ntpath.basename( og_clip_path) os.rename(just_copied_clip_path, d['clip_path']) # add info from final_non_eval_post_id_clip_path_dl to non_eval_clips_row_dl for row_d in final_non_eval_post_id_clip_path_dl: non_eval_clips_row_dl.append(row_d) logger.logList(non_eval_clips_row_dl, NON_EVAL_CLIPS_DATA_CSV_PATH, False, NON_EVAL_CLIPS_DATA_CSV_HEADER_LIST, 'overwrite')
def print_dl_report(): def _num_dl_success(row_dl): num_dl_success = 0 for row_d in row_dl: if row_d['download_success'] == 'True': num_dl_success += 1 return num_dl_success def _youtube_reddit_other_cnt(row_dl): def __youtube_reddit_or_other_url(url): if 'youtu' in url: return 'youtube' elif 'redd' in url: return 'reddit' else: return 'other' # raise Exception('ERROR: Unkown URL type: ', url) yt_cnt = 0 r_cnt = 0 other_cnt = 0 for row_d in row_dl: if (__youtube_reddit_or_other_url(row_d['postURL']) == 'youtube'): yt_cnt += 1 elif (__youtube_reddit_or_other_url(row_d['postURL']) == 'reddit'): r_cnt += 1 else: other_cnt += 1 return yt_cnt, r_cnt, other_cnt def _print_fail_reason_occ(row_dl): def __fail_reason_occ_d(row_dl): fail_reason_occ_d = {} for row_d in row_dl: if row_d['fail_reason'] != '': if row_d['fail_reason'] in fail_reason_occ_d.keys(): fail_reason_occ_d[row_d['fail_reason']] += 1 else: fail_reason_occ_d[row_d['fail_reason']] = 1 return fail_reason_occ_d overall_fail_reason_occ_d = __fail_reason_occ_d(row_dl) num_attempts = len(row_dl) total_fails = sum(overall_fail_reason_occ_d.values()) print('') print('Total Fails: ', total_fails, ' %', int((total_fails / num_attempts) * 100)) print('') print('% Fails Of % Fails Of # Fails: Fail Reason:') print('Attempts: Fails:') for fail_reason, num_occ in overall_fail_reason_occ_d.items(): percent_of_attempts = int((num_occ / num_attempts) * 100) percent_of_fails = int((num_occ / total_fails) * 100) print('%', percent_of_attempts, ' %', percent_of_fails, ' ', num_occ, ' ', fail_reason) row_dl = logger.readCSV(CLIP_DOWNLOAD_LOG_CSV_PATH) num_attempts = len(row_dl) num_dl_success = _num_dl_success(row_dl) dl_success_ratio = num_dl_success / num_attempts yt_cnt, r_cnt, other_cnt = _youtube_reddit_other_cnt(row_dl) # overall_fail_reason_occ_d = _fail_reason_occ_d(row_dl) # print(overall_fail_reason_occ_d) print('num_attempts: ', num_attempts) print('num_dl_success: ', num_dl_success) print('dl_success_ratio: ', dl_success_ratio) print('num youtube videos: ', yt_cnt, ' %', (yt_cnt / num_attempts) * 100) print('num reddit videos: ', r_cnt, ' %', (r_cnt / num_attempts) * 100) print('num other videos: ', other_cnt, ' %', (other_cnt / num_attempts) * 100) _print_fail_reason_occ(row_dl)
def get_confirmed_code_dl__and_is_complete(store_name, value, quantity): def get_datetime_from_dt_csv_str(datetime_csv_str): ss = str_utils.multi_dim_split(['-', ' ', ':', "'"], datetime_csv_str) return datetime(int(ss[0]), int(ss[1]), int(ss[2]), int(ss[3]), int(ss[4]), int(ss[5])) def add_to_code_d_if_exists_in_row_d(code_d, row_d, key_): if key_ in row_d.keys(): code_d[key_] = row_d[key_] return code_d def build_code_d(row_d): code_d = {} header = 'main_code' if header in row_d.keys(): code_d[header] = row_d[header][:-1] code_d = add_to_code_d_if_exists_in_row_d(code_d, row_d, 'pin') code_d = add_to_code_d_if_exists_in_row_d( code_d, row_d, 'biz_id' ) # eventually remove !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! return code_d confirmed_code_dl = [] unused_code_csv_path = get__store_unused_codes_csv_path( code_req_d['store_name']) # return empty if code csv does not exist if not fsu.is_file(unused_code_csv_path): return confirmed_code_dl row_dl = logger.readCSV(unused_code_csv_path) store = STORE_D[ store_name] # will eventually be replaced with Store(store_name) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! header_l = store.csv_header_l # will eventually get this from config !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! row_num = 0 while (len(confirmed_code_dl) < quantity and row_num < len(row_dl)): row_d = row_dl[row_num] if float(row_d['adv_value']) == float(value): code_d = build_code_d(row_d) last_confirm_datetime = get_datetime_from_dt_csv_str( row_d['last_confirmed']) datetime_since_last_confirm = datetime.now( ) - last_confirm_datetime sec_since_last_confirm = datetime_since_last_confirm.total_seconds( ) # if it has been too long since last check, re-check code if sec_since_last_confirm > MAX_CONFIRMED_CODE_AGE_DAYS * 3600: # real_value = store.get_code_value(code_d) # put back !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! real_value = 50 # remove, just for testing !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! print('using ', real_value, ' as test #, should check code for real, PUT BACK' ) #````````````````````````````````````````` # if after checking, the real value is less than the value, # remove the code from unused_codes and put it in failed_codes if real_value < float(row_d['adv_value']): logger.removeRowByHeaderVal('og_code_str', row_d['og_code_str'], unused_code_csv_path, errorIfHeaderNotExist=True) failed_codes_csv_path = get__store_failed_codes_csv_path( store_name) logger.logList(row_dl, failed_codes_csv_path, wantBackup=True, headerList=header_l, overwriteAction='append') break # if code not old, or if you just checked and confirmed the code confirmed_code_dl.append(code_d) row_num += 1 return confirmed_code_dl, len(confirmed_code_dl) == quantity
def get_csv_row_dl(): return logger.readCSV(POOL_CLIPS_DATA_CSV_PATH)