def run(self): self.yt_list = t.load_json(self.youtube_path) self.ex_list = t.load_json(self.exports_path) self.youtube_to_process, self.export_to_process = [], [] print('==> prepare_in_out_list') self.prepare_in_out() self.youtube_to_process_path = os.path.join(self.workdir, 'youtube_to_process.json') self.export_to_process_path = os.path.join(self.workdir, 'export_to_process.json') # print('==> read youtube durations') # self.read_durations(self.youtube_to_process) # t.save_json(self.youtube_to_process_path, self.youtube_to_process) print('==> read export durations') self.read_durations(self.export_to_process) t.save_json(self.export_to_process_path, self.export_to_process) return self.youtube_to_process_path, self.export_to_process_path
from api.tools import Tools as t y_popular_id_list = t.load_json( '/mnt/data/palpatine/DATASETS/YT_LINK/100_popular_from_2017.json') y_list = t.load_json( '/mnt/data/palpatine/DATASETS/YT_LINK/workdir/youtube_to_process.json') all_youtube_dict_by_id = {} for item in y_list: y_id = item['src'].split('/')[-1].split('.')[0] all_youtube_dict_by_id[y_id] = item new_list_to_process = [] ok = 0 for y_id_pop in y_popular_id_list: if y_id_pop in all_youtube_dict_by_id: ok += 1 new_list_to_process.append(all_youtube_dict_by_id[y_id_pop]) print('{}/{}'.format(ok, len(y_popular_id_list))) t.save_json( '/mnt/data/palpatine/DATASETS/YT_LINK/100_popular_from_2017_to_process.json', new_list_to_process)
from api.tools import Tools as t import random ######### # path = '100_id_list.txt' yt_to_process = t.load_json('/mnt/data/palpatine/DATASETS/YT_LINK/workdir/youtube_to_process.json') yt_to_process_100 = '/mnt/data/palpatine/DATASETS/YT_LINK/workdir/youtube_to_process_100.json' # file1 = open(path, 'r') # lines = file1.read().splitlines() # id_list = [] unknown = 0 # dct_yt_list = {} # for item in yt_to_process: # dct_yt_list[item['dst']] = item random.shuffle(yt_to_process) # y100 = [] # for line in lines: # key = 'youtube/'+ line.split(',')[-1] # if key not in dct_yt_list: # unknown +=1 # else: # dct_yt_list[key]['year']=line.split(',')[-2] # dct_yt_list[key]['id']=line.split(',')[0] # y100.append(dct_yt_list[key]) y100 = yt_to_process[:100] print('unknown', unknown) print('new', len(y100)) t.save_json(yt_to_process_100, y100)
if 'vimeo' in yt_link.lower(): # print(output) continue else: yt_id = yt_link.rsplit('/')[-1] yt_video_path = None for video in youtube_entire_video_list: if yt_id in video: yt_video_path= video if yt_video_path is None: continue # print(line) export = '/jabba/' + export if export[-1] == ' ': export = export[:-1] # yt = '/jabba/youtube/videos_youtube/' + yt youtube_list.append(yt_video_path) exports_list.append(export) t.save_json(yt_path, youtube_list) t.save_json(ex_path, exports_list)