예제 #1
0
    def run(self):

        self.yt_list = t.load_json(self.youtube_path)
        self.ex_list = t.load_json(self.exports_path)
        self.youtube_to_process, self.export_to_process = [], []
        print('==> prepare_in_out_list')
        self.prepare_in_out()

        self.youtube_to_process_path = os.path.join(self.workdir,
                                                    'youtube_to_process.json')
        self.export_to_process_path = os.path.join(self.workdir,
                                                   'export_to_process.json')

        # print('==> read youtube durations')
        # self.read_durations(self.youtube_to_process)
        # t.save_json(self.youtube_to_process_path, self.youtube_to_process)

        print('==> read export durations')
        self.read_durations(self.export_to_process)
        t.save_json(self.export_to_process_path, self.export_to_process)

        return self.youtube_to_process_path, self.export_to_process_path
예제 #2
0
from api.tools import Tools as t

y_popular_id_list = t.load_json(
    '/mnt/data/palpatine/DATASETS/YT_LINK/100_popular_from_2017.json')
y_list = t.load_json(
    '/mnt/data/palpatine/DATASETS/YT_LINK/workdir/youtube_to_process.json')

all_youtube_dict_by_id = {}

for item in y_list:
    y_id = item['src'].split('/')[-1].split('.')[0]
    all_youtube_dict_by_id[y_id] = item

new_list_to_process = []
ok = 0
for y_id_pop in y_popular_id_list:
    if y_id_pop in all_youtube_dict_by_id:
        ok += 1

        new_list_to_process.append(all_youtube_dict_by_id[y_id_pop])
print('{}/{}'.format(ok, len(y_popular_id_list)))
t.save_json(
    '/mnt/data/palpatine/DATASETS/YT_LINK/100_popular_from_2017_to_process.json',
    new_list_to_process)
예제 #3
0
from api.tools import Tools as t
import random
#########
# path = '100_id_list.txt'
yt_to_process = t.load_json('/mnt/data/palpatine/DATASETS/YT_LINK/workdir/youtube_to_process.json')
yt_to_process_100 = '/mnt/data/palpatine/DATASETS/YT_LINK/workdir/youtube_to_process_100.json'

# file1 = open(path, 'r')
# lines = file1.read().splitlines()
# id_list = []
unknown = 0

# dct_yt_list = {}
# for item in yt_to_process:
#     dct_yt_list[item['dst']] = item

random.shuffle(yt_to_process)
# y100 = []
# for line in lines:
#     key = 'youtube/'+ line.split(',')[-1]
#     if key not in dct_yt_list:
#         unknown +=1
#     else:
#         dct_yt_list[key]['year']=line.split(',')[-2]
#         dct_yt_list[key]['id']=line.split(',')[0]
#         y100.append(dct_yt_list[key])
y100 = yt_to_process[:100]
print('unknown', unknown)
print('new', len(y100))
t.save_json(yt_to_process_100, y100)
예제 #4
0
    if 'vimeo' in yt_link.lower():
        # print(output)
        continue
    else:
        yt_id = yt_link.rsplit('/')[-1]

    yt_video_path = None
    for video in youtube_entire_video_list:
        if yt_id in video:
            yt_video_path= video


    if yt_video_path is None:
        continue

    # print(line)



    export = '/jabba/' + export
    if export[-1] == ' ':
        export = export[:-1]
    # yt = '/jabba/youtube/videos_youtube/' + yt
    youtube_list.append(yt_video_path)
    exports_list.append(export)

t.save_json(yt_path, youtube_list)
t.save_json(ex_path, exports_list)