def main(): session = Session() session.headers.update({'User-Agent': USER_AGENT}) for url in ALBUM_URLS: try: album_name = decode_url(url.split('/')[-2]) except Exception: album_name = None print("Downloading music from album '{}'\n".format(album_name)) resp = session.get(url, timeout=30) if not resp.ok: print('Response code: {}'.format(resp.status_code)) return html = resp.text soup = Soup(html) try: music_list = soup.scrape(scrape_config) except Exception: print( "Can't download music from album {}. check URL and content selector.\n" .format(album_name)) continue for song in music_list: music_title = remove_whitespace(song.get('music_title', None)) \ .replace(" (music.com.bd).mp3", "") \ .replace("{} - ".format(decode_url(url.split('/')[-3])), "") music_url = 'https:{}'.format(encode_to_url( song.get('music_url'))).replace(".html", "") if not music_url.endswith('.mp3'): print("Skipping '{}' : not a music file".format(music_title)) continue music_title = '{}.mp3'.format( re.search(MUSIC_TITLE_REGEX, music_title).group(1)) create_dir(os.path.join('Downloads', album_name)) print('>>> Downloading {}'.format(music_title)) resp_music = session.get(music_url) if not resp_music.ok: print(' >>> Download failed : {}'.format( resp_music.status_code)) continue music_path = os.path.join(album_name, music_title) write_binary_file(music_path, resp_music) print(' >>> {} downloaded'.format(music_title)) # end loop: music_list print("\n>>> Album '{}' downloaded".format(album_name)) print('----------------------------\n\n') # end loop : urls print('- Done -')
def commit_hierarchy(hierarchy): """ Commit hierarchy to file system so that they can be cached :param hierarchy: hierarchy to commit :param dest: directory in file system to commit to """ for node in hierarchy: create_dir(os.path.dirname(node['path'])) write_text(node['path'], node['video_id'])
def POST(self): #登陆控制 if session.login == 1: if session.user: x = web.input(myfile={}) mission_name = web.input().mission_name mission_content = web.input().mission_content mission_starttime = web.input().mission_starttime mission_plan_end_time = web.input().mission_plan_end_time mission_doers = web.input().mission_doers #检查任务信息是否合法 result = mission.mission_check(mission_name, mission_content, mission_starttime, mission_plan_end_time) ajax_result = {"statusCode":"300", "message":result} web.header('Content-Type', 'application/json') #如果任务合法,将任务信息存储进MISSION表 mission_pubtime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) if result == "no error": mission.mission_save(mission_name, mission_content, mission_starttime, mission_plan_end_time, session.user, mission_doers,mission_pubtime) #获取到mission_id的值 mission_id = mission.mission_id_get(session.user, mission_pubtime) file_type = 0 user = session.user filedir = 'uploads/'+user # change this to the directory you want to store the file in. file.create_dir(filedir) #create dir if the dir is not exist if 'myfile' in x: file_url=x.myfile.filename.replace('\\','/') print "file_url = x.myfile.filename = :" print file_url file_name=file_url.split('/')[-1] # splits the and chooses the last part (the filename with extension) file_url = './uploads/'+user+'/'+file_name fout = open(filedir +'/'+ file_name,'wb') # creates the file where the uploaded file should be stored fout.write(x.myfile.file.read()) # writes the uploaded file to the newly created file. fout.close() # closes the file, upload complete. file_upload_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) file.upload(mission_id,file_name,file_url, user,file_upload_time,file_type) ajax_result = {"statusCode":"200", "message":"任务新添加成功", "callbackType":"closeCurrent"} return json.dumps(ajax_result) #如果任务不合法,把已填写的表单数据返回给new_mission页面 """ else: return render_template( type=session.type, template_name='new_mission.html', user=session.user, mission_name=mission_name, mission_content=mission_content, mission_starttime=mission_starttime, mission_plan_end_time=mission_plan_end_time, error=result ) """ else: return json.dumps({"statusCode":"301", "message":"会话超时,请重新登录"}) else: return json.dumps({"statusCode":"301", "message":"会话超时,请重新登录"})
def POST(self): #登陆控制 if session.login == 1: if session.user: x = web.input(myfile={}) mission_name = web.input().mission_name mission_content = web.input().mission_content mission_starttime = web.input().mission_starttime mission_plan_end_time = web.input().mission_plan_end_time mission_doers = web.input().mission_doers #检查任务信息是否合法 result = mission.mission_check(mission_name, mission_content, mission_starttime, mission_plan_end_time) ajax_result = {"statusCode":"300", "message":result} web.header('Content-Type', 'application/json') #如果任务合法,将任务信息存储进MISSION表 mission_pubtime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) if result == "no error": mission.mission_save(mission_name, mission_content, mission_starttime, mission_plan_end_time, session.user, mission_doers,mission_pubtime) #获取到mission_id的值 mission_id = mission.mission_id_get(session.user, mission_pubtime) file_type = 0 user = session.user filedir = 'uploads/'+user # change this to the directory you want to store the FILEin. file.create_dir(filedir) #create dir if the dir is not exist if 'myfile' in x: file_url=x.myfile.filename.replace('\\','/') print "file_url = x.myfile.filename = :" print file_url file_name=file_url.split('/')[-1] # splits the and chooses the last part (the filename with extension) file_url = './uploads/'+user+'/'+file_name fout = open(filedir +'/'+ file_name,'wb') # creates the FILEwhere the uploaded FILEshould be stored fout.write(x.myfile.file.read()) # writes the uploaded FILEto the newly created file. fout.close() # closes the file, upload complete. file_upload_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) file.upload(mission_id,file_name,file_url, user,file_upload_time,file_type) ajax_result = {"statusCode":"200", "message":"任务新添加成功", "callbackType":"closeCurrent"} return json.dumps(ajax_result) #如果任务不合法,把已填写的表单数据返回给new_mission页面 """ else: return render_template( type=session.type, template_name='new_mission.html', user=session.user, mission_name=mission_name, mission_content=mission_content, mission_starttime=mission_starttime, mission_plan_end_time=mission_plan_end_time, error=result ) """ else: return json.dumps({"statusCode":"301", "message":"会话超时,请重新登录"}) else: return json.dumps({"statusCode":"301", "message":"会话超时,请重新登录"})
def POST(self,arg): x = web.input(myfile={}) mission_id = web.input().mission_id file_type = 1 user = session.user filedir = 'uploads/'+user # change this to the directory you want to store the file in. file.create_dir(filedir) #create dir if the dir is not exist if 'myfile' in x: file_url=x.myfile.filename.replace('\\','/') file_name=file_url.split('/')[-1] # splits the and chooses the last part (the filename with extension) file_url = './uploads/'+user+'/'+file_name fout = open(filedir +'/'+ file_name,'wb') # creates the file where the uploaded file should be stored fout.write(x.myfile.file.read()) # writes the uploaded file to the newly created file. fout.close() # closes the file, upload complete. file_upload_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) file.upload(mission_id,file_name,file_url, user,file_upload_time,file_type) return json.dumps({"statusCode":"200", "message":"文件上传成功"})
def POST(self,arg): x = web.input(myfile={}) mission_id = web.input().mission_id file_type = 1 user = session.user filedir = 'uploads/'+user # change this to the directory you want to store the FILEin. file.create_dir(filedir) #create dir if the dir is not exist if 'myfile' in x: file_url=x.myfile.filename.replace('\\','/') file_name=file_url.split('/')[-1] # splits the and chooses the last part (the filename with extension) file_url = './uploads/'+user+'/'+file_name fout = open(filedir +'/'+ file_name,'wb') # creates the FILEwhere the uploaded FILEshould be stored fout.write(x.myfile.file.read()) # writes the uploaded FILEto the newly created file. fout.close() # closes the file, upload complete. file_upload_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) file.upload(mission_id,file_name,file_url, user,file_upload_time,file_type) return json.dumps({"statusCode":"200", "message":"文件上传成功"})
def save(title, modules): out_dir = os.path.join('transcripts', 'tutorials') create_dir(out_dir) fname = os.path.join(out_dir, title + '.json') write_text(fname, json.dumps(modules))
Recursively generate all subdirectories containing transcripts :param root: root path :param path: current relative path :return: directory containing transcripts """ for sub in os.listdir(os.path.join(root, path)): subpath = os.path.join(path, sub) if is_transcript_directory(os.path.join(root, subpath)): yield subpath else: yield from generate_transcript_directories(root, subpath) if __name__ == '__main__': src = 'video_ids' dest_dir = os.path.join('transcripts', 'xml') for path in generate_transcript_directories(src): for video_title in os.listdir(os.path.join(src, path)): video_id = read_text(os.path.join(src, path, video_title)) print(path, video_title, video_id) fname = os.path.join(dest_dir, path, video_title) + '.xml' if should_skip_transcript(fname): print('\t', 'skipping') continue transcript = download_transcript(video_id) create_dir(os.path.dirname(fname)) write_text(fname, transcript)
def scatter_2d(point2d, class_indexes, n_iter, dir_name, ms=3, alpha=0.1, momentum=None, save=None): """ :param save: :param momentum: :param dir_name: :param n_iter: :param point2d: :param class_indexes: :param ms: marker size :param alpha: :param savename: :return: """ fig, ax = plt.subplots(figsize=(9, 6)) classes = list(np.unique(class_indexes)) markers = 'os' * len(classes) colors = plt.cm.rainbow(np.linspace(0, 1, len(classes))) for i, cls in enumerate(classes): mark = markers[i] ax.plot(point2d[class_indexes == cls, 0], point2d[class_indexes == cls, 1], marker=mark, linestyle='', ms=ms, label=str(cls), alpha=alpha, color=colors[i], markeredgecolor='black', markeredgewidth=0.4) ax.legend() # plt.show() if save: ''' # save picture at every iteration ''' pic_name = "iter" if momentum: pic_name = "momentum_" + pic_name file.create_dir(dir_name + "\\" + "pic") plt.savefig(dir_name + "\\" + "pic\\" + pic_name + str(n_iter)) ''' # save low dimension vectors at every iteration ''' low_file_name = "low_" + str(n_iter) + ".json" if momentum: low_file_name = "momentum_" + low_file_name dir_name = dir_name file.create_file(low_file_name, dir_name) file.write_json(file.numpy_array_to_list(point2d), low_file_name, dir_name) return ax
:param path: current relative path :return: directory containing transcripts """ for sub in os.listdir(os.path.join(root, path)): subpath = os.path.join(path, sub) if is_transcript_directory(os.path.join(root, subpath)): yield subpath else: yield from generate_transcript_directories(root, subpath) if __name__ == '__main__': xml_dir = os.path.join('transcripts', 'xml') for path in generate_transcript_directories(xml_dir, ''): in_dir = os.path.join(xml_dir, path) for t in os.listdir(in_dir): print(t) in_fname = os.path.join(in_dir, t) xml_transcript = read_text(in_fname) # Sometimes the transcript is empty; Maybe try downloading again? if not xml_transcript: continue txt_transcript = parse_transcript(xml_transcript) out_dir = os.path.join('transcripts', 'txt', path) create_dir(out_dir) out_fname = os.path.join(out_dir, remove_extension(t) + '.txt') # `:` is illegal in windows out_fname = out_fname.replace(':', '_') write_text(out_fname, txt_transcript)