def download_file(bot, message, path): file_info = bot.get_file(message.document.file_id) type_file = str(file_info.file_path).split(".")[-1] downloaded_file = bot.download_file(file_info.file_path) full_path = "{}.{}".format(path, type_file) FileManager.if_file_exists_remove(full_path) tools.download_file(full_path, downloaded_file) return full_path
def download_img(img): url = formatUrl(img.get('src')) alt = img.get('alt') if alt: url = url.split('.') # 去除尺寸参数 url.pop(len(url) - 2) tools.download_file(url=url, fileName='./img/' + str(alt))
def process_add_faculty(message): path = os.path.join(constants.documents_directory, constants.excel_file_faculty) file_info = bot.get_file(message.document.file_id) type_file = str(file_info.file_path).split(".")[-1] if type_file == constants.excel_file_type or type_file == constants.excel_file_type_a: downloaded_file = bot.download_file(file_info.file_path) tools.download_file(path + "." + type_file, downloaded_file) path += "." + type_file if os.path.isfile(path): info = tools.read_faculty(path) for el in info: db_manager.add_faculty(el)
import os import shutil import sys LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) TOOLS_PATH = os.path.join(LOCAL_PATH, "..", "..", "tools") sys.path.append(TOOLS_PATH) from tools import download_file_and_uncompress, download_file if __name__ == '__main__': url = "https://s3-eu-west-1.amazonaws.com/kaggle-display-advertising-challenge-dataset/dac.tar.gz" url2 = "https://paddlerec.bj.bcebos.com/deepfm%2Ffeat_dict_10.pkl2" print("download and extract starting...") download_file_and_uncompress(url) if not os.path.exists("aid_data"): os.makedirs("aid_data") download_file(url2, "./aid_data/feat_dict_10.pkl2", True) print("download and extract finished") print("preprocessing...") os.system("python preprocess.py") print("preprocess done") shutil.rmtree("raw_data") print("done")
def get_index_url(i): return base_url + "/archive/2010/0427/bundestag/abgeordnete/bio/" + chr( i + 65) + "/index.html" def get_index_filename(i): return html_dir + "index" + chr(i + 65) + ".html" # download index files if do_download == True: for i in range(0, 26): url = get_index_url(i) download_file(url, get_index_filename(i), url) # parse index file for bios urls = [] for i in range(0, 26): urlss = parse_index(get_index_url(i), get_index_filename(i)) if urlss == False: print "parsing failed" else: urls += urlss # download all bios if do_download == True: for url in urls: fn = get_bio_filename(url) if fn == False: quit()
if i.name == "dt": first = i.text if i.name == "dd": A.statements.append(first + " " + i.text) first = "" return A ######################################################################## do_download = False # download index file if do_download == True: if download_file( base_url + "/landtag/abgeordnete-und-fraktionen/abgeordnete/", html_dir + "index.html") == False: quit() # parse index file for bios urls = parse_index(html_dir + "index.html") if urls == False: quit() # download all bios if do_download == True: for url in urls: fn = get_filename(url) if fn == False: quit() download_file(get_complete_url(base_url, url), fn) # parse all biographies
if j.name == "h3": break; if j.name == "ul": for k in j.find_all("li"): memb.append( k.text.strip() )#.encode("UTF-8") ) #print A return A ######################################################################## do_download = False # download index file if do_download == True: if download_file(base_url + "/bundestag/abgeordnete18/alphabet", html_dir + "index.html") == False: quit() # parse index file for bios urls = parse_index(html_dir + "index.html") if urls == False: quit() # download all bios if do_download == True: for url in urls: fn = get_filename(url) if fn == False: quit() download_file(get_complete_url(base_url, url), fn) # parse all biographies errors = 0 people = []
import os import shutil import sys LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) TOOLS_PATH = os.path.join(LOCAL_PATH, "..", "..", "tools") sys.path.append(TOOLS_PATH) from tools import download_file_and_uncompress, download_file if __name__ == '__main__': url_train = "https://paddlerec.bj.bcebos.com/xdeepfm%2Ftr" url_test = "https://paddlerec.bj.bcebos.com/xdeepfm%2Fev" train_dir = "train_data" test_dir = "test_data" if not os.path.exists(train_dir): os.mkdir(train_dir) if not os.path.exists(test_dir): os.mkdir(test_dir) print("download and extract starting...") download_file(url_train, "./train_data/tr", True) download_file(url_test, "./test_data/ev", True) print("download and extract finished") print("done")