Ejemplo n.º 1
0
 def download_file(bot, message, path):
     file_info = bot.get_file(message.document.file_id)
     type_file = str(file_info.file_path).split(".")[-1]
     downloaded_file = bot.download_file(file_info.file_path)
     full_path = "{}.{}".format(path, type_file)
     FileManager.if_file_exists_remove(full_path)
     tools.download_file(full_path, downloaded_file)
     return full_path
Ejemplo n.º 2
0
 def download_img(img):
     url = formatUrl(img.get('src'))
     alt = img.get('alt')
     if alt:
         url = url.split('.')
         #  去除尺寸参数
         url.pop(len(url) - 2)
         tools.download_file(url=url, fileName='./img/' + str(alt))
Ejemplo n.º 3
0
def process_add_faculty(message):
    path = os.path.join(constants.documents_directory,
                        constants.excel_file_faculty)
    file_info = bot.get_file(message.document.file_id)
    type_file = str(file_info.file_path).split(".")[-1]

    if type_file == constants.excel_file_type or type_file == constants.excel_file_type_a:
        downloaded_file = bot.download_file(file_info.file_path)
        tools.download_file(path + "." + type_file, downloaded_file)

    path += "." + type_file

    if os.path.isfile(path):
        info = tools.read_faculty(path)
        for el in info:
            db_manager.add_faculty(el)
Ejemplo n.º 4
0
import os
import shutil
import sys

LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
TOOLS_PATH = os.path.join(LOCAL_PATH, "..", "..", "tools")
sys.path.append(TOOLS_PATH)

from tools import download_file_and_uncompress, download_file

if __name__ == '__main__':
    url = "https://s3-eu-west-1.amazonaws.com/kaggle-display-advertising-challenge-dataset/dac.tar.gz"
    url2 = "https://paddlerec.bj.bcebos.com/deepfm%2Ffeat_dict_10.pkl2"

    print("download and extract starting...")
    download_file_and_uncompress(url)
    if not os.path.exists("aid_data"):
        os.makedirs("aid_data")
    download_file(url2, "./aid_data/feat_dict_10.pkl2", True)
    print("download and extract finished")

    print("preprocessing...")
    os.system("python preprocess.py")
    print("preprocess done")

    shutil.rmtree("raw_data")
    print("done")
Ejemplo n.º 5
0

def get_index_url(i):
    return base_url + "/archive/2010/0427/bundestag/abgeordnete/bio/" + chr(
        i + 65) + "/index.html"


def get_index_filename(i):
    return html_dir + "index" + chr(i + 65) + ".html"


# download index files
if do_download == True:
    for i in range(0, 26):
        url = get_index_url(i)
        download_file(url, get_index_filename(i), url)

# parse index file for bios
urls = []
for i in range(0, 26):
    urlss = parse_index(get_index_url(i), get_index_filename(i))
    if urlss == False:
        print "parsing failed"
    else:
        urls += urlss

# download all bios
if do_download == True:
    for url in urls:
        fn = get_bio_filename(url)
        if fn == False: quit()
Ejemplo n.º 6
0
                if i.name == "dt": first = i.text
                if i.name == "dd":
                    A.statements.append(first + " " + i.text)
                    first = ""

    return A


########################################################################

do_download = False

# download index file
if do_download == True:
    if download_file(
            base_url + "/landtag/abgeordnete-und-fraktionen/abgeordnete/",
            html_dir + "index.html") == False:
        quit()

# parse index file for bios
urls = parse_index(html_dir + "index.html")
if urls == False: quit()

# download all bios
if do_download == True:
    for url in urls:
        fn = get_filename(url)
        if fn == False: quit()
        download_file(get_complete_url(base_url, url), fn)

# parse all biographies
Ejemplo n.º 7
0
								if j.name == "h3": break;
								if j.name == "ul":
									for k in j.find_all("li"):
										memb.append( k.text.strip() )#.encode("UTF-8") )
									
	#print A
	return A


########################################################################

do_download = False

# download index file
if do_download == True:
	if download_file(base_url + "/bundestag/abgeordnete18/alphabet", html_dir + "index.html") == False: quit()

# parse index file for bios
urls = parse_index(html_dir + "index.html") 
if urls == False: quit()

# download all bios
if do_download == True:
	for url in urls:
		fn = get_filename(url)
		if fn == False: quit()
		download_file(get_complete_url(base_url, url), fn)
	
# parse all biographies
errors = 0
people = []
Ejemplo n.º 8
0
import os
import shutil
import sys

LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
TOOLS_PATH = os.path.join(LOCAL_PATH, "..", "..", "tools")
sys.path.append(TOOLS_PATH)

from tools import download_file_and_uncompress, download_file

if __name__ == '__main__':
    url_train = "https://paddlerec.bj.bcebos.com/xdeepfm%2Ftr"
    url_test = "https://paddlerec.bj.bcebos.com/xdeepfm%2Fev"

    train_dir = "train_data"
    test_dir = "test_data"

    if not os.path.exists(train_dir):
        os.mkdir(train_dir)
    if not os.path.exists(test_dir):
        os.mkdir(test_dir)

    print("download and extract starting...")
    download_file(url_train, "./train_data/tr", True)
    download_file(url_test, "./test_data/ev", True)
    print("download and extract finished")

    print("done")