예제 #1
0
def addLinkSpider(add_link_dictionary):
    # get user's download information from add_link_dictionary
    for i in [
            'link', 'header', 'out', 'user_agent', 'load_cookies', 'referer'
    ]:
        if not (i in add_link_dictionary):
            add_link_dictionary[i] = None

    link = add_link_dictionary['link']
    header = add_link_dictionary['header']
    user_agent = add_link_dictionary['user_agent']
    raw_cookies = add_link_dictionary['load_cookies']
    referer = add_link_dictionary['referer']

    requests_session = requests.Session()  # defining a requests Session

    if raw_cookies:  # set cookies
        cookie = SimpleCookie()
        cookie.load(raw_cookies)

        cookies = {key: morsel.value for key, morsel in cookie.items()}
        requests_session.cookies = cookiejar_from_dict(cookies)

    if referer:
        # set referer to the session
        requests_session.headers.update({'referer': referer})

    if user_agent:
        # set user_agent to the session
        requests_session.headers.update({'user-agent': user_agent})

    # find headers
    try:
        response = requests_session.head(link)
        header = response.headers
    except:
        header = {}

    # find file size
    file_size = None
    if 'Content-Length' in header.keys():  # checking if file_size is available
        file_size = int(header['Content-Length'])

        # converting file_size to KiB or MiB or GiB
        file_size = str(humanReadbleSize(file_size))

    # find file name
    file_name = None
    if 'Content-Disposition' in header.keys(
    ):  # checking if filename is available
        content_disposition = header['Content-Disposition']
        if content_disposition.find('filename') != -1:
            filename_splited = content_disposition.split('filename=')
            filename_splited = filename_splited[-1]
            # getting file name in desired format
            file_name = str(filename_splited[1:-1])

    return file_name, file_size  # If no Content-Length ? fixed it.
예제 #2
0
def addLinkSpider(add_link_dictionary):
    # get user's download information from add_link_dictionary
    for i in ['link', 'header', 'out', 'user_agent', 'load_cookies', 'referer']:
        if not (i in add_link_dictionary):
            add_link_dictionary[i] = None

    link = add_link_dictionary['link']
    header = add_link_dictionary['header']
    user_agent = add_link_dictionary['user_agent']
    raw_cookies = add_link_dictionary['load_cookies']
    referer = add_link_dictionary['referer']

    requests_session = requests.Session()  # defining a requests Session

    if raw_cookies:  # set cookies
        cookie = SimpleCookie()
        cookie.load(raw_cookies)

        cookies = {key: morsel.value for key, morsel in cookie.items()}
        requests_session.cookies = cookiejar_from_dict(cookies)

    if referer:
        # set referer to the session
        requests_session.headers.update({'referer': referer})

    if user_agent:
        # set user_agent to the session
        requests_session.headers.update({'user-agent': user_agent})

    # find headers
    try:
        response = requests_session.head(link)
        header = response.headers
    except:
        header = {}

    file_size = None 
    if 'Content-Length' in header.keys():  # checking if file_size is available
        file_size = int(header['Content-Length'])
        
        # converting file_size to KiB or MiB or GiB
        file_size = humanReadbleSize(file_size)

    return file_size  # If no Content-Length ? fixed it.
예제 #3
0
def spider(add_link_dictionary):
    # get user's download request from add_link_dictionary
    link = add_link_dictionary['link']
    ip = add_link_dictionary['ip']
    port = add_link_dictionary['port']
    proxy_user = add_link_dictionary['proxy_user']
    proxy_passwd = add_link_dictionary['proxy_passwd']
    download_user = add_link_dictionary['download_user']
    download_passwd = add_link_dictionary['download_passwd']
    header = add_link_dictionary['header']
    out = add_link_dictionary['out']
    user_agent = add_link_dictionary['user_agent']
    raw_cookies = add_link_dictionary['load_cookies']
    referer = add_link_dictionary['referer']

    # defin a requests session
    requests_session = requests.Session() 
    if ip:
        ip_port = 'http://' + str(ip) + ":" + str(port)
        if proxy_user:
            ip_port = 'http://' + proxy_user + ':' + proxy_passwd + '@' + ip_port
        # set proxy to the session
        requests_session.proxies = {'http': ip_port}

    if download_user:
        # set download user pass to the session
        requests_session.auth(download_user, download_passwd)

    # set cookies
    if raw_cookies:  
        cookie = SimpleCookie()
        cookie.load(raw_cookies)

        cookies = {key: morsel.value for key, morsel in cookie.items()}
        requests_session.cookies = cookiejar_from_dict(cookies)

    # set referer
    if referer:
        requests_session.headers.update({'referer': referer }) #setting referer to the session

    # set user_agent
    if user_agent:
        requests_session.headers.update({'user-agent':user_agent }) #setting user_agent to the session
        
    #find headers
    try:
        response = requests_session.head(link)
        header = response.headers
    except:
        header = {}

    filename = None
    filesize = None
    if 'Content-Disposition' in header.keys():  # checking if filename is available
        content_disposition = header['Content-Disposition']
        if content_disposition.find('filename') != -1:
            filename_splited = content_disposition.split('filename=')
            filename_splited = filename_splited[-1]

            # getting file name in desired format
            filename = filename_splited[1:-1]

    if not(filename):
        filename = link.split('/')[-1]

    # if user set file name before in add_link_dictionary['out'],
    # then set "out" for filename
    if out:
        filename = out

    # check if file_size is available
    if 'Content-Length' in header.keys():
        file_size = int(header['Content-Length'])

        # converting file_size to KiB or MiB or GiB 
        file_size = humanReadbleSize(file_size)

    # return results
    return filename, filesize
예제 #4
0
def convertDownloadInformation(download_status):
    # find file_name
    try:
        # file_status contains name of download file and link of download file
        file_status = str(download_status['files'])
        file_status = file_status[1:-1]
        file_status = ast.literal_eval(file_status)
        path = str(file_status['path'])
        file_name = urllib.parse.unquote(os.path.basename(path))
        if not(file_name):
            file_name = None

        uris = file_status['uris']
        uri = uris[0]
        link = uri['uri']

    except:

        file_name = None
        link = None

    for i in download_status.keys():
        if not(download_status[i]):
            download_status[i] = None

    # find file_size
    try:
        file_size = float(download_status['totalLength'])
    except:
        file_size = None

    # find downloaded size
    try:
        downloaded = float(download_status['completedLength'])
    except:
        downloaded = None

    # convert file_size and downloaded_size to KiB and MiB and GiB
    if (downloaded != None and file_size != None and file_size != 0):
        file_size_back = file_size

        # converting file_size to KiB or MiB or GiB
        size_str = humanReadbleSize(file_size)
        downloaded_back = downloaded

        downloaded_str = humanReadbleSize(downloaded)

        # find download percent from file_size and downloaded_size
        file_size = file_size_back
        downloaded = downloaded_back
        percent = int(downloaded * 100 / file_size)
        percent_str = str(percent) + "%"
    else:
        percent_str = None
        size_str = None
        downloaded_str = None

    # find download_speed
    try:
        download_speed = int(download_status['downloadSpeed'])
    except:
        download_speed = 0

    # convert download_speed to desired units.
    # and find estimate_time_left
    if (downloaded != None and download_speed != 0):
        estimate_time_left = int((file_size - downloaded)/download_speed)

        # converting file_size to KiB or MiB or GiB
        download_speed_str = humanReadbleSize(download_speed, 'speed') + '/s'

        eta = ""
        if estimate_time_left >= 3600:
            eta = eta + str(int(estimate_time_left/3600)) + "h"
            estimate_time_left = estimate_time_left % 3600
            eta = eta + str(int(estimate_time_left/60)) + "m"
            estimate_time_left = estimate_time_left % 60
            eta = eta + str(estimate_time_left) + "s"
        elif estimate_time_left >= 60:
            eta = eta + str(int(estimate_time_left/60)) + "m"
            estimate_time_left = estimate_time_left % 60
            eta = eta + str(estimate_time_left) + "s"
        else:
            eta = eta + str(estimate_time_left) + "s"
        estimate_time_left_str = eta

    else:
        download_speed_str = "0"
        estimate_time_left_str = None

    # find number of connections
    try:
        connections_str = str(download_status['connections'])
    except:
        connections_str = None

    # find status of download
    try:
        status_str = str(download_status['status'])
    except:
        status_str = None

    # rename active status to downloading
    if (status_str == "active"):
        status_str = "downloading"

    # rename removed status to stopped
    if (status_str == "removed"):
        status_str = "stopped"

    if (status_str == "None"):
        status_str = None

    # set 0 second for estimate_time_left_str if download is completed.
    if status_str == 'complete':
        estimate_time_left_str = '0s'

# return information in dictionary format
    download_info = {
        'gid': download_status['gid'],
        'file_name': file_name,
        'status': status_str,
        'size': size_str,
        'downloaded_size': downloaded_str,
        'percent': percent_str,
        'connections': connections_str,
        'rate': download_speed_str,
        'estimate_time_left': estimate_time_left_str,
        'link': link
    }

    return download_info