def addLinkSpider(add_link_dictionary): # get user's download information from add_link_dictionary for i in [ 'link', 'header', 'out', 'user_agent', 'load_cookies', 'referer' ]: if not (i in add_link_dictionary): add_link_dictionary[i] = None link = add_link_dictionary['link'] header = add_link_dictionary['header'] user_agent = add_link_dictionary['user_agent'] raw_cookies = add_link_dictionary['load_cookies'] referer = add_link_dictionary['referer'] requests_session = requests.Session() # defining a requests Session if raw_cookies: # set cookies cookie = SimpleCookie() cookie.load(raw_cookies) cookies = {key: morsel.value for key, morsel in cookie.items()} requests_session.cookies = cookiejar_from_dict(cookies) if referer: # set referer to the session requests_session.headers.update({'referer': referer}) if user_agent: # set user_agent to the session requests_session.headers.update({'user-agent': user_agent}) # find headers try: response = requests_session.head(link) header = response.headers except: header = {} # find file size file_size = None if 'Content-Length' in header.keys(): # checking if file_size is available file_size = int(header['Content-Length']) # converting file_size to KiB or MiB or GiB file_size = str(humanReadbleSize(file_size)) # find file name file_name = None if 'Content-Disposition' in header.keys( ): # checking if filename is available content_disposition = header['Content-Disposition'] if content_disposition.find('filename') != -1: filename_splited = content_disposition.split('filename=') filename_splited = filename_splited[-1] # getting file name in desired format file_name = str(filename_splited[1:-1]) return file_name, file_size # If no Content-Length ? fixed it.
def addLinkSpider(add_link_dictionary): # get user's download information from add_link_dictionary for i in ['link', 'header', 'out', 'user_agent', 'load_cookies', 'referer']: if not (i in add_link_dictionary): add_link_dictionary[i] = None link = add_link_dictionary['link'] header = add_link_dictionary['header'] user_agent = add_link_dictionary['user_agent'] raw_cookies = add_link_dictionary['load_cookies'] referer = add_link_dictionary['referer'] requests_session = requests.Session() # defining a requests Session if raw_cookies: # set cookies cookie = SimpleCookie() cookie.load(raw_cookies) cookies = {key: morsel.value for key, morsel in cookie.items()} requests_session.cookies = cookiejar_from_dict(cookies) if referer: # set referer to the session requests_session.headers.update({'referer': referer}) if user_agent: # set user_agent to the session requests_session.headers.update({'user-agent': user_agent}) # find headers try: response = requests_session.head(link) header = response.headers except: header = {} file_size = None if 'Content-Length' in header.keys(): # checking if file_size is available file_size = int(header['Content-Length']) # converting file_size to KiB or MiB or GiB file_size = humanReadbleSize(file_size) return file_size # If no Content-Length ? fixed it.
def spider(add_link_dictionary): # get user's download request from add_link_dictionary link = add_link_dictionary['link'] ip = add_link_dictionary['ip'] port = add_link_dictionary['port'] proxy_user = add_link_dictionary['proxy_user'] proxy_passwd = add_link_dictionary['proxy_passwd'] download_user = add_link_dictionary['download_user'] download_passwd = add_link_dictionary['download_passwd'] header = add_link_dictionary['header'] out = add_link_dictionary['out'] user_agent = add_link_dictionary['user_agent'] raw_cookies = add_link_dictionary['load_cookies'] referer = add_link_dictionary['referer'] # defin a requests session requests_session = requests.Session() if ip: ip_port = 'http://' + str(ip) + ":" + str(port) if proxy_user: ip_port = 'http://' + proxy_user + ':' + proxy_passwd + '@' + ip_port # set proxy to the session requests_session.proxies = {'http': ip_port} if download_user: # set download user pass to the session requests_session.auth(download_user, download_passwd) # set cookies if raw_cookies: cookie = SimpleCookie() cookie.load(raw_cookies) cookies = {key: morsel.value for key, morsel in cookie.items()} requests_session.cookies = cookiejar_from_dict(cookies) # set referer if referer: requests_session.headers.update({'referer': referer }) #setting referer to the session # set user_agent if user_agent: requests_session.headers.update({'user-agent':user_agent }) #setting user_agent to the session #find headers try: response = requests_session.head(link) header = response.headers except: header = {} filename = None filesize = None if 'Content-Disposition' in header.keys(): # checking if filename is available content_disposition = header['Content-Disposition'] if content_disposition.find('filename') != -1: filename_splited = content_disposition.split('filename=') filename_splited = filename_splited[-1] # getting file name in desired format filename = filename_splited[1:-1] if not(filename): filename = link.split('/')[-1] # if user set file name before in add_link_dictionary['out'], # then set "out" for filename if out: filename = out # check if file_size is available if 'Content-Length' in header.keys(): file_size = int(header['Content-Length']) # converting file_size to KiB or MiB or GiB file_size = humanReadbleSize(file_size) # return results return filename, filesize
def convertDownloadInformation(download_status): # find file_name try: # file_status contains name of download file and link of download file file_status = str(download_status['files']) file_status = file_status[1:-1] file_status = ast.literal_eval(file_status) path = str(file_status['path']) file_name = urllib.parse.unquote(os.path.basename(path)) if not(file_name): file_name = None uris = file_status['uris'] uri = uris[0] link = uri['uri'] except: file_name = None link = None for i in download_status.keys(): if not(download_status[i]): download_status[i] = None # find file_size try: file_size = float(download_status['totalLength']) except: file_size = None # find downloaded size try: downloaded = float(download_status['completedLength']) except: downloaded = None # convert file_size and downloaded_size to KiB and MiB and GiB if (downloaded != None and file_size != None and file_size != 0): file_size_back = file_size # converting file_size to KiB or MiB or GiB size_str = humanReadbleSize(file_size) downloaded_back = downloaded downloaded_str = humanReadbleSize(downloaded) # find download percent from file_size and downloaded_size file_size = file_size_back downloaded = downloaded_back percent = int(downloaded * 100 / file_size) percent_str = str(percent) + "%" else: percent_str = None size_str = None downloaded_str = None # find download_speed try: download_speed = int(download_status['downloadSpeed']) except: download_speed = 0 # convert download_speed to desired units. # and find estimate_time_left if (downloaded != None and download_speed != 0): estimate_time_left = int((file_size - downloaded)/download_speed) # converting file_size to KiB or MiB or GiB download_speed_str = humanReadbleSize(download_speed, 'speed') + '/s' eta = "" if estimate_time_left >= 3600: eta = eta + str(int(estimate_time_left/3600)) + "h" estimate_time_left = estimate_time_left % 3600 eta = eta + str(int(estimate_time_left/60)) + "m" estimate_time_left = estimate_time_left % 60 eta = eta + str(estimate_time_left) + "s" elif estimate_time_left >= 60: eta = eta + str(int(estimate_time_left/60)) + "m" estimate_time_left = estimate_time_left % 60 eta = eta + str(estimate_time_left) + "s" else: eta = eta + str(estimate_time_left) + "s" estimate_time_left_str = eta else: download_speed_str = "0" estimate_time_left_str = None # find number of connections try: connections_str = str(download_status['connections']) except: connections_str = None # find status of download try: status_str = str(download_status['status']) except: status_str = None # rename active status to downloading if (status_str == "active"): status_str = "downloading" # rename removed status to stopped if (status_str == "removed"): status_str = "stopped" if (status_str == "None"): status_str = None # set 0 second for estimate_time_left_str if download is completed. if status_str == 'complete': estimate_time_left_str = '0s' # return information in dictionary format download_info = { 'gid': download_status['gid'], 'file_name': file_name, 'status': status_str, 'size': size_str, 'downloaded_size': downloaded_str, 'percent': percent_str, 'connections': connections_str, 'rate': download_speed_str, 'estimate_time_left': estimate_time_left_str, 'link': link } return download_info