def gen_download_url(url, flag): dl_urls = [] dl_urls_temp = [] page = netfunc.get_page(url) pat1 = '<div id="pdownload">' pat2 = '</div> <!-- end pdownload -->' part_of_page = stripstr.part_of_str(page, pat1, pat2) if flag == 'sourcenames': pat1 = '<td><a href="' pat2 = '">' len_pat1 = len(pat1) loc1 = part_of_page.find(pat1) while loc1 != -1: loc2 = part_of_page[loc1:].find(pat2) dl_urls.append(part_of_page[loc1 + len_pat1:loc1 + loc2]) part_of_page = part_of_page[loc1 + loc2:] loc1 = part_of_page.find(pat1) return dl_urls else: #print '#####################' pat1 = '<th><a href="' pat2 = '">' len_pat1 = len(pat1) loc1 = part_of_page.find(pat1) start_of_url = 'http://packages.ubuntu.com' while loc1 != -1: loc2 = part_of_page[loc1:].find(pat2) dl_urls_temp.append(start_of_url + part_of_page[loc1 + len_pat1:loc1 + loc2]) part_of_page = part_of_page[loc1 + loc2:] loc1 = part_of_page.find(pat1) flag_slash = '/' + flag + '/' for item in dl_urls_temp: if item.find(flag_slash) != -1: dl_urls.append(choose_url_mirror(item)) #print '**********' return dl_urls for item in dl_urls_temp: all_loc = item.find('/all/') if all_loc != -1: dl_urls.append(choose_url_mirror(item)) #print '$$$$$$$$$$$$' return dl_urls return 1
def choose_url_mirror(url): page = netfunc.get_page(url) pat1 = '<p>You can download the requested file from the ' pat2 = '<div id="pdownloadnotes">' part_of_page = stripstr.part_of_str(page, pat1, pat2) pat3 = '<li><a href="' pat4 = '">' loc3 = part_of_page.find(pat3) loc4 = part_of_page[loc3:].find(pat4) return part_of_page[loc3 + len(pat3): loc3 + loc4]
def results(url, sflag): info = "" page = netfunc.get_page(url) part_page = stripstr.part_of_str(page, '<div id="psearchres">', "</div>") pac_list = [] pattern1_1 = "<h3>Package " pattern1_2 = "<h3>Source Package " if sflag == "sourcenames": pattern1 = pattern1_2 else: pattern1 = pattern1_1 i = 0 while True: # print "#" pac_name = stripstr.part_of_str(part_page, pattern1, "</h3>") if pac_name == 1: break pat1 = '<a class="resultlink" href="' loc1 = part_page.find(pat1) pat2 = '">' pat3_1 = '[<strong class="' pat3_2 = "<br>" if part_page[loc1:].find(pat3_1) < part_page[loc1:].find(pat3_2): pat3 = pat3_1 else: pat3 = pat3_2 tempstr = stripstr.part_of_str(part_page[loc1:], pat2, pat3) # print tempstr pat4 = "</a>" loc4 = tempstr.find(pat4) # Find [category, description] of package in a list if sflag == "sourcenames": cat_desc = tempstr[loc4 + len(pat4) :].rsplit(": ") else: cat_desc = tempstr[loc4 + len(pat4) :].rsplit("\n") # print cat_desc pac_category, pac_desc = cat_desc[0], cat_desc[1] pac_desc = pac_desc.strip() info = pac_name + ": [" + tempstr[:loc4] + "]" + pac_category + " " + pac_desc print str(i) + ". " + info.rstrip("\n") part_url = tempstr[:loc4] + "/" + pac_name pac_list.append(part_url) # print part_url loc3 = part_page[loc1:].find(pat3) part_page = part_page[loc1 + loc3 :] i += 1 selected = input("Enter the package number to download = ") if selected < len(pac_list) and selected >= 0: # print '#######' # print pac_list[selected] return pac_list[selected] else: print "Invalid input!" print "Program will exit now..." return 1