def doDownload(file): utils.log('download url: %s' % file) dst = ADDON.getSetting('DOWNLOAD_FOLDER') import sfile sfile.makedirs(dst) if not sfile.isdir(dst): utils.DialogOK(GETTEXT(30256), GETTEXT(30257)) utils.openSettings(ADDONID, 2.24) xbmc.sleep(500) while(xbmc.getCondVisibility('Window.IsActive(addonsettings)') == 1): xbmc.sleep(100) dst = ADDON.getSetting('DOWNLOAD_FOLDER') if not sfile.isdir(dst): utils.DialogOK(GETTEXT(30256)) return dst = os.path.join(ADDON.getSetting('DOWNLOAD_FOLDER'), getDownloadTitle(file)) if utils.DialogYesNo(GETTEXT(30243), GETTEXT(30244)): xbmc.Player().stop() import download download.download(file, dst, 'Super Favourites')
def get_articles(homepage_url, link_extracter, site_name): index_page = download(homepage_url) links = link_extracter(index_page) counter = 0 article_list = [] for section in links: temp = 1 for link in links[section]: print "processing link " + str(temp) + " of " + str(len(links[section])) + " in section " + section temp += 1 page = download(link) title, text = extract_content(page) date = parse_date_str(page) article = {} article["articleText"] = text article["title"] = title article["byline"] = "N/A" article["articleDate"] = date article["section"] = section article["articlePosition"] = counter article["cleanFormat"] = 1 article["newspaperName"] = site_name article["source"] = "web scraped" article["accessDate"] = str(datetime.date.today()) article["comments"] = "N/A" counter += 1; article_list.append(article) return article_list
def DownloadIfExists(url, dst): import download if not download.getResponse(url, 0, ''): return False download.download(url, dst) return True
def main(): # get blog url from user try: blog_url = sys.argv[1] if not 'escapistmagazine.com/videos/view/' in blog_url: reason = 'wrong url supplied' _error(reason) raise ValueError(reason) except: _print('usage: escapist-dl.py url_to_video_post') exit(1) # get & parse blog site for video config _info('loading `%s`' % blog_url) config_url = get_config_url(blog_url) if config_url: _info('found config at `%s`' % config_url) # get & parse video config for video info video_url, video_name = get_video_url_name(config_url) if video_url and video_name: _info('found clip at `%s`' % video_url) # get that video! file_name = video_name + '.mp4' _print('downloading `%s` ..' % file_name, newline = False) download(video_url, file_name) _print('done!')
def doDownload(file): utils.log('download url: %s' % file) dst = ADDON.getSetting('DOWNLOAD_FOLDER') import sfile sfile.makedirs(dst) if not sfile.exists(dst): utils.DialogOK(GETTEXT(30256), GETTEXT(30257)) utils.openSettings(ADDONID, 2.24) xbmc.sleep(500) while(xbmc.getCondVisibility('Window.IsActive(addonsettings)') == 1): xbmc.sleep(100) dst = ADDON.getSetting('DOWNLOAD_FOLDER') if not sfile.exists(dst): utils.DialogOK(GETTEXT(30256)) return import os dst = os.path.join(ADDON.getSetting('DOWNLOAD_FOLDER'), getDownloadTitle(file)) if utils.DialogYesNo(GETTEXT(30243), GETTEXT(30244)): xbmc.executebuiltin('Action(Stop)') import download download.download(file, dst, utils.TITLE)
def main(): # hub_url = 'http://www.escapistmagazine.com/videos/view/zero-punctuation' get_video_infos = re.compile('''<div class='filmstrip_video'><a href='(?P<blog_url>http://www\.escapistmagazine\.com/videos/view/zero-punctuation/.+?)'><img src='(?P<thumbnail_url>http://.+?)'></a><div class='title'>(?P<video_title>.+?)</div><div class='date'>Date: (?P<month>\d{2})/(?P<day>\d{2})/(?P<year>\d{4})</div>''').findall overview_url_template = 'http://www.escapistmagazine.com/videos/view/zero-punctuation?page=%i' i = 1 while 1: # get overview site print 'loading page #%i ..' % i, overview_url = overview_url_template % i i += 1 overview = download(overview_url) print 'done.' # parse overview site if 'gallery_pagination_footer' not in overview: print 'all done! :)' break print 'loading video information ..' for info in get_video_infos(overview): blog_url, thumb_url, video_title, month, day, year = info print '\tdownloading `%s` ..' % video_title, video_url = get_video_url(blog_url) if not video_url: print 'failed!' else: file_name = '%s (%s-%s-%s).mp4' % (fix_filename(video_title), year, month, day) try: download(video_url, file_name) except IOError, e: print 'failed! (%s)' % e.message else: print 'done!'
def DownloadLogos(): url = dixie.GetExtraUrl() + 'resources/logos.zip' try: os.makedirs(logofolder) except: pass download.download(url, logodest) if os.path.exists(logos): now = datetime.datetime.now() date = now.strftime('%B-%d-%Y %H-%M') import shutil cur = dixie.GetSetting('dixie.logo.folder') src = os.path.join(logos, cur) dst = os.path.join(logos, cur+'-%s' % date) try: shutil.copytree(src, dst) shutil.rmtree(src) except: pass extract.all(logodest, extras) dixie.SetSetting('LOGOVERSION', LOGOVERSION) dixie.SetSetting('dixie.logo.folder', LOGOPACK) try: os.remove(logodest) except: pass
def download(path, zipfile): import download import extract download.download(url, zipfile) extract.all(zipfile, path) sfile.remove(zipfile)
def installMacOS(): import download import extract import stat url = 'http://www.on-tapp.tv/wp-content/vpn/openvpn-macos-2.3.4.zip' bindir = xbmc.translatePath('special://profile/addon_data/plugin.program.vpnicity/macos/sbin/') dest = os.path.join(bindir, 'openvpn-macos.zip') macbin = os.path.join(bindir, 'openvpn') try: os.makedirs(bindir) except: pass download.download(url, dest) extract.all(dest, bindir) st = os.stat(macbin) os.chmod(macbin, st.st_mode | stat.S_IEXEC) try: os.remove(dest) except: pass success = path.getPath(utils.ADDON.getSetting('OS'), silent=True) if success: utils.dialogOK('VPN application successfully installed') else: utils.dialogOK('VPN application installation failed', 'Please try again later')
def get_finance_numeric(): # 获取概念板块资金流向数据 # 单位成交量主力流入占比 url = 'http://nufm.dfcfw.com/EM_Finance2014NumericApplication/JS.aspx?cmd=C._BKGN&type=ct&st=(BalFlowMain)&sr=-1&p=1&ps=50&js=[(x)]&token=894050c76af8597a853f5b408b759f5d&sty=DCFFITABK' html = download().get(url) bag_bk = {} for inum, i in enumerate(re.compile(r'"([^"]+)"').findall(html)): m = i.split(',') bkid = m[1] if len(m)>1 else '' if bkid not in bag_bk: bag_bk[bkid] = {} money_in = float(m[4])*10000 if len(m) >4 else '' surl = 'http://nufm.dfcfw.com/EM_Finance2014NumericApplication/JS.aspx?type=CT&cmd=C.%s1&sty=FCOIATA&sortType=C&sortRule=-1&page=1&pageSize=100&js=[(x)]&token=7bc05d0d4c3c22ef9fca8c2a912d779c&jsName=quote_123' % bkid if bkid else '' if surl: shtml = download().get(surl) if not shtml: continue infos = re.compile(r'"([^"]+)"').findall(shtml) amt_all = sum([float(j.split(',')[8]) for j in infos if j.split(',')[8] != '-']) # 大单占板块成交量的占比 bag_bk[bkid]['bkid'] = bkid bag_bk[bkid]['bkname'] = m[2] bag_bk[bkid]['avg_in'] = 100*money_in / amt_all bag_bk[bkid]['stocks_all'] = ';'.join([j.split(',')[1] for j in infos]) bag_bk[bkid]['high_price'] = m[11] bag_bk[bkid]['now_price'] = m[3] if inum > 20: break return bag_bk
def RetrieveURL(url, type, isServer): if len(url) == 0: return if type == VIDEO_ADDON: AddDir(1, url, 0) root = utils.getDownloadLocation() if type == SERVER_FILE: dst = urllib.quote_plus(url) dst = os.path.join(root, dst) if not sfile.exists(dst): try: sfile.copy(url, dst) except: pass AddDir(1, dst, 0) if type == AMAZON_FILE: src = url.lower().endswith('.txt') or url.lower().endswith('.%s' % SRC) url = urllib.quote_plus(url) dst = os.path.join(root, url) import download url = s3.getURL(url) url = s3.convertToCloud(url) utils.Log('Amazon URL : %s' % url) if src: url = utils.GetHTML(url, maxAge=7*86400) url = urllib.quote_plus(url) dst = os.path.join(root, url) url = s3.getURL(url) url = s3.convertToCloud(url) utils.Log('Real Amazon URL : %s' % url) downloading = sfile.exists(dst+'.part') if downloading: if False:#isServer: AddDir(1, dst, 0) else: AddDir(1, url, 0) return if sfile.exists(dst): AddDir(1, dst, 0) return download.download(url, dst) #if isServer: # while sfile.size(dst) == 0: # xbmc.sleep(100) # AddDir(1, dst, 0) # return AddDir(1, url, 0)
def downloadSkins(url, path, zipfile): import download import extract utils.DialogOK('Una nueva version actualizada está disponible.', 'Se puede descargar e instalar "," en su sistema GVAX.') download.download(url, zipfile) extract.all(zipfile, path, dp='Installing skin update') sfile.remove(zipfile)
def downloadLogos(url, path, zipfile): import download import extract DialogOK('Some new logos are available.', 'They will be downloaded and added to your logopack.') download.download(url, zipfile) extract.all(zipfile, path, dp='Installing logo update') sfile.remove(zipfile)
def downloadLogos(url, path, zipfile): import download import extract utils.DialogOK('Algunos de los nuevos logotipos están disponibles.', 'Pueden ser descargados y añadidos a su logopack.') download.download(url, zipfile) extract.all(zipfile, path, dp='Installing logo update') sfile.remove(zipfile)
def DOWNLOAD(url, name): if len(url) > 0: downloads = addon.getSetting("downloads") if "" == downloads: xbmcgui.Dialog().ok("FilmyCZ", "Nastavte složku pro stahování") return localfile = "%s%s" % (downloads, name) localfile = localfile.replace(":", "") download.download(addon, localfile, url)
def Start(xpath, offset, isurl, xnumber): stop = 0 dixie.log("### XPATH: "+xpath) try: os.remove(os.path.join(ADDON_DATA, AddonID, 'settings.cfg')) except: dixie.log("### No settings.cfg file to remove") # Check database isn't locked and continue if possible if os.path.exists(dbpath): try: os.rename(dbpath,dbpath+'1') os.rename(dbpath+'1',dbpath) dixie.log("Database not in use, we can continue") except: dixie.log("### Database in use, Kodi needs a restart, if that doesn't work you'll need to restart your system.") dialog.ok(ADDON.getLocalizedString(30813),ADDON.getLocalizedString(30814)) stop = 1 if stop == 0: xbmc.executebuiltin("XBMC.Notification("+ADDON.getLocalizedString(30807)+","+ADDON.getLocalizedString(30811)+",10000,"+updateicon+")") xbmc.executebuiltin("ActivateWindow(busydialog)") # Download the online xml file if isurl: dixie.log('File is URL, downloading to temp.xml') download.download(xpath, tempxml) xpath = tempxml # Read contents of xml readfile = open(xpath, 'r') content = readfile.read().decode('utf-8', 'ignore') readfile.close() xmlsource = re.compile('source-info-name="(.+?)"').findall(content) try: xmlsource = xmlsource[0] except: xmlsource = 'unknown' dixie.log("XML TV SOURCE: "+xmlsource) channels = re.compile('<channel id="[\s\S]*?<\/channel').findall(content) programmes = re.compile('<programme[\s\S]*?<\/programme').findall(content) # Get total amount of channels channelcount = len(channels) # Get total amount of programmes listingcount = len(programmes) xbmc.executebuiltin('Dialog.Close(busydialog)') try: cur.close() con.close() except: dixie.log("### Database not open, we can continue") Create_CSV(channels,channelcount,listingcount,programmes,xmlsource,offset,xnumber)
def downloadSkins(url, path, zipfile): import download import extract DialogOK('A new skin update is available.', 'It will be downloaded and installed', 'into your On-Tapp.TV system.') download.download(url, zipfile) extract.all(zipfile, path, dp='Installing skin update') sfile.remove(zipfile)
def downloadSkins(url, path, zipfile): import download import extract DialogOK("A new skin update is available.", "It will be downloaded and installed", "into your On-Tapp.TV system.") download.download(url, zipfile) extract.all(zipfile, path, dp="Installing skin update") sfile.remove(zipfile)
def metalliq_play(args): run = 0 windowopen = 0 dixie.log('### metalliq play args: %s' % args) channel, repository, plugin, playertype, channel_orig, itemname = args.split('|') dixie.log('### %s' % channel) dixie.log('### %s' % repository) dixie.log('### %s' % plugin) dixie.log('### %s' % playertype) # Check if add-on is installed try: addonid = xbmcaddon.Addon(id=plugin) addonname = addonid.getAddonInfo('name') updateicon = os.path.join(ADDONS, plugin, 'icon.png') xbmc.executebuiltin("XBMC.Notification(Please Wait...,Searching for [COLOR=dodgerblue]"+channel+"[/COLOR] ,5000,"+updateicon+")") xbmc.executebuiltin('RunPlugin(plugin://plugin.video.metalliq/live/%s/None/en/%s)' % (channel, playertype)) Check_Playback(channel, repository, plugin, playertype, channel_orig, itemname) # If not check if the relevant repo is installed except: try: repoid = xbmcaddon.Addon(id=repository) reponame = repoid.getAddonInfo('name') run = 1 # If not then install the relevant repo except: if dialog.yesno('Repository Install', 'To install the add-on required you need the following repo:','[COLOR=dodgerblue]%s[/COLOR]' % repository,'Would you like to install?'): dp.create('Downloading','[COLOR=dodgerblue]%s[/COLOR]' % repository,'Please Wait') DOWNLOAD_ZIP = os.path.join(PACKAGES, repository+'.zip') download.download('https://github.com/noobsandnerds/noobsandnerds/blob/master/zips/%s/%s-0.0.0.1.zip?raw=true' % (repository, repository), DOWNLOAD_ZIP, dp) extract.all(DOWNLOAD_ZIP, ADDONS, dp) dp.close() xbmc.executebuiltin('UpdateLocalAddons') xbmc.executebuiltin('UpdateAddonRepos') xbmc.sleep(4000) run = 1 # If add-on wasn't installed we install it if run == 1: xbmc.executebuiltin("ActivateWindow(10025,plugin://%s,return)" % plugin) xbmc.sleep(1500) activewindow = True while activewindow: activewindow = xbmc.getCondVisibility('Window.IsActive(yesnodialog)') xbmc.sleep(500) xbmc.sleep(1000) activewindow = True while activewindow: activewindow = xbmc.getCondVisibility('Window.IsActive(progressdialog)') xbmc.sleep(500) # Update enabled metalliq players xbmc.executebuiltin('RunPlugin(plugin://plugin.video.metalliq/settings/players/tvportal)')
def _download_trailers( equivalent_mpaa, mpaa, genre, movie ): updated_trailers = [] utils.log( "Downloading Trailers: %s Trailers" % trailer_settings[ "trailer_count" ], xbmc.LOGNOTICE ) temp_destination = os.path.join( BASE_CURRENT_SOURCE_PATH, "temp_trailers" ).replace( "\\\\", "\\" ) if not xbmcvfs.exists( temp_destination ): xbmcvfs.mkdir( temp_destination ) trailers = _get_trailers( items=trailer_settings[ "trailer_count" ], equivalent_mpaa=equivalent_mpaa, mpaa=mpaa, genre=genre, movie=movie, mode="download" ) for trailer in trailers: updated_trailer = {} success = False destination = "" thumb = "" utils.log( "Attempting To Download Trailer: %s" % trailer[ 1 ], xbmc.LOGNOTICE ) filename, ext = os.path.splitext( os.path.basename( (trailer[ 2 ].split("|")[0] ).replace( "?","" ) ) ) filename = filename + "-trailer" + ext file_path = os.path.join( trailer_settings[ "trailer_download_folder" ], filename ).replace( "\\\\", "\\" ) # check to see if trailer is already downloaded if xbmcvfs.exists( file_path ): success = True destination = file_path thumb = os.path.splitext( file_path )[0] + ".tbn" else: success, destination = download( trailer[ 2 ], temp_destination, file_tag="-trailer" ) tsuccess, thumb = download( trailer[ 3 ], temp_destination, file_tag="-trailer", new_name=filename, extension=".tbn" ) if success: utils.log( "Successfully Download Trailer: %s" % trailer[ 1 ], xbmc.LOGNOTICE ) updated_trailer[ 0 ] = trailer[ 0 ] updated_trailer[ 1 ] = trailer[ 1 ] updated_trailer[ 2 ] = destination updated_trailer[ 3 ] = thumb updated_trailer[ 4 ] = trailer[ 4 ] updated_trailer[ 5 ] = trailer[ 5 ] updated_trailer[ 6 ] = trailer[ 6 ] updated_trailer[ 7 ] = trailer[ 7 ] updated_trailer[ 8 ] = trailer[ 8 ] updated_trailer[ 9 ] = trailer[ 9 ] updated_trailer[ 10 ] = trailer[ 10 ] updated_trailer[ 11 ] = trailer[ 11 ] _create_nfo_file( updated_trailer, os.path.join( temp_destination, filename).replace( "\\\\", "\\" ) ) else: utils.log( "Failed to Download Trailer: %s" % ( logmessage, trailer[ 1 ] ), xbmc.LOGNOTICE ) updated_trailer=[] xbmcvfs.copy( os.path.join( temp_destination, filename ).replace( "\\\\", "\\"), os.path.join( trailer_settings[ "trailer_download_folder" ], filename ).replace( "\\\\", "\\" ) ) xbmcvfs.copy( os.path.join( temp_destination, os.path.splitext( filename )[0] + ".tbn" ).replace( "\\\\", "\\"), os.path.join( trailer_settings[ "trailer_download_folder" ], os.path.splitext( filename )[0] + ".tbn" ).replace( "\\\\", "\\" ) ) xbmcvfs.copy( os.path.join( temp_destination, os.path.splitext( filename )[0] + ".nfo" ).replace( "\\\\", "\\"), os.path.join( trailer_settings[ "trailer_download_folder" ], os.path.splitext( filename )[0] + ".nfo" ).replace( "\\\\", "\\" ) ) xbmcvfs.delete( os.path.join( temp_destination, filename ).replace( "\\\\", "\\") ) xbmcvfs.delete( os.path.join( temp_destination, os.path.splitext( filename )[0] + ".tbn" ).replace( "\\\\", "\\") ) xbmcvfs.delete( os.path.join( temp_destination, os.path.splitext( filename )[0] + ".nfo" ).replace( "\\\\", "\\") ) updated_trailers += [ updated_trailer ] return updated_trailers
def doOTTUpdate(url, path, zipfile, ottupdate): import download import extract utils.DialogOK('A GVAX "Live Update" está disponible.', 'Actualización %s será descargado e instalado en su sistema.'% (ottupdate), 'Gracias.') download.download(url, zipfile) extract.all(zipfile, path, dp='Installing python update') sfile.remove(zipfile) utils.Log('OTT Update %s installed' % str(ottupdate)) xbmc.executebuiltin('UpdateLocalAddons')
def doOTTUpdate(url, path, zipfile, ottupdate): import download import extract DialogOK('An On-Tapp.TV "Live Update" is available.', 'OTTV Update %s will be downloaded and installed on your system.' % (ottupdate), 'Thank you.') download.download(url, zipfile) extract.all(zipfile, path, dp='Installing python update') sfile.remove(zipfile) Log('OTT Update %s installed' % str(ottupdate)) xbmc.executebuiltin('UpdateLocalAddons')
def doDSFSkinUpdate(url, path, zipfile, kodiskin): import download import extract utils.DialogOK('Un GVAX es "Live Update" disponible.', 'Actualización %s será descargado e instalado en su sistema.' % (kodiskin), 'Gracias.') download.download(url, zipfile) extract.all(zipfile, path, dp='Installing skin update') sfile.remove(zipfile) utils.Log('Skin Update %s installed' % str(kodiskin)) xbmc.executebuiltin('UpdateLocalAddons')
def Download(title, url): file = DownloadPath(title, url) if not file: return try: import download download.download(url, file) except Exception, e: print TITLE + ' Error during downloading of ' + url print str(e)
def add_to_queue(request): if request.method == "GET": playlist = Playlist.objects.get(name=request.GET["playlist"]) song = Song.objects.get_or_create(playlist=playlist, name=request.GET["title"])[0] song.url = request.GET["videoId"] song.save() download(request.GET["videoId"], request.GET["playlist"], request.GET["title"]) songs = Song.objects.all() if len(songs) > 6: songs[0].delete() return render(request, "YTD/download_queue.html", {"songs": Song.objects.all()})
def command_line(): alreadyLoggedIn = False tasks = [] outputs = [] while True: userInput = str(raw_input(">>>")) if(userInput == 'autodownloadrun'): download.download(pathToModels) login(driver, username, password) alreadyLoggedIn = True tasks = uploadAll(driver, alreadyLoggedIn, username, password, nodescores) makeAllTasks(driver, tasks, alreadyLoggedIn, username, password) outputs = runAllTasks(driver, tasks, alreadyLoggedIn, pathToOutputs, username, password) print "outputs: ", outputs elif(userInput == 'autorun'): login(driver, username, password) alreadyLoggedIn = True tasks = uploadAll(driver, alreadyLoggedIn, username, password, nodescores) print "tasks: ", tasks makeAllTasks(driver, tasks, alreadyLoggedIn, username, password) outputs = runAllTasks(driver, tasks, alreadyLoggedIn, pathToOutputs, username, password) print "Outputs: ", outputs elif(userInput == "download"): download.download(pathToModels) elif(userInput == "login"): login(driver, username, password) alreadyLoggedIn = True elif(userInput == "uploadandmaketasks"): tasks = uploadAll(driver, alreadyLoggedIn, username, password, nodescores) print "tasks: ", tasks makeAllTasks(driver, tasks, alreadyLoggedIn, username, password) elif(userInput == "runall"): outputs = runAllTasks(driver, tasks, pathToOutputs, alreadyLoggedIn, username, password) print "outputs: ", outputs elif(userInput == "getoutputs"): if not alreadyLoggedIn: count = 0 for f in listdir(pathToModels): count += 1 print count outputs = storeOutputs(driver, count, pathToOutputs, alreadyLoggedIn, username, password) print outputs alreadyLoggedIn = True getoutputs(driver, outputs, alreadyLoggedIn, username, password) elif(userInput == "exit"): return
def submit_query_terms(self, term_list, max_url_count = 15, parallel_cb = None, cached=False): #Perform queries to Search Engine APIs #This function only operates when there is no information associated with the terms, #usually before running extract_terms() # #Args: # term_list: list of search terms that are submited by user #Returns: # urls: list of urls that are returned by Search Engine chdir(self.memex_home + '/seed_crawler/seeds_generator') query = ' '.join(term_list) with open('conf/queries.txt','w') as f: f.write(query) if not cached: comm = "java -cp .:class:libs/commons-codec-1.9.jar BingSearch -t " + str(max_url_count) p=Popen(comm, shell=True, stdout=PIPE) output, errors = p.communicate() print output print errors call(["rm", "-rf", "html"]) call(["mkdir", "-p", "html"]) call(["rm", "-rf", "thumbnails"]) call(["mkdir", "-p", "thumbnails"]) #if sys.platform in ['darwin', 'linux2']: if sys.platform in ['darwin']: download("results.txt") else: download("results.txt", True, parallel_cb) if exists(self.memex_home + "/seed_crawler/ranking/exclude.txt"): call(["rm", self.memex_home + "/seed_crawler/ranking/exclude.txt"]) with open("results.txt",'r') as f: urls = [self.validate_url(line.strip()) for line in f.readlines()] # chdir(self.memex_home + '/seed_crawler/lda_pipeline') # call(["mkdir", "-p", "data"]) # p=Popen("java -cp .:class/:lib/boilerpipe-1.2.0.jar:lib/nekohtml-1.9.13.jar:lib/xerces-2.9.1.jar Extract ../seeds_generator/html/ | python concat_nltk.py data/lda_input.csv",shell=True,stdout=PIPE) # output, errors = p.communicate() # print output # print errors else: urls = term_search('query', term_list) for url in urls: self.urls_set.add(url) return self.urls_set #Results from Search Engine
def main(): parser = build_arg_parser() args = parser.parse_args() if (args.sub_command == 'push'): if (len(args.files) == 0): logger.die('Must include at least one file') else: for f in args.files: upload.upload(f, args.MediaFire_Path) elif (args.sub_command == 'pull'): if (len(args.files) == 0): logger.die('Must include at least one file') else: for f in args.files: download.download(f) elif (args.sub_command == 'del'): if (len(args.files) == 0): logger.die('Must include at least one file') else: for f in args.files: delete.delete(f) elif (args.sub_command == 'init'): if (user.is_user_signed_in()): logger.end('User is already initialized') else: user.get_auth() elif (args.sub_command == 'list'): if (len(args.files) == 0): lister.list_files('') else: for f in args.files: lister.list_files(f) elif (args.sub_command == 'diff'): if (len(args.files) == 0): logger.die('Must include at least one file') else: for f in args.files: diff.diff(f, args.MediaFire_Path) elif (args.sub_command == 'out'): user.log_out() elif (args.sub_command == 'change'): user.change_user() elif (args.sub_command == 'share'): if (len(args.files) == 0): logger.die('Must include at least on file') else: for f in args.files: share.share(f)
def Download(title, _url): file = DownloadPath(_url) if not file: return url = GetCartoonURL(_url) url = url.rsplit('|', 1)[0] try: import download download.download(url, file, TITLE, URL) except Exception, e: print '%s - %s Error during downloading of %s' % (TITLE, VERSION, _url) print str(e)
def Download(title, url): file = DownloadPath(title, url) if not file: return if not url.startswith(URL): url = URL + url try: import download download.download(url, file, TITLE, URL) except Exception, e: print TITLE + ' Error during downloading of ' + url print str(e)
def fetch_and_get_project_links(url: str) -> Set[str]: logging.debug("page %s", url) return get_project_links(download(url))
def download_climate(replace=False): prefix = "ftp://ftp.cdc.noaa.gov/Datasets/ncep.reanalysis.derived/" for fname in FILES: target = pjoin(CELER_PATH, 'climate/surface', fname) download.download(prefix + "surface/" + fname, target, replace=replace)
if (check != 'last' and check != 'new'): print('Argument --check/-c should be last or new') print('Exiting.....') exit() if (check == 'new'): reset('1') wb = openpyxl.Workbook() sht = wb.active file = 'try1.xlsx' wb.save(file) wb = openpyxl.load_workbook('try1.xlsx') sht = wb.active while (1): flag = download(file, seed, sht, wb) if (flag == None): print("All sitemaps downloaded") print("Scraping successful") print("Exiting") break import openpyxl import json import ast wb = openpyxl.load_workbook('try1.xlsx') sht = wb.active i = 2 js = {"data": []}
''' 1、需寻找的图片网址为Project以及Unitia Unitia以 https://ba.hitomi.la/galleries/1294943/ 开头, 后接图片名 Project以 https://ba.hitomi.la/galleries/1286145/ 开头 2、图片名用列表保存 ''' from extract_data import extract_data from download import download unitia = "https://hitomi.la/reader/1294943.html" project = "https://hitomi.la/reader/1286145.html" unitia_img = extract_data(unitia) unitia_id = unitia.split('reader' + '/')[1].split('.html')[0] print(unitia_id) download(unitia_img, unitia_id)
hpxdir = config['hpxdir'] zpfile = config['zpfile'] blfile = config['blfile'] section = config['db'] tags = config['tags'] ebv = config.get('ebv', None) if os.path.exists(zpfile) and not args.force: print "Found %s; skipping download..." % zpfile else: query = download.zeropoint_query(tags) print query sqlfile = os.path.splitext(zpfile)[0] + '.sql' download.download(zpfile, query, sqlfile=sqlfile, section=section, force=args.force) for band in config['bands']: dirname = os.path.join(hpxdir, band) filenames = sorted(glob.glob(dirname + '/*.fits')) logdir = mkdir(os.path.join(dirname, 'log')) for filename in filenames: basename = os.path.basename(filename) if not args.force: fits = fitsio.FITS(filename) colname = 'MAG_ZERO' if colname in fits[1].get_colnames(): print "Found column '%s'; skipping %s..." % (colname,
def test_successful_download(self, mock_urlopen): self._mock_urlopen(mock_urlopen) url = 'https://somevalidurl.com/image.jpg' download(url, download_dir=self.test_image_dir)
import os import json import responder from download import download from parsing import Parser env = os.environ DEBUG = env['DEBUG'] in ['1', 'True', 'true'] LIBRARY = env.get('LIBRARY') LANG = env.get('LANG') api = responder.API(debug=DEBUG) download(library=LIBRARY, lang=LANG) parser = Parser(library=LIBRARY, lang=LANG) @api.route("/") async def parse(req, resp): body = await req.text texts = json.loads(body) docs = [parser.parse(text) for text in texts] resp.media = dict(data=docs) if __name__ == "__main__": api.run()
def downloadInfo(id): outfile = open(str(id) + '_Info' + '.json', 'w') shotUrl = 'https://api.dribbble.com/v1/shots/{}?access_token=9a7123861f629b903d04c7fdd6d6ed00c124c6d44af05b7c8e9ae9f079064d95' shotHtml = download.download(shotUrl.format(id)) json_data = json.loads(shotHtml) json.dump(json_data, outfile)
from skimage.io import imsave from download import download import numpy as np import os ###### download train data url = "https://www.dropbox.com/s/uzwczfzg80tfy8u/images.npy?dl=0" path = download(url, './images.npy', replace=True) data = np.load(path) if not os.path.exists(os.path.join("data", "images")): os.makedirs(os.path.join("data", "images")) for i in range(len(data)): imsave(os.path.join("data", "images", str(i) + ".png"), data[i]) os.rename(os.path.join("data", "images", str(i) + ".png"), os.path.join("data", "images", str(i))) os.remove('./images.npy') ###### download test data url = "https://www.dropbox.com/s/jfk0e6xp8miwbei/images_test.npy?dl=0" path = download(url, './images_test.npy', replace=True) init = len(data) data = np.load(path) for i in range(len(data)):
##############################変数の定義######################################## #取得するデータの開始年と終了年 start_y="1980" end_y="2018" #使用するURL boj_url1="https://www.stat-search.boj.or.jp/info/nme_Mdframe.html" boj_url2="https://www.stat-search.boj.or.jp/ssi/cgi-bin/famecgi2?cgi=$nme_s050" dl_path="https://www.stat-search.boj.or.jp/ssi/html" ##############################プログラムの実行################################## if __name__ == '__main__': if os.path.exists("boj.dict") == False: with open("boj.dict","wb") as f1: pickle.dump(get_dict(boj_url1),f1) with open("boj.dict","rb") as f2: dict=pickle.load(f2) param_list=mk_display(dict) if len(param_list)==0: print("データ系列が選択されていません") sys.exit() download(param_list,boj_url2,start_y,end_y,dl_path)
import os import glob import shutil import zipfile from download import download os.makedirs('contents/1stCheckTest', exist_ok=True) url = "http://hanyujiaocai.jinkan.kyoto-u.ac.jp/2017beijing/2017CALL/download/" for i in range(11, 16): zip_name = "L" + str(i) + ".zip" zip_url = url + zip_name download(zip_url, './contents/1stCheckTest/' + zip_name) with zipfile.ZipFile('contents/1stCheckTest/' + zip_name) as existing_zip: existing_zip.extractall('contents/1stCheckTest/') Q1_list = glob.glob('contents/1stCheckTest/L11/honbun/01.mp3') Q2_list = glob.glob('contents/1stCheckTest/L11/bunpou/4/06.mp3') Q3_list = glob.glob('contents/1stCheckTest/L12/bunpou/2/05.mp3') Q4_list = glob.glob('contents/1stCheckTest/L12/bunpou/4/03.mp3') Q5_list = glob.glob('contents/1stCheckTest/L13/honbun/07.mp3') Q6_list = glob.glob('contents/1stCheckTest/L13/bunpou/4/04.mp3') Q7_list = glob.glob('contents/1stCheckTest/L14/bunpou/3/04.mp3') Q8_list = glob.glob('contents/1stCheckTest/L15/bunpou/2/01.mp3') Q9_list = glob.glob('contents/1stCheckTest/L15/bunpou/3/02.mp3') Q10_list = glob.glob('contents/1stCheckTest/L15/bunpou/4/03.mp3') Q3_ans_list = glob.glob('contents/1stCheckTest/L12/bunpou/2/06.mp3') Q6_ans_list = glob.glob('contents/1stCheckTest/L13/bunpou/4/05.mp3')
'https://data.montpellier3m.fr/sites/default/files/ressources/MMM_EcoCompt_X2H20042632_archive.json', #Lattes 1 'https://data.montpellier3m.fr/sites/default/files/ressources/MMM_EcoCompt_X2H20042635_archive.json', #Vieille poste 'https://data.montpellier3m.fr/sites/default/files/ressources/MMM_EcoCompt_X2H20063161_archive.json', #Gerhardt 'https://data.montpellier3m.fr/sites/default/files/ressources/MMM_EcoCompt_X2H20063162_archive.json', #Tanneurs 'https://data.montpellier3m.fr/sites/default/files/ressources/MMM_EcoCompt_XTH19101158_archive.json', #Delmas 1 'https://data.montpellier3m.fr/sites/default/files/ressources/MMM_EcoCompt_X2H20063163_archive.json', #Delmas 2 'https://data.montpellier3m.fr/sites/default/files/ressources/MMM_EcoCompt_X2H20063164_archive.json', ] download(url[0], "./data_visulization/Celleneuve.json", replace=False) download(url[1], "./data_visulization/Lattes2.json", replace=False) download(url[2], "./data_visulization/Berracasa.json", replace=False) download(url[3], "./data_visulization/Lavérune.json", replace=False) download(url[4], "./data_visulization/Lattes1.json", replace=False) download(url[5], "./data_visulization/Vieille_poste.json", replace=False) download(url[6], "./data_visulization/Gerhardt.json", replace=False) download(url[7], "./data_visulization/Tanneurs.json", replace=False) download(url[8], "./data_visulization/Delmas1.json", replace=False) download(url[9], "./data_visulization/Delmas2.json", replace=False) #%% bike_traffic_df1 = pd.read_json('./data_visulization/Celleneuve.json', lines=True) bike_traffic_df2 = pd.read_json('./data_visulization/lattes2.json', lines=True) bike_traffic_df3 = pd.read_json('./data_visulization/Berracasa.json', lines=True) bike_traffic_df4 = pd.read_json('./data_visulization/Lavérune.json', lines=True)
def _download_trailers(equivalent_mpaa, mpaa, genre, movie): updated_trailers = [] utils.log( "Downloading Trailers: %s Trailers" % trailer_settings["trailer_count"], xbmc.LOGNOTICE) temp_destination = os.path.join(BASE_CURRENT_SOURCE_PATH, "temp_trailers").replace("\\\\", "\\") if not xbmcvfs.exists(temp_destination): xbmcvfs.mkdir(temp_destination) trailers = _get_trailers(items=trailer_settings["trailer_count"], equivalent_mpaa=equivalent_mpaa, mpaa=mpaa, genre=genre, movie=movie, mode="download") for trailer in trailers: updated_trailer = {} success = False destination = "" thumb = "" utils.log("Attempting To Download Trailer: %s" % trailer[1], xbmc.LOGNOTICE) filename, ext = os.path.splitext( os.path.basename((trailer[2].split("|")[0]).replace("?", ""))) filename = filename + "-trailer" + ext file_path = os.path.join(trailer_settings["trailer_download_folder"], filename).replace("\\\\", "\\") # check to see if trailer is already downloaded if xbmcvfs.exists(file_path): success = True destination = file_path thumb = os.path.splitext(file_path)[0] + ".tbn" else: success, destination = download(trailer[2], temp_destination, file_tag="-trailer") tsuccess, thumb = download(trailer[3], temp_destination, file_tag="-trailer", new_name=filename, extension=".tbn") if success: utils.log("Successfully Download Trailer: %s" % trailer[1], xbmc.LOGNOTICE) updated_trailer[0] = trailer[0] updated_trailer[1] = trailer[1] updated_trailer[2] = destination updated_trailer[3] = thumb updated_trailer[4] = trailer[4] updated_trailer[5] = trailer[5] updated_trailer[6] = trailer[6] updated_trailer[7] = trailer[7] updated_trailer[8] = trailer[8] updated_trailer[9] = trailer[9] updated_trailer[10] = trailer[10] updated_trailer[11] = trailer[11] _create_nfo_file( updated_trailer, os.path.join(temp_destination, filename).replace("\\\\", "\\")) else: utils.log( "Failed to Download Trailer: %s" % (logmessage, trailer[1]), xbmc.LOGNOTICE) updated_trailer = [] xbmcvfs.copy( os.path.join(temp_destination, filename).replace("\\\\", "\\"), os.path.join(trailer_settings["trailer_download_folder"], filename).replace("\\\\", "\\")) xbmcvfs.copy( os.path.join(temp_destination, os.path.splitext(filename)[0] + ".tbn").replace( "\\\\", "\\"), os.path.join(trailer_settings["trailer_download_folder"], os.path.splitext(filename)[0] + ".tbn").replace( "\\\\", "\\")) xbmcvfs.copy( os.path.join(temp_destination, os.path.splitext(filename)[0] + ".nfo").replace( "\\\\", "\\"), os.path.join(trailer_settings["trailer_download_folder"], os.path.splitext(filename)[0] + ".nfo").replace( "\\\\", "\\")) xbmcvfs.delete( os.path.join(temp_destination, filename).replace("\\\\", "\\")) xbmcvfs.delete( os.path.join(temp_destination, os.path.splitext(filename)[0] + ".tbn").replace( "\\\\", "\\")) xbmcvfs.delete( os.path.join(temp_destination, os.path.splitext(filename)[0] + ".nfo").replace( "\\\\", "\\")) updated_trailers += [updated_trailer] return updated_trailers
if __name__ == '__main__': if deleteDB(): d = xbmcgui.Dialog() d.ok('PLD TV Guide', ' Extras eliminado correctamente') else: d = xbmcgui.Dialog() d.ok('PLD TV Guide', ' DESEA DESCARGAR LOS SKINS ') ADDON = xbmcaddon.Addon(id = 'script.tvguidemicro') datapath = xbmc.translatePath(ADDON.getAddonInfo('profile')) extras = os.path.join(datapath, 'extras') skinfolder = os.path.join(datapath, extras, 'skins') dest = os.path.join(extras, 'skins.zip') url = base64.b64decode('aHR0cDovL3BsZHR2Z3VpZGUucHJveWVjdG9sdXpkaWdpdGFsLmNvbS9wbGQtdHYtZ2lhL3NraW5zLXBhY2svc2tpbnMuemlw') try: os.makedirs(skins) except: pass download.download(url, dest) extract.all(dest, extras) try: os.remove(dest) except: pass
#Packages dont nous aurons besoin import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from ipywidgets import interact from download import download import statsmodels.api as sm from statsmodels.formula.api import ols # Chargement des données url = "http://josephsalmon.eu/enseignement/datasets/Mesure_journaliere_Region_Occitanie_Polluants_Principaux.csv" path_target = "./Mesure_journaliere_Region_Occitanie_Polluants_Principaux.csv" download(url, path_target, replace=False) # Renomer le tableau de données et affichage de ses dimensions df_poccitanie = pd.read_csv("Mesure_journaliere_Region_Occitanie_Polluants_Principaux.csv") print(df_poccitanie.shape) # Analyse du tableau: df_poccitanie.head() # affichage du début du tableau df_poccitanie.columns # affichage des variables df_poccitanie['valeur_originale'].unique() # affichage des valeurs originales polluant = df_poccitanie['polluant'].unique() # affichage des polluants villes = df_poccitanie['nom_com'].unique() # affichage des villes # Ajout d'une colonne de temporalité "day" au tableau df_poccitanie['day'] = pd.to_datetime(df_poccitanie['date_debut']) df_poccitanie.columns
start_date="2015-01-01" end_date = "2016-01-01" freq = "d" yfURL = constructYFURL(ticker,start_date,end_date,freq) print yfURL # Let's try out our download function localFilePath="/Users/swethakolalapudi/pytest/gspc.csv" download(localFilePath,yfURL) nseURL= constructNSEurl("CM",2,"MAY",2016) print nseURL nseFilePath="/Users/swethakolalapudi/pytest/nsebhav.csv.zip" download(nseFilePath,nseURL) # the download function gave an error. The NSE doesn't allow any program # to download it's files directly, but we can get around this. We just need to # make the NSE feel that it's a human and not a machine trying to do the download. # Now let's try to download thatfile again
from download import download from flask import Flask, render_template, request from apscheduler.schedulers.background import BackgroundScheduler scheduler = BackgroundScheduler() scheduler.start() app = Flask(__name__) app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 download() def get_page(): """ Revieves data graphs Calls download to revieve the graphs form the S3bucket """ download() scheduler.add_job(get_page, 'cron', hour='5') @app.route('/') def home(): """ Call functions and model to assess the data recieved and make predidction, renders home.html and counts ammount of each risk recieved. """
# coding:utf-8 import json import requests from download import download query = '永野芽郁' ''' download all images of Mei Nagano from douban.com ''' ''' for loop to request all urls ''' for i in range(0, 22471, 20): url = 'https://www.douban.com/j/search_photo?q=' + \ query+'&limit=20&start='+str(i) html = requests.get(url).text response = json.loads(html, encoding='utf-8') for image in response['images']: print('img src is:', image['src']) download(image['src'], image['id'], 'scrapted_imgs')
def downloadArtwork(id, page): artistUrl = 'https://api.dribbble.com/v1/shots/{}/comments?page={}&access_token=9a7123861f629b903d04c7fdd6d6ed00c124c6d44af05b7c8e9ae9f079064d95' artistHtml = download.download(artistUrl.format(id, page)) if artistHtml != None: json_data = json.loads(artistHtml) return json_data
def downloadPage(): path = pathN fname = str(request.form.get('fname')) return download.download(path, fname)
def download(path, zipfile): download.download(url, zipfile) extract.all(zipfile, path) sfile.remove(zipfile)
'--seed', type=int, help='Number of Seeds', nargs='?', default=20) parser.add_argument('-f', '--file', type=str, required=True, help='Output file name') args = parser.parse_args() file = args.file seed = args.seed check = args.check if (check != 'last' and check != 'new'): print('Argument --check/-c should be last or new') print('Exiting.....') exit() if (check == 'new'): reset() while (1): flag = download(file, seed) if (flag == None): print("All sitemaps downloaded") print("Scraping successful") print("Exiting") break
doStandard(useScript=True) if choice == _PLAYLIST: activateWindow('videoplaylist') if choice == _DOWNLOAD: utils.log('download url: %s' % file) dst = os.path.join(ADDON.getSetting('DOWNLOAD_FOLDER'), getDownloadTitle(file)) if utils.DialogYesNo(GETTEXT(30243), GETTEXT(30244)): xbmc.Player().stop() import download download.download(file, dst, 'Super Favourites') if choice == _SF_SETTINGS: utils.ADDON.openSettings() if choice == _SETTINGS: xbmcaddon.Addon(localAddon).openSettings() if choice == _ADDTOFAVES: import favourite if path.lower().startswith('addons://user/'): path = path.replace('addons://user/', 'plugin://')
def download_libsvm(dataset, destination, replace=False): """Download a dataset from LIBSVM website.""" url = ("https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/" + NAMES[dataset]) path = download(url, destination, replace=replace) return path
tasks = [0, 1, 2, 3, 4, 5, 6] n_tasks = len(tasks) mtgl_only = False positive = False ############################################################################### # Download data. The images 'X' are grouped and sorted. Generate true # labels 'Y' accordingly if not os.path.exists('./data'): os.mkdir('./data') url = "http://archive.ics.uci.edu/ml/machine-learning-databases/" url += "mfeat/mfeat-pix" if not os.path.exists(".data/digits.txt"): path = download(url, ".data/digits.txt", replace=True) Xraw = np.loadtxt(".data/digits.txt") Xraw = Xraw.reshape(10, 200, 240) yraw = np.zeros((10, 2000)) for k in range(10): yraw[k, 200 * k:200 * (k + 1)] = 1. yraw = yraw.reshape(10, 10, 200) ############################################################################### # Each digit corresponds to a task. Reshape data to fit a multi-task # learner and split it into a cv and validation set. # Here the design matrix X is the same for all tasks.""" samples = np.arange(200) samples = rnd.permutation(samples)[:n_samples] mask_valid = np.ones(200).astype(bool)
req_headers = {"User-Agent": req_useragent} req = requests.get(req_url, headers=req_headers, timeout=15) # 查看回應 if sys_debug: print("HOST => HTTP: " + req.url) print("HOST <= HTTP: " + str(req.status_code) + "\n") if req.status_code != requests.codes.ok: print("Um... Bad request :/\n") continue # 解析 req.encoding = "utf-8" req = soup(req.text, "html.parser") data_title = req.title.string data_url = (req.select("div#video-player-bg script")[3]).string pattern = re.compile(r"html5player\.setVideoUrlHigh\(\'.*") data_url = re.findall(pattern, data_url)[0] if sys_debug: print("Got: " + data_url + "\n") pattern = re.compile(r"http.*\'") data_url = re.findall(pattern, data_url)[0] data_url = data_url.replace(" ", "") data_url = data_url.replace("\'", "") if sys_debug: print("Analyzed: " + data_url + "\n") # 下載 import download download.download(req_headers=req_headers, data_title=data_title, data_url=data_url, sys_debug=sys_debug)
def update(email, password): """Login to FutureLearn with the supplied credentials, Get a list of courses and their metadata, then attempt to download the associated CSV files :param: email (str): The facilitators email address password (str) The facilitators FutureLearn password """ # logging in to futureLearn website with the credentials taken from config.json now = time.strftime("%c") print("Current time %s" % now) loginInfo, rep = login(email, password, 'https://www.futurelearn.com/sign-in') if rep.status_code == 200: print "Login OK..." # check if user has admin privileges! cos = FLCourses(loginInfo) # files with contains the list of csvs path on disk files = {} print "Retrieving courses..." enrolmentData = [] # cos.getCourses().items() will return each course and their runs alongside various info such as: start_date, end_date ect, # Look into ./course_run.py for the full list. These info are scrapped from the website. # by the end of this for loop, there will be a csv file named "Courses Data/Data/Courses-Data.csv" # which contains the overall info about each run of each course, This file will be later inserted into mysql, alongside other csvs, # which are directly downloaded from the website (not individually scrapped) for course_name, runs in cos.getCourses().items(): for run, info in runs.items(): start_date = info['start_date'].strftime('%Y-%m-%d') end_date = info['end_date'].strftime('%Y-%m-%d') dir_path = "../data/" + cos.getUniName( ) + "/" + course_name + "/" + run + " - " + start_date + " - " + end_date run_enrol_data = info['enrolmentData'] run_enrol_data['no_of_weeks'] = info['duration_weeks'] enrolmentData.append(run_enrol_data) print len(info['datasets']) if (not len(info['datasets']) == 0): # download all csvs for each run within courses download(loginInfo, cos.getUniName(), course_name, run, info) for url, filename in info['datasets'].items(): files[dir_path + "/" + filename] = run courses_path = "../data/" + cos.getUniName() + "/Courses Data/Data" courses_filename = "/Courses-Data.csv" if not os.path.exists(courses_path): os.makedirs(courses_path) # multiple IFs below because some runs have nulls # In this step, the scrapped info are written in the csv file "Courses-Data.csv" with open(courses_path + courses_filename, 'w') as f: writer = csv.writer(f, lineterminator='\n') writer.writerow( "run_id,start_date,no_of_weeks,joiners,leavers,leavers_percent,learners,learners_percent,active_learners,active_learners_percent,returning_learners,returning_learners_percent,social_learners,social_learners_percent,fully_participating_learners,fully_participating_learners_percent,statements_sold,certificates_sold,upgrades_sold,upgrades_sold_percent,learners_with_at_least_50_percent_step_completion,learners_with_at_least_50_percent_step_completion_percent,learners_with_at_least_90_percent_step_completion,learners_with_at_least_90_percent_step_completion_percent,run_retention_index,run_retention_index_percent,gross_revenue_in_gbp,course,course_run,run" .split(',')) for row in enrolmentData: print(row) if 'upgrades_sold' in row: line = '{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},N/A,N/A,{10},{11},N/A,N/A,0,0,{12},{13},{14},{15},{16},{17},{18},{19},{20},{21},{22},{23}'.format( row['run_id'], row['start_date'], row['no_of_weeks'], row['joiners'], row['leavers'].split(" - ")[0], row['leavers'].split(" - ")[1], row['learners'].split(" - ")[0], row['learners'].split(" - ")[1], row['active_learners'].split(" - ")[0], row['active_learners'].split(" - ")[1], row['social_learners'].split(" - ")[0], row['social_learners'].split(" - ")[1], row['upgrades_sold'].split(" - ")[0], row['upgrades_sold'].split(" - ")[1], row['learners_with_at_least_50_percent_step_completion'] .split(" - ")[0], row['learners_with_at_least_50_percent_step_completion'] .split(" - ")[1], row['learners_with_at_least_90_percent_step_completion'] .split(" - ")[0], row['learners_with_at_least_90_percent_step_completion'] .split(" - ")[1], row['run_retention_index'].split(" - ")[0], row['run_retention_index'].split(" - ")[1], row['gross_revenue_in_gbp'].split(" - ")[0], row['course'], row['course_run'], row['course_run']) elif 'joiners' not in row: line = '{0},{1},{2},N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,{3},{4},{5}'.format( row['run_id'], row['start_date'], row['no_of_weeks'], row['course'], row['course_run'], row['course_run']) elif 'learners' not in row and 'fully_participating_learners' in row: line = '{0},{1},{2},{3},{4},{5},N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,{6},{7},{8},{9},N/A,N/A,{10},{11},{12},{13},{14},{15},N/A,{16},{17},{18}'.format( row['run_id'], row['start_date'], row['no_of_weeks'], row['joiners'], row['leavers'].split(" - ")[0], row['leavers'].split(" - ")[1], row['fully_participating_learners'].split(" - ")[0], row['fully_participating_learners'].split(" - ")[1], row['statements_sold'], row['certificates_sold'], row['learners_with_at_least_50_percent_step_completion'] .split(" - ")[0], row['learners_with_at_least_50_percent_step_completion'] .split(" - ")[1], row['learners_with_at_least_90_percent_step_completion'] .split(" - ")[0], row['learners_with_at_least_90_percent_step_completion'] .split(" - ")[1], row['run_retention_index'].split(" - ")[0], row['run_retention_index'].split(" - ")[1], row['course'], row['course_run'], row['course_run']) elif 'learners' not in row and 'fully_participating_learners' not in row: line = '{0},{1},{2},{3},{4},{5},N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,{6},N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,{7},{8},{9}'.format( row['run_id'], row['start_date'], row['no_of_weeks'], row['joiners'], row['leavers'].split(" - ")[0], row['leavers'].split(" - ")[1], row['statements_sold'], row['course'], row['course_run'], row['course_run']) # elif 'certificates_sold' in row: # line = '{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11},{12}'.format(row['run_id'],row['start_date'],row['no_of_weeks'],row['joiners'],row['leavers'],row['learners'],row['active_learners'],row['returning_learners'],row['social_learners'],row['fully_participating_learners'],row['certificates_sold'], row['course'],row['course_run']) elif 'returning_learners' not in row: line = '{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},N/A,N/A,{10},{11},N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,{12},{13},{14}'.format( row['run_id'], row['start_date'], row['no_of_weeks'], row['joiners'], row['leavers'].split(" - ")[0], row['leavers'].split(" - ")[1], row['learners'].split(" - ")[0], row['learners'].split(" - ")[1], row['active_learners'].split(" - ")[0], row['active_learners'].split(" - ")[1], row['social_learners'].split(" - ")[0], row['social_learners'].split(" - ")[1], row['course'], row['course_run'], row['course_run']) else: line = '{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11},{12},{13},{14},{15},{16},{17},N/A,N/A,{18},{19},{20},{21},{22},{23},N/A,{24},{25},{26}'.format( row['run_id'], row['start_date'], row['no_of_weeks'], row['joiners'], row['leavers'].split(" - ")[0], row['leavers'].split(" - ")[1], row['learners'].split(" - ")[0], row['learners'].split(" - ")[1], row['active_learners'].split(" - ")[0], row['active_learners'].split(" - ")[1], row['returning_learners'].split(" - ")[0], row['returning_learners'].split(" - ")[1], row['social_learners'].split(" - ")[0], row['social_learners'].split(" - ")[1], row['fully_participating_learners'].split(" - ")[0], row['fully_participating_learners'].split(" - ")[1], row['statements_sold'], row['certificates_sold'], row['learners_with_at_least_50_percent_step_completion'] .split(" - ")[0], row['learners_with_at_least_50_percent_step_completion'] .split(" - ")[1], row['learners_with_at_least_90_percent_step_completion'] .split(" - ")[0], row['learners_with_at_least_90_percent_step_completion'] .split(" - ")[1], row['run_retention_index'].split(" - ")[0], row['run_retention_index'].split(" - ")[1], row['course'], row['course_run'], row['course_run']) writer.writerow(line.split(',')) f.close() files[courses_path + courses_filename] = 1 print "Number of csv files to be inserted into database: " + str( len(files)) # JSR Disable import as unused print "Changing the csv hex to unix style" output = subprocess.call(['../data/removeCarrigeReturn.sh', '../data']) # Now that all csvs are in place and stored in "files" var, then the process of inserting these csvs into mysql importData(files, cos.getUniName()) f_update_time = open("../data/" + cos.getUniName() + "/updated.txt", 'w') f_update_time.write(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")) print("Finished Time %s" % datetime.datetime.now().strftime("%Y-%m-%d %H:%M")) f_update_time.close() else: print "Fail" f = open('fail', 'a') f.write('update fail ' + datetime.datetime.now().strftime('%Y-%m-%d') + '\n') f.close()
if keykey != 'page': f.write(('%s: %s\n' % (keykey, str(value)))) f.write('\n') # f.write(str(links)) else: return None except IOError as ioe: print u'失败保存:%s %s' % (local_file, traceback.format_exc()) return None else: print u'成功保存:%s' % local_file return local_file if __name__ == '__main__': # 测试get_links() page = download.download('https://blog.csdn.net/le_17_4_6/', save=True) base_url = getbyre(page, 'var\sbaseUrl\s=\s["\'](\S+)["\']')[0] # 获取分页的url pagesize = getbyre(page, 'var\spageSize\s=\s(\d+)')[0] # 获取每页文章数 pagesnum = getbyre(page, 'var\slistTotal\s=\s(\d+)')[0] #获取总文章数 print base_url, pagesize, pagesnum listnum = int(pagesnum) // int(pagesize) + 2 #获取页数 links=[] for lp in range(1, listnum): print lp list_url = base_url + '/' + str(lp) page_i = download.download(list_url) links.extend(getbyre(page_i, re_str='<a[^>]+href=["\'](https:\/\/blog\.csdn\.net\/le_17_4_6\/article\/details\/\d+)["\']')) hasload = [] for link in links: if link not in hasload: hasload.append(link)
m = str(len(training_data)) # for line in training_data: # print line print 'n = ', n print 'o = ', o print 'm = ', m # read all param and send to subprocess' stdin input_data = prepare_input_data(name, training_data, n, o, m) if input_data is not None: p = subprocess.Popen('./test/ModelTest', stdin=subprocess.PIPE) p.communicate(input=input_data) if __name__ == '__main__': download.download() data_path = download.extract() if len(sys.argv) == 1: exit(0) for data in data_path['c']: name = data.split('/')[-1][:-4] if name != sys.argv[1]: continue process(data, name) break
def startdownload(): with open('mydownloads.txt', 'r') as f: downloadinglist = f.read() download(downloadinglist)