def main(): login() #songsList = getSongsFromInternet() songsList = getSongsFromLocalarea("songsList.txt") #partition(songsList) downloader = Downloader(songsList) downloader.download(3, threadNum=10) #print(songsList) print("done.")
def test_download_to_dir(self): dir = tempfile.mkdtemp() try: d = Downloader(dir) with util.CaptureStdout(): with d.download('file://' + __file__) as f: filename = f finally: shutil.rmtree(dir) self.assertEqual(dir, os.path.dirname(filename))
def test_file_removed_on_exception(self): try: d = Downloader() with util.CaptureStdout(): with d.download('file://' + __file__) as f: filename = f raise TestException() except TestException: pass self.assertFalse(os.path.exists(filename))
def main(reparse=False): """Main entry point for this ETL process. Downloads, updates db, stores the nightly data. This is the binary to run from a cron job. """ os.chdir(os.path.dirname(__file__)) logger = log.logger() logger.info('Starting ETL of FBO Nightly data.') # Figure out where we put data datadir = get_datadir() dbdir = get_dbdir() if not os.path.exists(os.path.join(dbdir, "sqlite3")): os.makedirs(os.path.join(dbdir, "sqlite3")) # Get a database connection, create db if needed db = model.FBO( "development", db_conf_file=os.path.join( dbdir, "dbconf.yml")) # Make sure the db schema is up to date, create tables, etc. db.migrate() assert os.path.exists(datadir) # Download raw data files dloader = Downloader(datadir, db, 'nightly') dloader.download(fname_urls, True) # Do our ETL nights = Nightlies(db) nights.etl_from_dir(reparse=reparse) # Close the db connection db.close() info('Finished ETL of FBO data.')
def download(url='', title='', artist='', gender='', album=''): cleanMp3s() url = request.form['url'] title = request.form['title'] artist = request.form['artist'] gender = request.form['gender'] album = request.form['album'] downloader = Downloader(url, title, artist, gender, album) try: path = downloader.download() except IOError as e: return str(e) return send_from_directory(os.path.abspath('.'), path, as_attachment=True)
def main(): args = [i.lower() for i in sys.argv] if 'help' in args or len(args) is 1: print_help() if 'download' in args: down = Downloader() down.download() down.preprocess() down.write_out(train="train.dat",test="test.dat") if 'tag' in args: t = Tagger() t.tag("test.dat") t.write_out("test_tagged.dat") if 'train' in args: m = Model() m.train("train.dat") m.write_out() if 'test' in args: m = Model("model.mdl") m.test("test_tagged.dat")
def downloadlink(url='', title='', artist='', gender='', album=''): cleanMp3s() url = request.form['url'] title = request.form['title'] artist = request.form['artist'] gender = request.form['gender'] album = request.form['album'] downloader = Downloader(url, title, artist, gender, album) path = downloader.download() dir = 'files/' if not os.path.exists(dir): os.makedirs(dir) newpath = dir + path os.rename(path, newpath) return '<a href="/' + newpath + '">' + newpath + '</a>'
#LC80130312013273LGN00 prefix = 'LC8013031' #postfix = 'LGN01' images = [ '2013273LGN00', # Sept 29th, 2013 ] print "Processing {0} image archives ...".format(len(images)) for image in images: imagename = "{0}{1}".format(prefix,image) print "Working on '{0}' ...".format(imagename) dler.download(imagename) uncomp.uncompress(imagename) rgb.makergb(imagename,RED=5,GREEN=6,BLUE=4) prev.makepreview(imagename) #cleanup os.system("rm ./{0}/*_B*.TIF".format(imagename)) #os.system("rm ./{0}/*_PROJECTED*".format(imagename)) #os.system('rm ./{0}/*_RGB*') print "Done with {0}".format(imagename) print "Done creating JPEG preview files for all {0} image archives.".format(len(images))
class Projects: def __init__(self, target): self.downloader = Downloader() self.projects = [] self.projects.append(Project( "pugixml", "PUGIXML", None, False)) self.projects.append(libgit2Project(target)) self.projects.append(Project( "Ishiko/Errors", "ISHIKO", "Makefiles/$(compiler_short_name)/IshikoErrors.sln", False)) self.projects.append(Project( "Ishiko/Collections", "ISHIKO", "Makefiles/$(compiler_short_name)/IshikoCollections.sln", False)) self.projects.append(Project( "Ishiko/Process", "ISHIKO", "Makefiles/$(compiler_short_name)/IshikoProcess.sln", False)) self.projects.append(Project( "DiplodocusDB/Core", "DIPLODOCUSDB", "Makefiles/$(compiler_short_name)/DiplodocusDBCore.sln", False)) self.projects.append(Project( "DiplodocusDB/TreeDB/Core", "DIPLODOCUSDB", "Makefiles/$(compiler_short_name)/DiplodocusTreeDBCore.sln", False)) self.projects.append(Project( "DiplodocusDB/TreeDB/XMLTreeDB", "DIPLODOCUSDB", "Makefiles/$(compiler_short_name)/DiplodocusXMLTreeDB.sln", False)) self.projects.append(Project( "CodeSmithyIDE/CodeSmithy/Core", "CODESMITHY", "Makefiles/$(compiler_short_name)/CodeSmithyCore.sln", False)) self.projects.append(Project( "CodeSmithyIDE/CodeSmithy/Make", "CODESMITHY", "Makefiles/$(compiler_short_name)/CodeSmithyMake.sln", False)) self.projects.append(Project( "Ishiko/TestFramework/Core", "ISHIKO", "Makefiles/$(compiler_short_name)/IshikoTestFrameworkCore.sln", True)) self.projects.append(Project( "Ishiko/WindowsRegistry", "ISHIKO", "Makefiles/$(compiler_short_name)/IshikoWindowsRegistry.sln", True)) self.projects.append(Project( "Ishiko/FileTypes", "ISHIKO", "Makefiles/$(compiler_short_name)/IshikoFileTypes.sln", True)) self.projects.append(Project( "CodeSmithyIDE/CodeSmithy/UICore", "CODESMITHY", "Makefiles/$(compiler_short_name)/CodeSmithyUICore.sln", True)) self.projects.append(wxWidgetsProject()) self.projects.append(Project( "CodeSmithyIDE/CodeSmithy/UIElements", "CODESMITHY", "Makefiles/$(compiler_short_name)/CodeSmithyUIElements.sln", True)) self.projects.append(Project( "CodeSmithyIDE/CodeSmithy/UIImplementation", "CODESMITHY", "Makefiles/$(compiler_short_name)/CodeSmithyUIImplementation.sln", True)) self.projects.append(Project( "CodeSmithyIDE/CodeSmithy/UI", "CODESMITHY", "Makefiles/$(compiler_short_name)/CodeSmithy.sln", True)) self.projects.append(Project( "CodeSmithyIDE/CodeSmithy/Tests/Core", "CODESMITHY", "Makefiles/$(compiler_short_name)/CodeSmithyCoreTests.sln", True)) self.projects.append(Project( "CodeSmithyIDE/CodeSmithy/Tests/Make", "CODESMITHY", "Makefiles/$(compiler_short_name)/CodeSmithyMakeTests.sln", True)) self.projects.append(Project( "CodeSmithyIDE/CodeSmithy/Tests/UICore", "CODESMITHY", "Makefiles/$(compiler_short_name)/CodeSmithyUICoreTests.sln", True)) self.tests = [] self.tests.append(Test("CodeSmithyIDE/CodeSmithy/Tests/Core", "CodeSmithyCoreTests.exe")) self._init_downloader() def get(self, name): for project in self.projects: if project.name == name: return project return None def set_environment_variables(self, output): print("") output.print_step_title("Setting environment variables") env = {} for project in self.projects: value = os.getcwd() + "/Build/" + project.name.split("/")[0] if project.env_var in env: old_value = env[project.env_var] if (old_value != value): exception_text = "Conflicting values for " + \ "environment variable " + project.env_var + " (" + \ value + " vs " + old_value + ")" raise RuntimeError(exception_text) else: env[project.env_var] = value for var_name in env: print(" " + var_name + ": " + env[var_name]) os.environ[var_name] = env[var_name] output.next_step() def download(self): self.downloader.download() def build(self, build_tools, build_configuration, input, state, output): # For now only bypass pugixml, libgit2 and wxWidgets because they # are independent from the rest. More complex logic is required to # handle the other projects. # Unless we have built all project succesfully. for project in self.projects: if state.build_complete: project.built = True elif project.name in ["libgit2", "pugixml", "wxWidgets"]: if project.name in state.built_projects: project.built = True for project in self.projects: print("") output.print_step_title("Building " + project.name) if project.built: print(" Using previous execution") else: project.unzip(self.downloader) project.build(build_tools, build_configuration, input, output) state.set_built_project(project.name) output.next_step() state.set_build_complete() def test(self, compiler, architecture_dir_name, input): for test in self.tests: # TODO executable_path = "Build/" + test.project_name + \ "/Makefiles/VC15/x64/Debug/" + test.executable try: subprocess.check_call([executable_path]) except subprocess.CalledProcessError: launchIDE = input.query(" Tests failed. Do you you want to" " launch the IDE?", ["y", "n"], "n") if launchIDE == "y": self.get(test.project_name).launch(compiler, architecture_dir_name) raise RuntimeError(test.project_name + " tests failed.") def _init_downloader(self): for project in self.projects: project_downloader = project.create_downloader() self.downloader.merge(project_downloader)
class VkBot: def __init__(self): self.timeout = TIMEOUT_SECONDS signal.signal(signal.SIGTERM, self.catch_signal) self.init_connection() def init_connection(self): try: self.vk_session = vk_api.VkApi(login=os.getenv("LOGIN"), password=os.getenv("PASSW")) try: self.vk_session.auth(token_only=True) except vk_api.AuthError as e: print(e) sys.exit(0) except vk_api.exceptions.Captcha as e: print("CAPTCHA") print(e.get_url()) code = input() e.try_again(key=code) print("ID:", os.getpid()) print("Got VK API Session") self.group_session = vk_api.VkApi(token=os.getenv("KEY")) print("Got Group Session") self.longpoll = VkBotLongPoll(self.group_session, os.getenv("GROUP_ID")) print("Got Longpoll Object") self.api = self.vk_session.get_api() print("Got API Object") self.group_api = self.group_session.get_api() print("Got Group API Object") self.upload = vk_api.VkUpload(self.vk_session) print("Got Upload Object") self.loader = Downloader() print("Got Downloader Object") except (requests.exceptions.ConnectionError) as e: print("Reinitializing session data") print(e) print("Timeout:", self.timeout) time.sleep(self.timeout) self.timeout += 1 self.init_connection() def catch_signal(self, signal, frame): print("Stopping...") sys.exit(0) def send_message(self, user_id, message, attachment=None): self.group_api.messages.send(user_id=user_id, random_id=get_random_id(), message=message, attachment=attachment) def response(self, event): self.send_message(user_id=event.obj.message["from_id"], message="Wait a bit") link = self.find_yt(event.obj) if link: result = self.loader.download(link) if result: if not result.endswith(".mp3"): index = result.find(".") path = result.replace(result[index:], ".mp3") else: path = result title, artist = result.lstrip("storage/").rstrip(".mp3").split( "---") self.upload_yt(event, path, title, artist) os.remove(path) else: self.send_error(event) print() def start(self): print("Start Longpoll listening") while True: try: for event in self.longpoll.listen(): if event.type == VkBotEventType.MESSAGE_NEW: print("Event:\n", pprint.pprint(event.obj)) print("From:", event.obj.message["from_id"]) print('Message:', event.obj.message["text"]) self.response(event) elif event.type == VkBotEventType.MESSAGE_REPLY: print("From(Bot):", event.obj.peer_id) print('Message(Bot):', event.obj.text) print() else: print(event.type) print() except (requests.exceptions.ReadTimeout) as e: print("Got exception") print(type(e)) print(e) time.sleep(self.timeout) self.init_connection() self.timeout = TIMEOUT_SECONDS self.start() def upload_yt(self, event, path, title, artist): try: audio = self.upload.audio(audio=path, title=title, artist=artist) except vk_api.exceptions.ApiError as e: self.send_message(user_id=event.obj.message["from_id"], message=f"{e.error['error_msg']}") else: self.send_message( user_id=event.obj.message["from_id"], message="Your audio:", attachment=f"audio{audio['owner_id']}_{audio['id']}") def send_error(self, event): self.send_message(user_id=event.obj.message["from_id"], message='', attachment='photo-185940778_457239022') def find_yt(self, event): if event.message["text"] != '': pattern = r'(http(s)?:\/\/)?((w){3}.)?youtu(be|.be)?(\.com)?\/.\S*' result = re.search(pattern, event.message["text"]) if result: return result.group() else: if event.message['attachments']: attachment = event.message['attachments'][0] else: return if attachment['type'] != 'video' or attachment['video'].get( 'platform') != 'YouTube': return videos = self.api.video.get( videos= f"{attachment['video']['owner_id']}_{attachment['video']['id']}" ) if not videos['items']: return return videos['items'][0]['player']
class Handler(object): def __init__(self, url_q, wb_q, info_q): self.url_count = 1 self.handle_urls = set() self.url_suf = 'http://weibo.com' self.url_q = url_q self.wb_q = wb_q self.info_q = info_q self.downloader = Downloader() self.validater = Validater() def oninit(self, url): url = self.url_suf + url res = self.downloader.download(url) if not res: return cont = res.content uid = re.search("CONFIG\['oid'\]='(.*?)'", cont).group(1) nick = re.search("CONFIG\['onick'\]='(.*?)'", cont).group(1).decode('utf-8') # ajax 请求微博时要用 page_id = re.search("CONFIG\['page_id'\]='(.*?)'", cont).group(1) domain = re.search("CONFIG\['domain'\]='(.*?)'", cont).group(1) location = re.search("CONFIG\['location'\]='(.*?)'", cont).group(1) nums = Parser.parse_index(cont) if not nums: return follow_num, fans_num, wb_num = nums # 验证该用户关注粉丝微博数是否变化 # 如变化更新信息,微博变化则需要爬取更新的微博 # 该方法返回需要爬取的微博数 crawl_info, wb_num = self.validater.validate_nums(page_id, nums) self.crawl_info = crawl_info return { 'uid': uid, 'page_id': page_id, 'nick': nick, 'domain': domain, 'location': location, 'follow_num': int(follow_num), 'fans_num': int(fans_num), 'wb_num': int(wb_num) } def handle_url(self): while True: try: url = self.url_q.get() if url == 'end': self.wb_q.put_nowait('end') self.info_q.put_nowait('end') print 'crawl over' break if url in self.handle_urls: continue self.args = args = self.oninit(url) if not args: continue # wb_t = Thread(target=self.get_wb) # url_t = Thread(target=self.get_url) # wb_t.start() # url_t.start() # wb_t.join() # info_t = Thread(target=self.get_info) # info_t.start() # url_t.join() # info_t.join() greenlets = [] greenlets.append(gevent.spawn(self.get_wb)) greenlets.append(gevent.spawn(self.get_url)) greenlets.append(gevent.spawn(self.get_info)) gevent.joinall(greenlets) self.handle_urls.add(url) except Empty: sleep(1) except Exception, e: logger.debug('error in handle_url:' + str(e))
class Player: def __init__(self): if not os.path.isdir(PLAYLIST_DIR): print(f'No config found at {PLAYLIST_DIR}') exit(1) if not os.path.isdir(DOWNLOADS_DIR): os.mkdir(DOWNLOADS_DIR) self.downloader = Downloader(DOWNLOADS_DIR) self.audio = None def list_playlists(self): playlists = [] for f in os.listdir(PLAYLIST_DIR): if f.endswith('.playlist'): playlists.append(f.replace('.playlist', '')) playlists.sort() return playlists def list_songs(self, playlist_name): playlist = Playlist(PLAYLIST_DIR, playlist_name) return playlist.songs def play(self, playlist_name, callback, offset=0): simpleaudio.stop_all() playlist = Playlist(PLAYLIST_DIR, playlist_name) self.current_playlist = playlist songs = cycle(playlist.songs) for song in islice(songs, offset, None): if self.current_playlist is not playlist: break if not self._download_exists(song): print(f'Downloading {song}') callback.on_downloading_song(song) self.downloader.download(song) print(f'Playing {song}') callback.on_song_started(song) self._play_audio(song) def _download_exists(self, song): path = os.path.join(DOWNLOADS_DIR, song) return os.path.isfile(path) def _play_audio(self, file_name): path = os.path.join(DOWNLOADS_DIR, file_name) sound = AudioSegment.from_file(path) self.audio = simpleaudio.play_buffer( sound.raw_data, num_channels=sound.channels, bytes_per_sample=sound.sample_width, sample_rate=sound.frame_rate) self.audio.wait_done()
'2013170LGN00', '2013154LGN00', '2013138LGN01', '2013122LGN01', '2013106LGN01', ] print "Processing {0} image archives ...".format(len(images)) for image in images: imagename = "{0}{1}".format(prefix,image) print "Working on '{0}' ...".format(imagename) dler.download(imagename) uncomp.uncompress(imagename) rgb.makergb(imagename) prev.makepreview(imagename) #cleanup os.system("rm ./{0}/*_B*.TIF".format(imagename)) #os.system("rm ./{0}/*_PROJECTED*".format(imagename)) #os.system('rm ./{0}/*_RGB*') print "Done with {0}".format(imagename) print "Done creating JPEG preview files for all {0} image archives.".format(len(images))
def test_download_to_temp_dir(self): d = Downloader() with util.CaptureStdout(): with d.download('file://' + __file__) as f: filename = f self.assertEqual(tempfile.gettempdir(), os.path.dirname(filename))
def test_download(self): d = Downloader() with util.CaptureStdout(): with d.download('file://' + __file__) as f: self.assertEqual(readfile(__file__), readfile(f))