def main(): parser = argparse.ArgumentParser( description="Help with the process of putting a song in Apple Music.") parser.add_argument( "youtube", default=None, help="The youtube link to the songs video, must surround in quotes") parser.add_argument("--title", nargs="+", default=None, help="The title of the music") parser.add_argument("--artist", nargs="+", default=None, help="The artist of the music") parser.add_argument("--thumbnail", action="store_true", help="If the music should include art") parser.add_argument("--geckodriver", default=None, help="Path to geckodriver, must surround in quotes") arguments = parser.parse_args() arguments.title = " ".join(arguments.title) arguments.artist = " ".join(arguments.artist) name = f"{arguments.artist} - {arguments.title}" options = { "format": "bestaudio/best", "postprocessors": [{ "key": "FFmpegExtractAudio", "preferredcodec": "mp3", "preferredquality": "192", }], "outtmpl": f"{name}.%(ext)s" } with youtube_dl.YoutubeDL(options) as youtube: youtube.download([arguments.youtube]) music = metadata.load(f"{name}.mp3") if arguments.title: music.tag.title = arguments.title if arguments.artist: music.tag.artist = arguments.artist if arguments.thumbnail and arguments.geckodriver: from scraper import Scraper scraper = Scraper(arguments.geckodriver, music) scraper.begin(name=name) music.tag.save()
graph = Graph() while running: print "Commands:" print "0: quit" print "1:(Actor/Movie to parse)" print "2:(load graph path)" print "3:(execute command)" input = raw_input("Command") print input if input[0] == '0': break if input[0] == '1': test = Scraper('https://en.wikipedia.org/wiki/' + input[2:], 50) test.set_speed(1) graph = test.begin() if input[0] == '2': graph.open_json(input[2:]) if input[0] == '3': cmd = "print " cmd += input[2:] exec(cmd) ''' test_two = Scraper('https://en.wikipedia.org/wiki/Ryan_Reynolds', 30) test_two.set_speed(1) print str(Scraper.get_oldest_actors(graph, 5)) print str(Scraper.get_movies(graph,2009)) print str(Scraper.get_actors(graph,2009))
class ScraperWrapper(threading.Thread): def __init__(self,address='localhost',exchange='barkingowl',DEBUG=False): threading.Thread.__init__(self) self.uid = str(uuid.uuid4()) self.address = address self.exchange = exchange self.DEBUG=DEBUG self.interval = 1 # create scraper instance self.scraper = Scraper(uid=self.uid,DEBUG=DEBUG) self.scraping = False #setup message bus self.respcon = pika.BlockingConnection(pika.ConnectionParameters( host=self.address)) self.respchan = self.respcon.channel() self.respchan.exchange_declare(exchange=self.exchange,type='fanout') self.reqcon = pika.BlockingConnection(pika.ConnectionParameters(host=address)) self.reqchan = self.reqcon.channel() self.reqchan.exchange_declare(exchange=exchange,type='fanout') result = self.reqchan.queue_declare(exclusive=True) queue_name = result.method.queue self.reqchan.queue_bind(exchange=exchange,queue=queue_name) self.reqchan.basic_consume(self.reqcallback,queue=queue_name,no_ack=True) if self.DEBUG: print "Scraper Wrapper INIT complete." def run(self): # setup call backs self.scraper.setFinishedCallback(self.scraperFinishedCallback) self.scraper.setStartedCallback(self.scraperStartedCallback) self.scraper.setBroadcastDocCallback(self.scraperBroadcastDocCallback) # broadcast availability self.broadcastavailable() self.reqchan.start_consuming() def stop(self): self.scraper.stop() self.reqchan.stop_consuming() def broadcastavailable(self): if self.scraper.status['busy'] == True: # we are currently scraping, so we are not available - don't broadcast return isodatetime = strftime("%Y-%m-%d %H:%M:%S") packet = { 'availabledatetime': str(isodatetime) } payload = { 'command': 'scraper_available', 'sourceid': self.uid, 'destinationid': 'broadcast', 'message': packet } jbody = simplejson.dumps(payload) self.respchan.basic_publish(exchange=self.exchange,routing_key='',body=jbody) # # TODO: move this over to it's own timer, no need to do it here. # if self.scraper.stopped(): raise Exception("Scraper Wrapper Exiting") else: threading.Timer(self.interval, self.broadcastavailable).start() def broadcaststatus(self): isodatetime = strftime("%Y-%m-%d %H:%M:%S") packet = { 'status': self.scraper.status, 'urldata': self.status['urldata'], 'statusdatetime': str(isodatetime) } payload = { 'command': 'scraper_status', 'sourceid': self.uid, 'destinationid': 'broadcast', 'message': packet } jbody = simplejson.dumps(payload) #time.sleep(.5) self.respchan.basic_publish(exchange=self.exchange,routing_key='',body=jbody) def broadcastsimplestatus(self): isodatetime = strftime("%Y-%m-%d %H:%M:%S") if self.scraper.status['urldata'] == {}: targeturl = 'null' else: targeturl = self.scraper.status['urldata']['targeturl'] packet = { 'busy': self.scraper.status['busy'], 'linkcount': self.scraper.status['linkcount'], 'processedlinkcount': len(self.scraper.status['processed']), 'badlinkcount': len(self.scraper.status['badlinks']), 'targeturl': targeturl, 'statusdatetime': str(isodatetime) } payload = { 'command': 'scraper_status_simple', 'sourceid': self.uid, 'destinationid': 'broadcast', 'message': packet } jbody = simplejson.dumps(payload) self.respchan.basic_publish(exchange=self.exchange,routing_key='',body=jbody) def scraperFinishedCallback(self,payload): jbody = simplejson.dumps(payload) self.respchan.basic_publish(exchange=self.exchange,routing_key='',body=jbody) return def scraperStartedCallback(self,payload): jbody = simplejson.dumps(payload) self.respchan.basic_publish(exchange=self.exchange,routing_key='',body=jbody) return def scraperBroadcastDocCallback(self,payload): jbody = simplejson.dumps(payload) self.respchan.basic_publish(exchange=self.exchange,routing_key='',body=jbody) return # message handler def reqcallback(self,ch,method,properties,body): try: response = simplejson.loads(body) if self.DEBUG: print "Processing Message:\n\t{0}".format(response['command']) if response['command'] == 'url_dispatch': if response['destinationid'] == self.uid: #print "URL Dispatch Command Seen." #print response if self.scraping == False: #print "[Wrapper] Launching Scraper on URL: '{0}'".format(response['message']['targeturl']) self.scraper.seturldata(response['message']) if self.scraper.started == False: self.scraper.start() self.scraper.begin() self.scraping = True elif response['command'] == 'scraper_finished': if response['sourceid'] == self.scraper.uid: self.scraping = False elif response['command'] == 'get_status': self.broadcaststatus() elif response['command'] == 'get_status_simple': self.broadcastsimplestatus() elif response['command'] == 'shutdown': if response['destinationid'] == self.uid: print "[{0}] Shutting Down Recieved".format(self.uid) self.stop() elif response['command'] == 'global_shutdown': print "Global Shutdown Recieved" self.stop() except: if self.DEBUG: print "Message Error"