def get_schedule_page_url(): """returns the url of the schedule page""" school_data = br.open( get_base_url() + 'portal/portalOutlineWrapper.xsl?x=portal.PortalOutline&contentType=text/xml&lang=en' ) dom = minidom.parse(school_data) node = dom.getElementsByTagName('Student')[0] person_id = node.getAttribute('personID') first_name = node.getAttribute('firstName') last_name = node.getAttribute('lastName') node = dom.getElementsByTagName('Calendar')[0] school_id = node.getAttribute('schoolID') node = dom.getElementsByTagName('ScheduleStructure')[0] calendar_id = node.getAttribute('calendarID') structure_id = node.getAttribute('structureID') calendar_name = node.getAttribute('calendarName') return utils.url_fix( get_base_url() + u"portal/portal.xsl?x=portal.PortalOutline&lang=en&personID={}&studentFirstName={}&lastName={}&firstName={}&schoolID={}&calendarID={}&structureID={}&calendarName={}&mode=schedule&x=portal.PortalSchedule&x=resource.PortalOptions" .format(person_id, first_name, last_name, first_name, school_id, calendar_id, structure_id, calendar_name))
def musicbrainz_recording_search(reid): """ Uses musicbrainz for english recording by reid search. Returns a list of tracks. """ log.debug("Searching for album %s in the tracks list on musicbrainz..." % reid) url = 'http://www.musicbrainz.org/ws/2/recording/?query=reid:"%s"' % reid log.debug("Fetching %s..." % url) url = utils.url_fix(url) obj = urllib2.urlopen(url, timeout=config.metadata_timeout) response = obj.read() obj.close() tracks = [] def sort_key(x): "Sort key by a release's track amount" num = x.find("release-list").find("medium-list").medium.find("track-list").track.number.text non_decimal = re.compile(r"[^\d.]+") num = non_decimal.sub("", num) num = int(num) if num else 0 return num soup = BeautifulSoup(response, "xml") for track in sorted(soup.find_all("recording"), key=sort_key): name = track.title.text.replace(u"\u2019", "'") tracks.append(name) return tracks
def musicbrainz_artist_search(s): ''' Uses musicbrainz for english artist searching. Returns a list of MetadataArtist. ''' log.debug("Searching for %s in the artists list on musicbrainz..." % s) url = 'http://www.musicbrainz.org/ws/2/artist?query=artist:"%s"&limit=5' % utils.url_fix(s) log.debug('Fetching %s...' % url) obj = urllib2.urlopen(url, timeout=config.metadata_timeout) response = obj.read() obj.close() l = [] soup = BeautifulSoup(response, 'xml') for artist in soup.find_all('artist'): if int(artist.attrs['ext:score']) > 94: id_ = artist['id'] type_ = artist['type'] if artist.has_key('type') else "" name = artist.find('name').text score = artist.attrs['ext:score'] disambiguation = artist.find('disambiguation').text if artist.find('disambiguation') else "" obj = utils.cls.MetadataArtist(id_, name, 'musicbrainz', type_, score, disambiguation=disambiguation) l.append(obj) return l
def get_schedule_page_url(): """returns the url of the schedule page""" school_data = br.open(get_base_url() + 'portal/portalOutlineWrapper.xsl?x=portal.PortalOutline&contentType=text/xml&lang=en') dom = minidom.parse(school_data) node = dom.getElementsByTagName('Student')[0] person_id = node.getAttribute('personID') first_name = node.getAttribute('firstName') last_name = node.getAttribute('lastName') node = dom.getElementsByTagName('Calendar')[0] school_id = node.getAttribute('schoolID') node = dom.getElementsByTagName('ScheduleStructure')[0] calendar_id = node.getAttribute('calendarID') structure_id = node.getAttribute('structureID') calendar_name = node.getAttribute('calendarName') return utils.url_fix(get_base_url() + u"portal/portal.xsl?x=portal.PortalOutline&lang=en&personID={}&studentFirstName={}&lastName={}&firstName={}&schoolID={}&calendarID={}&structureID={}&calendarName={}&mode=schedule&x=portal.PortalSchedule&x=resource.PortalOptions".format( person_id, first_name, last_name, first_name, school_id, calendar_id, structure_id, calendar_name))
def musicbrainz_artist_search(s): """ Uses musicbrainz for english artist searching. Returns a list of MetadataArtist. """ log.debug("Searching for %s in the artists list on musicbrainz..." % s) url = 'http://www.musicbrainz.org/ws/2/artist?query=artist:"%s"&limit=5' % utils.url_fix(s) log.debug("Fetching %s..." % url) obj = urllib2.urlopen(url, timeout=config.metadata_timeout) response = obj.read() obj.close() l = [] soup = BeautifulSoup(response, "xml") for artist in soup.find_all("artist"): if int(artist.attrs["ext:score"]) > 94: id_ = artist["id"] type_ = artist["type"] if artist.has_key("type") else "" name = artist.find("name").text score = artist.attrs["ext:score"] disambiguation = artist.find("disambiguation").text if artist.find("disambiguation") else "" obj = utils.cls.MetadataArtist(id_, name, "musicbrainz", type_, score, disambiguation=disambiguation) l.append(obj) return l
def musicbrainz_recording_search(reid): ''' Uses musicbrainz for english recording by reid search. Returns a list of tracks. ''' log.debug("Searching for album %s in the tracks list on musicbrainz..." % reid) url = 'http://www.musicbrainz.org/ws/2/recording/?query=reid:"%s"' % reid log.debug('Fetching %s...' % url) url = utils.url_fix(url) obj = urllib2.urlopen(url, timeout=config.metadata_timeout) response = obj.read() obj.close() tracks = [] def sort_key(x): "Sort key by a release's track amount" num = x.find('release-list').find('medium-list').medium.find( 'track-list').track.number.text non_decimal = re.compile(r'[^\d.]+') num = non_decimal.sub('', num) num = int(num) if num else 0 return num soup = BeautifulSoup(response, 'xml') for track in sorted(soup.find_all('recording'), key=sort_key): name = track.title.text.replace(u'\u2019', "'") tracks.append(name) return tracks
def musicbrainz_artist_search(s): ''' Uses musicbrainz for english artist searching. Returns a list of MetadataArtist. ''' log.debug("Searching for %s in the artists list on musicbrainz..." % s) url = 'http://www.musicbrainz.org/ws/2/artist?query=artist:"%s"&limit=5' % utils.url_fix( s) log.debug('Fetching %s...' % url) obj = urllib2.urlopen(url, timeout=config.metadata_timeout) response = obj.read() obj.close() l = [] soup = BeautifulSoup(response, 'xml') for artist in soup.find_all('artist'): if int(artist.attrs['ext:score']) > 94: id_ = artist['id'] type_ = artist['type'] if artist.has_key('type') else "" name = artist.find('name').text score = artist.attrs['ext:score'] disambiguation = artist.find('disambiguation').text if artist.find( 'disambiguation') else "" obj = utils.cls.MetadataArtist(id_, name, 'musicbrainz', type_, score, disambiguation=disambiguation) l.append(obj) return l
def musicbrainz_release_search(arid): """ Uses musicbrainz for english releases. Returns a three objects of MetadataRelease lists: albums, singles, others. """ log.debug("Searching for artist %s in the releases list on musicbrainz..." % arid) url = 'http://www.musicbrainz.org/ws/2/release/?query=arid:"%s" AND status :"official"' % arid log.debug("Fetching %s..." % url) url = utils.url_fix(url) obj = urllib2.urlopen(url, timeout=config.metadata_timeout) response = obj.read() obj.close() release_groups = {} final_releases = [] soup = BeautifulSoup(response, "xml") for release in soup.find_all("release"): group = release.find("release-group") reid = release["id"] rgid = group["id"] if not rgid in release_groups.keys(): release_groups[rgid] = {} release_groups[rgid]["releases"] = {} release_groups[rgid]["type"] = group["type"] if group.has_key("type") else "" d = {} d["date"] = release.date.text if release.date else "" d["title"] = release.title.text d["count"] = release.find("medium-list").find("track-count").text d["artistname"] = release.artist.name release_groups[rgid]["releases"][reid] = d for rgid, d in release_groups.items(): list_of_rids = d["releases"].items() if list_of_rids: sorted(list_of_rids, key=(lambda x: x[1]["count"])) reid, d2 = list_of_rids[0] obj = utils.cls.MetadataRelease( reid, d2["title"], "musicbrainz", d["type"], d2["date"], d2["count"], arid, d2["artistname"] ) final_releases.append(obj) final_releases = sorted(final_releases, key=lambda x: x.date) albums = [x for x in final_releases if x.type == "Album"] singles = [x for x in final_releases if x.type == "Single"] others = [x for x in final_releases if x.type not in ["Album", "Single"]] # for album in albums: # print "Album %s (%s tracks, out at %s)" % (album.title, album.count, album.date) # for single in singles: # print "Single %s (%s tracks)" % (single.title, single.count) # for other in others: # print "Other %s (%s tracks)" % (other.title, other.count) return albums, singles, others
def musicbrainz_release_search(arid): ''' Uses musicbrainz for english releases. Returns a three objects of MetadataRelease lists: albums, singles, others. ''' log.debug("Searching for artist %s in the releases list on musicbrainz..." % arid) url = 'http://www.musicbrainz.org/ws/2/release/?query=arid:"%s" AND status :"official"' % arid log.debug('Fetching %s...' % url) url = utils.url_fix(url) obj = urllib2.urlopen(url, timeout=config.metadata_timeout) response = obj.read() obj.close() release_groups = {} final_releases = [] soup = BeautifulSoup(response, 'xml') for release in soup.find_all('release'): group = release.find('release-group') reid = release['id'] rgid = group['id'] if not rgid in release_groups.keys(): release_groups[rgid] = {} release_groups[rgid]['releases'] = {} release_groups[rgid]['type'] = group['type'] if group.has_key('type') else "" d = {} d['date'] = release.date.text if release.date else "" d['title'] = release.title.text d['count'] = release.find('medium-list').find('track-count').text d['artistname'] = release.artist.name release_groups[rgid]['releases'][reid] = d for rgid, d in release_groups.items(): list_of_rids = d['releases'].items() if list_of_rids: sorted(list_of_rids, key=(lambda x:x[1]['count'])) reid, d2 = list_of_rids[0] obj = utils.cls.MetadataRelease(reid, d2['title'], 'musicbrainz', d['type'], d2['date'], d2['count'], arid, d2['artistname']) final_releases.append(obj) final_releases = sorted(final_releases, key=lambda x:x.date) albums = [x for x in final_releases if x.type == 'Album'] singles = [x for x in final_releases if x.type == 'Single'] others = [x for x in final_releases if x.type not in ['Album', 'Single']] # for album in albums: # print "Album %s (%s tracks, out at %s)" % (album.title, album.count, album.date) # for single in singles: # print "Single %s (%s tracks)" % (single.title, single.count) # for other in others: # print "Other %s (%s tracks)" % (other.title, other.count) return albums, singles, others
def __init__(self, urls, dest=None, progress_bar=True, fix_urls=True, logger=None, connect_default_logger=False): self.mirrors = [urls] if isinstance(urls, basestring) else urls if fix_urls: self.mirrors = [utils.url_fix(x) for x in self.mirrors] self.url = self.mirrors.pop(0) fn = os.path.basename(urlparse(self.url).path) self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL', fn) if self.dest[-1] == os.sep: if os.path.exists(self.dest[:-1]) and os.path.isfile(self.dest[:-1]): os.unlink(self.dest[:-1]) self.dest += fn if os.path.isdir(self.dest): self.dest = os.path.join(self.dest, fn) self.progress_bar = progress_bar if logger: self.logger = logger elif connect_default_logger: self.logger = utils.create_debugging_logger() else: self.logger = utils.DummyLogger() self.headers = {'User-Agent': utils.get_random_useragent()} self.threads_count = 3 self.timeout = 4 self.current_attemp = 1 self.attemps_limit = 4 self.minChunkFile = 1024**2*2 # 2MB self.filesize = 0 self.shared_var = multiprocessing.Value(c_int, 0) # a ctypes var that counts the bytes already downloaded self.thread_shared_cmds = {} self.status = "ready" self.verify_hash = False self._killed = False self._failed = False self._start_func_blocking = True self.errors = [] self.post_threadpool_thread = None self.control_thread = None if not os.path.exists(os.path.dirname(self.dest)): self.logger.debug('Folder "%s" does not exist. Creating...' % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) if not utils.is_HTTPRange_supported(self.url): self.logger.warning("Server does not support HTTPRange. threads_count is set to 1.") self.threads_count = 1 if os.path.exists(self.dest): self.logger.warning('Destination "%s" already exists. Existing file will be removed.' % self.dest) if not os.path.exists(os.path.dirname(self.dest)): self.logger.warning('Directory "%s" does not exist. Creating it...' % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) self.pool = utils.ManagedThreadPoolExecutor(self.threads_count)
def __init__(self, urls, dest=None, progress_bar=True, fix_urls=True, logger=None, connect_default_logger=False): self.mirrors = [urls] if isinstance(urls, basestring) else urls if fix_urls: self.mirrors = [utils.url_fix(x) for x in self.mirrors] self.url = self.mirrors.pop(0) fn = urllib2.unquote(os.path.basename(urlparse(self.url).path)).decode('utf-8') self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL', fn) if self.dest[-1] == os.sep: if os.path.exists(self.dest[:-1]) and os.path.isfile(self.dest[:-1]): os.unlink(self.dest[:-1]) self.dest += fn if os.path.isdir(self.dest): self.dest = os.path.join(self.dest, fn) self.progress_bar = progress_bar if logger: self.logger = logger elif connect_default_logger: self.logger = utils.create_debugging_logger() else: self.logger = utils.DummyLogger() self.headers = {'User-Agent': utils.get_random_useragent()} self.threads_count = 5 self.timeout = 4 self.current_attemp = 1 self.attemps_limit = 4 self.minChunkFile = 1024**2*2 # 2MB self.filesize = 0 self.shared_var = multiprocessing.Value(c_int, 0) # a ctypes var that counts the bytes already downloaded self.thread_shared_cmds = {} self.status = "ready" self.verify_hash = False self._killed = False self._failed = False self._start_func_blocking = True self.errors = [] self.post_threadpool_thread = None self.control_thread = None if not os.path.exists(os.path.dirname(self.dest)): self.logger.debug('Folder "%s" does not exist. Creating...' % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) if not utils.is_HTTPRange_supported(self.url): self.logger.warning("Server does not support HTTPRange. threads_count is set to 1.") self.threads_count = 1 if os.path.exists(self.dest): self.logger.warning('Destination "%s" already exists. Existing file will be removed.' % self.dest) if not os.path.exists(os.path.dirname(self.dest)): self.logger.warning('Directory "%s" does not exist. Creating it...' % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) self.pool = utils.ManagedThreadPoolExecutor(self.threads_count)
def get_url(self): params = {"title": self.title, "action": "render"} if not self.project.subdomain_lang: params["uselang"] = self.lang url = "http://%s/w/index.php" % self.get_domain() url += "?" + "&".join(["%s=%s" % (key, val) for key, val in params.items()]) url = url_fix(url) return url
def parse_musicBrainz(title, artist): "Uses musicBrainz API for releases data grabbing" url = 'http://www.musicbrainz.org/ws/2/recording?query="%s" AND artist:"%s"' % ( urllib2.quote( title.encode("utf8")), urllib2.quote(artist.encode("utf8"))) url = utils.url_fix(url) log.debug('Fetching %s...' % url) obj = urllib2.urlopen(url, timeout=config.metadata_timeout) response = obj.read() obj.close() dom = xml.dom.minidom.parseString(response) try: tag = dom.getElementsByTagName('metadata')[0].getElementsByTagName('recording-list')[0] \ .getElementsByTagName('recording')[0].getElementsByTagName('tag-list')[0] \ .getElementsByTagName('tag')[0].getElementsByTagName('name')[0].childNodes[0].data except IndexError: tag = "" try: artist = dom.getElementsByTagName('metadata')[0].getElementsByTagName('recording-list')[0] \ .getElementsByTagName('recording')[0].getElementsByTagName('artist-credit')[0] \ .getElementsByTagName('name-credit')[0].getElementsByTagName('artist')[0] \ .getElementsByTagName('name')[0].childNodes[0].data except IndexError: artist = "" try: release_list_dom = dom.getElementsByTagName('metadata')[0].getElementsByTagName('recording-list')[0] \ .getElementsByTagName('recording')[0].getElementsByTagName('release-list')[0] \ .getElementsByTagName('release') except IndexError: return None release_dict = {} for dom in release_list_dom: d = {} d['title'] = dom.getElementsByTagName( 'title')[0].childNodes[0].data if dom.getElementsByTagName( 'title') else "" d['date'] = dom.getElementsByTagName( 'date')[0].childNodes[0].data if dom.getElementsByTagName( 'date') else "" d['country'] = dom.getElementsByTagName( 'country')[0].childNodes[0].data if dom.getElementsByTagName( 'country') else "" d['artist'] = artist d['tag'] = tag release_dict[dom.attributes.values()[0].value] = d return release_dict
def __init__(self, urls, dest=None, max_threads=5, show_output=True, logger=None): self.mirrors = [urls] if isinstance(urls, basestring) else urls for i, url in enumerate(self.mirrors): if " " in url: self.mirrors[i] = utils.url_fix(url) self.url = self.mirrors.pop(0) self.dest = dest or r"%s\%s" % (config.temp_dir, urlparse( self.url).path.split('/')[-1]) self.show_output = show_output self.logger = logger or logging.getLogger('dummy') self.max_threads = max_threads self.headers = config.generic_http_headers self.timeout = 4 self.current_attemp = 1 self.attemps_limit = 4 self.minChunkFile = 1024**2 # 1MB self.filesize = 0 self.shared_var = multiprocessing.Value( c_int, 0) # a ctypes var that counts the bytes already downloaded self.status = "ready" self.verify_hash = False self._killed = False self._failed = False self.post_threadpool_thread = None self.control_thread = None if not is_ServerSupportHTTPRange(self.url): self.logger.warning( "Server does not support HTTPRange. max_threads is set to 1.") self.max_threads = 1 if os.path.exists(self.dest): self.logger.warning( "Destination '%s' already exists. Existing file will be removed." % self.dest) if not os.path.exists(os.path.dirname(self.dest)): self.logger.warning( "Directory '%s' does not exist. Creating it..." % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) self.pool = ThreadPool(max_threads=self.max_threads, catch_returns=True, logger=self.logger)
def get_page_url(gradesPage, dom, curr_user): """returns the url of the schedule page""" nodes = dom.getElementsByTagName('Student') node = False for student in nodes: if not student.hasAttribute('studentNumber'): continue curr_student_id = student.getAttribute('studentNumber') if curr_student_id == curr_user.student_id: node = student break if not node: print("Account does not have this student ID") return False person_id = node.getAttribute('personID') first_name = node.getAttribute('firstName') last_name = node.getAttribute('lastName') nodes = node.getElementsByTagName('Calendar') if len(nodes) < 1: return False node = nodes[0] school_id = node.getAttribute('schoolID') nodes = node.getElementsByTagName('ScheduleStructure') if len(nodes) < 1: return False node = nodes[0] calendar_id = node.getAttribute('calendarID') structure_id = node.getAttribute('structureID') calendar_name = node.getAttribute('calendarName') if gradesPage: mode = 'grades' x = 'portal.PortalGrades' else: mode = 'schedule' x = 'portal.PortalSchedule' return utils.url_fix(get_base_url() + u"portal/portal.xsl?x=portal.PortalOutline&lang=en&personID={}&studentFirstName={}&lastName={}&firstName={}&schoolID={}&calendarID={}&structureID={}&calendarName={}&mode={}&x={}&x=resource.PortalOptions".format( person_id, first_name, last_name, first_name, school_id, calendar_id, structure_id, calendar_name, mode, x))
def parse_musicBrainz(title, artist): "Uses musicBrainz API for releases data grabbing" url = 'http://www.musicbrainz.org/ws/2/recording?query="%s" AND artist:"%s"' % (urllib2.quote(title.encode("utf8")), urllib2.quote(artist.encode("utf8"))) url = utils.url_fix(url) log.debug('Fetching %s...' % url) obj = urllib2.urlopen(url, timeout=config.metadata_timeout) response = obj.read() obj.close() dom = xml.dom.minidom.parseString(response) try: tag = dom.getElementsByTagName('metadata')[0].getElementsByTagName('recording-list')[0] \ .getElementsByTagName('recording')[0].getElementsByTagName('tag-list')[0] \ .getElementsByTagName('tag')[0].getElementsByTagName('name')[0].childNodes[0].data except IndexError: tag = "" try: artist = dom.getElementsByTagName('metadata')[0].getElementsByTagName('recording-list')[0] \ .getElementsByTagName('recording')[0].getElementsByTagName('artist-credit')[0] \ .getElementsByTagName('name-credit')[0].getElementsByTagName('artist')[0] \ .getElementsByTagName('name')[0].childNodes[0].data except IndexError: artist = "" try: release_list_dom = dom.getElementsByTagName('metadata')[0].getElementsByTagName('recording-list')[0] \ .getElementsByTagName('recording')[0].getElementsByTagName('release-list')[0] \ .getElementsByTagName('release') except IndexError: return None release_dict = {} for dom in release_list_dom: d = {} d['title'] = dom.getElementsByTagName('title')[0].childNodes[0].data if dom.getElementsByTagName('title') else "" d['date'] = dom.getElementsByTagName('date')[0].childNodes[0].data if dom.getElementsByTagName('date') else "" d['country'] = dom.getElementsByTagName('country')[0].childNodes[0].data if dom.getElementsByTagName('country') else "" d['artist'] = artist d['tag'] = tag release_dict[dom.attributes.values()[0].value] = d return release_dict
def __init__(self, urls, dest=None, max_threads=5, show_output=True, logger=None): self.mirrors = [urls] if isinstance(urls, basestring) else urls for i, url in enumerate(self.mirrors): if " " in url: self.mirrors[i] = utils.url_fix(url) self.url = self.mirrors.pop(0) self.dest = dest or r"%s\%s" % (config.temp_dir, urlparse(self.url).path.split('/')[-1]) self.show_output = show_output self.logger = logger or logging.getLogger('dummy') self.max_threads = max_threads self.headers = config.generic_http_headers self.timeout = 4 self.current_attemp = 1 self.attemps_limit = 4 self.minChunkFile = 1024**2 # 1MB self.filesize = 0 self.shared_var = multiprocessing.Value(c_int, 0) # a ctypes var that counts the bytes already downloaded self.status = "ready" self.verify_hash = False self._killed = False self._failed = False self.post_threadpool_thread = None self.control_thread = None if not is_ServerSupportHTTPRange(self.url): self.logger.warning("Server does not support HTTPRange. max_threads is set to 1.") self.max_threads = 1 if os.path.exists(self.dest): self.logger.warning("Destination '%s' already exists. Existing file will be removed." % self.dest) if not os.path.exists(os.path.dirname(self.dest)): self.logger.warning("Directory '%s' does not exist. Creating it..." % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) self.pool = ThreadPool(max_threads=self.max_threads, catch_returns=True, logger=self.logger)
def sanitize_triple_item(item): if isinstance(item, term.URIRef): return term.URIRef(url_fix(str(item))) return item
def __init__(self, urls, dest=None, progress_bar=True, fix_urls=True, threads=5, logger=None, connect_default_logger=False, proxy=None): global DEFAULT_LOGGER_CREATED self.mirrors = [urls] if isinstance(urls, basestring) else urls if fix_urls: self.mirrors = [utils.url_fix(x) for x in self.mirrors] self.url = self.mirrors.pop(0) if proxy is not None: proxy = urllib2.ProxyHandler({'http': proxy, 'https': proxy}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) fn = urllib2.unquote(os.path.basename(urlparse(self.url).path)) if sys.version_info < (3, 0): fn = fn.decode('utf-8') # required only on python 2 self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL', fn) if self.dest[-1] == os.sep: if os.path.exists(self.dest[:-1]) and os.path.isfile( self.dest[:-1]): os.unlink(self.dest[:-1]) self.dest += fn if os.path.isdir(self.dest): self.dest = os.path.join(self.dest, fn) self.progress_bar = progress_bar if logger: self.logger = logger elif connect_default_logger: if not DEFAULT_LOGGER_CREATED: self.logger = utils.create_debugging_logger() DEFAULT_LOGGER_CREATED = True else: self.logger = logging.getLogger('pySmartDL') else: self.logger = utils.DummyLogger() self.headers = {'User-Agent': utils.get_random_useragent()} self.threads_count = threads self.timeout = 4 self.current_attemp = 1 self.attemps_limit = 4 self.minChunkFile = 1024**2 * 2 # 2MB self.filesize = 0 self.shared_var = multiprocessing.Value( c_int, 0) # a ctypes var that counts the bytes already downloaded self.thread_shared_cmds = {} self.status = "ready" self.verify_hash = False self._killed = False self._failed = False self._start_func_blocking = True self.errors = [] self.post_threadpool_thread = None self.control_thread = None if not os.path.exists(os.path.dirname(self.dest)): self.logger.info('Folder "%s" does not exist. Creating...' % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) if not utils.is_HTTPRange_supported(self.url): self.logger.warning( "Server does not support HTTPRange. threads_count is set to 1." ) self.threads_count = 1 if os.path.exists(self.dest): self.logger.warning( 'Destination "%s" already exists. Existing file will be removed.' % self.dest) if not os.path.exists(os.path.dirname(self.dest)): self.logger.warning( 'Directory "%s" does not exist. Creating it...' % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) self.logger.info("Creating a ThreadPool of %d thread(s).", self.threads_count) self.pool = utils.ManagedThreadPoolExecutor(self.threads_count)
def parse_musicBrainz(title, artist): "Uses musicBrainz API for releases data grabbing" url = 'http://www.musicbrainz.org/ws/2/recording?query="%s" AND artist:"%s"' % ( urllib2.quote(title.encode("utf8")), urllib2.quote(artist.encode("utf8")), ) url = utils.url_fix(url) log.debug("Fetching %s..." % url) obj = urllib2.urlopen(url, timeout=config.metadata_timeout) response = obj.read() obj.close() dom = xml.dom.minidom.parseString(response) try: tag = ( dom.getElementsByTagName("metadata")[0] .getElementsByTagName("recording-list")[0] .getElementsByTagName("recording")[0] .getElementsByTagName("tag-list")[0] .getElementsByTagName("tag")[0] .getElementsByTagName("name")[0] .childNodes[0] .data ) except IndexError: tag = "" try: artist = ( dom.getElementsByTagName("metadata")[0] .getElementsByTagName("recording-list")[0] .getElementsByTagName("recording")[0] .getElementsByTagName("artist-credit")[0] .getElementsByTagName("name-credit")[0] .getElementsByTagName("artist")[0] .getElementsByTagName("name")[0] .childNodes[0] .data ) except IndexError: artist = "" try: release_list_dom = ( dom.getElementsByTagName("metadata")[0] .getElementsByTagName("recording-list")[0] .getElementsByTagName("recording")[0] .getElementsByTagName("release-list")[0] .getElementsByTagName("release") ) except IndexError: return None release_dict = {} for dom in release_list_dom: d = {} d["title"] = ( dom.getElementsByTagName("title")[0].childNodes[0].data if dom.getElementsByTagName("title") else "" ) d["date"] = dom.getElementsByTagName("date")[0].childNodes[0].data if dom.getElementsByTagName("date") else "" d["country"] = ( dom.getElementsByTagName("country")[0].childNodes[0].data if dom.getElementsByTagName("country") else "" ) d["artist"] = artist d["tag"] = tag release_dict[dom.attributes.values()[0].value] = d return release_dict
def musicbrainz_release_search(arid): ''' Uses musicbrainz for english releases. Returns a three objects of MetadataRelease lists: albums, singles, others. ''' log.debug( "Searching for artist %s in the releases list on musicbrainz..." % arid) url = 'http://www.musicbrainz.org/ws/2/release/?query=arid:"%s" AND status :"official"' % arid log.debug('Fetching %s...' % url) url = utils.url_fix(url) obj = urllib2.urlopen(url, timeout=config.metadata_timeout) response = obj.read() obj.close() release_groups = {} final_releases = [] soup = BeautifulSoup(response, 'xml') for release in soup.find_all('release'): group = release.find('release-group') reid = release['id'] rgid = group['id'] if not rgid in release_groups.keys(): release_groups[rgid] = {} release_groups[rgid]['releases'] = {} release_groups[rgid]['type'] = group['type'] if group.has_key( 'type') else "" d = {} d['date'] = release.date.text if release.date else "" d['title'] = release.title.text d['count'] = release.find('medium-list').find('track-count').text d['artistname'] = release.artist.name release_groups[rgid]['releases'][reid] = d for rgid, d in release_groups.items(): list_of_rids = d['releases'].items() if list_of_rids: sorted(list_of_rids, key=(lambda x: x[1]['count'])) reid, d2 = list_of_rids[0] obj = utils.cls.MetadataRelease(reid, d2['title'], 'musicbrainz', d['type'], d2['date'], d2['count'], arid, d2['artistname']) final_releases.append(obj) final_releases = sorted(final_releases, key=lambda x: x.date) albums = [x for x in final_releases if x.type == 'Album'] singles = [x for x in final_releases if x.type == 'Single'] others = [x for x in final_releases if x.type not in ['Album', 'Single']] # for album in albums: # print "Album %s (%s tracks, out at %s)" % (album.title, album.count, album.date) # for single in singles: # print "Single %s (%s tracks)" % (single.title, single.count) # for other in others: # print "Other %s (%s tracks)" % (other.title, other.count) return albums, singles, others