def get_schedule_page_url():
    """returns the url of the schedule page"""
    school_data = br.open(
        get_base_url() +
        'portal/portalOutlineWrapper.xsl?x=portal.PortalOutline&contentType=text/xml&lang=en'
    )
    dom = minidom.parse(school_data)

    node = dom.getElementsByTagName('Student')[0]
    person_id = node.getAttribute('personID')
    first_name = node.getAttribute('firstName')
    last_name = node.getAttribute('lastName')

    node = dom.getElementsByTagName('Calendar')[0]
    school_id = node.getAttribute('schoolID')

    node = dom.getElementsByTagName('ScheduleStructure')[0]
    calendar_id = node.getAttribute('calendarID')
    structure_id = node.getAttribute('structureID')
    calendar_name = node.getAttribute('calendarName')

    return utils.url_fix(
        get_base_url() +
        u"portal/portal.xsl?x=portal.PortalOutline&lang=en&personID={}&studentFirstName={}&lastName={}&firstName={}&schoolID={}&calendarID={}&structureID={}&calendarName={}&mode=schedule&x=portal.PortalSchedule&x=resource.PortalOptions"
        .format(person_id, first_name, last_name, first_name, school_id,
                calendar_id, structure_id, calendar_name))
def musicbrainz_recording_search(reid):
    """
	Uses musicbrainz for english recording by reid search.
	
	Returns a list of tracks.
	"""
    log.debug("Searching for album %s in the tracks list on musicbrainz..." % reid)

    url = 'http://www.musicbrainz.org/ws/2/recording/?query=reid:"%s"' % reid
    log.debug("Fetching %s..." % url)
    url = utils.url_fix(url)
    obj = urllib2.urlopen(url, timeout=config.metadata_timeout)
    response = obj.read()
    obj.close()

    tracks = []

    def sort_key(x):
        "Sort key by a release's track amount"
        num = x.find("release-list").find("medium-list").medium.find("track-list").track.number.text
        non_decimal = re.compile(r"[^\d.]+")
        num = non_decimal.sub("", num)
        num = int(num) if num else 0
        return num

    soup = BeautifulSoup(response, "xml")

    for track in sorted(soup.find_all("recording"), key=sort_key):
        name = track.title.text.replace(u"\u2019", "'")
        tracks.append(name)

    return tracks
Exemple #3
0
def musicbrainz_artist_search(s):
	'''
	Uses musicbrainz for english artist searching.
	
	Returns a list of MetadataArtist.
	'''
	log.debug("Searching for %s in the artists list on musicbrainz..." % s)
	url = 'http://www.musicbrainz.org/ws/2/artist?query=artist:"%s"&limit=5' % utils.url_fix(s)
	log.debug('Fetching %s...' % url)

	obj = urllib2.urlopen(url, timeout=config.metadata_timeout)
	response = obj.read()
	obj.close()
	
	l = []
	soup = BeautifulSoup(response, 'xml')
	for artist in soup.find_all('artist'):
		if int(artist.attrs['ext:score']) > 94:
			id_ = artist['id']
			type_ = artist['type'] if artist.has_key('type') else ""
			name = artist.find('name').text
			score = artist.attrs['ext:score']
			disambiguation = artist.find('disambiguation').text if artist.find('disambiguation') else ""
			obj = utils.cls.MetadataArtist(id_, name, 'musicbrainz', type_, score, disambiguation=disambiguation)
			l.append(obj)
	return l
def get_schedule_page_url():
    """returns the url of the schedule page"""
    school_data = br.open(get_base_url() + 'portal/portalOutlineWrapper.xsl?x=portal.PortalOutline&contentType=text/xml&lang=en')
    dom = minidom.parse(school_data)

    node = dom.getElementsByTagName('Student')[0]
    person_id = node.getAttribute('personID')
    first_name = node.getAttribute('firstName')
    last_name = node.getAttribute('lastName')

    node = dom.getElementsByTagName('Calendar')[0]
    school_id = node.getAttribute('schoolID')

    node = dom.getElementsByTagName('ScheduleStructure')[0]
    calendar_id = node.getAttribute('calendarID')
    structure_id = node.getAttribute('structureID')
    calendar_name = node.getAttribute('calendarName')

    return utils.url_fix(get_base_url() + u"portal/portal.xsl?x=portal.PortalOutline&lang=en&personID={}&studentFirstName={}&lastName={}&firstName={}&schoolID={}&calendarID={}&structureID={}&calendarName={}&mode=schedule&x=portal.PortalSchedule&x=resource.PortalOptions".format(
                                                                    person_id,
                                                                    first_name,
                                                                    last_name,
                                                                    first_name,
                                                                    school_id,
                                                                    calendar_id,
                                                                    structure_id,
                                                                    calendar_name))
def musicbrainz_artist_search(s):
    """
	Uses musicbrainz for english artist searching.
	
	Returns a list of MetadataArtist.
	"""
    log.debug("Searching for %s in the artists list on musicbrainz..." % s)
    url = 'http://www.musicbrainz.org/ws/2/artist?query=artist:"%s"&limit=5' % utils.url_fix(s)
    log.debug("Fetching %s..." % url)

    obj = urllib2.urlopen(url, timeout=config.metadata_timeout)
    response = obj.read()
    obj.close()

    l = []
    soup = BeautifulSoup(response, "xml")
    for artist in soup.find_all("artist"):
        if int(artist.attrs["ext:score"]) > 94:
            id_ = artist["id"]
            type_ = artist["type"] if artist.has_key("type") else ""
            name = artist.find("name").text
            score = artist.attrs["ext:score"]
            disambiguation = artist.find("disambiguation").text if artist.find("disambiguation") else ""
            obj = utils.cls.MetadataArtist(id_, name, "musicbrainz", type_, score, disambiguation=disambiguation)
            l.append(obj)
    return l
Exemple #6
0
def musicbrainz_recording_search(reid):
    '''
	Uses musicbrainz for english recording by reid search.
	
	Returns a list of tracks.
	'''
    log.debug("Searching for album %s in the tracks list on musicbrainz..." %
              reid)

    url = 'http://www.musicbrainz.org/ws/2/recording/?query=reid:"%s"' % reid
    log.debug('Fetching %s...' % url)
    url = utils.url_fix(url)
    obj = urllib2.urlopen(url, timeout=config.metadata_timeout)
    response = obj.read()
    obj.close()

    tracks = []

    def sort_key(x):
        "Sort key by a release's track amount"
        num = x.find('release-list').find('medium-list').medium.find(
            'track-list').track.number.text
        non_decimal = re.compile(r'[^\d.]+')
        num = non_decimal.sub('', num)
        num = int(num) if num else 0
        return num

    soup = BeautifulSoup(response, 'xml')

    for track in sorted(soup.find_all('recording'), key=sort_key):
        name = track.title.text.replace(u'\u2019', "'")
        tracks.append(name)

    return tracks
Exemple #7
0
def musicbrainz_artist_search(s):
    '''
	Uses musicbrainz for english artist searching.
	
	Returns a list of MetadataArtist.
	'''
    log.debug("Searching for %s in the artists list on musicbrainz..." % s)
    url = 'http://www.musicbrainz.org/ws/2/artist?query=artist:"%s"&limit=5' % utils.url_fix(
        s)
    log.debug('Fetching %s...' % url)

    obj = urllib2.urlopen(url, timeout=config.metadata_timeout)
    response = obj.read()
    obj.close()

    l = []
    soup = BeautifulSoup(response, 'xml')
    for artist in soup.find_all('artist'):
        if int(artist.attrs['ext:score']) > 94:
            id_ = artist['id']
            type_ = artist['type'] if artist.has_key('type') else ""
            name = artist.find('name').text
            score = artist.attrs['ext:score']
            disambiguation = artist.find('disambiguation').text if artist.find(
                'disambiguation') else ""
            obj = utils.cls.MetadataArtist(id_,
                                           name,
                                           'musicbrainz',
                                           type_,
                                           score,
                                           disambiguation=disambiguation)
            l.append(obj)
    return l
def musicbrainz_release_search(arid):
    """
	Uses musicbrainz for english releases.
	
	Returns a three objects of MetadataRelease lists: albums, singles, others.
	"""
    log.debug("Searching for artist %s in the releases list on musicbrainz..." % arid)
    url = 'http://www.musicbrainz.org/ws/2/release/?query=arid:"%s" AND status :"official"' % arid
    log.debug("Fetching %s..." % url)
    url = utils.url_fix(url)
    obj = urllib2.urlopen(url, timeout=config.metadata_timeout)
    response = obj.read()
    obj.close()

    release_groups = {}
    final_releases = []
    soup = BeautifulSoup(response, "xml")

    for release in soup.find_all("release"):
        group = release.find("release-group")
        reid = release["id"]
        rgid = group["id"]

        if not rgid in release_groups.keys():
            release_groups[rgid] = {}
            release_groups[rgid]["releases"] = {}
            release_groups[rgid]["type"] = group["type"] if group.has_key("type") else ""

        d = {}
        d["date"] = release.date.text if release.date else ""
        d["title"] = release.title.text
        d["count"] = release.find("medium-list").find("track-count").text
        d["artistname"] = release.artist.name
        release_groups[rgid]["releases"][reid] = d

    for rgid, d in release_groups.items():
        list_of_rids = d["releases"].items()
        if list_of_rids:
            sorted(list_of_rids, key=(lambda x: x[1]["count"]))
            reid, d2 = list_of_rids[0]

            obj = utils.cls.MetadataRelease(
                reid, d2["title"], "musicbrainz", d["type"], d2["date"], d2["count"], arid, d2["artistname"]
            )
            final_releases.append(obj)

    final_releases = sorted(final_releases, key=lambda x: x.date)
    albums = [x for x in final_releases if x.type == "Album"]
    singles = [x for x in final_releases if x.type == "Single"]
    others = [x for x in final_releases if x.type not in ["Album", "Single"]]

    # for album in albums:
    # print "Album %s (%s tracks, out at %s)" % (album.title, album.count, album.date)
    # for single in singles:
    # print "Single %s (%s tracks)" % (single.title, single.count)
    # for other in others:
    # print "Other %s (%s tracks)" % (other.title, other.count)

    return albums, singles, others
Exemple #9
0
def musicbrainz_release_search(arid):
	'''
	Uses musicbrainz for english releases.
	
	Returns a three objects of MetadataRelease lists: albums, singles, others.
	'''
	log.debug("Searching for artist %s in the releases list on musicbrainz..." % arid)
	url = 'http://www.musicbrainz.org/ws/2/release/?query=arid:"%s" AND status :"official"' % arid
	log.debug('Fetching %s...' % url)
	url = utils.url_fix(url)
	obj = urllib2.urlopen(url, timeout=config.metadata_timeout)
	response = obj.read()
	obj.close()
	
	release_groups = {}
	final_releases = []
	soup = BeautifulSoup(response, 'xml')

	for release in soup.find_all('release'):
		group = release.find('release-group')
		reid = release['id']
		rgid = group['id']
		
		if not rgid in release_groups.keys():
			release_groups[rgid] = {}
			release_groups[rgid]['releases'] = {}
			release_groups[rgid]['type'] = group['type'] if group.has_key('type') else ""
		
		d = {}
		d['date'] = release.date.text if release.date else ""
		d['title'] = release.title.text
		d['count'] = release.find('medium-list').find('track-count').text
		d['artistname'] = release.artist.name
		release_groups[rgid]['releases'][reid] = d

	for rgid, d in release_groups.items():
		list_of_rids = d['releases'].items()
		if list_of_rids:
			sorted(list_of_rids, key=(lambda x:x[1]['count']))
			reid, d2 = list_of_rids[0]
			
			obj = utils.cls.MetadataRelease(reid, d2['title'], 'musicbrainz', d['type'], d2['date'], d2['count'], arid, d2['artistname'])
			final_releases.append(obj)
	
	final_releases = sorted(final_releases, key=lambda x:x.date)
	albums = [x for x in final_releases if x.type == 'Album']
	singles = [x for x in final_releases if x.type == 'Single']
	others = [x for x in final_releases if x.type not in ['Album', 'Single']]
	
	# for album in albums:
		# print "Album %s (%s tracks, out at %s)" % (album.title, album.count, album.date)
	# for single in singles:
		# print "Single %s (%s tracks)" % (single.title, single.count)
	# for other in others:
		# print "Other %s (%s tracks)" % (other.title, other.count)
		
	return albums, singles, others
Exemple #10
0
    def __init__(self, urls, dest=None, progress_bar=True, fix_urls=True, logger=None, connect_default_logger=False):
        self.mirrors = [urls] if isinstance(urls, basestring) else urls
        if fix_urls:
            self.mirrors = [utils.url_fix(x) for x in self.mirrors]
        self.url = self.mirrors.pop(0)

        fn = os.path.basename(urlparse(self.url).path)
        self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL', fn)
        if self.dest[-1] == os.sep:
            if os.path.exists(self.dest[:-1]) and os.path.isfile(self.dest[:-1]):
                os.unlink(self.dest[:-1])
            self.dest += fn
        if os.path.isdir(self.dest):
            self.dest = os.path.join(self.dest, fn)

        self.progress_bar = progress_bar

        if logger:
            self.logger = logger
        elif connect_default_logger:
            self.logger = utils.create_debugging_logger()
        else:
            self.logger = utils.DummyLogger()

        self.headers = {'User-Agent': utils.get_random_useragent()}
        self.threads_count = 3
        self.timeout = 4
        self.current_attemp = 1
        self.attemps_limit = 4
        self.minChunkFile = 1024**2*2 # 2MB
        self.filesize = 0
        self.shared_var = multiprocessing.Value(c_int, 0) # a ctypes var that counts the bytes already downloaded
        self.thread_shared_cmds = {}
        self.status = "ready"
        self.verify_hash = False
        self._killed = False
        self._failed = False
        self._start_func_blocking = True
        self.errors = []

        self.post_threadpool_thread = None
        self.control_thread = None

        if not os.path.exists(os.path.dirname(self.dest)):
            self.logger.debug('Folder "%s" does not exist. Creating...' % os.path.dirname(self.dest))
            os.makedirs(os.path.dirname(self.dest))
        if not utils.is_HTTPRange_supported(self.url):
            self.logger.warning("Server does not support HTTPRange. threads_count is set to 1.")
            self.threads_count = 1
        if os.path.exists(self.dest):
            self.logger.warning('Destination "%s" already exists. Existing file will be removed.' % self.dest)
        if not os.path.exists(os.path.dirname(self.dest)):
            self.logger.warning('Directory "%s" does not exist. Creating it...' % os.path.dirname(self.dest))
            os.makedirs(os.path.dirname(self.dest))

        self.pool = utils.ManagedThreadPoolExecutor(self.threads_count)
Exemple #11
0
 def __init__(self, urls, dest=None, progress_bar=True, fix_urls=True, logger=None, connect_default_logger=False):
     self.mirrors = [urls] if isinstance(urls, basestring) else urls
     if fix_urls:
         self.mirrors = [utils.url_fix(x) for x in self.mirrors]
     self.url = self.mirrors.pop(0)
     
     fn = urllib2.unquote(os.path.basename(urlparse(self.url).path)).decode('utf-8')
     self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL', fn)
     if self.dest[-1] == os.sep:
         if os.path.exists(self.dest[:-1]) and os.path.isfile(self.dest[:-1]):
             os.unlink(self.dest[:-1])
         self.dest += fn
     if os.path.isdir(self.dest):
         self.dest = os.path.join(self.dest, fn)
     
     self.progress_bar = progress_bar
     
     if logger:
         self.logger = logger
     elif connect_default_logger:
         self.logger = utils.create_debugging_logger()
     else:
         self.logger = utils.DummyLogger()
     
     self.headers = {'User-Agent': utils.get_random_useragent()}
     self.threads_count = 5
     self.timeout = 4
     self.current_attemp = 1 
     self.attemps_limit = 4
     self.minChunkFile = 1024**2*2 # 2MB
     self.filesize = 0
     self.shared_var = multiprocessing.Value(c_int, 0) # a ctypes var that counts the bytes already downloaded
     self.thread_shared_cmds = {}
     self.status = "ready"
     self.verify_hash = False
     self._killed = False
     self._failed = False
     self._start_func_blocking = True
     self.errors = []
     
     self.post_threadpool_thread = None
     self.control_thread = None
     
     if not os.path.exists(os.path.dirname(self.dest)):
         self.logger.debug('Folder "%s" does not exist. Creating...' % os.path.dirname(self.dest))
         os.makedirs(os.path.dirname(self.dest))
     if not utils.is_HTTPRange_supported(self.url):
         self.logger.warning("Server does not support HTTPRange. threads_count is set to 1.")
         self.threads_count = 1
     if os.path.exists(self.dest):
         self.logger.warning('Destination "%s" already exists. Existing file will be removed.' % self.dest)
     if not os.path.exists(os.path.dirname(self.dest)):
         self.logger.warning('Directory "%s" does not exist. Creating it...' % os.path.dirname(self.dest))
         os.makedirs(os.path.dirname(self.dest))
     
     self.pool = utils.ManagedThreadPoolExecutor(self.threads_count)
Exemple #12
0
    def get_url(self):
        params = {"title": self.title, "action": "render"}

        if not self.project.subdomain_lang:
            params["uselang"] = self.lang

        url = "http://%s/w/index.php" % self.get_domain()
        url += "?" + "&".join(["%s=%s" % (key, val) for key, val in params.items()])
        url = url_fix(url)
        return url
Exemple #13
0
def parse_musicBrainz(title, artist):
    "Uses musicBrainz API for releases data grabbing"
    url = 'http://www.musicbrainz.org/ws/2/recording?query="%s" AND artist:"%s"' % (
        urllib2.quote(
            title.encode("utf8")), urllib2.quote(artist.encode("utf8")))
    url = utils.url_fix(url)

    log.debug('Fetching %s...' % url)
    obj = urllib2.urlopen(url, timeout=config.metadata_timeout)
    response = obj.read()
    obj.close()

    dom = xml.dom.minidom.parseString(response)

    try:
        tag = dom.getElementsByTagName('metadata')[0].getElementsByTagName('recording-list')[0] \
             .getElementsByTagName('recording')[0].getElementsByTagName('tag-list')[0] \
             .getElementsByTagName('tag')[0].getElementsByTagName('name')[0].childNodes[0].data
    except IndexError:
        tag = ""

    try:
        artist = dom.getElementsByTagName('metadata')[0].getElementsByTagName('recording-list')[0] \
             .getElementsByTagName('recording')[0].getElementsByTagName('artist-credit')[0] \
             .getElementsByTagName('name-credit')[0].getElementsByTagName('artist')[0] \
             .getElementsByTagName('name')[0].childNodes[0].data
    except IndexError:
        artist = ""

    try:
        release_list_dom = dom.getElementsByTagName('metadata')[0].getElementsByTagName('recording-list')[0] \
             .getElementsByTagName('recording')[0].getElementsByTagName('release-list')[0] \
             .getElementsByTagName('release')
    except IndexError:
        return None

    release_dict = {}
    for dom in release_list_dom:
        d = {}
        d['title'] = dom.getElementsByTagName(
            'title')[0].childNodes[0].data if dom.getElementsByTagName(
                'title') else ""
        d['date'] = dom.getElementsByTagName(
            'date')[0].childNodes[0].data if dom.getElementsByTagName(
                'date') else ""
        d['country'] = dom.getElementsByTagName(
            'country')[0].childNodes[0].data if dom.getElementsByTagName(
                'country') else ""
        d['artist'] = artist
        d['tag'] = tag
        release_dict[dom.attributes.values()[0].value] = d

    return release_dict
Exemple #14
0
    def __init__(self,
                 urls,
                 dest=None,
                 max_threads=5,
                 show_output=True,
                 logger=None):
        self.mirrors = [urls] if isinstance(urls, basestring) else urls
        for i, url in enumerate(self.mirrors):
            if " " in url:
                self.mirrors[i] = utils.url_fix(url)
        self.url = self.mirrors.pop(0)

        self.dest = dest or r"%s\%s" % (config.temp_dir, urlparse(
            self.url).path.split('/')[-1])
        self.show_output = show_output
        self.logger = logger or logging.getLogger('dummy')
        self.max_threads = max_threads

        self.headers = config.generic_http_headers
        self.timeout = 4
        self.current_attemp = 1
        self.attemps_limit = 4
        self.minChunkFile = 1024**2  # 1MB
        self.filesize = 0
        self.shared_var = multiprocessing.Value(
            c_int, 0)  # a ctypes var that counts the bytes already downloaded
        self.status = "ready"
        self.verify_hash = False
        self._killed = False
        self._failed = False

        self.post_threadpool_thread = None
        self.control_thread = None

        if not is_ServerSupportHTTPRange(self.url):
            self.logger.warning(
                "Server does not support HTTPRange. max_threads is set to 1.")
            self.max_threads = 1
        if os.path.exists(self.dest):
            self.logger.warning(
                "Destination '%s' already exists. Existing file will be removed."
                % self.dest)
        if not os.path.exists(os.path.dirname(self.dest)):
            self.logger.warning(
                "Directory '%s' does not exist. Creating it..." %
                os.path.dirname(self.dest))
            os.makedirs(os.path.dirname(self.dest))

        self.pool = ThreadPool(max_threads=self.max_threads,
                               catch_returns=True,
                               logger=self.logger)
Exemple #15
0
def get_page_url(gradesPage, dom, curr_user):
    """returns the url of the schedule page"""
    nodes = dom.getElementsByTagName('Student')
    node = False
    for student in nodes:
        if not student.hasAttribute('studentNumber'):
            continue
        curr_student_id = student.getAttribute('studentNumber')
        if curr_student_id == curr_user.student_id:
            node = student
            break
    if not node:
        print("Account does not have this student ID")
        return False

    person_id = node.getAttribute('personID')
    first_name = node.getAttribute('firstName')
    last_name = node.getAttribute('lastName')

    nodes = node.getElementsByTagName('Calendar')
    if len(nodes) < 1:
        return False
    node = nodes[0]
    school_id = node.getAttribute('schoolID')

    nodes = node.getElementsByTagName('ScheduleStructure')
    if len(nodes) < 1:
        return False
    node = nodes[0]
    calendar_id = node.getAttribute('calendarID')
    structure_id = node.getAttribute('structureID')
    calendar_name = node.getAttribute('calendarName')

    if gradesPage:
        mode = 'grades'
        x = 'portal.PortalGrades'
    else:
        mode = 'schedule'
        x = 'portal.PortalSchedule'

    return utils.url_fix(get_base_url() + u"portal/portal.xsl?x=portal.PortalOutline&lang=en&personID={}&studentFirstName={}&lastName={}&firstName={}&schoolID={}&calendarID={}&structureID={}&calendarName={}&mode={}&x={}&x=resource.PortalOptions".format(
        person_id,
        first_name,
        last_name,
        first_name,
        school_id,
        calendar_id,
        structure_id,
        calendar_name,
        mode,
        x))
Exemple #16
0
def parse_musicBrainz(title, artist):
	"Uses musicBrainz API for releases data grabbing"
	url = 'http://www.musicbrainz.org/ws/2/recording?query="%s" AND artist:"%s"' % (urllib2.quote(title.encode("utf8")), urllib2.quote(artist.encode("utf8")))
	url = utils.url_fix(url)
	
	log.debug('Fetching %s...' % url)
	obj = urllib2.urlopen(url, timeout=config.metadata_timeout)
	response = obj.read()
	obj.close()
	
	dom = xml.dom.minidom.parseString(response)
	
	try:
		tag = dom.getElementsByTagName('metadata')[0].getElementsByTagName('recording-list')[0] \
							.getElementsByTagName('recording')[0].getElementsByTagName('tag-list')[0] \
							.getElementsByTagName('tag')[0].getElementsByTagName('name')[0].childNodes[0].data
	except IndexError:
		tag = ""
	
	try:
		artist = dom.getElementsByTagName('metadata')[0].getElementsByTagName('recording-list')[0] \
							.getElementsByTagName('recording')[0].getElementsByTagName('artist-credit')[0] \
							.getElementsByTagName('name-credit')[0].getElementsByTagName('artist')[0] \
							.getElementsByTagName('name')[0].childNodes[0].data
	except IndexError:
		artist = ""
		
	try:
		release_list_dom = dom.getElementsByTagName('metadata')[0].getElementsByTagName('recording-list')[0] \
							.getElementsByTagName('recording')[0].getElementsByTagName('release-list')[0] \
							.getElementsByTagName('release')
	except IndexError:
		return None
	
	release_dict = {}						
	for dom in release_list_dom:
		d = {}
		d['title'] = dom.getElementsByTagName('title')[0].childNodes[0].data if dom.getElementsByTagName('title') else ""
		d['date'] = dom.getElementsByTagName('date')[0].childNodes[0].data if dom.getElementsByTagName('date') else ""
		d['country'] = dom.getElementsByTagName('country')[0].childNodes[0].data if dom.getElementsByTagName('country') else ""
		d['artist'] = artist
		d['tag'] = tag
		release_dict[dom.attributes.values()[0].value] = d
		
	return release_dict
Exemple #17
0
	def __init__(self, urls, dest=None, max_threads=5, show_output=True, logger=None):
		self.mirrors = [urls] if isinstance(urls, basestring) else urls
		for i, url in enumerate(self.mirrors):
			if " " in url:
				self.mirrors[i] = utils.url_fix(url)
		self.url = self.mirrors.pop(0)

		self.dest = dest or r"%s\%s" % (config.temp_dir, urlparse(self.url).path.split('/')[-1])
		self.show_output = show_output
		self.logger = logger or logging.getLogger('dummy')
		self.max_threads = max_threads
		
		self.headers = config.generic_http_headers
		self.timeout = 4
		self.current_attemp = 1 
		self.attemps_limit = 4
		self.minChunkFile = 1024**2 # 1MB
		self.filesize = 0
		self.shared_var = multiprocessing.Value(c_int, 0) # a ctypes var that counts the bytes already downloaded
		self.status = "ready"
		self.verify_hash = False
		self._killed = False
		self._failed = False
		
		self.post_threadpool_thread = None
		self.control_thread = None
		
		if not is_ServerSupportHTTPRange(self.url):
			self.logger.warning("Server does not support HTTPRange. max_threads is set to 1.")
			self.max_threads = 1
		if os.path.exists(self.dest):
			self.logger.warning("Destination '%s' already exists. Existing file will be removed." % self.dest)
			
		if not os.path.exists(os.path.dirname(self.dest)):
			self.logger.warning("Directory '%s' does not exist. Creating it..." % os.path.dirname(self.dest))
			os.makedirs(os.path.dirname(self.dest))
			
		self.pool = ThreadPool(max_threads=self.max_threads, catch_returns=True, logger=self.logger)
Exemple #18
0
 def sanitize_triple_item(item):
     if isinstance(item, term.URIRef):
         return term.URIRef(url_fix(str(item)))
     return item
Exemple #19
0
    def __init__(self,
                 urls,
                 dest=None,
                 progress_bar=True,
                 fix_urls=True,
                 threads=5,
                 logger=None,
                 connect_default_logger=False,
                 proxy=None):
        global DEFAULT_LOGGER_CREATED

        self.mirrors = [urls] if isinstance(urls, basestring) else urls
        if fix_urls:
            self.mirrors = [utils.url_fix(x) for x in self.mirrors]
        self.url = self.mirrors.pop(0)

        if proxy is not None:
            proxy = urllib2.ProxyHandler({'http': proxy, 'https': proxy})
            opener = urllib2.build_opener(proxy)
            urllib2.install_opener(opener)

        fn = urllib2.unquote(os.path.basename(urlparse(self.url).path))
        if sys.version_info < (3, 0):
            fn = fn.decode('utf-8')  # required only on python 2
        self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL',
                                         fn)
        if self.dest[-1] == os.sep:
            if os.path.exists(self.dest[:-1]) and os.path.isfile(
                    self.dest[:-1]):
                os.unlink(self.dest[:-1])
            self.dest += fn
        if os.path.isdir(self.dest):
            self.dest = os.path.join(self.dest, fn)

        self.progress_bar = progress_bar

        if logger:
            self.logger = logger
        elif connect_default_logger:
            if not DEFAULT_LOGGER_CREATED:
                self.logger = utils.create_debugging_logger()
                DEFAULT_LOGGER_CREATED = True
            else:
                self.logger = logging.getLogger('pySmartDL')
        else:
            self.logger = utils.DummyLogger()

        self.headers = {'User-Agent': utils.get_random_useragent()}
        self.threads_count = threads
        self.timeout = 4
        self.current_attemp = 1
        self.attemps_limit = 4
        self.minChunkFile = 1024**2 * 2  # 2MB
        self.filesize = 0
        self.shared_var = multiprocessing.Value(
            c_int, 0)  # a ctypes var that counts the bytes already downloaded
        self.thread_shared_cmds = {}
        self.status = "ready"
        self.verify_hash = False
        self._killed = False
        self._failed = False
        self._start_func_blocking = True
        self.errors = []

        self.post_threadpool_thread = None
        self.control_thread = None

        if not os.path.exists(os.path.dirname(self.dest)):
            self.logger.info('Folder "%s" does not exist. Creating...' %
                             os.path.dirname(self.dest))
            os.makedirs(os.path.dirname(self.dest))
        if not utils.is_HTTPRange_supported(self.url):
            self.logger.warning(
                "Server does not support HTTPRange. threads_count is set to 1."
            )
            self.threads_count = 1
        if os.path.exists(self.dest):
            self.logger.warning(
                'Destination "%s" already exists. Existing file will be removed.'
                % self.dest)
        if not os.path.exists(os.path.dirname(self.dest)):
            self.logger.warning(
                'Directory "%s" does not exist. Creating it...' %
                os.path.dirname(self.dest))
            os.makedirs(os.path.dirname(self.dest))

        self.logger.info("Creating a ThreadPool of %d thread(s).",
                         self.threads_count)
        self.pool = utils.ManagedThreadPoolExecutor(self.threads_count)
def parse_musicBrainz(title, artist):
    "Uses musicBrainz API for releases data grabbing"
    url = 'http://www.musicbrainz.org/ws/2/recording?query="%s" AND artist:"%s"' % (
        urllib2.quote(title.encode("utf8")),
        urllib2.quote(artist.encode("utf8")),
    )
    url = utils.url_fix(url)

    log.debug("Fetching %s..." % url)
    obj = urllib2.urlopen(url, timeout=config.metadata_timeout)
    response = obj.read()
    obj.close()

    dom = xml.dom.minidom.parseString(response)

    try:
        tag = (
            dom.getElementsByTagName("metadata")[0]
            .getElementsByTagName("recording-list")[0]
            .getElementsByTagName("recording")[0]
            .getElementsByTagName("tag-list")[0]
            .getElementsByTagName("tag")[0]
            .getElementsByTagName("name")[0]
            .childNodes[0]
            .data
        )
    except IndexError:
        tag = ""

    try:
        artist = (
            dom.getElementsByTagName("metadata")[0]
            .getElementsByTagName("recording-list")[0]
            .getElementsByTagName("recording")[0]
            .getElementsByTagName("artist-credit")[0]
            .getElementsByTagName("name-credit")[0]
            .getElementsByTagName("artist")[0]
            .getElementsByTagName("name")[0]
            .childNodes[0]
            .data
        )
    except IndexError:
        artist = ""

    try:
        release_list_dom = (
            dom.getElementsByTagName("metadata")[0]
            .getElementsByTagName("recording-list")[0]
            .getElementsByTagName("recording")[0]
            .getElementsByTagName("release-list")[0]
            .getElementsByTagName("release")
        )
    except IndexError:
        return None

    release_dict = {}
    for dom in release_list_dom:
        d = {}
        d["title"] = (
            dom.getElementsByTagName("title")[0].childNodes[0].data if dom.getElementsByTagName("title") else ""
        )
        d["date"] = dom.getElementsByTagName("date")[0].childNodes[0].data if dom.getElementsByTagName("date") else ""
        d["country"] = (
            dom.getElementsByTagName("country")[0].childNodes[0].data if dom.getElementsByTagName("country") else ""
        )
        d["artist"] = artist
        d["tag"] = tag
        release_dict[dom.attributes.values()[0].value] = d

    return release_dict
Exemple #21
0
def musicbrainz_release_search(arid):
    '''
	Uses musicbrainz for english releases.
	
	Returns a three objects of MetadataRelease lists: albums, singles, others.
	'''
    log.debug(
        "Searching for artist %s in the releases list on musicbrainz..." %
        arid)
    url = 'http://www.musicbrainz.org/ws/2/release/?query=arid:"%s" AND status :"official"' % arid
    log.debug('Fetching %s...' % url)
    url = utils.url_fix(url)
    obj = urllib2.urlopen(url, timeout=config.metadata_timeout)
    response = obj.read()
    obj.close()

    release_groups = {}
    final_releases = []
    soup = BeautifulSoup(response, 'xml')

    for release in soup.find_all('release'):
        group = release.find('release-group')
        reid = release['id']
        rgid = group['id']

        if not rgid in release_groups.keys():
            release_groups[rgid] = {}
            release_groups[rgid]['releases'] = {}
            release_groups[rgid]['type'] = group['type'] if group.has_key(
                'type') else ""

        d = {}
        d['date'] = release.date.text if release.date else ""
        d['title'] = release.title.text
        d['count'] = release.find('medium-list').find('track-count').text
        d['artistname'] = release.artist.name
        release_groups[rgid]['releases'][reid] = d

    for rgid, d in release_groups.items():
        list_of_rids = d['releases'].items()
        if list_of_rids:
            sorted(list_of_rids, key=(lambda x: x[1]['count']))
            reid, d2 = list_of_rids[0]

            obj = utils.cls.MetadataRelease(reid, d2['title'], 'musicbrainz',
                                            d['type'], d2['date'], d2['count'],
                                            arid, d2['artistname'])
            final_releases.append(obj)

    final_releases = sorted(final_releases, key=lambda x: x.date)
    albums = [x for x in final_releases if x.type == 'Album']
    singles = [x for x in final_releases if x.type == 'Single']
    others = [x for x in final_releases if x.type not in ['Album', 'Single']]

    # for album in albums:
    # print "Album %s (%s tracks, out at %s)" % (album.title, album.count, album.date)
    # for single in singles:
    # print "Single %s (%s tracks)" % (single.title, single.count)
    # for other in others:
    # print "Other %s (%s tracks)" % (other.title, other.count)

    return albums, singles, others