Python removearticles Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: removearticles

Examples at hotexamples.com: 4

Python removearticles - 4 examples found. These are the top rated real world Python examples of utils.removearticles extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: issueidentifier.py Project: goldsoundz/comictagger

	def search( self ):

		ca = self.comic_archive
		self.match_list = []
		self.cancel = False
		self.search_result = self.ResultNoMatches
		
		if not pil_available:
			self.log_msg( "Python Imaging Library (PIL) is not available and is needed for issue identification." )
			return self.match_list
			
		if not ca.seemsToBeAComicArchive():
			self.log_msg( "Sorry, but "+ opts.filename + "  is not a comic archive!")
			return self.match_list
		
		cover_image_data = ca.getPage( self.cover_page_index )
		cover_hash = self.calculateHash( cover_image_data )

		#check the apect ratio
		# if it's wider than it is high, it's probably a two page spread
		# if so, crop it and calculate a second hash
		narrow_cover_hash = None
		aspect_ratio = self.getAspectRatio( cover_image_data )
		if aspect_ratio < 1.0:
			right_side_image_data = self.cropCover( cover_image_data )
			if right_side_image_data is not None:
				narrow_cover_hash = self.calculateHash( right_side_image_data )
				
		#self.log_msg( "Cover hash = {0:016x}".format(cover_hash) )

		keys = self.getSearchKeys()
		#normalize the issue number
		keys['issue_number'] = IssueString(keys['issue_number']).asString()
		
		# we need, at minimum, a series and issue number
		if keys['series'] is None or keys['issue_number'] is None:
			self.log_msg("Not enough info for a search!")
			return []
		
		
		self.log_msg( "Going to search for:" )
		self.log_msg( "\tSeries: " + keys['series'] )
		self.log_msg( "\tIssue : " + keys['issue_number']  )
		if keys['issue_count'] is not None:
			self.log_msg( "\tCount : " + str(keys['issue_count']) )
		if keys['year'] is not None:
			self.log_msg( "\tYear :  " + str(keys['year']) )
		if keys['month'] is not None:
			self.log_msg( "\tMonth : " + str(keys['month']) )
		
		#self.log_msg("Publisher Blacklist: " + str(self.publisher_blacklist))
		
		comicVine = ComicVineTalker( )
		comicVine.setLogFunc( self.output_function )

		#self.log_msg( ( "Searching for " + keys['series'] + "...")
		self.log_msg( u"Searching for  {0} #{1} ...".format( keys['series'], keys['issue_number']) )
		try:
			cv_search_results = comicVine.searchForSeries( keys['series'] )
		except ComicVineTalkerException:
			self.log_msg( "Network issue while searching for series.  Aborting...")
			return []
		
		#self.log_msg( "Found " + str(len(cv_search_results)) + " initial results" )
		if self.cancel == True:
			return []
		
		series_second_round_list = []
		
		#self.log_msg( "Removing results with too long names, banned publishers, or future start dates" )
		for item in cv_search_results:
			length_approved = False
			publisher_approved = True
			date_approved = True
			
			# remove any series that starts after the issue year
			if keys['year'] is not None and str(keys['year']).isdigit() and item['start_year'] is not None and str(item['start_year']).isdigit():
				if int(keys['year']) < int(item['start_year']):
					date_approved = False
					
			#assume that our search name is close to the actual name, say within ,e.g. 5 chars
			shortened_key =       utils.removearticles(keys['series'])
			shortened_item_name = utils.removearticles(item['name'])
			if len( shortened_item_name ) <  ( len( shortened_key ) + self.length_delta_thresh) :
				length_approved = True
				
			# remove any series from publishers on the blacklist
			if item['publisher'] is not None:
				publisher = item['publisher']['name']
				if publisher is not None and publisher.lower() in self.publisher_blacklist:
					publisher_approved = False

			if length_approved and publisher_approved and date_approved:
				series_second_round_list.append(item)

		self.log_msg( "Searching in " + str(len(series_second_round_list)) +" series" )
		
		if self.callback is not None:
			self.callback( 0, len(series_second_round_list))
			
		# now sort the list by name length
		series_second_round_list.sort(key=lambda x: len(x['name']), reverse=False)
		
		#build a list of volume IDs
		volume_id_list = list()
		for series in series_second_round_list:
			volume_id_list.append( series['id'])
			
		try:
			issue_list = comicVine.fetchIssuesByVolumeIssueNumAndYear( volume_id_list,
																	keys['issue_number'],
																	keys['year'])

		except ComicVineTalkerException:
			self.log_msg( "Network issue while searching for series details.  Aborting...")
			return []
			
		shortlist = list()
		#now re-associate the issues and volumes
		for issue in issue_list:
			for series in series_second_round_list:
				if series['id'] == issue['volume']['id']:
					shortlist.append( (series, issue) )
					break
		
		if keys['year'] is None:
			self.log_msg( u"Found {0} series that have an issue #{1}".format(len(shortlist), keys['issue_number']) )
		else:
			self.log_msg( u"Found {0} series that have an issue #{1} from {2}".format(len(shortlist), keys['issue_number'], keys['year'] ))
		
			
		# now we have a shortlist of volumes with the desired issue number
		# Do first round of cover matching
		counter = len(shortlist)
		for series, issue in  shortlist:		
			if self.callback is not None:
				self.callback( counter, len(shortlist)*3)
				counter += 1
			
			self.log_msg( u"Examining covers for  ID: {0} {1} ({2}) ...".format(
			               series['id'], 
			               series['name'], 
			               series['start_year']), newline=False )
			
			# parse out the cover date
			day, month, year = comicVine.parseDateStr( issue['cover_date'] )

			# Now check the cover match against the primary image
			hash_list = [ cover_hash ]
			if narrow_cover_hash is not None:
				hash_list.append(narrow_cover_hash)

			try:	
				image_url = issue['image']['super_url']
				thumb_url = issue['image']['thumb_url']
				page_url = issue['site_detail_url']

				score_item = self.getIssueCoverMatchScore( comicVine, issue['id'], image_url, thumb_url, page_url, hash_list, useRemoteAlternates = False )
			except:
				self.match_list = []
				return self.match_list

			match = dict()
			match['series'] = u"{0} ({1})".format(series['name'], series['start_year'])
			match['distance'] = score_item['score']
			match['issue_number'] = keys['issue_number']
			match['cv_issue_count'] = series['count_of_issues']
			match['url_image_hash'] = score_item['hash']
			match['issue_title'] = issue['name']
			match['issue_id'] = issue['id']
			match['volume_id'] = series['id']
			match['month'] = month
			match['year'] = year
			match['publisher'] = None
			if series['publisher'] is not None:
				match['publisher'] = series['publisher']['name']
			match['image_url'] = image_url
			match['thumb_url'] = thumb_url
			match['page_url'] = page_url			
			match['description'] = issue['description']			
				
			self.match_list.append(match)

			self.log_msg( " --> {0}".format(match['distance']), newline=False )

			self.log_msg( "" )
		
		if len(self.match_list) == 0:
			self.log_msg( ":-(  no matches!" )
			self.search_result = self.ResultNoMatches
			return self.match_list


		# sort list by image match scores
		self.match_list.sort(key=lambda k: k['distance'])		
		
		l = []
		for i in self.match_list:
			l.append( i['distance'] )

		self.log_msg( "Compared to covers in {0} issue(s):".format(len(self.match_list)), newline=False)
		self.log_msg( str(l))

		def print_match(item):
			self.log_msg( u"-----> {0} #{1} {2} ({3}/{4}) -- score: {5}".format(
									item['series'], 
									item['issue_number'], 
									item['issue_title'],
									item['month'],
									item['year'],
									item['distance']) )
		
		best_score = self.match_list[0]['distance']

		if best_score >= self.min_score_thresh:
			# we have 1 or more low-confidence matches (all bad cover scores)
			# look at a few more pages in the archive, and also alternate covers online
			self.log_msg( "Very weak scores for the cover.  Analyzing alternate pages and covers..." )
			hash_list = [ cover_hash ]
			if narrow_cover_hash is not None:
				hash_list.append(narrow_cover_hash)
			for i in range( 1, min(3, ca.getNumberOfPages())):
				image_data = ca.getPage(i)
				page_hash = self.calculateHash( image_data )
				hash_list.append( page_hash )
				
			second_match_list = []
			counter = 2*len(self.match_list)
			for m in self.match_list:
				if self.callback is not None:
					self.callback( counter, len(self.match_list)*3)
					counter += 1
				self.log_msg( u"Examining alternate covers for ID: {0} {1} ...".format(
							   m['volume_id'], 
							   m['series']), newline=False )
				try:
					score_item = self.getIssueCoverMatchScore( comicVine, m['issue_id'], m['image_url'], m['thumb_url'], m['page_url'], hash_list, useRemoteAlternates = True )
				except:
					self.match_list = []
					return self.match_list
				self.log_msg("--->{0}".format(score_item['score']))
				self.log_msg( "" )

				if score_item['score'] < self.min_alternate_score_thresh:
					second_match_list.append(m)
					m['distance'] = score_item['score']
					
			if len(	second_match_list ) == 0:
				if len( self.match_list) == 1:
					self.log_msg( "No matching pages in the issue." )
					self.log_msg( u"--------------------------------------------------")
					print_match(self.match_list[0])
					self.log_msg( u"--------------------------------------------------")
					self.search_result = self.ResultFoundMatchButBadCoverScore
				else:
					self.log_msg( u"--------------------------------------------------")
					self.log_msg( u"Multiple bad cover matches!  Need to use other info..." )
					self.log_msg( u"--------------------------------------------------")
					self.search_result = self.ResultMultipleMatchesWithBadImageScores
				return self.match_list
			else:
				# We did good, found something!
				self.log_msg( "Success in secondary/alternate cover matching!" )
				
				self.match_list = second_match_list
				# sort new list by image match scores
				self.match_list.sort(key=lambda k: k['distance'])		
				best_score = self.match_list[0]['distance']
				self.log_msg("[Second round cover matching: best score = {0}]".format(best_score))
				# now drop down into the rest of the processing
				
		if self.callback is not None:
			self.callback( 99, 100)
		
		#now pare down list, remove any item more than specified distant from the top scores
		for item in reversed(self.match_list):
			if item['distance'] > best_score + self.min_score_distance:
				self.match_list.remove(item)

		# One more test for the case choosing limited series first issue vs a trade with the same cover:
		# if we have a given issue count > 1 and the volume from CV has count==1, remove it from match list
		if len(self.match_list) >= 2 and keys['issue_count'] is not None and keys['issue_count'] != 1:
			new_list = list()
			for match in self.match_list:
				if match['cv_issue_count'] != 1:
					new_list.append(match)
				else:
					self.log_msg("Removing volume {0} [{1}] from consideration (only 1 issue)".format(match['series'], match['volume_id']))
				
			if len(new_list) > 0:
				self.match_list = new_list
		
		if len(self.match_list) == 1:
			self.log_msg( u"--------------------------------------------------")
			print_match(self.match_list[0])
			self.log_msg( u"--------------------------------------------------")
			self.search_result = self.ResultOneGoodMatch
			
		elif len(self.match_list) == 0:
			self.log_msg( u"--------------------------------------------------")
			self.log_msg( "No matches found :(" )
			self.log_msg( u"--------------------------------------------------")
			self.search_result = self.ResultNoMatches
		else:
			# we've got multiple good matches:
			self.log_msg( "More than one likley candiate." )
			self.search_result = self.ResultMultipleGoodMatches
			self.log_msg( u"--------------------------------------------------")
			for item in self.match_list:
				print_match(item)
			self.log_msg( u"--------------------------------------------------")

		return self.match_list

Example #2

Show file

File: comicvinetalker.py Project: xeddmc/mylar

    def searchForSeries(self, series_name, callback=None, refresh_cache=False):

        # remove cruft from the search string
        series_name = utils.removearticles(series_name).lower().strip()

        # before we search online, look in our cache, since we might have
        # done this same search recently
        cvc = ComicVineCacher()
        if not refresh_cache:
            cached_search_results = cvc.get_search_results(series_name)

            if len(cached_search_results) > 0:
                return cached_search_results

        original_series_name = series_name

        # We need to make the series name into an "AND"ed query list
        query_word_list = series_name.split()
        and_list = ['AND'] * (len(query_word_list) - 1)
        and_list.append('')
        # zipper up the two lists
        query_list = zip(query_word_list, and_list)
        # flatten the list
        query_list = [item for sublist in query_list for item in sublist]
        # convert back to a string
        query_string = " ".join(query_list).strip()
        # print "Query string = ", query_string

        query_string = urllib.quote_plus(query_string.encode("utf-8"))

        search_url = self.api_base_url + "/search/?api_key=" + self.api_key + "&format=json&resources=volume&query=" + \
            query_string + \
            "&field_list=name,id,start_year,publisher,image,description,count_of_issues"
        cv_response = self.getCVContent(search_url + "&page=1")

        search_results = list()

        # see http://api.comicvine.com/documentation/#handling_responses

        limit = cv_response['limit']
        current_result_count = cv_response['number_of_page_results']
        total_result_count = cv_response['number_of_total_results']

        if callback is None:
            self.writeLog(
                "Found {0} of {1} results\n".format(
                    cv_response['number_of_page_results'],
                    cv_response['number_of_total_results']))
        search_results.extend(cv_response['results'])
        page = 1

        if callback is not None:
            callback(current_result_count, total_result_count)

        # see if we need to keep asking for more pages...
        while (current_result_count < total_result_count):
            if callback is None:
                self.writeLog(
                    "getting another page of results {0} of {1}...\n".format(
                        current_result_count,
                        total_result_count))
            page += 1

            cv_response = self.getCVContent(search_url + "&page=" + str(page))

            search_results.extend(cv_response['results'])
            current_result_count += cv_response['number_of_page_results']

            if callback is not None:
                callback(current_result_count, total_result_count)

        # for record in search_results:
            #print(u"{0}: {1} ({2})".format(record['id'], record['name'] , record['start_year']))
            # print(record)
            #record['count_of_issues'] = record['count_of_isssues']
        #print(u"{0}: {1} ({2})".format(search_results['results'][0]['id'], search_results['results'][0]['name'] , search_results['results'][0]['start_year']))

        # cache these search results
        cvc.add_search_results(original_series_name, search_results)

        return search_results

Example #3

Show file

    def search(self):

        ca = self.comic_archive
        self.match_list = []
        self.cancel = False
        self.search_result = self.ResultNoMatches

        if not pil_available:
            self.log_msg(
                "Python Imaging Library (PIL) is not available and is needed for issue identification.")
            return self.match_list

        if not ca.seemsToBeAComicArchive():
            self.log_msg(
                "Sorry, but " + opts.filename + " is not a comic archive!")
            return self.match_list

        cover_image_data = ca.getPage(self.cover_page_index)
        cover_hash = self.calculateHash(cover_image_data)

        # check the aspect ratio
        # if it's wider than it is high, it's probably a two page spread
        # if so, crop it and calculate a second hash
        narrow_cover_hash = None
        aspect_ratio = self.getAspectRatio(cover_image_data)
        if aspect_ratio < 1.0:
            right_side_image_data = self.cropCover(cover_image_data)
            if right_side_image_data is not None:
                narrow_cover_hash = self.calculateHash(right_side_image_data)

        #self.log_msg("Cover hash = {0:016x}".format(cover_hash))

        keys = self.getSearchKeys()
        # normalize the issue number
        keys['issue_number'] = IssueString(keys['issue_number']).asString()

        # we need, at minimum, a series and issue number
        if keys['series'] is None or keys['issue_number'] is None:
            self.log_msg("Not enough info for a search!")
            return []

        self.log_msg("Going to search for:")
        self.log_msg("\tSeries: " + keys['series'])
        self.log_msg("\tIssue:  " + keys['issue_number'])
        if keys['issue_count'] is not None:
            self.log_msg("\tCount:  " + str(keys['issue_count']))
        if keys['year'] is not None:
            self.log_msg("\tYear:   " + str(keys['year']))
        if keys['month'] is not None:
            self.log_msg("\tMonth:  " + str(keys['month']))

        #self.log_msg("Publisher Blacklist: " + str(self.publisher_blacklist))
        comicVine = ComicVineTalker()
        comicVine.wait_for_rate_limit = self.waitAndRetryOnRateLimit

        comicVine.setLogFunc(self.output_function)

        # self.log_msg(("Searching for " + keys['series'] + "...")
        self.log_msg(u"Searching for  {0} #{1} ...".format(
            keys['series'], keys['issue_number']))
        try:
            cv_search_results = comicVine.searchForSeries(keys['series'])
        except ComicVineTalkerException:
            self.log_msg(
                "Network issue while searching for series. Aborting...")
            return []

        #self.log_msg("Found " + str(len(cv_search_results)) + " initial results")
        if self.cancel:
            return []

        if cv_search_results is None:
            return []

        series_second_round_list = []

        #self.log_msg("Removing results with too long names, banned publishers, or future start dates")
        for item in cv_search_results:
            length_approved = False
            publisher_approved = True
            date_approved = True

            # remove any series that starts after the issue year
            if keys['year'] is not None and str(
                    keys['year']).isdigit() and item['start_year'] is not None and str(
                    item['start_year']).isdigit():
                if int(keys['year']) < int(item['start_year']):
                    date_approved = False

            # assume that our search name is close to the actual name, say
            # within ,e.g. 5 chars
            shortened_key = utils.removearticles(keys['series'])
            shortened_item_name = utils.removearticles(item['name'])
            if len(shortened_item_name) < (
                    len(shortened_key) + self.length_delta_thresh):
                length_approved = True

            # remove any series from publishers on the blacklist
            if item['publisher'] is not None:
                publisher = item['publisher']['name']
                if publisher is not None and publisher.lower(
                ) in self.publisher_blacklist:
                    publisher_approved = False

            if length_approved and publisher_approved and date_approved:
                series_second_round_list.append(item)

        self.log_msg(
            "Searching in " + str(len(series_second_round_list)) + " series")

        if self.callback is not None:
            self.callback(0, len(series_second_round_list))

        # now sort the list by name length
        series_second_round_list.sort(
            key=lambda x: len(x['name']), reverse=False)

        # build a list of volume IDs
        volume_id_list = list()
        for series in series_second_round_list:
            volume_id_list.append(series['id'])

        try:
            issue_list = comicVine.fetchIssuesByVolumeIssueNumAndYear(
                volume_id_list,
                keys['issue_number'],
                keys['year'])

        except ComicVineTalkerException:
            self.log_msg(
                "Network issue while searching for series details. Aborting...")
            return []

        if issue_list is None:
            return []

        shortlist = list()
        # now re-associate the issues and volumes
        for issue in issue_list:
            for series in series_second_round_list:
                if series['id'] == issue['volume']['id']:
                    shortlist.append((series, issue))
                    break

        if keys['year'] is None:
            self.log_msg(u"Found {0} series that have an issue #{1}".format(
                len(shortlist), keys['issue_number']))
        else:
            self.log_msg(
                u"Found {0} series that have an issue #{1} from {2}".format(
                    len(shortlist),
                    keys['issue_number'],
                    keys['year']))

        # now we have a shortlist of volumes with the desired issue number
        # Do first round of cover matching
        counter = len(shortlist)
        for series, issue in shortlist:
            if self.callback is not None:
                self.callback(counter, len(shortlist) * 3)
                counter += 1

            self.log_msg(u"Examining covers for  ID: {0} {1} ({2}) ...".format(
                series['id'],
                series['name'],
                series['start_year']), newline=False)

            # parse out the cover date
            day, month, year = comicVine.parseDateStr(issue['cover_date'])

            # Now check the cover match against the primary image
            hash_list = [cover_hash]
            if narrow_cover_hash is not None:
                hash_list.append(narrow_cover_hash)

            try:
                image_url = issue['image']['super_url']
                thumb_url = issue['image']['thumb_url']
                page_url = issue['site_detail_url']

                score_item = self.getIssueCoverMatchScore(
                    comicVine,
                    issue['id'],
                    image_url,
                    thumb_url,
                    page_url,
                    hash_list,
                    useRemoteAlternates=False)
            except:
                self.match_list = []
                return self.match_list

            match = dict()
            match['series'] = u"{0} ({1})".format(
                series['name'], series['start_year'])
            match['distance'] = score_item['score']
            match['issue_number'] = keys['issue_number']
            match['cv_issue_count'] = series['count_of_issues']
            match['url_image_hash'] = score_item['hash']
            match['issue_title'] = issue['name']
            match['issue_id'] = issue['id']
            match['volume_id'] = series['id']
            match['month'] = month
            match['year'] = year
            match['publisher'] = None
            if series['publisher'] is not None:
                match['publisher'] = series['publisher']['name']
            match['image_url'] = image_url
            match['thumb_url'] = thumb_url
            match['page_url'] = page_url
            match['description'] = issue['description']

            self.match_list.append(match)

            self.log_msg(" --> {0}".format(match['distance']), newline=False)

            self.log_msg("")

        if len(self.match_list) == 0:
            self.log_msg(":-(no matches!")
            self.search_result = self.ResultNoMatches
            return self.match_list

        # sort list by image match scores
        self.match_list.sort(key=lambda k: k['distance'])

        l = []
        for i in self.match_list:
            l.append(i['distance'])

        self.log_msg("Compared to covers in {0} issue(s):".format(
            len(self.match_list)), newline=False)
        self.log_msg(str(l))

        def print_match(item):
            self.log_msg(u"-----> {0} #{1} {2} ({3}/{4}) -- score: {5}".format(
                item['series'],
                item['issue_number'],
                item['issue_title'],
                item['month'],
                item['year'],
                item['distance']))

        best_score = self.match_list[0]['distance']

        if best_score >= self.min_score_thresh:
            # we have 1 or more low-confidence matches (all bad cover scores)
            # look at a few more pages in the archive, and also alternate
            # covers online
            self.log_msg(
                "Very weak scores for the cover. Analyzing alternate pages and covers...")
            hash_list = [cover_hash]
            if narrow_cover_hash is not None:
                hash_list.append(narrow_cover_hash)
            for i in range(1, min(3, ca.getNumberOfPages())):
                image_data = ca.getPage(i)
                page_hash = self.calculateHash(image_data)
                hash_list.append(page_hash)

            second_match_list = []
            counter = 2 * len(self.match_list)
            for m in self.match_list:
                if self.callback is not None:
                    self.callback(counter, len(self.match_list) * 3)
                    counter += 1
                self.log_msg(
                    u"Examining alternate covers for ID: {0} {1} ...".format(
                        m['volume_id'],
                        m['series']),
                    newline=False)
                try:
                    score_item = self.getIssueCoverMatchScore(
                        comicVine,
                        m['issue_id'],
                        m['image_url'],
                        m['thumb_url'],
                        m['page_url'],
                        hash_list,
                        useRemoteAlternates=True)
                except:
                    self.match_list = []
                    return self.match_list
                self.log_msg("--->{0}".format(score_item['score']))
                self.log_msg("")

                if score_item['score'] < self.min_alternate_score_thresh:
                    second_match_list.append(m)
                    m['distance'] = score_item['score']

            if len(second_match_list) == 0:
                if len(self.match_list) == 1:
                    self.log_msg("No matching pages in the issue.")
                    self.log_msg(
                        u"--------------------------------------------------------------------------")
                    print_match(self.match_list[0])
                    self.log_msg(
                        u"--------------------------------------------------------------------------")
                    self.search_result = self.ResultFoundMatchButBadCoverScore
                else:
                    self.log_msg(
                        u"--------------------------------------------------------------------------")
                    self.log_msg(
                        u"Multiple bad cover matches!  Need to use other info...")
                    self.log_msg(
                        u"--------------------------------------------------------------------------")
                    self.search_result = self.ResultMultipleMatchesWithBadImageScores
                return self.match_list
            else:
                # We did good, found something!
                self.log_msg("Success in secondary/alternate cover matching!")

                self.match_list = second_match_list
                # sort new list by image match scores
                self.match_list.sort(key=lambda k: k['distance'])
                best_score = self.match_list[0]['distance']
                self.log_msg(
                    "[Second round cover matching: best score = {0}]".format(best_score))
                # now drop down into the rest of the processing

        if self.callback is not None:
            self.callback(99, 100)

        # now pare down list, remove any item more than specified distant from
        # the top scores
        for item in reversed(self.match_list):
            if item['distance'] > best_score + self.min_score_distance:
                self.match_list.remove(item)

        # One more test for the case choosing limited series first issue vs a trade with the same cover:
        # if we have a given issue count > 1 and the volume from CV has
        # count==1, remove it from match list
        if len(self.match_list) >= 2 and keys[
                'issue_count'] is not None and keys['issue_count'] != 1:
            new_list = list()
            for match in self.match_list:
                if match['cv_issue_count'] != 1:
                    new_list.append(match)
                else:
                    self.log_msg(
                        "Removing volume {0} [{1}] from consideration (only 1 issue)".format(
                            match['series'],
                            match['volume_id']))

            if len(new_list) > 0:
                self.match_list = new_list

        if len(self.match_list) == 1:
            self.log_msg(
                u"--------------------------------------------------------------------------")
            print_match(self.match_list[0])
            self.log_msg(
                u"--------------------------------------------------------------------------")
            self.search_result = self.ResultOneGoodMatch

        elif len(self.match_list) == 0:
            self.log_msg(
                u"--------------------------------------------------------------------------")
            self.log_msg("No matches found :(")
            self.log_msg(
                u"--------------------------------------------------------------------------")
            self.search_result = self.ResultNoMatches
        else:
            # we've got multiple good matches:
            self.log_msg("More than one likely candidate.")
            self.search_result = self.ResultMultipleGoodMatches
            self.log_msg(
                u"--------------------------------------------------------------------------")
            for item in self.match_list:
                print_match(item)
            self.log_msg(
                u"--------------------------------------------------------------------------")

        return self.match_list

Example #4

Show file

File: comicvinetalker.py Project: goldsoundz/comictagger

	def searchForSeries( self, series_name , callback=None, refresh_cache=False ):
		
		# remove cruft from the search string
		series_name = utils.removearticles( series_name ).lower().strip()
		
		# before we search online, look in our cache, since we might have
		# done this same search recently
		cvc = ComicVineCacher( )
		if not refresh_cache:
			cached_search_results = cvc.get_search_results( series_name )
			
			if len (cached_search_results) > 0:
				return cached_search_results
		
		original_series_name = series_name
	
		series_name = urllib.quote_plus(series_name.encode("utf-8"))
		#series_name = urllib.quote_plus(unicode(series_name))
		search_url = self.api_base_url + "/search/?api_key=" + self.api_key + "&format=json&resources=volume&query=" + series_name + "&field_list=name,id,start_year,publisher,image,description,count_of_issues"
		content = self.getUrlContent(search_url + "&page=1") 
	
		cv_response = json.loads(content)
	
		if cv_response[ 'status_code' ] != 1:
			self.writeLog( "Comic Vine query failed with error:  [{0}]. \n".format( cv_response[ 'error' ] ))
			return None

		search_results = list()
			
		# see http://api.comicvine.com/documentation/#handling_responses		

		limit = cv_response['limit']
		current_result_count = cv_response['number_of_page_results']
		total_result_count = cv_response['number_of_total_results']
		
		if callback is None:
			self.writeLog( "Found {0} of {1} results\n".format( cv_response['number_of_page_results'], cv_response['number_of_total_results']))
		search_results.extend( cv_response['results'])
		page = 1
		
		if callback is not None:
			callback( current_result_count, total_result_count )
			
		# see if we need to keep asking for more pages...
		while ( current_result_count < total_result_count ):
			if callback is None:
				self.writeLog("getting another page of results {0} of {1}...\n".format( current_result_count, total_result_count))
			page += 1

			content = self.getUrlContent(search_url + "&page="+str(page)) 
		
			cv_response = json.loads(content)
		
			if cv_response[ 'status_code' ] != 1:
				self.writeLog( "Comic Vine query failed with error:  [{0}]. \n".format( cv_response[ 'error' ] ))
				return None
			search_results.extend( cv_response['results'])
			current_result_count += cv_response['number_of_page_results']
			
			if callback is not None:
				callback( current_result_count, total_result_count )

	
		#for record in search_results: 
		#	#print( u"{0}: {1} ({2})".format(record['id'], record['name'] , record['start_year'] ) )
		#	#print record
		#	#record['count_of_issues'] = record['count_of_isssues']
		#print u"{0}: {1} ({2})".format(search_results['results'][0]['id'], search_results['results'][0]['name'] , search_results['results'][0]['start_year'] ) 
	
		# cache these search results
		cvc.add_search_results( original_series_name, search_results )

		return search_results