def parseMetadataFromString(self, mdstr): """The metadata string is a comma separated list of name-value pairs The names match the attributes of the internal metadata struct (for now) The caret is the special "escape character", since it's not common in natural language text example = "series=Kickers^, Inc. ,issue=1, year=1986" """ escaped_comma = "^," escaped_equals = "^=" replacement_token = "<_~_>" md = GenericMetadata() # First, replace escaped commas with with a unique token (to be changed # back later) mdstr = mdstr.replace(escaped_comma, replacement_token) tmp_list = mdstr.split(",") md_list = [] for item in tmp_list: item = item.replace(replacement_token, ",") md_list.append(item) # Now build a nice dict from the list md_dict = dict() for item in md_list: # Make sure to fix any escaped equal signs i = item.replace(escaped_equals, replacement_token) key, value = i.split("=") value = value.replace(replacement_token, "=").strip() key = key.strip() if key.lower() == "credit": cred_attribs = value.split(":") role = cred_attribs[0] person = (cred_attribs[1] if len(cred_attribs) > 1 else "") primary = (cred_attribs[2] if len(cred_attribs) > 2 else None) md.addCredit( person.strip(), role.strip(), True if primary is not None else False) else: md_dict[key] = value # Map the dict to the metadata object for key in md_dict: if not hasattr(md, key): print("Warning: '{0}' is not a valid tag name".format(key)) else: md.isEmpty = False setattr(md, key, md_dict[key]) # print(md) return md
def parseMetadataFromString(self, mdstr): """The metadata string is a comma separated list of name-value pairs The names match the attributes of the internal metadata struct (for now) The caret is the special "escape character", since it's not common in natural language text example = "series=Kickers^, Inc. ,issue=1, year=1986" """ escaped_comma = "^," escaped_equals = "^=" replacement_token = "<_~_>" md = GenericMetadata() # First, replace escaped commas with with a unique token (to be changed # back later) mdstr = mdstr.replace(escaped_comma, replacement_token) tmp_list = mdstr.split(",") md_list = [] for item in tmp_list: item = item.replace(replacement_token, ",") md_list.append(item) # Now build a nice dict from the list md_dict = dict() for item in md_list: # Make sure to fix any escaped equal signs i = item.replace(escaped_equals, replacement_token) key, value = i.split("=") value = value.replace(replacement_token, "=").strip() key = key.strip() if key.lower() == "credit": cred_attribs = value.split(":") role = cred_attribs[0] person = (cred_attribs[1] if len(cred_attribs) > 1 else "") primary = (cred_attribs[2] if len(cred_attribs) > 2 else None) md.addCredit(person.strip(), role.strip(), True if primary is not None else False) else: md_dict[key] = value # Map the dict to the metadata object for key in md_dict: if not hasattr(md, key): print("Warning: '{0}' is not a valid tag name".format(key)) else: md.isEmpty = False setattr(md, key, md_dict[key]) # print(md) return md
def __init__(self, comic_archive, settings): self.comic_archive = comic_archive self.image_hasher = 1 self.onlyUseAdditionalMetaData = False # a decent hamming score, good enough to call it a match self.min_score_thresh = 16 # for alternate covers, be more stringent, since we're a bit more scattershot in comparisons self.min_alternate_score_thresh = 12 # the min distance a hamming score must be to separate itself from closest neighbor self.min_score_distance = 4 # a very strong hamming score, almost certainly the same image self.strong_score_thresh = 8 # used to eliminate series names that are too long based on our search string self.length_delta_thresh = settings.id_length_delta_thresh # used to eliminate unlikely publishers self.publisher_blacklist = [ s.strip().lower() for s in settings.id_publisher_blacklist.split(',') ] self.additional_metadata = GenericMetadata() self.output_function = IssueIdentifier.defaultWriteOutput self.callback = None self.coverUrlCallback = None self.search_result = self.ResultNoMatches self.cover_page_index = 0 self.cancel = False self.waitAndRetryOnRateLimit = False
def readMetadata(self, style): if style == MetaDataStyle.CIX: return self.readCIX() elif style == MetaDataStyle.CBI: return self.readCBI() elif style == MetaDataStyle.COMET: return self.readCoMet() else: return GenericMetadata()
def readCBI(self): if self.cbi_md is None: raw_cbi = self.readRawCBI() if raw_cbi is None: self.cbi_md = GenericMetadata() else: self.cbi_md = ComicBookInfo().metadataFromString(raw_cbi) self.cbi_md.setDefaultPageList(self.getNumberOfPages()) return self.cbi_md
def autoSelect(self): if self.comic_archive is None: QtGui.QMessageBox.information( self, "Auto-Select", "You need to load a comic first!") return if self.issue_number is None or self.issue_number == "": QtGui.QMessageBox.information( self, "Auto-Select", "Can't auto-select without an issue number (yet!)") return self.iddialog = IDProgressWindow(self) self.iddialog.setModal(True) self.iddialog.rejected.connect(self.identifyCancel) self.iddialog.show() self.ii = IssueIdentifier(self.comic_archive, self.settings) md = GenericMetadata() md.series = self.series_name md.issue = self.issue_number md.year = self.year md.issueCount = self.issue_count self.ii.setAdditionalMetadata(md) self.ii.onlyUseAdditionalMetaData = True self.ii.cover_page_index = int(self.cover_index_list[0]) self.id_thread = IdentifyThread(self.ii) self.id_thread.identifyComplete.connect(self.identifyComplete) self.id_thread.identifyLogMsg.connect(self.logIDOutput) self.id_thread.identifyProgress.connect(self.identifyProgress) self.id_thread.start() self.iddialog.exec_()
def create_local_metadata(opts, ca, has_desired_tags): md = GenericMetadata() md.setDefaultPageList(ca.getNumberOfPages()) if has_desired_tags: md = ca.readMetadata(opts.data_style) # now, overlay the parsed filename info if opts.parse_filename: md.overlay(ca.metadataFromFilename()) # finally, use explicit stuff if opts.metadata is not None: md.overlay(opts.metadata) return md
def create_local_metadata( opts, ca, has_desired_tags ): md = GenericMetadata() md.setDefaultPageList( ca.getNumberOfPages() ) if has_desired_tags: md = ca.readMetadata( opts.data_style ) # now, overlay the parsed filename info if opts.parse_filename: md.overlay( ca.metadataFromFilename() ) # finally, use explicit stuff if opts.metadata is not None: md.overlay( opts.metadata ) return md
def metadataFromFilename(self, parse_scan_info=True): metadata = GenericMetadata() fnp = FileNameParser() fnp.parseFilename(self.path) if fnp.issue != "": metadata.issue = fnp.issue if fnp.series != "": metadata.series = fnp.series if fnp.volume != "": metadata.volume = fnp.volume if fnp.year != "": metadata.year = fnp.year if fnp.issue_count != "": metadata.issueCount = fnp.issue_count if parse_scan_info: if fnp.remainder != "": metadata.scanInfo = fnp.remainder metadata.isEmpty = False return metadata
def readCIX(self): if self.cix_md is None: raw_cix = self.readRawCIX() if raw_cix is None or raw_cix == "": self.cix_md = GenericMetadata() else: self.cix_md = ComicInfoXml().metadataFromString(raw_cix) #validate the existing page list (make sure count is correct) if len(self.cix_md.pages) != 0: if len(self.cix_md.pages) != self.getNumberOfPages(): # pages array doesn't match the actual number of images we're seeing # in the archive, so discard the data self.cix_md.pages = [] if len(self.cix_md.pages) == 0: self.cix_md.setDefaultPageList(self.getNumberOfPages()) return self.cix_md
def metadataFromFilename( self ): metadata = GenericMetadata() fnp = FileNameParser() fnp.parseFilename( self.path ) if fnp.issue != "": metadata.issue = fnp.issue if fnp.series != "": metadata.series = fnp.series if fnp.volume != "": metadata.volume = fnp.volume if fnp.year != "": metadata.year = fnp.year if fnp.issue_count != "": metadata.issueCount = fnp.issue_count if self.settings.parse_scan_info: if fnp.remainder != "": metadata.scanInfo = fnp.remainder metadata.isEmpty = False return metadata
def readCoMet( self ): if self.comet_md is None: raw_comet = self.readRawCoMet() if raw_comet is None or raw_comet == "": self.comet_md = GenericMetadata() else: self.comet_md = CoMet().metadataFromString( raw_comet ) self.comet_md.setDefaultPageList( self.getNumberOfPages() ) #use the coverImage value from the comet_data to mark the cover in this struct # walk through list of images in file, and find the matching one for md.coverImage # need to remove the existing one in the default if self.comet_md.coverImage is not None: cover_idx = 0 for idx,f in enumerate(self.getPageNameList()): if self.comet_md.coverImage == f: cover_idx = idx break if cover_idx != 0: del (self.comet_md.pages[0]['Type'] ) self.comet_md.pages[ cover_idx ]['Type'] = PageType.FrontCover return self.comet_md
def autoSelect(self): if self.comic_archive is None: QtGui.QMessageBox.information(self, "Auto-Select", "You need to load a comic first!") return if self.issue_number is None or self.issue_number == "": QtGui.QMessageBox.information( self, "Auto-Select", "Can't auto-select without an issue number (yet!)") return self.iddialog = IDProgressWindow(self) self.iddialog.setModal(True) self.iddialog.rejected.connect(self.identifyCancel) self.iddialog.show() self.ii = IssueIdentifier(self.comic_archive, self.settings) md = GenericMetadata() md.series = self.series_name md.issue = self.issue_number md.year = self.year md.issueCount = self.issue_count self.ii.setAdditionalMetadata(md) self.ii.onlyUseAdditionalMetaData = True self.ii.cover_page_index = int(self.cover_index_list[0]) self.id_thread = IdentifyThread(self.ii) self.id_thread.identifyComplete.connect(self.identifyComplete) self.id_thread.identifyLogMsg.connect(self.logIDOutput) self.id_thread.identifyProgress.connect(self.identifyProgress) self.id_thread.start() self.iddialog.exec_()
def convertXMLToMetadata(self, tree): root = tree.getroot() if root.tag != "comet": raise 1 return None metadata = GenericMetadata() md = metadata # Helper function def xlate(tag): node = root.find(tag) if node is not None: return node.text else: return None md.series = xlate("series") md.title = xlate("title") md.issue = xlate("issue") md.volume = xlate("volume") md.comments = xlate("description") md.publisher = xlate("publisher") md.language = xlate("language") md.format = xlate("format") md.pageCount = xlate("pages") md.maturityRating = xlate("rating") md.price = xlate("price") md.isVersionOf = xlate("isVersionOf") md.rights = xlate("rights") md.identifier = xlate("identifier") md.lastMark = xlate("lastMark") md.genre = xlate("genre") # TODO - repeatable field date = xlate("date") if date is not None: parts = date.split("-") if len(parts) > 0: md.year = parts[0] if len(parts) > 1: md.month = parts[1] md.coverImage = xlate("coverImage") readingDirection = xlate("readingDirection") if readingDirection is not None and readingDirection == "rtl": md.manga = "YesAndRightToLeft" # loop for character tags char_list = [] for n in root: if n.tag == "character": char_list.append(n.text.strip()) md.characters = utils.listToString(char_list) # Now extract the credit info for n in root: if ( n.tag == "writer" or n.tag == "penciller" or n.tag == "inker" or n.tag == "colorist" or n.tag == "letterer" or n.tag == "editor" ): metadata.addCredit(n.text.strip(), n.tag.title()) if n.tag == "coverDesigner": metadata.addCredit(n.text.strip(), "Cover") metadata.isEmpty = False return metadata
def convertXMLToMetadata(self, tree): root = tree.getroot() if root.tag != 'comet': raise 1 return None metadata = GenericMetadata() md = metadata # Helper function def xlate(tag): node = root.find(tag) if node is not None: return node.text else: return None md.series = xlate('series') md.title = xlate('title') md.issue = xlate('issue') md.volume = xlate('volume') md.comments = xlate('description') md.publisher = xlate('publisher') md.language = xlate('language') md.format = xlate('format') md.pageCount = xlate('pages') md.maturityRating = xlate('rating') md.price = xlate('price') md.isVersionOf = xlate('isVersionOf') md.rights = xlate('rights') md.identifier = xlate('identifier') md.lastMark = xlate('lastMark') md.genre = xlate('genre') # TODO - repeatable field date = xlate('date') if date is not None: parts = date.split('-') if len(parts) > 0: md.year = parts[0] if len(parts) > 1: md.month = parts[1] md.coverImage = xlate('coverImage') readingDirection = xlate('readingDirection') if readingDirection is not None and readingDirection == "rtl": md.manga = "YesAndRightToLeft" # loop for character tags char_list = [] for n in root: if n.tag == 'character': char_list.append(n.text.strip()) md.characters = utils.listToString(char_list) # Now extract the credit info for n in root: if (n.tag == 'writer' or n.tag == 'penciller' or n.tag == 'inker' or n.tag == 'colorist' or n.tag == 'letterer' or n.tag == 'editor'): metadata.addCredit(n.text.strip(), n.tag.title()) if n.tag == 'coverDesigner': metadata.addCredit(n.text.strip(), "Cover") metadata.isEmpty = False return metadata
def mapCVDataToMetadata(self, volume_results, issue_results, settings): # Now, map the Comic Vine data to generic metadata metadata = GenericMetadata() metadata.series = issue_results['volume']['name'] num_s = IssueString(issue_results['issue_number']).asString() metadata.issue = num_s metadata.title = issue_results['name'] metadata.publisher = volume_results['publisher']['name'] metadata.day, metadata.month, metadata.year = self.parseDateStr( issue_results['cover_date']) #metadata.issueCount = volume_results['count_of_issues'] metadata.comments = self.cleanup_html( issue_results['description'], settings.remove_html_tables) if settings.use_series_start_as_volume: metadata.volume = volume_results['start_year'] metadata.notes = "Tagged with the {0} fork of ComicTagger {1} using info from Comic Vine on {2}. [Issue ID {3}]".format( ctversion.fork, ctversion.version, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), issue_results['id']) #metadata.notes += issue_results['site_detail_url'] metadata.webLink = issue_results['site_detail_url'] person_credits = issue_results['person_credits'] for person in person_credits: if 'role' in person: roles = person['role'].split(',') for role in roles: # can we determine 'primary' from CV?? metadata.addCredit( person['name'], role.title().strip(), False) character_credits = issue_results['character_credits'] character_list = list() for character in character_credits: character_list.append(character['name']) metadata.characters = utils.listToString(character_list) team_credits = issue_results['team_credits'] team_list = list() for team in team_credits: team_list.append(team['name']) metadata.teams = utils.listToString(team_list) location_credits = issue_results['location_credits'] location_list = list() for location in location_credits: location_list.append(location['name']) metadata.locations = utils.listToString(location_list) story_arc_credits = issue_results['story_arc_credits'] arc_list = [] for arc in story_arc_credits: arc_list.append(arc['name']) if len(arc_list) > 0: metadata.storyArc = utils.listToString(arc_list) return metadata
def metadataFromString( self, string ): cbi_container = json.loads( unicode(string, 'utf-8') ) metadata = GenericMetadata() cbi = cbi_container[ 'ComicBookInfo/1.0' ] #helper func # If item is not in CBI, return None def xlate( cbi_entry): if cbi_entry in cbi: return cbi[cbi_entry] else: return None metadata.series = xlate( 'series' ) metadata.title = xlate( 'title' ) metadata.issue = xlate( 'issue' ) metadata.publisher = xlate( 'publisher' ) metadata.month = xlate( 'publicationMonth' ) metadata.year = xlate( 'publicationYear' ) metadata.issueCount = xlate( 'numberOfIssues' ) metadata.comments = xlate( 'comments' ) metadata.credits = xlate( 'credits' ) metadata.genre = xlate( 'genre' ) metadata.volume = xlate( 'volume' ) metadata.volumeCount = xlate( 'numberOfVolumes' ) metadata.language = xlate( 'language' ) metadata.country = xlate( 'country' ) metadata.criticalRating = xlate( 'rating' ) metadata.tags = xlate( 'tags' ) # make sure credits and tags are at least empty lists and not None if metadata.credits is None: metadata.credits = [] if metadata.tags is None: metadata.tags = [] #need to massage the language string to be ISO if metadata.language is not None: # reverse look-up pattern = metadata.language metadata.language = None for key in utils.getLanguageDict(): if utils.getLanguageDict()[ key ] == pattern.encode('utf-8'): metadata.language = key break metadata.isEmpty = False return metadata
def convertXMLToMetadata( self, tree, check = True ): root = tree.getroot() if check and root.tag != 'ComicInfo': raise 1 return None metadata = GenericMetadata() md = metadata # Helper function def xlate( tag ): node = root.find( tag ) if node is not None: return node.text else: return None md.series = xlate( 'Series' ) md.title = xlate( 'Title' ) md.issue = xlate( 'Number' ) md.issueCount = xlate( 'Count' ) md.volume = xlate( 'Volume' ) md.alternateSeries = xlate( 'AlternateSeries' ) md.alternateNumber = xlate( 'AlternateNumber' ) md.alternateCount = xlate( 'AlternateCount' ) md.comments = xlate( 'Summary' ) md.notes = xlate( 'Notes' ) md.year = xlate( 'Year' ) md.month = xlate( 'Month' ) md.day = xlate( 'Day' ) md.publisher = xlate( 'Publisher' ) md.imprint = xlate( 'Imprint' ) md.genre = xlate( 'Genre' ) md.webLink = xlate( 'Web' ) md.language = xlate( 'LanguageISO' ) md.format = xlate( 'Format' ) md.manga = xlate( 'Manga' ) md.characters = xlate( 'Characters' ) md.teams = xlate( 'Teams' ) md.locations = xlate( 'Locations' ) md.pageCount = xlate( 'PageCount' ) md.scanInfo = xlate( 'ScanInformation' ) md.storyArc = xlate( 'StoryArc' ) md.seriesGroup = xlate( 'SeriesGroup' ) md.maturityRating = xlate( 'AgeRating' ) tmp = xlate( 'BlackAndWhite' ) md.blackAndWhite = False if tmp is not None and tmp.lower() in [ "yes", "true", "1" ]: md.blackAndWhite = True # Now extract the credit info for n in root: if ( n.tag == 'Writer' or n.tag == 'Penciller' or n.tag == 'Inker' or n.tag == 'Colorist' or n.tag == 'Letterer' or n.tag == 'Editor' ): if n.text is not None: for name in n.text.split(','): metadata.addCredit( name.strip(), n.tag ) if n.tag == 'CoverArtist': if n.text is not None: for name in n.text.split(','): metadata.addCredit( name.strip(), "Cover" ) # parse page data now pages_node = root.find( "Pages" ) if pages_node is not None: for page in pages_node: metadata.pages.append( page.attrib ) #print page.attrib metadata.isEmpty = False return metadata
def convertXMLToMetadata(self, tree, check=True): root = tree.getroot() if check and root.tag != 'ComicInfo': raise 1 return None metadata = GenericMetadata() md = metadata # Helper function def xlate(tag): node = root.find(tag) if node is not None: return node.text else: return None md.series = xlate('Series') md.title = xlate('Title') md.issue = xlate('Number') md.issueCount = xlate('Count') md.volume = xlate('Volume') md.alternateSeries = xlate('AlternateSeries') md.alternateNumber = xlate('AlternateNumber') md.alternateCount = xlate('AlternateCount') md.comments = xlate('Summary') md.notes = xlate('Notes') md.year = xlate('Year') md.month = xlate('Month') md.day = xlate('Day') md.publisher = xlate('Publisher') md.imprint = xlate('Imprint') md.genre = xlate('Genre') md.webLink = xlate('Web') md.language = xlate('LanguageISO') md.format = xlate('Format') md.manga = xlate('Manga') md.characters = xlate('Characters') md.teams = xlate('Teams') md.locations = xlate('Locations') md.pageCount = xlate('PageCount') md.scanInfo = xlate('ScanInformation') md.storyArc = xlate('StoryArc') md.seriesGroup = xlate('SeriesGroup') md.maturityRating = xlate('AgeRating') tmp = xlate('BlackAndWhite') md.blackAndWhite = False if tmp is not None and tmp.lower() in ["yes", "true", "1"]: md.blackAndWhite = True # Now extract the credit info for n in root: if (n.tag == 'Writer' or n.tag == 'Penciller' or n.tag == 'Inker' or n.tag == 'Colorist' or n.tag == 'Letterer' or n.tag == 'Editor'): if n.text is not None: for name in n.text.split(','): metadata.addCredit(name.strip(), n.tag) if n.tag == 'CoverArtist': if n.text is not None: for name in n.text.split(','): metadata.addCredit(name.strip(), "Cover") # parse page data now pages_node = root.find("Pages") if pages_node is not None: for page in pages_node: metadata.pages.append(page.attrib) #print page.attrib metadata.isEmpty = False return metadata
def convertXMLToMetadata( self, tree ): root = tree.getroot() if root.tag != 'comet': raise 1 return None metadata = GenericMetadata() md = metadata # Helper function def xlate( tag ): node = root.find( tag ) if node is not None: return node.text else: return None md.series = xlate( 'series' ) md.title = xlate( 'title' ) md.issue = xlate( 'issue' ) md.volume = xlate( 'volume' ) md.comments = xlate( 'description' ) md.publisher = xlate( 'publisher' ) md.language = xlate( 'language' ) md.format = xlate( 'format' ) md.pageCount = xlate( 'pages' ) md.maturityRating = xlate( 'rating' ) md.price = xlate( 'price' ) md.isVersionOf = xlate( 'isVersionOf' ) md.rights = xlate( 'rights' ) md.identifier = xlate( 'identifier' ) md.lastMark = xlate( 'lastMark' ) md.genre = xlate( 'genre' ) # TODO - repeatable field date = xlate( 'date' ) if date is not None: parts = date.split('-') if len( parts) > 0: md.year = parts[0] if len( parts) > 1: md.month = parts[1] md.coverImage = xlate( 'coverImage' ) readingDirection = xlate( 'readingDirection' ) if readingDirection is not None and readingDirection == "rtl": md.manga = "YesAndRightToLeft" # loop for character tags char_list = [] for n in root: if n.tag == 'character': char_list.append(n.text.strip()) md.characters = utils.listToString( char_list ) # Now extract the credit info for n in root: if ( n.tag == 'writer' or n.tag == 'penciller' or n.tag == 'inker' or n.tag == 'colorist' or n.tag == 'letterer' or n.tag == 'editor' ): metadata.addCredit( n.text.strip(), n.tag.title() ) if n.tag == 'coverDesigner': metadata.addCredit( n.text.strip(), "Cover" ) metadata.isEmpty = False return metadata
def metadataFromString(self, string): cbi_container = json.loads(unicode(string, 'utf-8')) metadata = GenericMetadata() cbi = cbi_container['ComicBookInfo/1.0'] #helper func # If item is not in CBI, return None def xlate(cbi_entry): if cbi_entry in cbi: return cbi[cbi_entry] else: return None metadata.series = xlate('series') metadata.title = xlate('title') metadata.issue = xlate('issue') metadata.publisher = xlate('publisher') metadata.month = xlate('publicationMonth') metadata.year = xlate('publicationYear') metadata.issueCount = xlate('numberOfIssues') metadata.comments = xlate('comments') metadata.credits = xlate('credits') metadata.genre = xlate('genre') metadata.volume = xlate('volume') metadata.volumeCount = xlate('numberOfVolumes') metadata.language = xlate('language') metadata.country = xlate('country') metadata.criticalRating = xlate('rating') metadata.tags = xlate('tags') # make sure credits and tags are at least empty lists and not None if metadata.credits is None: metadata.credits = [] if metadata.tags is None: metadata.tags = [] #need to massage the language string to be ISO if metadata.language is not None: # reverse look-up pattern = metadata.language metadata.language = None for key in utils.getLanguageDict(): if utils.getLanguageDict()[key] == pattern.encode('utf-8'): metadata.language = key break metadata.isEmpty = False return metadata