def __init__(self, verbose = True, json_fname = os.path.join(datadir, 'AllSets.json')): self.verbose = verbose self.names = {} if self.verbose: print 'Setting up namediff...' if self.verbose: print ' Reading names from: ' + json_fname json_srcs = jdecode.mtg_open_json(json_fname, verbose) namecount = 0 for json_cardname in sorted(json_srcs): if len(json_srcs[json_cardname]) > 0: jcards = json_srcs[json_cardname] # just use the first one idx = 0 card = cardlib.Card(jcards[idx]) name = card.name jname = jcards[idx]['name'] if name in self.names: print ' Duplicate name ' + name + ', ignoring.' else: self.names[name] = jname namecount += 1 print ' Read ' + str(namecount) + ' unique cardnames' print ' Building SequenceMatcher objects.' self.matchers = [difflib.SequenceMatcher(b=n, autojunk=False) for n in self.names] print '... Done.'
def __init__(self, verbose = True, json_fname = os.path.join(datadir, 'AllSets.json')): self.verbose = verbose self.names = {} self.codes = {} self.cardstrings = {} if self.verbose: print('Setting up namediff...') if self.verbose: print(' Reading names from: ' + json_fname) json_srcs = jdecode.mtg_open_json(json_fname, verbose) namecount = 0 for json_cardname in sorted(json_srcs): if len(json_srcs[json_cardname]) > 0: jcards = json_srcs[json_cardname] # just use the first one idx = 0 card = cardlib.Card(jcards[idx]) name = card.name jname = jcards[idx]['name'] jcode = jcards[idx][utils.json_field_info_code] if 'number' in jcards[idx]: jnum = jcards[idx]['number'] else: jnum = '' if name in self.names: print(' Duplicate name ' + name + ', ignoring.') else: self.names[name] = jname self.cardstrings[name] = card.encode() if jcode and jnum: self.codes[name] = jcode + '/' + jnum + '.jpg' else: self.codes[name] = '' namecount += 1 print(' Read ' + str(namecount) + ' unique cardnames') print(' Building SequenceMatcher objects.') self.matchers = [difflib.SequenceMatcher(b=n, autojunk=False) for n in self.names] self.card_matchers = [difflib.SequenceMatcher(b=self.cardstrings[n], autojunk=False) for n in self.cardstrings] print('... Done.')
def __init__(self, verbose = True, json_fname = os.path.join(datadir, 'AllSets.json')): self.verbose = verbose self.names = {} self.codes = {} self.cardstrings = {} if self.verbose: print 'Setting up namediff...' if self.verbose: print ' Reading names from: ' + json_fname json_srcs = jdecode.mtg_open_json(json_fname, verbose) namecount = 0 for json_cardname in sorted(json_srcs): if len(json_srcs[json_cardname]) > 0: jcards = json_srcs[json_cardname] # just use the first one idx = 0 card = cardlib.Card(jcards[idx]) name = card.name jname = jcards[idx]['name'] jcode = jcards[idx][utils.json_field_info_code] if 'number' in jcards[idx]: jnum = jcards[idx]['number'] else: jnum = '' if name in self.names: print ' Duplicate name ' + name + ', ignoring.' else: self.names[name] = jname self.cardstrings[name] = card.encode() if jcode and jnum: self.codes[name] = jcode + '/' + jnum + '.jpg' else: self.codes[name] = '' namecount += 1 print ' Read ' + str(namecount) + ' unique cardnames' print ' Building SequenceMatcher objects.' self.matchers = [difflib.SequenceMatcher(b=n, autojunk=False) for n in self.names] self.card_matchers = [difflib.SequenceMatcher(b=self.cardstrings[n], autojunk=False) for n in self.cardstrings] print '... Done.'
def main(fname, verbose=True, outliers=False, dump_all=False): if fname[-5:] == ".json": if verbose: print "This looks like a json file: " + fname json_srcs = jdecode.mtg_open_json(fname, verbose) card_srcs = [] for json_cardname in sorted(json_srcs): if len(json_srcs[json_cardname]) > 0: card_srcs += [json_srcs[json_cardname][0]] else: if verbose: print "Opening encoded card file: " + fname with open(fname, "rt") as f: text = f.read() card_srcs = text.split(utils.cardsep) mine = Datamine(card_srcs) mine.summarize() if outliers or dump_all: mine.outliers(dump_invalid=dump_all)
def main(fname, verbose = True, outliers = False, dump_all = False): if fname[-5:] == '.json': if verbose: print 'This looks like a json file: ' + fname json_srcs = jdecode.mtg_open_json(fname, verbose) card_srcs = [] for json_cardname in sorted(json_srcs): if len(json_srcs[json_cardname]) > 0: card_srcs += [json_srcs[json_cardname][0]] else: if verbose: print 'Opening encoded card file: ' + fname with open(fname, 'rt') as f: text = f.read() card_srcs = text.split(utils.cardsep) mine = Datamine(card_srcs) mine.summarize() if outliers or dump_all: mine.outliers(dump_invalid = dump_all)
def main(fname, oname = None, verbose = True, encoding = 'std', gatherer = False, for_forum = False, for_mse = False, creativity = False, vdump = False): fmt_ordered = cardlib.fmt_ordered_default if encoding in ['std']: pass elif encoding in ['named']: fmt_ordered = cardlib.fmt_ordered_named elif encoding in ['noname']: fmt_ordered = cardlib.fmt_ordered_noname elif encoding in ['rfields']: pass elif encoding in ['old']: fmt_ordered = cardlib.fmt_ordered_old elif encoding in ['norarity']: fmt_ordered = cardlib.fmt_ordered_norarity elif encoding in ['vec']: pass elif encoding in ['custom']: ## put custom format decisions here ########################## ## end of custom format ###################################### pass else: raise ValueError('encode.py: unknown encoding: ' + encoding) cards = [] valid = 0 invalid = 0 unparsed = 0 if fname[-5:] == '.json': if verbose: print 'This looks like a json file: ' + fname json_srcs = jdecode.mtg_open_json(fname, verbose) for json_cardname in sorted(json_srcs): if len(json_srcs[json_cardname]) > 0: jcards = json_srcs[json_cardname] # look for a normal rarity version, in a set we can use idx = 0 card = cardlib.Card(jcards[idx], fmt_ordered = fmt_ordered) while (idx < len(jcards) and (card.rarity == utils.rarity_special_marker or exclude_sets(jcards[idx][utils.json_field_set_name]))): idx += 1 if idx < len(jcards): card = cardlib.Card(jcards[idx], fmt_ordered = fmt_ordered) # if there isn't one, settle with index 0 if idx >= len(jcards): idx = 0 card = cardlib.Card(jcards[idx], fmt_ordered = fmt_ordered) # we could go back and look for a card satisfying one of the criteria, # but eh if card.valid: valid += 1 elif card.parsed: invalid += 1 else: unparsed += 1 cards += [card] # fall back to opening a normal encoded file else: if verbose: print 'Opening encoded card file: ' + fname with open(fname, 'rt') as f: text = f.read() for card_src in text.split(utils.cardsep): if card_src: card = cardlib.Card(card_src, fmt_ordered = fmt_ordered) if card.valid: valid += 1 elif card.parsed: invalid += 1 else: unparsed += 1 cards += [card] if verbose: print (str(valid) + ' valid, ' + str(invalid) + ' invalid, ' + str(unparsed) + ' failed to parse.') good_count = 0 bad_count = 0 for card in cards: if not card.parsed and not card.text.text: bad_count += 1 elif len(card.name) > 50 or len(card.rarity) > 3: bad_count += 1 else: good_count += 1 if good_count + bad_count > 15: break # random heuristic if bad_count > 10: print 'WARNING: Saw a bunch of unparsed cards:' print ' If this is a legacy format, try rerunning with "-e old" or "-e norarity"' if creativity: cbow = CBOW() namediff = Namediff() def writecards(writer): if for_mse: # have to prepend a massive chunk of formatting info writer.write(utils.mse_prepend) for card in cards: if for_mse: writer.write(card.to_mse().encode('utf-8')) fstring = '' if card.json: fstring += 'JSON:\n' + card.json + '\n' if card.raw: fstring += 'raw:\n' + card.raw + '\n' fstring += '\n' fstring += card.format(gatherer = gatherer, for_forum = for_forum, vdump = vdump) fstring = fstring.replace('<', '(').replace('>', ')') writer.write(('\n' + fstring[:-1]).replace('\n', '\n\t\t')) else: writer.write(card.format(gatherer = gatherer, for_forum = for_forum, vdump = vdump).encode('utf-8')) if creativity: cstring = '~~ closest cards ~~\n' nearest = cbow.nearest(card) for dist, cardname in nearest: cardname = namediff.names[cardname] if for_forum: cardname = '[card]' + cardname + '[/card]' cstring += cardname + ': ' + str(dist) + '\n' cstring += '~~ closest names ~~\n' nearest = namediff.nearest(card.name) for dist, cardname in nearest: cardname = namediff.names[cardname] if for_forum: cardname = '[card]' + cardname + '[/card]' cstring += cardname + ': ' + str(dist) + '\n' if for_mse: cstring = cstring.replace('<', '(').replace('>', ')') cstring = ('\n\n' + cstring[:-1]).replace('\n', '\n\t\t') writer.write(cstring.encode('utf-8')) writer.write('\n'.encode('utf-8')) if for_mse: # more formatting info writer.write('version control:\n\ttype: none\napprentice code: ') if oname: if verbose: print 'Writing output to: ' + oname with open(oname, 'w') as ofile: writecards(ofile) if for_mse: # Copy whatever output file is produced, name the copy 'set' (yes, no extension). if os.path.isfile('set'): print 'ERROR: tried to overwrite existing file "set" - aborting.' return shutil.copyfile(oname, 'set') # Use the freaky mse extension instead of zip. with zipfile.ZipFile(oname+'.mse-set', mode='w') as zf: try: # Zip up the set file into oname.mse-set. zf.write('set') finally: if verbose: print 'Made an MSE set file called ' + oname + '.mse-set.' # The set file is useless outside the .mse-set, delete it. os.remove('set') else: writecards(sys.stdout) sys.stdout.flush()
def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stable = False): fmt_ordered = cardlib.fmt_ordered_default fmt_labeled = None fieldsep = utils.fieldsep randomize_fields = False randomize_mana = False initial_sep = True final_sep = True # set the properties of the encoding if encoding in ['vec']: pass elif encoding in ['std']: if dupes == 0: dupes = 1 elif encoding in ['rmana']: if dupes == 0: dupes = 1 randomize_mana = True elif encoding in ['rmana_dual']: if dupes == 0: dupes = 1 fmt_ordered = fmt_ordered + [cardlib.field_cost] randomize_mana = True elif encoding in ['rfields']: if dupes == 0: dupes = 1 fmt_labeled = cardlib.fmt_labeled_default randomize_fields = True #randomize_mana = True final_sep = False else: raise ValueError('encode.py: unknown encoding: ' + encoding) if dupes <= 0: dupes = 1 if verbose: print 'Preparing to encode:' print ' Using encoding ' + repr(encoding) if dupes > 1: print ' Duplicating each card ' + str(dupes) + ' times.' if stable: print ' NOT randomizing order of cards.' cards = [] valid = 0 skipped = 0 invalid = 0 unparsed = 0 if fname[-5:] == '.json': if verbose: print 'This looks like a json file: ' + fname json_srcs = jdecode.mtg_open_json(fname, verbose) # don't worry we randomize later for json_cardname in sorted(json_srcs): if len(json_srcs[json_cardname]) > 0: jcards = json_srcs[json_cardname] # look for a normal rarity version, in a set we can use idx = 0 card = cardlib.Card(jcards[idx]) while (idx < len(jcards) and (card.rarity == utils.rarity_special_marker or exclude_sets(jcards[idx][utils.json_field_set_name]))): idx += 1 if idx < len(jcards): card = cardlib.Card(jcards[idx]) # if there isn't one, settle with index 0 if idx >= len(jcards): idx = 0 card = cardlib.Card(jcards[idx]) # we could go back and look for a card satisfying one of the criteria, # but eh skip = False if (exclude_sets(jcards[idx][utils.json_field_set_name]) or exclude_layouts(jcards[idx]['layout'])): skip = True for cardtype in card.types: if exclude_types(cardtype): skip = True if skip: skipped += 1 continue if card.valid: valid += 1 cards += [card] * dupes elif card.parsed: invalid += 1 else: unparsed += 1 # fall back to opening a normal encoded file else: if verbose: print 'Opening encoded card file: ' + fname with open(fname, 'rt') as f: text = f.read() for card_src in text.split(utils.cardsep): if card_src: card = cardlib.Card(card_src) if card.valid: valid += 1 cards += [card] * dupes elif card.parsed: invalid += 1 else: unparsed += 1 if verbose: print (str(valid) + ' valid, ' + str(skipped) + ' skipped, ' + str(invalid) + ' invalid, ' + str(unparsed) + ' failed to parse.') # This should give a random but consistent ordering, to make comparing changes # between the output of different versions easier. if not stable: random.seed(1371367) random.shuffle(cards) def writecards(writer): for card in cards: if encoding in ['vec']: writer.write(card.vectorize() + '\n\n') else: writer.write(card.encode(fmt_ordered = fmt_ordered, fmt_labeled = fmt_labeled, fieldsep = fieldsep, randomize_fields = randomize_fields, randomize_mana = randomize_mana, initial_sep = initial_sep, final_sep = final_sep) + utils.cardsep) if oname: if verbose: print 'Writing output to: ' + oname with open(oname, 'w') as ofile: writecards(ofile) else: writecards(sys.stdout) sys.stdout.flush()
def main(fname, oname = None, verbose = True, gatherer = False, for_forum = False, creativity = False, norarity = False, for_mse = False): cards = [] valid = 0 invalid = 0 unparsed = 0 if norarity: decode_fields = [ cardlib.field_name, cardlib.field_supertypes, cardlib.field_types, cardlib.field_loyalty, cardlib.field_subtypes, #cardlib.field_rarity, cardlib.field_pt, cardlib.field_cost, cardlib.field_text, ] else: decode_fields = cardlib.fmt_ordered_default if fname[-5:] == '.json': if verbose: print 'This looks like a json file: ' + fname json_srcs = jdecode.mtg_open_json(fname, verbose) for json_cardname in sorted(json_srcs): if len(json_srcs[json_cardname]) > 0: jcards = json_srcs[json_cardname] # look for a normal rarity version, in a set we can use idx = 0 card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields) while (idx < len(jcards) and (card.rarity == utils.rarity_special_marker or exclude_sets(jcards[idx][utils.json_field_set_name]))): idx += 1 if idx < len(jcards): card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields) # if there isn't one, settle with index 0 if idx >= len(jcards): idx = 0 card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields) # we could go back and look for a card satisfying one of the criteria, # but eh if card.valid: valid += 1 elif card.parsed: invalid += 1 else: unparsed += 1 cards += [card] # fall back to opening a normal encoded file else: if verbose: print 'Opening encoded card file: ' + fname with open(fname, 'rt') as f: text = f.read() for card_src in text.split(utils.cardsep): if card_src: card = cardlib.Card(card_src, fmt_ordered = decode_fields) if card.valid: valid += 1 elif card.parsed: invalid += 1 else: unparsed += 1 cards += [card] if verbose: print (str(valid) + ' valid, ' + str(invalid) + ' invalid, ' + str(unparsed) + ' failed to parse.') good_count = 0 bad_count = 0 for card in cards: if not card.parsed and not card.text.text: bad_count += 1 else: good_count += 1 if good_count + bad_count > 15: break # random heuristic if bad_count > 10: print 'Saw a bunch of unparsed cards with no text:' print 'If this is a legacy format, try rerunning with --norarity' if creativity: cbow = CBOW() namediff = Namediff() def writecards(writer): if for_mse: # have to prepend a massive chunk. writer.write(utils.mse_prepend) for card in cards: writer.write((card.format(gatherer = gatherer, for_forum = for_forum, for_mse = for_mse))) if creativity and not for_mse: # this won't end well if mse mode is enabled. writer.write('~~ closest cards ~~\n'.encode('utf-8')) nearest = cbow.nearest(card) for dist, cardname in nearest: cardname = namediff.names[cardname] if for_forum: cardname = '[card]' + cardname + '[/card]' writer.write((cardname + ': ' + str(dist) + '\n').encode('utf-8')) writer.write('~~ closest names ~~\n'.encode('utf-8')) nearest = namediff.nearest(card.name) for dist, cardname in nearest: cardname = namediff.names[cardname] if for_forum: cardname = '[card]' + cardname + '[/card]' writer.write((cardname + ': ' + str(dist) + '\n').encode('utf-8')) writer.write('\n'.encode('utf-8')) if for_mse: writer.write('version control:\n\ttype: none\napprentice code: ') # have to append some junk at the end of file. if oname: if verbose: print 'Writing output to: ' + oname with open(oname, 'w') as ofile: writecards(ofile) if for_mse: shutil.copyfile(oname, 'set') # copy whatever output file is produced, name the copy 'set' (yes, no extension). zf = zipfile.ZipFile(oname+'.mse-set', mode='w') # use the freaky mse extension instead of zip. try: zf.write('set') # zip up the set file into oname.mse-set. finally: print 'Made an MSE set file called ' + oname + '.mse-set.' zf.close() os.remove('set') # the set file is useless outside the .mse-set, delete it. else: writecards(sys.stdout) sys.stdout.flush()
def main( fname, oname=None, verbose=True, gatherer=False, for_forum=False, for_mse=False, creativity=False, norarity=False, vdump=False, for_html=False, ): cards = [] valid = 0 invalid = 0 unparsed = 0 if norarity: decode_fields = [ cardlib.field_name, cardlib.field_supertypes, cardlib.field_types, cardlib.field_loyalty, cardlib.field_subtypes, # cardlib.field_rarity, cardlib.field_pt, cardlib.field_cost, cardlib.field_text, ] else: decode_fields = cardlib.fmt_ordered_default if fname[-5:] == ".json": if verbose: print "This looks like a json file: " + fname json_srcs = jdecode.mtg_open_json(fname, verbose) for json_cardname in sorted(json_srcs): if len(json_srcs[json_cardname]) > 0: jcards = json_srcs[json_cardname] # look for a normal rarity version, in a set we can use idx = 0 card = cardlib.Card(jcards[idx], fmt_ordered=decode_fields) while idx < len(jcards) and ( card.rarity == utils.rarity_special_marker or exclude_sets(jcards[idx][utils.json_field_set_name]) ): idx += 1 if idx < len(jcards): card = cardlib.Card(jcards[idx], fmt_ordered=decode_fields) # if there isn't one, settle with index 0 if idx >= len(jcards): idx = 0 card = cardlib.Card(jcards[idx], fmt_ordered=decode_fields) # we could go back and look for a card satisfying one of the criteria, # but eh if card.valid: valid += 1 elif card.parsed: invalid += 1 else: unparsed += 1 cards += [card] # fall back to opening a normal encoded file else: if verbose: print "Opening encoded card file: " + fname with open(fname, "rt") as f: text = f.read() for card_src in text.split(utils.cardsep): if card_src: card = cardlib.Card(card_src, fmt_ordered=decode_fields) if card.valid: valid += 1 elif card.parsed: invalid += 1 else: unparsed += 1 cards += [card] if verbose: print (str(valid) + " valid, " + str(invalid) + " invalid, " + str(unparsed) + " failed to parse.") good_count = 0 bad_count = 0 for card in cards: if not card.parsed and not card.text.text: bad_count += 1 else: good_count += 1 if good_count + bad_count > 15: break # random heuristic if bad_count > 10: print "Saw a bunch of unparsed cards with no text:" print "If this is a legacy format, try rerunning with --norarity" if creativity: cbow = CBOW() namediff = Namediff() def writecards(writer): if for_mse: # have to prepend a massive chunk of formatting info writer.write(utils.mse_prepend) if for_html: # have to preapend html info writer.write(utils.html_preapend) for card in cards: if for_mse: writer.write(card.to_mse().encode("utf-8")) fstring = "" if card.json: fstring += "JSON:\n" + card.json + "\n" if card.raw: fstring += "raw:\n" + card.raw + "\n" fstring += "\n" fstring += card.format(gatherer=gatherer, for_forum=for_forum, vdump=vdump, for_html=for_html) fstring = fstring.replace("<", "(").replace(">", ")") writer.write(("\n" + fstring[:-1]).replace("\n", "\n\t\t")) else: writer.write( card.format(gatherer=gatherer, for_forum=for_forum, vdump=vdump, for_html=for_html).encode("utf-8") ) if creativity: cstring = "~~ closest cards ~~\n" nearest = cbow.nearest(card) for dist, cardname in nearest: cardname = namediff.names[cardname] if for_forum: cardname = "[card]" + cardname + "[/card]" cstring += cardname + ": " + str(dist) + "\n" cstring += "~~ closest names ~~\n" nearest = namediff.nearest(card.name) for dist, cardname in nearest: cardname = namediff.names[cardname] if for_forum: cardname = "[card]" + cardname + "[/card]" cstring += cardname + ": " + str(dist) + "\n" if for_mse: cstring = cstring.replace("<", "(").replace(">", ")") cstring = ("\n\n" + cstring[:-1]).replace("\n", "\n\t\t") writer.write(cstring.encode("utf-8")) writer.write("\n".encode("utf-8")) if for_mse: # more formatting info writer.write("version control:\n\ttype: none\napprentice code: ") if for_html: # closing the html file writer.write(utils.html_postapend) if oname: if verbose: print "Writing output to: " + oname with open(oname, "w") as ofile: writecards(ofile) if for_mse: # Copy whatever output file is produced, name the copy 'set' (yes, no extension). if os.path.isfile("set"): print 'ERROR: tried to overwrite existing file "set" - aborting.' return shutil.copyfile(oname, "set") # Use the freaky mse extension instead of zip. with zipfile.ZipFile(oname + ".mse-set", mode="w") as zf: try: # Zip up the set file into oname.mse-set. zf.write("set") finally: if verbose: print "Made an MSE set file called " + oname + ".mse-set." # The set file is useless outside the .mse-set, delete it. os.remove("set") # if for_html: ## not sure what to put here else: writecards(sys.stdout) sys.stdout.flush()