def __init__(self, verbose = True, vector_fname = os.path.join(datadir, 'cbow.bin'), card_fname = os.path.join(datadir, 'output.txt')): self.verbose = verbose self.cardvecs = [] if self.verbose: print 'Building a cbow model...' if self.verbose: print ' Reading binary vector data from: ' + vector_fname (vocab, vecs) = read_vector_file(vector_fname) self.vocab = vocab self.vecs = vecs if self.verbose: print ' Reading encoded cards from: ' + card_fname print ' They\'d better be in the same order as the file used to build the vector model!' with open(card_fname, 'rt') as f: text = f.read() for card_src in text.split(utils.cardsep): if card_src: card = cardlib.Card(card_src) name = card.name self.cardvecs += [(name, makevector(self.vocab, self.vecs, card.vectorize()))] if self.verbose: print '... Done.' print ' vocab size: ' + str(len(self.vocab)) print ' raw vecs: ' + str(len(self.vecs)) print ' card vecs: ' + str(len(self.cardvecs))
def __init__(self, verbose = True, json_fname = os.path.join(datadir, 'AllSets.json')): self.verbose = verbose self.names = {} self.codes = {} self.cardstrings = {} if self.verbose: print 'Setting up namediff...' if self.verbose: print ' Reading names from: ' + json_fname json_srcs = jdecode.mtg_open_json(json_fname, verbose) namecount = 0 for json_cardname in sorted(json_srcs): if len(json_srcs[json_cardname]) > 0: jcards = json_srcs[json_cardname] # just use the first one idx = 0 card = cardlib.Card(jcards[idx]) name = card.name jname = jcards[idx]['name'] jcode = jcards[idx][utils.json_field_info_code] if 'number' in jcards[idx]: jnum = jcards[idx]['number'] else: jnum = '' if name in self.names: print ' Duplicate name ' + name + ', ignoring.' else: self.names[name] = jname self.cardstrings[name] = card.encode() if jcode and jnum: self.codes[name] = jcode + '/' + jnum + '.jpg' else: self.codes[name] = '' namecount += 1 print ' Read ' + str(namecount) + ' unique cardnames' print ' Building SequenceMatcher objects.' self.matchers = [difflib.SequenceMatcher(b=n, autojunk=False) for n in self.names] self.card_matchers = [difflib.SequenceMatcher(b=self.cardstrings[n], autojunk=False) for n in self.cardstrings] print '... Done.'
def mtg_open_file(fname, verbose=False, linetrans=True, fmt_ordered=cardlib.fmt_ordered_default, exclude_sets=default_exclude_sets, exclude_types=default_exclude_types, exclude_layouts=default_exclude_layouts): cards = [] valid = 0 skipped = 0 invalid = 0 unparsed = 0 if fname[-5:] == '.json': if verbose: print 'This looks like a json file: ' + fname json_srcs = mtg_open_json(fname, verbose) # sorted for stability for json_cardname in sorted(json_srcs): if len(json_srcs[json_cardname]) > 0: jcards = json_srcs[json_cardname] # look for a normal rarity version, in a set we can use idx = 0 card = cardlib.Card(jcards[idx], linetrans=linetrans) while (idx < len(jcards) and (card.rarity == utils.rarity_special_marker or exclude_sets(jcards[idx][utils.json_field_set_name]))): idx += 1 if idx < len(jcards): card = cardlib.Card(jcards[idx], linetrans=linetrans) # if there isn't one, settle with index 0 if idx >= len(jcards): idx = 0 card = cardlib.Card(jcards[idx], linetrans=linetrans) # we could go back and look for a card satisfying one of the criteria, # but eh skip = False if (exclude_sets(jcards[idx][utils.json_field_set_name]) or exclude_layouts(jcards[idx]['layout'])): skip = True for cardtype in card.types: if exclude_types(cardtype): skip = True if skip: skipped += 1 continue if card.valid: valid += 1 cards += [card] elif card.parsed: invalid += 1 if verbose: print 'Invalid card: ' + json_cardname else: unparsed += 1 # fall back to opening a normal encoded file else: if verbose: print 'Opening encoded card file: ' + fname with open(fname, 'rt') as f: text = f.read() for card_src in text.split(utils.cardsep): if card_src: card = cardlib.Card(card_src, fmt_ordered=fmt_ordered) # unlike opening from json, we still want to return invalid cards cards += [card] if card.valid: valid += 1 elif card.parsed: invalid += 1 if verbose: print 'Invalid card: ' + json_cardname else: unparsed += 1 if verbose: print( str(valid) + ' valid, ' + str(skipped) + ' skipped, ' + str(invalid) + ' invalid, ' + str(unparsed) + ' failed to parse.') good_count = 0 bad_count = 0 for card in cards: if not card.parsed and not card.text.text: bad_count += 1 elif len(card.name) > 50 or len(card.rarity) > 3: bad_count += 1 else: good_count += 1 if good_count + bad_count > 15: break # random heuristic if bad_count > 10: print 'WARNING: Saw a bunch of unparsed cards:' print ' Is this a legacy format, you may need to specify the field order.' return cards