def __init__(self, verbose = True,
                 vector_fname = os.path.join(datadir, 'cbow.bin'), 
                 card_fname = os.path.join(datadir, 'output.txt')):
        self.verbose = verbose
        self.cardvecs = []

        if self.verbose:
            print 'Building a cbow model...'

        if self.verbose:
            print '  Reading binary vector data from: ' + vector_fname
        (vocab, vecs) = read_vector_file(vector_fname)
        self.vocab = vocab
        self.vecs = vecs
        
        if self.verbose:
            print '  Reading encoded cards from: ' + card_fname
            print '  They\'d better be in the same order as the file used to build the vector model!'
        with open(card_fname, 'rt') as f:
            text = f.read()
        for card_src in text.split(utils.cardsep):
            if card_src:
                card = cardlib.Card(card_src)
                name = card.name
                self.cardvecs += [(name, makevector(self.vocab, 
                                                    self.vecs, 
                                                    card.vectorize()))]
                
        if self.verbose:
            print '... Done.'
            print '  vocab size: ' + str(len(self.vocab))
            print '  raw vecs:   ' + str(len(self.vecs))
            print '  card vecs:  ' + str(len(self.cardvecs))
Exemple #2
0
    def __init__(self, verbose = True,
                 json_fname = os.path.join(datadir, 'AllSets.json')):
        self.verbose = verbose
        self.names = {}
        self.codes = {}
        self.cardstrings = {}

        if self.verbose:
            print 'Setting up namediff...'

        if self.verbose:
            print '  Reading names from: ' + json_fname
        json_srcs = jdecode.mtg_open_json(json_fname, verbose)
        namecount = 0
        for json_cardname in sorted(json_srcs):
            if len(json_srcs[json_cardname]) > 0:
                jcards = json_srcs[json_cardname]

                # just use the first one
                idx = 0
                card = cardlib.Card(jcards[idx])
                name = card.name
                jname = jcards[idx]['name']
                jcode = jcards[idx][utils.json_field_info_code]
                if 'number' in jcards[idx]:
                    jnum = jcards[idx]['number']
                else:
                    jnum = ''
                    
                if name in self.names:
                    print '  Duplicate name ' + name + ', ignoring.'
                else:
                    self.names[name] = jname
                    self.cardstrings[name] = card.encode()
                    if jcode and jnum:
                        self.codes[name] = jcode + '/' + jnum + '.jpg'
                    else:
                        self.codes[name] = ''
                    namecount += 1

        print '  Read ' + str(namecount) + ' unique cardnames'
        print '  Building SequenceMatcher objects.'
        
        self.matchers = [difflib.SequenceMatcher(b=n, autojunk=False) for n in self.names]
        self.card_matchers = [difflib.SequenceMatcher(b=self.cardstrings[n], autojunk=False) for n in self.cardstrings]

        print '... Done.'
def mtg_open_file(fname,
                  verbose=False,
                  linetrans=True,
                  fmt_ordered=cardlib.fmt_ordered_default,
                  exclude_sets=default_exclude_sets,
                  exclude_types=default_exclude_types,
                  exclude_layouts=default_exclude_layouts):

    cards = []
    valid = 0
    skipped = 0
    invalid = 0
    unparsed = 0

    if fname[-5:] == '.json':
        if verbose:
            print 'This looks like a json file: ' + fname
        json_srcs = mtg_open_json(fname, verbose)
        # sorted for stability
        for json_cardname in sorted(json_srcs):
            if len(json_srcs[json_cardname]) > 0:
                jcards = json_srcs[json_cardname]

                # look for a normal rarity version, in a set we can use
                idx = 0
                card = cardlib.Card(jcards[idx], linetrans=linetrans)
                while (idx < len(jcards) and
                       (card.rarity == utils.rarity_special_marker or
                        exclude_sets(jcards[idx][utils.json_field_set_name]))):
                    idx += 1
                    if idx < len(jcards):
                        card = cardlib.Card(jcards[idx], linetrans=linetrans)
                # if there isn't one, settle with index 0
                if idx >= len(jcards):
                    idx = 0
                    card = cardlib.Card(jcards[idx], linetrans=linetrans)
                # we could go back and look for a card satisfying one of the criteria,
                # but eh

                skip = False
                if (exclude_sets(jcards[idx][utils.json_field_set_name])
                        or exclude_layouts(jcards[idx]['layout'])):
                    skip = True
                for cardtype in card.types:
                    if exclude_types(cardtype):
                        skip = True
                if skip:
                    skipped += 1
                    continue

                if card.valid:
                    valid += 1
                    cards += [card]
                elif card.parsed:
                    invalid += 1
                    if verbose:
                        print 'Invalid card: ' + json_cardname
                else:
                    unparsed += 1

    # fall back to opening a normal encoded file
    else:
        if verbose:
            print 'Opening encoded card file: ' + fname
        with open(fname, 'rt') as f:
            text = f.read()
        for card_src in text.split(utils.cardsep):
            if card_src:
                card = cardlib.Card(card_src, fmt_ordered=fmt_ordered)
                # unlike opening from json, we still want to return invalid cards
                cards += [card]
                if card.valid:
                    valid += 1
                elif card.parsed:
                    invalid += 1
                    if verbose:
                        print 'Invalid card: ' + json_cardname
                else:
                    unparsed += 1

    if verbose:
        print(
            str(valid) + ' valid, ' + str(skipped) + ' skipped, ' +
            str(invalid) + ' invalid, ' + str(unparsed) + ' failed to parse.')

    good_count = 0
    bad_count = 0
    for card in cards:
        if not card.parsed and not card.text.text:
            bad_count += 1
        elif len(card.name) > 50 or len(card.rarity) > 3:
            bad_count += 1
        else:
            good_count += 1
        if good_count + bad_count > 15:
            break
    # random heuristic
    if bad_count > 10:
        print 'WARNING: Saw a bunch of unparsed cards:'
        print '         Is this a legacy format, you may need to specify the field order.'

    return cards