Exemple #1
0
def main(args):
    fds = args.fds
    fname = args.fname
    block_size =  args.block_size
    main_seed = args.seed if args.seed != 0 else None

    # simple default encoding for now, will add more options with the curriculum
    # learning feature

    cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True)

    def write_stream(i, fd):
        local_random = random.Random(main_seed)
        local_random.jumpahead(i)
        local_cards = [card for card in cards]
        with open('/proc/self/fd/'+str(fd), 'wt') as f:
            while True:
                local_random.shuffle(local_cards)
                for card in local_cards:
                    f.write(card.encode(randomize_mana=True, randomize_lines=True))
                    f.write(utils.cardsep)

    def mkargs(i, fd):
        return i, fd

    streaming_noreturn(fds, write_stream, mkargs)
Exemple #2
0
def main(fname, oname, gmin = 2, gmax = 8, verbose = True):
    gmin = int(gmin)
    gmax = int(gmax)
    bins = [1, 2, 3, 10, 30, 100, 300, 1000]
    if gmin < 2 or gmax < gmin:
        print 'invalid gram sizes: ' + str(gmin) + '-' + str(gmax)
        exit(1)

    # may need to set special arguments here
    cards = jdecode.mtg_open_file(fname, verbose=verbose)

    for grams in range(gmin, gmax+1):
        if verbose:
            print 'generating ' + str(grams) + '-grams...'
        gramdict = {}
        for card in cards:
            update_ngrams(card.text_lines_words, gramdict, grams)
    
        oname_full = oname + '.' + str(grams) + 'g'
        if verbose:
            print '  writing ' + str(len(gramdict)) + ' unique ' + str(grams) + '-grams to ' + oname_full
            describe_bins(gramdict, bins)

        with open(oname_full, 'wt') as f:
            for ngram in sorted(gramdict,
                                lambda x,y: cmp(gramdict[x], gramdict[y]),
                                reverse = True):
                f.write((ngram + ': ' + str(gramdict[ngram]) + '\n').encode('utf-8'))
Exemple #3
0
def main(args):
    fds = args.fds
    fname = args.fname
    block_size = args.block_size
    main_seed = args.seed if args.seed != 0 else None

    # simple default encoding for now, will add more options with the curriculum
    # learning feature

    cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True)

    def write_stream(i, fd):
        local_random = random.Random(main_seed)
        local_random.jumpahead(i)
        local_cards = [card for card in cards]
        with open('/proc/self/fd/' + str(fd), 'wt') as f:
            while True:
                local_random.shuffle(local_cards)
                for card in local_cards:
                    f.write(
                        card.encode(randomize_mana=True, randomize_lines=True))
                    f.write(utils.cardsep)

    def mkargs(i, fd):
        return i, fd

    streaming_noreturn(fds, write_stream, mkargs)
Exemple #4
0
def check_vocab(fname):
    cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True)

    vocab = {}
    for card in cards:
        words = card.text.vectorize().split()
        if card.bside:
            words += card.bside.text.vectorize().split()
        for word in words:
            if not word in vocab:
                vocab[word] = 1
            else:
                vocab[word] += 1

    for word in sorted(vocab, lambda x,y: cmp(vocab[x], vocab[y]), reverse = True):
        print('{:8d} : {:s}'.format(vocab[word], word))

    n = 3

    for card in cards:
        words = card.text.vectorize().split()
        if card.bside:
            words += card.bside.text.vectorize().split()
        for word in words:
            if vocab[word] <= n:
            #if 'name' in word:
                print('\n{:8d} : {:s}'.format(vocab[word], word))
                print(card.encode())
                break
def main(infile, verbose=False):
    lm = ngrams.build_ngram_model(jdecode.mtg_open_file(
        str(os.path.join(datadir, 'output.txt'))),
                                  3,
                                  separate_lines=True,
                                  verbose=True)
    stats = get_statistics(infile, lm=lm, sep=True, verbose=verbose)
    print_statistics(stats)
Exemple #6
0
def main(fname, oname = None, verbose = False, dump = False):
    # may need to set special arguments here
    cards = jdecode.mtg_open_file(fname, verbose=verbose)
    
    do_grams = False

    if do_grams:
        rg = {}
        for card in cards:
            g = rare_grams(card, thresh=2, grams=2)
            if len(card.text_words) > 0:
                g = int(1.0 + (float(g) * 100.0 / float(len(card.text_words))))
            if g in rg:
                rg[g] += 1
            else:
                rg[g] = 1
            if g >= 60:
                print(g)
                print(card.format())

        tot = 0
        vmax = sum(rg.values())
        pct90 = None
        pct95 = None
        pct99 = None
        for i in sorted(rg):
            print(str(i) + ' rare ngrams: ' + str(rg[i]))
            tot += rg[i]
            if pct90 is None and tot >= vmax * 0.90:
                pct90 = i
            if pct95 is None and tot >= vmax * 0.95:
                pct95 = i
            if pct99 is None and tot >= vmax * 0.99:
                pct99 = i

        print('90% - ' + str(pct90))
        print('95% - ' + str(pct95))
        print('99% - ' + str(pct99))

    else:
        ((total_all, total_good, total_bad, total_uncovered), 
         values) = process_props(cards, dump=dump)

        # summary
        print('-- overall --')
        print(('  total     : ' + str(total_all)))
        print(('  good      : ' + str(total_good) + ' ' + pct(total_good, total_all)))
        print(('  bad       : ' + str(total_bad) + ' ' + pct(total_bad, total_all)))
        print(('  uncocoverd: ' + str(total_uncovered) + ' ' + pct(total_uncovered, total_all)))
        print('----')

        # breakdown
        for prop in props:
            (total, good, bad) = values[prop]
            print((prop + ':'))
            print(('  total: ' + str(total) + ' ' + pct(total, total_all)))
            print(('  good : ' + str(good) + ' ' + pct(good, total_all)))
            print(('  bad  : ' + str(bad) + ' ' + pct(bad, total_all)))
Exemple #7
0
def main(fname, oname, verbose = True, parallel = True):
    # may need to set special arguments here
    cards = jdecode.mtg_open_file(fname, verbose=verbose)

    # this could reasonably be some separate function
    # might make sense to merge cbow and namediff and have this be the main interface
    namediff = Namediff()
    cbow = CBOW()

    if verbose:
        print('Computing nearest names...')
    if parallel:
        nearest_names = namediff.nearest_par([c.name for c in cards], n=1)
    else:
        nearest_names = [namediff.nearest(c.name, n=1) for c in cards]

    if verbose:
        print('Computing nearest cards...')
    if parallel:
        nearest_cards = cbow.nearest_par(cards, n=1)
    else:
        nearest_cards = [cbow.nearest(c, n=1) for c in cards]

    for i in range(0, len(cards)):
        cards[i].nearest_names = nearest_names[i]
        cards[i].nearest_cards = nearest_cards[i]

    # # unfortunately this takes ~30 hours on 8 cores for a 10MB dump
    # if verbose:
    #     print 'Computing nearest encodings by text edit distance...'
    # if parallel:
    #     nearest_cards_text = namediff.nearest_card_par(cards, n=1)
    # else:
    #     nearest_cards_text = [namediff.nearest_card(c, n=1) for c in cards]

    if verbose:
        print('...Done.')

    # write to a file to store the data, this is a terribly long computation
    # we could also just store this same info in the cards themselves as more fields...
    sep = '|'
    with open(oname, 'w') as ofile:
        for i in range(0, len(cards)):
            card = cards[i]
            ostr = str(i) + sep + card.name + sep
            ndist, _ = card.nearest_names[0]
            ostr += str(ndist) + sep
            cdist, _ = card.nearest_cards[0]
            ostr += str(cdist) + '\n'
            # tdist, _ = nearest_cards_text[i][0]
            # ostr += str(tdist) + '\n'
            ofile.write(ostr.encode('utf-8'))
def main(fname, oname, verbose = True, parallel = True):
    # may need to set special arguments here
    cards = jdecode.mtg_open_file(fname, verbose=verbose)

    # this could reasonably be some separate function
    # might make sense to merge cbow and namediff and have this be the main interface
    namediff = Namediff()
    cbow = CBOW()

    if verbose:
        print 'Computing nearest names...'
    if parallel:
        nearest_names = namediff.nearest_par(map(lambda c: c.name, cards), n=1)
    else:
        nearest_names = [namediff.nearest(c.name, n=1) for c in cards]

    if verbose:
        print 'Computing nearest cards...'
    if parallel:
        nearest_cards = cbow.nearest_par(cards, n=1)
    else:
        nearest_cards = [cbow.nearest(c, n=1) for c in cards]

    for i in range(0, len(cards)):
        cards[i].nearest_names = nearest_names[i]
        cards[i].nearest_cards = nearest_cards[i]

    # # unfortunately this takes ~30 hours on 8 cores for a 10MB dump
    # if verbose:
    #     print 'Computing nearest encodings by text edit distance...'
    # if parallel:
    #     nearest_cards_text = namediff.nearest_card_par(cards, n=1)
    # else:
    #     nearest_cards_text = [namediff.nearest_card(c, n=1) for c in cards]

    if verbose:
        print '...Done.'

    # write to a file to store the data, this is a terribly long computation
    # we could also just store this same info in the cards themselves as more fields...
    sep = '|'
    with open(oname, 'w') as ofile:
        for i in range(0, len(cards)):
            card = cards[i]
            ostr = str(i) + sep + card.name + sep
            ndist, _ = card.nearest_names[0]
            ostr += str(ndist) + sep
            cdist, _ = card.nearest_cards[0]
            ostr += str(cdist) + '\n'
            # tdist, _ = nearest_cards_text[i][0]
            # ostr += str(tdist) + '\n'
            ofile.write(ostr.encode('utf-8'))
Exemple #9
0
def main(fname, oname, gmin=2, gmax=8, nltk=False, sep=False, verbose=False):
    # may need to set special arguments here
    cards = jdecode.mtg_open_file(fname, verbose=verbose)
    gmin = int(gmin)
    gmax = int(gmax)

    if nltk:
        n = gmin
        lm = build_ngram_model(cards, n, separate_lines=sep, verbose=verbose)
        if verbose:
            teststr = 'when @ enters the battlefield'
            print('litmus test: perplexity of ' + repr(teststr))
            print('  ' + str(lm.perplexity(teststr.split())))
        if verbose:
            print('pickling module to ' + oname)
        with open(oname, 'wb') as f:
            pickle.dump(lm, f)

    else:
        bins = [1, 2, 3, 10, 30, 100, 300, 1000]
        if gmin < 2 or gmax < gmin:
            print 'invalid gram sizes: ' + str(gmin) + '-' + str(gmax)
            exit(1)

        for grams in range(gmin, gmax + 1):
            if verbose:
                print 'generating ' + str(grams) + '-grams...'
            gramdict = {}
            for card in cards:
                update_ngrams(card.text_lines_words, gramdict, grams)

            oname_full = oname + '.' + str(grams) + 'g'
            if verbose:
                print('  writing ' + str(len(gramdict)) + ' unique ' +
                      str(grams) + '-grams to ' + oname_full)
                describe_bins(gramdict, bins)

            with open(oname_full, 'wt') as f:
                for ngram in sorted(gramdict,
                                    lambda x, y: cmp(gramdict[x], gramdict[y]),
                                    reverse=True):
                    f.write((ngram + ': ' + str(gramdict[ngram]) +
                             '\n').encode('utf-8'))
Exemple #10
0
def main(fname, oname, gmin = 2, gmax = 8, nltk = False, sep = False, verbose = False):
    # may need to set special arguments here
    cards = jdecode.mtg_open_file(fname, verbose=verbose)
    gmin = int(gmin)
    gmax = int(gmax)

    if nltk:
        n = gmin
        lm = build_ngram_model(cards, n, separate_lines=sep, verbose=verbose)
        if verbose:
            teststr = 'when @ enters the battlefield'
            print(('litmus test: perplexity of ' + repr(teststr)))
            print(('  ' + str(lm.perplexity(teststr.split()))))
        if verbose:
            print(('pickling module to ' + oname))
        with open(oname, 'wb') as f:
            pickle.dump(lm, f)

    else:
        bins = [1, 2, 3, 10, 30, 100, 300, 1000]
        if gmin < 2 or gmax < gmin:
            print('invalid gram sizes: ' + str(gmin) + '-' + str(gmax))
            exit(1)

        for grams in range(gmin, gmax+1):
            if verbose:
                print('generating ' + str(grams) + '-grams...')
            gramdict = {}
            for card in cards:
                update_ngrams(card.text_lines_words, gramdict, grams)

            oname_full = oname + '.' + str(grams) + 'g'
            if verbose:
                print(('  writing ' + str(len(gramdict)) + ' unique ' + str(grams) 
                      + '-grams to ' + oname_full))
                describe_bins(gramdict, bins)

            with open(oname_full, 'wt') as f:
                for ngram in sorted(gramdict,
                                    lambda x,y: cmp(gramdict[x], gramdict[y]),
                                    reverse = True):
                    f.write((ngram + ': ' + str(gramdict[ngram]) + '\n').encode('utf-8'))
Exemple #11
0
def check_characters(fname, vname):
    cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True)

    tokens = {c for c in utils.cardsep}
    for card in cards:
        for c in card.encode():
            tokens.add(c)

    token_to_idx = {tok:i+1 for i, tok in enumerate(sorted(tokens))}
    idx_to_token = {i+1:tok for i, tok in enumerate(sorted(tokens))}

    print('Vocabulary: ({:d} symbols)'.format(len(token_to_idx)))
    for token in sorted(token_to_idx):
        print('{:8s} : {:4d}'.format(repr(token), token_to_idx[token]))

    # compliant with torch-rnn
    if vname:
        json_data = {'token_to_idx':token_to_idx, 'idx_to_token':idx_to_token}
        print('writing vocabulary to {:s}'.format(vname))
        with open(vname, 'w') as f:
            json.dump(json_data, f)
Exemple #12
0
def main(fname,
         oname=None,
         verbose=True,
         encoding='std',
         nolinetrans=False,
         randomize=False,
         nolabel=False,
         stable=False):
    fmt_ordered = cardlib.fmt_ordered_default
    fmt_labeled = None if nolabel else cardlib.fmt_labeled_default
    fieldsep = utils.fieldsep
    line_transformations = not nolinetrans
    randomize_fields = False
    randomize_mana = randomize
    initial_sep = True
    final_sep = True

    # set the properties of the encoding

    if encoding in ['std']:
        pass
    elif encoding in ['named']:
        fmt_ordered = cardlib.fmt_ordered_named
    elif encoding in ['noname']:
        fmt_ordered = cardlib.fmt_ordered_noname
    elif encoding in ['rfields']:
        randomize_fields = True
        final_sep = False
    elif encoding in ['old']:
        fmt_ordered = cardlib.fmt_ordered_old
    elif encoding in ['norarity']:
        fmt_ordered = cardlib.fmt_ordered_norarity
    elif encoding in ['vec']:
        pass
    elif encoding in ['custom']:
        ## put custom format decisions here ##########################

        ## end of custom format ######################################
        pass
    else:
        raise ValueError('encode.py: unknown encoding: ' + encoding)

    if verbose:
        print 'Preparing to encode:'
        print '  Using encoding ' + repr(encoding)
        if stable:
            print '  NOT randomizing order of cards.'
        if randomize_mana:
            print '  Randomizing order of symobls in manacosts.'
        if not fmt_labeled:
            print '  NOT labeling fields for this run (may be harder to decode).'
        if not line_transformations:
            print '  NOT using line reordering transformations'

    cards = jdecode.mtg_open_file(fname,
                                  verbose=verbose,
                                  linetrans=line_transformations)

    # This should give a random but consistent ordering, to make comparing changes
    # between the output of different versions easier.
    if not stable:
        random.seed(1371367)
        random.shuffle(cards)

    def writecards(writer):
        for card in cards:
            if encoding in ['vec']:
                writer.write(card.vectorize() + '\n\n')
            else:
                writer.write(
                    card.encode(fmt_ordered=fmt_ordered,
                                fmt_labeled=fmt_labeled,
                                fieldsep=fieldsep,
                                randomize_fields=randomize_fields,
                                randomize_mana=randomize_mana,
                                initial_sep=initial_sep,
                                final_sep=final_sep) + utils.cardsep)

    if oname:
        if verbose:
            print 'Writing output to: ' + oname
        with open(oname, 'w') as ofile:
            writecards(ofile)
    else:
        writecards(sys.stdout)
        sys.stdout.flush()
Exemple #13
0
def main(fname, oname = None, verbose = True, encoding = 'std',
         gatherer = False, for_forum = False, for_mse = False,
         creativity = False, vdump = False, for_html = False):

    # there is a sane thing to do here (namely, produce both at the same time)
    # but we don't support it yet.
    if for_mse and for_html:
        print 'ERROR - decode.py - incompatible formats "mse" and "html"'
        return

    fmt_ordered = cardlib.fmt_ordered_default

    if encoding in ['std']:
        pass
    elif encoding in ['named']:
        fmt_ordered = cardlib.fmt_ordered_named
    elif encoding in ['noname']:
        fmt_ordered = cardlib.fmt_ordered_noname
    elif encoding in ['rfields']:
        pass
    elif encoding in ['old']:
        fmt_ordered = cardlib.fmt_ordered_old
    elif encoding in ['norarity']:
        fmt_ordered = cardlib.fmt_ordered_norarity
    elif encoding in ['vec']:
        pass
    elif encoding in ['custom']:
        ## put custom format decisions here ##########################
        
        ## end of custom format ######################################
        pass
    else:
        raise ValueError('encode.py: unknown encoding: ' + encoding)

    cards = jdecode.mtg_open_file(fname, verbose=verbose, fmt_ordered=fmt_ordered)

    if creativity:
        namediff = Namediff()
        cbow = CBOW()
        if verbose:
            print 'Computing nearest names...'
        nearest_names = namediff.nearest_par(map(lambda c: c.name, cards), n=3)
        if verbose:
            print 'Computing nearest cards...'
        nearest_cards = cbow.nearest_par(cards)
        for i in range(0, len(cards)):
            cards[i].nearest_names = nearest_names[i]
            cards[i].nearest_cards = nearest_cards[i]
        if verbose:
            print '...Done.'

    def hoverimg(cardname, dist, nd):
        truename = nd.names[cardname]
        code = nd.codes[cardname]
        namestr = ''
        if for_html:
            if code:
                namestr = ('<div class="hover_img"><a href="#">' + truename 
                           + '<span><img src="http://magiccards.info/scans/en/' + code
                           + '" alt="image"/></span></a>' + ': ' + str(dist) + '</div>')
            else:
                namestr = '<div>' + truename + ': ' + str(dist) + '</div>'
        elif for_forum:
            namestr = '[card]' + truename + '[/card]' + ': ' + str(dist) + '\n'
        else:
            namestr = truename + ': ' + str(dist) + '\n'
        return namestr 

    def writecards(writer):
        if for_mse:
            # have to prepend a massive chunk of formatting info
            writer.write(utils.mse_prepend)

        if for_html:
            # have to preapend html info
            writer.write(utils.html_prepend)

        for card in cards:
            if for_mse:
                writer.write(card.to_mse().encode('utf-8'))
                fstring = ''
                if card.json:
                    fstring += 'JSON:\n' + card.json + '\n'
                if card.raw: 
                    fstring += 'raw:\n' + card.raw + '\n'
                fstring += '\n'
                fstring += card.format(gatherer = gatherer, for_forum = for_forum,
                                       vdump = vdump) + '\n'
                fstring = fstring.replace('<', '(').replace('>', ')')
                writer.write(('\n' + fstring[:-1]).replace('\n', '\n\t\t'))
            else:
                fstring = card.format(gatherer = gatherer, for_forum = for_forum,
                                      vdump = vdump, for_html = for_html)
                if creativity and for_html:
                    fstring = fstring[:-6] # chop off the closing </div> to stick stuff in
                writer.write((fstring + '\n').encode('utf-8'))

            if creativity:
                cstring = '~~ closest cards ~~\n'
                nearest = card.nearest_cards
                for dist, cardname in nearest:
                    cstring += hoverimg(cardname, dist, namediff)
                cstring += '~~ closest names ~~\n'
                nearest = card.nearest_names
                for dist, cardname in nearest:
                    cstring += hoverimg(cardname, dist, namediff)
                if for_html:
                    cstring = '<hr><div>' + cstring.replace('\n', '<br>\n') + '</div>\n</div>'
                elif for_mse:
                    cstring = ('\n\n' + cstring[:-1]).replace('\n', '\n\t\t')
                
                writer.write(cstring.encode('utf-8'))

            writer.write('\n'.encode('utf-8'))

        if for_mse:
            # more formatting info
            writer.write('version control:\n\ttype: none\napprentice code: ')
        if for_html:
            # closing the html file
            writer.write(utils.html_append)

    if oname:
        if for_html:
            print oname
            # if ('.html' != oname[-])
            #     oname += '.html'
        if verbose:
            print 'Writing output to: ' + oname
        with open(oname, 'w') as ofile:
            writecards(ofile)
        if for_mse:
            # Copy whatever output file is produced, name the copy 'set' (yes, no extension).
            if os.path.isfile('set'):
                print 'ERROR: tried to overwrite existing file "set" - aborting.'
                return
            shutil.copyfile(oname, 'set')
            # Use the freaky mse extension instead of zip.
            with zipfile.ZipFile(oname+'.mse-set', mode='w') as zf:
                try:
                    # Zip up the set file into oname.mse-set.
                    zf.write('set') 
                finally:
                    if verbose:
                        print 'Made an MSE set file called ' + oname + '.mse-set.'
                    # The set file is useless outside the .mse-set, delete it.
                    os.remove('set') 
    else:
        writecards(sys.stdout)
        sys.stdout.flush()
Exemple #14
0
def main(fname, oname, n=20, verbose=False):
    cbow = CBOW()
    realcards = jdecode.mtg_open_file(str(os.path.join(datadir, 'output.txt')), verbose=verbose)
    real_by_name = {c.name: c for c in realcards}
    lm = ngrams.build_ngram_model(realcards, 3, separate_lines=separate_lines, verbose=verbose)
    cards = jdecode.mtg_open_file(fname, verbose=verbose)
    stats = analysis.get_statistics(fname, lm=lm, sep=separate_lines, verbose=verbose)

    selected = []
    for i in range(0, len(cards)):
        if select_card(cards, stats, i):
            selected += [(i, cards[i])]

    limit = 3000

    random.shuffle(selected)
    #selected = selected[:limit]

    if verbose:
        print(('computing nearest cards for ' + str(len(selected)) + ' candindates...'))
    cbow_nearest = cbow.nearest_par([i_c[1] for i_c in selected])
    for i in range(0, len(selected)):
        (j, card) = selected[i]
        selected[i] = (j, card, cbow_nearest[i])
    if verbose:
        print('...done')

    final = []
    for (i, card, nearest) in selected:
        for dist, rname in nearest:
            realcard = real_by_name[rname]
            if compare_to_real(card, realcard):
                final += [(i, card, realcard, dist)]
                break

    for (i, card, realcard, dist) in final:
        print('-- real --')
        print(realcard.format())
        print('-- fake --')
        print(card.format())
        print('-- stats --')
        perp_per = stats['ngram']['perp_per'][i]
        perp_max = stats['ngram']['perp_max'][i]
        print(dist)
        print(perp_per)
        print(perp_max)
        print('----')

    if not oname is None:
        with open(oname, 'wt') as ofile:
            ofile.write(utils.mse_prepend)
            for (i, card, realcard, dist) in final:
                name = realcard.name
                writecard(realcard, name, ofile)
                writecard(card, name, ofile)
            ofile.write('version control:\n\ttype: none\napprentice code: ')
            # Copy whatever output file is produced, name the copy 'set' (yes, no extension).
            if os.path.isfile('set'):
                print('ERROR: tried to overwrite existing file "set" - aborting.')
                return
            shutil.copyfile(oname, 'set')
            # Use the freaky mse extension instead of zip.
            with zipfile.ZipFile(oname+'.mse-set', mode='w') as zf:
                try:
                    # Zip up the set file into oname.mse-set.
                    zf.write('set') 
                finally:
                    if verbose:
                        print('Made an MSE set file called ' + oname + '.mse-set.')
                    # The set file is useless outside the .mse-set, delete it.
                    os.remove('set')
def main(fname, oname, n=20, verbose=False):
    cbow = CBOW()
    realcards = jdecode.mtg_open_file(str(os.path.join(datadir, 'output.txt')), verbose=verbose)
    real_by_name = {c.name: c for c in realcards}
    lm = ngrams.build_ngram_model(realcards, 3, separate_lines=separate_lines, verbose=verbose)
    cards = jdecode.mtg_open_file(fname, verbose=verbose)
    stats = analysis.get_statistics(fname, lm=lm, sep=separate_lines, verbose=verbose)

    selected = []
    for i in range(0, len(cards)):
        if select_card(cards, stats, i):
            selected += [(i, cards[i])]

    limit = 3000

    random.shuffle(selected)
    #selected = selected[:limit]

    if verbose:
        print('computing nearest cards for ' + str(len(selected)) + ' candindates...')
    cbow_nearest = cbow.nearest_par(map(lambda (i, c): c, selected))
    for i in range(0, len(selected)):
        (j, card) = selected[i]
        selected[i] = (j, card, cbow_nearest[i])
    if verbose:
        print('...done')

    final = []
    for (i, card, nearest) in selected:
        for dist, rname in nearest:
            realcard = real_by_name[rname]
            if compare_to_real(card, realcard):
                final += [(i, card, realcard, dist)]
                break

    for (i, card, realcard, dist) in final:
        print '-- real --'
        print realcard.format()
        print '-- fake --'
        print card.format()
        print '-- stats --'
        perp_per = stats['ngram']['perp_per'][i]
        perp_max = stats['ngram']['perp_max'][i]
        print dist
        print perp_per
        print perp_max
        print '----'

    if not oname is None:
        with open(oname, 'wt') as ofile:
            ofile.write(utils.mse_prepend)
            for (i, card, realcard, dist) in final:
                name = realcard.name
                writecard(realcard, name, ofile)
                writecard(card, name, ofile)
            ofile.write('version control:\n\ttype: none\napprentice code: ')
            # Copy whatever output file is produced, name the copy 'set' (yes, no extension).
            if os.path.isfile('set'):
                print 'ERROR: tried to overwrite existing file "set" - aborting.'
                return
            shutil.copyfile(oname, 'set')
            # Use the freaky mse extension instead of zip.
            with zipfile.ZipFile(oname+'.mse-set', mode='w') as zf:
                try:
                    # Zip up the set file into oname.mse-set.
                    zf.write('set') 
                finally:
                    if verbose:
                        print 'Made an MSE set file called ' + oname + '.mse-set.'
                    # The set file is useless outside the .mse-set, delete it.
                    os.remove('set')
def main(fname, oname=None, verbose=False, dump=False):
    # may need to set special arguments here
    cards = jdecode.mtg_open_file(fname, verbose=verbose)

    do_grams = False

    if do_grams:
        rg = {}
        for card in cards:
            g = rare_grams(card, thresh=2, grams=2)
            if len(card.text_words) > 0:
                g = int(1.0 + (float(g) * 100.0 / float(len(card.text_words))))
            if g in rg:
                rg[g] += 1
            else:
                rg[g] = 1
            if g >= 60:
                print g
                print card.format()

        tot = 0
        vmax = sum(rg.values())
        pct90 = None
        pct95 = None
        pct99 = None
        for i in sorted(rg):
            print str(i) + ' rare ngrams: ' + str(rg[i])
            tot += rg[i]
            if pct90 is None and tot >= vmax * 0.90:
                pct90 = i
            if pct95 is None and tot >= vmax * 0.95:
                pct95 = i
            if pct99 is None and tot >= vmax * 0.99:
                pct99 = i

        print '90% - ' + str(pct90)
        print '95% - ' + str(pct95)
        print '99% - ' + str(pct99)

    else:
        ((total_all, total_good, total_bad, total_uncovered),
         values) = process_props(cards, dump=dump)

        # summary
        print('-- overall --')
        print('  total     : ' + str(total_all))
        print('  good      : ' + str(total_good) + ' ' +
              pct(total_good, total_all))
        print('  bad       : ' + str(total_bad) + ' ' +
              pct(total_bad, total_all))
        print('  uncocoverd: ' + str(total_uncovered) + ' ' +
              pct(total_uncovered, total_all))
        print('----')

        # breakdown
        for prop in props:
            (total, good, bad) = values[prop]
            print(prop + ':')
            print('  total: ' + str(total) + ' ' + pct(total, total_all))
            print('  good : ' + str(good) + ' ' + pct(good, total_all))
            print('  bad  : ' + str(bad) + ' ' + pct(bad, total_all))
Exemple #17
0
def get_statistics(fname, lm = None, sep = False, verbose=False):
    stats = OrderedDict()
    cards = jdecode.mtg_open_file(fname, verbose=verbose)
    stats['cards'] = cards

    # unpack the name of the checkpoint - terrible and hacky
    try:
        final_name = os.path.basename(fname)
        halves = final_name.split('_epoch')
        cp_name = halves[0]
        cp_info = halves[1][:-4]
        info_halves = cp_info.split('_')
        cp_epoch = float(info_halves[0])
        fragments = info_halves[1].split('.')
        cp_vloss = float('.'.join(fragments[:2]))
        cp_temp = float('.'.join(fragments[-2:]))
        cp_ident = '.'.join(fragments[2:-2])
        stats['cp'] = OrderedDict([('name', cp_name),
                                   ('epoch', cp_epoch),
                                   ('vloss', cp_vloss),
                                   ('temp', cp_temp),
                                   ('ident', cp_ident)])
    except Exception as e:
        pass

    # validate
    ((total_all, total_good, total_bad, total_uncovered), 
         values) = mtg_validate.process_props(cards)
    
    stats['props'] = annotate_values(values)
    stats['props']['overall'] = OrderedDict([('total', total_all), 
                                             ('good', total_good), 
                                             ('bad', total_bad), 
                                             ('uncovered', total_uncovered)])

    # distances
    distfname = fname + '.dist'
    if os.path.isfile(distfname):
        name_dupes = 0
        card_dupes = 0
        with open(distfname, 'rt') as f:
            distlines = f.read().split('\n')
        dists = OrderedDict([('name', []), ('cbow', [])])
        for line in distlines:
            fields = line.split('|')
            if len(fields) < 4:
                continue
            idx = int(fields[0])
            name = str(fields[1])
            ndist = float(fields[2])
            cdist = float(fields[3])
            dists['name'] += [ndist]
            dists['cbow'] += [cdist]
            if ndist == 1.0:
                name_dupes += 1
            if cdist == 1.0:
                card_dupes += 1

        dists['name_mean'] = mean_nonan(dists['name'])
        dists['cbow_mean'] = mean_nonan(dists['cbow'])
        dists['name_geomean'] = gmean_nonzero(dists['name'])
        dists['cbow_geomean'] = gmean_nonzero(dists['cbow'])
        stats['dists'] = dists
        
    # n-grams
    if not lm is None:
        ngram = OrderedDict([('perp', []), ('perp_per', []), 
                             ('perp_max', []), ('perp_per_max', [])])
        for card in cards:
            if len(card.text.text) == 0:
                perp = 0.0
                perp_per = 0.0
            elif sep:
                vtexts = [line.vectorize().split() for line in card.text_lines 
                          if len(line.vectorize().split()) > 0]
                perps = [lm.perplexity(vtext) for vtext in vtexts]
                perps_per = [perps[i] / float(len(vtexts[i])) for i in range(0, len(vtexts))]
                perp = gmean_nonzero(perps)
                perp_per = gmean_nonzero(perps_per)
                perp_max = max(perps)
                perp_per_max = max(perps_per)
            else:
                vtext = card.text.vectorize().split()
                perp = lm.perplexity(vtext)
                perp_per = perp / float(len(vtext))
                perp_max = perp
                perp_per_max = perps_per

            ngram['perp'] += [perp]
            ngram['perp_per'] += [perp_per]
            ngram['perp_max'] += [perp_max]
            ngram['perp_per_max'] += [perp_per_max]

        ngram['perp_mean'] = mean_nonan(ngram['perp'])
        ngram['perp_per_mean'] = mean_nonan(ngram['perp_per'])
        ngram['perp_geomean'] = gmean_nonzero(ngram['perp'])
        ngram['perp_per_geomean'] = gmean_nonzero(ngram['perp_per'])
        stats['ngram'] = ngram

    return stats
Exemple #18
0
def main(fname, oname = None, verbose = True, encoding = 'std',
         gatherer = False, for_forum = False, for_mse = False,
         creativity = False, vdump = False, for_html = False):

    # there is a sane thing to do here (namely, produce both at the same time)
    # but we don't support it yet.
    if for_mse and for_html:
        print 'ERROR - decode.py - incompatible formats "mse" and "html"'
        return

    fmt_ordered = cardlib.fmt_ordered_default

    if encoding in ['std']:
        pass
    elif encoding in ['named']:
        fmt_ordered = cardlib.fmt_ordered_named
    elif encoding in ['noname']:
        fmt_ordered = cardlib.fmt_ordered_noname
    elif encoding in ['rfields']:
        pass
    elif encoding in ['old']:
        fmt_ordered = cardlib.fmt_ordered_old
    elif encoding in ['norarity']:
        fmt_ordered = cardlib.fmt_ordered_norarity
    elif encoding in ['vec']:
        pass
    elif encoding in ['custom']:
        ## put custom format decisions here ##########################
        
        ## end of custom format ######################################
        pass
    else:
        raise ValueError('encode.py: unknown encoding: ' + encoding)

    cards = jdecode.mtg_open_file(fname, verbose=verbose, fmt_ordered=fmt_ordered)

    if creativity:
        namediff = Namediff()
        cbow = CBOW()
        if verbose:
            print 'Computing nearest names...'
        nearest_names = namediff.nearest_par(map(lambda c: c.name, cards), n=3)
        if verbose:
            print 'Computing nearest cards...'
        nearest_cards = cbow.nearest_par(cards)
        for i in range(0, len(cards)):
            cards[i].nearest_names = nearest_names[i]
            cards[i].nearest_cards = nearest_cards[i]
        if verbose:
            print '...Done.'

    def hoverimg(cardname, dist, nd):
        truename = nd.names[cardname]
        code = nd.codes[cardname]
        namestr = ''
        if for_html:
            if code:
                namestr = ('<div class="hover_img"><a href="#">' + truename 
                           + '<span><img style="background: url(http://magiccards.info/scans/en/' + code
                           + ');" alt=""/></span></a>' + ': ' + str(dist) + '\n</div>\n')
            else:
                namestr = '<div>' + truename + ': ' + str(dist) + '</div>'
        elif for_forum:
            namestr = '[card]' + truename + '[/card]' + ': ' + str(dist) + '\n'
        else:
            namestr = truename + ': ' + str(dist) + '\n'
        return namestr 

    def writecards(writer):
        if for_mse:
            # have to prepend a massive chunk of formatting info
            writer.write(utils.mse_prepend)

        if for_html:
            # have to preapend html info
            writer.write(utils.html_prepend)
            # seperate the write function to allow for writing smaller chunks of cards at a time
            segments = sort_colors(cards)
            for i in range(len(segments)):
                # sort color by CMC
                segments[i] = sort_type(segments[i])
                # this allows card boxes to be colored for each color 
                # for coloring of each box seperately cardlib.Card.format() must change non-minimaly
                writer.write('<div id="' + utils.segment_ids[i] + '">')
                writehtml(writer, segments[i])
                writer.write("</div><hr>")
            # closing the html file
            writer.write(utils.html_append)
            return #break out of the write cards funcrion to avoid writing cards twice


        for card in cards:
            if for_mse:
                writer.write(card.to_mse().encode('utf-8'))
                fstring = ''
                if card.json:
                    fstring += 'JSON:\n' + card.json + '\n'
                if card.raw: 
                    fstring += 'raw:\n' + card.raw + '\n'
                fstring += '\n'
                fstring += card.format(gatherer = gatherer, for_forum = for_forum,
                                       vdump = vdump) + '\n'
                fstring = fstring.replace('<', '(').replace('>', ')')
                writer.write(('\n' + fstring[:-1]).replace('\n', '\n\t\t'))
            else:
                fstring = card.format(gatherer = gatherer, for_forum = for_forum,
                                      vdump = vdump, for_html = for_html)
                writer.write((fstring + '\n').encode('utf-8'))

            if creativity:
                cstring = '~~ closest cards ~~\n'
                nearest = card.nearest_cards
                for dist, cardname in nearest:
                    cstring += hoverimg(cardname, dist, namediff)
                cstring += '~~ closest names ~~\n'
                nearest = card.nearest_names
                for dist, cardname in nearest:
                    cstring += hoverimg(cardname, dist, namediff)
                if for_mse:
                    cstring = ('\n\n' + cstring[:-1]).replace('\n', '\n\t\t')
                writer.write(cstring.encode('utf-8'))

            writer.write('\n'.encode('utf-8'))

        if for_mse:
            # more formatting info
            writer.write('version control:\n\ttype: none\napprentice code: ')
            

    def writehtml(writer, card_set):
        for card in card_set:
            fstring = card.format(gatherer = gatherer, for_forum = True,
                                      vdump = vdump, for_html = for_html)
            if creativity:
                fstring = fstring[:-6] # chop off the closing </div> to stick stuff in
            writer.write((fstring + '\n').encode('utf-8'))

            if creativity:
                cstring = '~~ closest cards ~~\n<br>\n'
                nearest = card.nearest_cards
                for dist, cardname in nearest:
                    cstring += hoverimg(cardname, dist, namediff)
                cstring += "<br>\n"
                cstring += '~~ closest names ~~\n<br>\n'
                nearest = card.nearest_names
                for dist, cardname in nearest:
                    cstring += hoverimg(cardname, dist, namediff)
                cstring = '<hr><div>' + cstring + '</div>\n</div>'
                writer.write(cstring.encode('utf-8'))

            writer.write('\n'.encode('utf-8'))

    # Sorting by colors
    def sort_colors(card_set):
        # Initialize sections
        red_cards = []
        blue_cards = []
        green_cards = []
        black_cards = []
        white_cards = []
        multi_cards = []
        colorless_cards = []
        lands = []
        for card in card_set:
            if len(card.get_colors())>1:
                multi_cards += [card]
                continue
            if 'R' in card.get_colors():
                red_cards += [card]
                continue
            elif 'U' in card.get_colors():
                blue_cards += [card]
                continue
            elif 'B' in card.get_colors():
                black_cards += [card]
                continue
            elif 'G' in card.get_colors():
                green_cards += [card]
                continue
            elif 'W' in card.get_colors():
                white_cards += [card]
                continue
            else:
                if "land" in card.get_types():
                    lands += [card]
                    continue
                colorless_cards += [card]
        return[white_cards, blue_cards, black_cards, red_cards, green_cards, multi_cards, colorless_cards, lands]

    def sort_type(card_set):
        sorting = ["creature", "enchantment", "instant", "sorcery", "artifact", "planeswalker"]
        sorted_cards = [[],[],[],[],[],[],[]]
        sorted_set = []
        for card in card_set:
            types = card.get_types()
            for i in range(len(sorting)):
                if sorting[i] in types:
                    sorted_cards[i] += [card]
                    break
            else:
                sorted_cards[6] += [card]
        for value in sorted_cards:
            for card in value:
                sorted_set += [card]
        return sorted_set



    def sort_cmc(card_set):
        sorted_cards = []
        sorted_set = []
        for card in card_set:
            # make sure there is an empty set for each CMC
            while len(sorted_cards)-1 < card.get_cmc():
                sorted_cards += [[]]
            # add card to correct set of CMC values
            sorted_cards[card.get_cmc()] += [card]
        # combine each set of CMC valued cards together
        for value in sorted_cards:
            for card in value:
                sorted_set += [card]
        return sorted_set


    if oname:
        if for_html:
            print oname
            # if ('.html' != oname[-])
            #     oname += '.html'
        if verbose:
            print 'Writing output to: ' + oname
        with open(oname, 'w') as ofile:
            writecards(ofile)
        if for_mse:
            # Copy whatever output file is produced, name the copy 'set' (yes, no extension).
            if os.path.isfile('set'):
                print 'ERROR: tried to overwrite existing file "set" - aborting.'
                return
            shutil.copyfile(oname, 'set')
            # Use the freaky mse extension instead of zip.
            with zipfile.ZipFile(oname+'.mse-set', mode='w') as zf:
                try:
                    # Zip up the set file into oname.mse-set.
                    zf.write('set') 
                finally:
                    if verbose:
                        print 'Made an MSE set file called ' + oname + '.mse-set.'
                    # The set file is useless outside the .mse-set, delete it.
                    os.remove('set') 
    else:
        writecards(sys.stdout)
        sys.stdout.flush()
def get_statistics(fname, lm=None, sep=False, verbose=False):
    stats = OrderedDict()
    cards = jdecode.mtg_open_file(fname, verbose=verbose)
    stats['cards'] = cards

    # unpack the name of the checkpoint - terrible and hacky
    try:
        final_name = os.path.basename(fname)
        halves = final_name.split('_epoch')
        cp_name = halves[0]
        cp_info = halves[1][:-4]
        info_halves = cp_info.split('_')
        cp_epoch = float(info_halves[0])
        fragments = info_halves[1].split('.')
        cp_vloss = float('.'.join(fragments[:2]))
        cp_temp = float('.'.join(fragments[-2:]))
        cp_ident = '.'.join(fragments[2:-2])
        stats['cp'] = OrderedDict([('name', cp_name), ('epoch', cp_epoch),
                                   ('vloss', cp_vloss), ('temp', cp_temp),
                                   ('ident', cp_ident)])
    except Exception as e:
        pass

    # validate
    ((total_all, total_good, total_bad, total_uncovered),
     values) = mtg_validate.process_props(cards)

    stats['props'] = annotate_values(values)
    stats['props']['overall'] = OrderedDict([('total', total_all),
                                             ('good', total_good),
                                             ('bad', total_bad),
                                             ('uncovered', total_uncovered)])

    # distances
    distfname = fname + '.dist'
    if os.path.isfile(distfname):
        name_dupes = 0
        card_dupes = 0
        with open(distfname, 'rt') as f:
            distlines = f.read().split('\n')
        dists = OrderedDict([('name', []), ('cbow', [])])
        for line in distlines:
            fields = line.split('|')
            if len(fields) < 4:
                continue
            idx = int(fields[0])
            name = str(fields[1])
            ndist = float(fields[2])
            cdist = float(fields[3])
            dists['name'] += [ndist]
            dists['cbow'] += [cdist]
            if ndist == 1.0:
                name_dupes += 1
            if cdist == 1.0:
                card_dupes += 1

        dists['name_mean'] = mean_nonan(dists['name'])
        dists['cbow_mean'] = mean_nonan(dists['cbow'])
        dists['name_geomean'] = gmean_nonzero(dists['name'])
        dists['cbow_geomean'] = gmean_nonzero(dists['cbow'])
        stats['dists'] = dists

    # n-grams
    if not lm is None:
        ngram = OrderedDict([('perp', []), ('perp_per', []), ('perp_max', []),
                             ('perp_per_max', [])])
        for card in cards:
            if len(card.text.text) == 0:
                perp = 0.0
                perp_per = 0.0
            elif sep:
                vtexts = [
                    line.vectorize().split() for line in card.text_lines
                    if len(line.vectorize().split()) > 0
                ]
                perps = [lm.perplexity(vtext) for vtext in vtexts]
                perps_per = [
                    perps[i] / float(len(vtexts[i]))
                    for i in range(0, len(vtexts))
                ]
                perp = gmean_nonzero(perps)
                perp_per = gmean_nonzero(perps_per)
                perp_max = max(perps)
                perp_per_max = max(perps_per)
            else:
                vtext = card.text.vectorize().split()
                perp = lm.perplexity(vtext)
                perp_per = perp / float(len(vtext))
                perp_max = perp
                perp_per_max = perps_per

            ngram['perp'] += [perp]
            ngram['perp_per'] += [perp_per]
            ngram['perp_max'] += [perp_max]
            ngram['perp_per_max'] += [perp_per_max]

        ngram['perp_mean'] = mean_nonan(ngram['perp'])
        ngram['perp_per_mean'] = mean_nonan(ngram['perp_per'])
        ngram['perp_geomean'] = gmean_nonzero(ngram['perp'])
        ngram['perp_per_geomean'] = gmean_nonzero(ngram['perp_per'])
        stats['ngram'] = ngram

    return stats
Exemple #20
0
def main(fname,
         oname=None,
         verbose=True,
         encoding='std',
         gatherer=False,
         for_forum=False,
         for_mse=False,
         creativity=False,
         vdump=False,
         for_html=False):

    # there is a sane thing to do here (namely, produce both at the same time)
    # but we don't support it yet.
    if for_mse and for_html:
        print 'ERROR - decode.py - incompatible formats "mse" and "html"'
        return

    fmt_ordered = cardlib.fmt_ordered_default

    if encoding in ['std']:
        pass
    elif encoding in ['named']:
        fmt_ordered = cardlib.fmt_ordered_named
    elif encoding in ['noname']:
        fmt_ordered = cardlib.fmt_ordered_noname
    elif encoding in ['rfields']:
        pass
    elif encoding in ['old']:
        fmt_ordered = cardlib.fmt_ordered_old
    elif encoding in ['norarity']:
        fmt_ordered = cardlib.fmt_ordered_norarity
    elif encoding in ['vec']:
        pass
    elif encoding in ['custom']:
        ## put custom format decisions here ##########################

        ## end of custom format ######################################
        pass
    else:
        raise ValueError('encode.py: unknown encoding: ' + encoding)

    cards = jdecode.mtg_open_file(fname,
                                  verbose=verbose,
                                  fmt_ordered=fmt_ordered)

    if creativity:
        namediff = Namediff()
        cbow = CBOW()
        if verbose:
            print 'Computing nearest names...'
        nearest_names = namediff.nearest_par(map(lambda c: c.name, cards), n=3)
        if verbose:
            print 'Computing nearest cards...'
        nearest_cards = cbow.nearest_par(cards)
        for i in range(0, len(cards)):
            cards[i].nearest_names = nearest_names[i]
            cards[i].nearest_cards = nearest_cards[i]
        if verbose:
            print '...Done.'

    def hoverimg(cardname, dist, nd):
        truename = nd.names[cardname]
        code = nd.codes[cardname]
        namestr = ''
        if for_html:
            if code:
                namestr = (
                    '<div class="hover_img"><a href="#">' + truename +
                    '<span><img style="background: url(http://magiccards.info/scans/en/'
                    + code + ');" alt=""/></span></a>' + ': ' + str(dist) +
                    '\n</div>\n')
            else:
                namestr = '<div>' + truename + ': ' + str(dist) + '</div>'
        elif for_forum:
            namestr = '[card]' + truename + '[/card]' + ': ' + str(dist) + '\n'
        else:
            namestr = truename + ': ' + str(dist) + '\n'
        return namestr

    def writecards(writer):
        if for_mse:
            # have to prepend a massive chunk of formatting info
            writer.write(utils.mse_prepend)

        if for_html:
            # have to preapend html info
            writer.write(utils.html_prepend)
            # seperate the write function to allow for writing smaller chunks of cards at a time
            segments = sort_colors(cards)
            for i in range(len(segments)):
                # sort color by CMC
                segments[i] = sort_type(segments[i])
                # this allows card boxes to be colored for each color
                # for coloring of each box seperately cardlib.Card.format() must change non-minimaly
                writer.write('<div id="' + utils.segment_ids[i] + '">')
                writehtml(writer, segments[i])
                writer.write("</div><hr>")
            # closing the html file
            writer.write(utils.html_append)
            return  #break out of the write cards funcrion to avoid writing cards twice

        for card in cards:
            if for_mse:
                writer.write(card.to_mse().encode('utf-8'))
                fstring = ''
                if card.json:
                    fstring += 'JSON:\n' + card.json + '\n'
                if card.raw:
                    fstring += 'raw:\n' + card.raw + '\n'
                fstring += '\n'
                fstring += card.format(
                    gatherer=gatherer, for_forum=for_forum, vdump=vdump) + '\n'
                fstring = fstring.replace('<', '(').replace('>', ')')
                writer.write(('\n' + fstring[:-1]).replace('\n', '\n\t\t'))
            else:
                fstring = card.format(gatherer=gatherer,
                                      for_forum=for_forum,
                                      vdump=vdump,
                                      for_html=for_html)
                writer.write((fstring + '\n').encode('utf-8'))

            if creativity:
                cstring = '~~ closest cards ~~\n'
                nearest = card.nearest_cards
                for dist, cardname in nearest:
                    cstring += hoverimg(cardname, dist, namediff)
                cstring += '~~ closest names ~~\n'
                nearest = card.nearest_names
                for dist, cardname in nearest:
                    cstring += hoverimg(cardname, dist, namediff)
                if for_mse:
                    cstring = ('\n\n' + cstring[:-1]).replace('\n', '\n\t\t')
                writer.write(cstring.encode('utf-8'))

            writer.write('\n'.encode('utf-8'))

        if for_mse:
            # more formatting info
            writer.write('version control:\n\ttype: none\napprentice code: ')

    def writehtml(writer, card_set):
        for card in card_set:
            fstring = card.format(gatherer=gatherer,
                                  for_forum=True,
                                  vdump=vdump,
                                  for_html=for_html)
            if creativity:
                fstring = fstring[:
                                  -6]  # chop off the closing </div> to stick stuff in
            writer.write((fstring + '\n').encode('utf-8'))

            if creativity:
                cstring = '~~ closest cards ~~\n<br>\n'
                nearest = card.nearest_cards
                for dist, cardname in nearest:
                    cstring += hoverimg(cardname, dist, namediff)
                cstring += "<br>\n"
                cstring += '~~ closest names ~~\n<br>\n'
                nearest = card.nearest_names
                for dist, cardname in nearest:
                    cstring += hoverimg(cardname, dist, namediff)
                cstring = '<hr><div>' + cstring + '</div>\n</div>'
                writer.write(cstring.encode('utf-8'))

            writer.write('\n'.encode('utf-8'))

    # Sorting by colors
    def sort_colors(card_set):
        # Initialize sections
        red_cards = []
        blue_cards = []
        green_cards = []
        black_cards = []
        white_cards = []
        multi_cards = []
        colorless_cards = []
        lands = []
        for card in card_set:
            if len(card.get_colors()) > 1:
                multi_cards += [card]
                continue
            if 'R' in card.get_colors():
                red_cards += [card]
                continue
            elif 'U' in card.get_colors():
                blue_cards += [card]
                continue
            elif 'B' in card.get_colors():
                black_cards += [card]
                continue
            elif 'G' in card.get_colors():
                green_cards += [card]
                continue
            elif 'W' in card.get_colors():
                white_cards += [card]
                continue
            else:
                if "land" in card.get_types():
                    lands += [card]
                    continue
                colorless_cards += [card]
        return [
            white_cards, blue_cards, black_cards, red_cards, green_cards,
            multi_cards, colorless_cards, lands
        ]

    def sort_type(card_set):
        sorting = [
            "creature", "enchantment", "instant", "sorcery", "artifact",
            "planeswalker"
        ]
        sorted_cards = [[], [], [], [], [], [], []]
        sorted_set = []
        for card in card_set:
            types = card.get_types()
            for i in range(len(sorting)):
                if sorting[i] in types:
                    sorted_cards[i] += [card]
                    break
            else:
                sorted_cards[6] += [card]
        for value in sorted_cards:
            for card in value:
                sorted_set += [card]
        return sorted_set

    def sort_cmc(card_set):
        sorted_cards = []
        sorted_set = []
        for card in card_set:
            # make sure there is an empty set for each CMC
            while len(sorted_cards) - 1 < card.get_cmc():
                sorted_cards += [[]]
            # add card to correct set of CMC values
            sorted_cards[card.get_cmc()] += [card]
        # combine each set of CMC valued cards together
        for value in sorted_cards:
            for card in value:
                sorted_set += [card]
        return sorted_set

    if oname:
        if for_html:
            print oname
            # if ('.html' != oname[-])
            #     oname += '.html'
        if verbose:
            print 'Writing output to: ' + oname
        with open(oname, 'w') as ofile:
            writecards(ofile)
        if for_mse:
            # Copy whatever output file is produced, name the copy 'set' (yes, no extension).
            if os.path.isfile('set'):
                print 'ERROR: tried to overwrite existing file "set" - aborting.'
                return
            shutil.copyfile(oname, 'set')
            # Use the freaky mse extension instead of zip.
            with zipfile.ZipFile(oname + '.mse-set', mode='w') as zf:
                try:
                    # Zip up the set file into oname.mse-set.
                    zf.write('set')
                finally:
                    if verbose:
                        print 'Made an MSE set file called ' + oname + '.mse-set.'
                    # The set file is useless outside the .mse-set, delete it.
                    os.remove('set')
    else:
        writecards(sys.stdout)
        sys.stdout.flush()
Exemple #21
0
def main(fname, oname = None, verbose = True, encoding = 'std', 
		 nolinetrans = False, randomize = False, nolabel = False, stable = False, addspaces = False,filtersets = None):
	fmt_ordered = cardlib.fmt_ordered_default
	fmt_labeled = None if nolabel else cardlib.fmt_labeled_default
	
	if fmt_labeled is not None and addspaces:
		for label in fmt_labeled:
			fmt_labeled[label] = ' ' + fmt_labeled[label] + ' '
	
	fieldsep = utils.fieldsep
	
	if addspaces:
		fieldsep = ' ' + fieldsep + ' '
	
	line_transformations = not nolinetrans
	randomize_fields = False
	randomize_mana = randomize
	initial_sep = True
	final_sep = True
	
	if filtersets != None:
		filtersets = filtersets.split(',')

	# set the properties of the encoding

	if encoding in ['std']:
		pass
	elif encoding in ['named']:
		fmt_ordered = cardlib.fmt_ordered_named
	elif encoding in ['noname']:
		fmt_ordered = cardlib.fmt_ordered_noname
	elif encoding in ['rfields']:
		randomize_fields = True
		final_sep = False
	elif encoding in ['old']:
		fmt_ordered = cardlib.fmt_ordered_old
	elif encoding in ['norarity']:
		fmt_ordered = cardlib.fmt_ordered_norarity
	elif encoding in ['vec']:
		pass
	elif encoding in ['custom']:
		## put custom format decisions here ##########################
		
		## end of custom format ######################################
		pass
	else:
		raise ValueError('encode.py: unknown encoding: ' + encoding)

	if verbose:
		print('Preparing to encode:')
		print('	 Using encoding ' + repr(encoding))
		if stable:
			print('	 NOT randomizing order of cards.')
		if randomize_mana:
			print('	 Randomizing order of symobls in manacosts.')
		if not fmt_labeled:
			print('	 NOT labeling fields for this run (may be harder to decode).')
		if not line_transformations:
			print('	 NOT using line reordering transformations')

	cards = jdecode.mtg_open_file(fname, verbose=verbose, linetrans=line_transformations, addspaces = addspaces,include_sets=filtersets)
	#compression.compress_demo(cards)
	
	#RMMTMP
        #card.text.text.split()
	#cardtxts = [ card.text_words for card in cards]
	#ngrams = compression.count_ngrams(cardtxts)
	#compression.print_most_frequent(ngrams,100)
	#compression.build_vocab(cardtxts)
	

	# This should give a random but consistent ordering, to make comparing changes
	# between the output of different versions easier.
	if not stable:
		random.seed(1371367)
		random.shuffle(cards)

	def writecards(writer):
		for card in cards:
			if encoding in ['vec']:
				writer.write(card.vectorize() + '\n\n')
			else:
				writer.write(card.encode(fmt_ordered = fmt_ordered,
										 fmt_labeled = fmt_labeled,
										 fieldsep = fieldsep,
										 randomize_fields = randomize_fields,
										 randomize_mana = randomize_mana,
										 initial_sep = initial_sep,
										 final_sep = final_sep,addspaces = addspaces) 
							 + utils.cardsep)

	if oname:
		if verbose:
			print('Writing output to: ' + oname)
		with open(oname, 'w') as ofile:
			writecards(ofile)
	else:
		writecards(sys.stdout)
		sys.stdout.flush()
Exemple #22
0
def main(
    fname, oname=None, verbose=True, encoding="std", nolinetrans=False, randomize=False, nolabel=False, stable=False
):
    fmt_ordered = cardlib.fmt_ordered_default
    fmt_labeled = None if nolabel else cardlib.fmt_labeled_default
    fieldsep = utils.fieldsep
    line_transformations = not nolinetrans
    randomize_fields = False
    randomize_mana = randomize
    initial_sep = True
    final_sep = True

    # set the properties of the encoding

    if encoding in ["std"]:
        pass
    elif encoding in ["named"]:
        fmt_ordered = cardlib.fmt_ordered_named
    elif encoding in ["noname"]:
        fmt_ordered = cardlib.fmt_ordered_noname
    elif encoding in ["rfields"]:
        randomize_fields = True
        final_sep = False
    elif encoding in ["old"]:
        fmt_ordered = cardlib.fmt_ordered_old
    elif encoding in ["norarity"]:
        fmt_ordered = cardlib.fmt_ordered_norarity
    elif encoding in ["vec"]:
        pass
    elif encoding in ["custom"]:
        ## put custom format decisions here ##########################

        ## end of custom format ######################################
        pass
    else:
        raise ValueError("encode.py: unknown encoding: " + encoding)

    if verbose:
        print "Preparing to encode:"
        print "  Using encoding " + repr(encoding)
        if stable:
            print "  NOT randomizing order of cards."
        if randomize_mana:
            print "  Randomizing order of symobls in manacosts."
        if not fmt_labeled:
            print "  NOT labeling fields for this run (may be harder to decode)."
        if not line_transformations:
            print "  NOT using line reordering transformations"

    cards = jdecode.mtg_open_file(fname, verbose=verbose, linetrans=line_transformations)

    # This should give a random but consistent ordering, to make comparing changes
    # between the output of different versions easier.
    if not stable:
        random.seed(1371367)
        random.shuffle(cards)

    def writecards(writer):
        for card in cards:
            if encoding in ["vec"]:
                writer.write(card.vectorize() + "\n\n")
            else:
                writer.write(
                    card.encode(
                        fmt_ordered=fmt_ordered,
                        fmt_labeled=fmt_labeled,
                        fieldsep=fieldsep,
                        randomize_fields=randomize_fields,
                        randomize_mana=randomize_mana,
                        initial_sep=initial_sep,
                        final_sep=final_sep,
                    )
                    + utils.cardsep
                )

    if oname:
        if verbose:
            print "Writing output to: " + oname
        with open(oname, "w") as ofile:
            writecards(ofile)
    else:
        writecards(sys.stdout)
        sys.stdout.flush()
Exemple #23
0
def check_lines(fname):
    cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True)

    prelines = set()
    keylines = set()
    mainlines = set()
    costlines = set()
    postlines = set()

    known = ['enchant ', 'equip', 'countertype', 'multikicker', 'kicker',
             'suspend', 'echo', 'awaken', 'bestow', 'buyback',
             'cumulative', 'dash', 'entwine', 'evoke', 'fortify',
             'flashback', 'madness', 'morph', 'megamorph', 'miracle', 'ninjutsu',
             'overload', 'prowl', 'recover', 'reinforce', 'replicate', 'scavenge',
             'splice', 'surge', 'unearth', 'transfigure', 'transmute',
    ]
    known = []

    for card in cards:
        prel, keyl, mainl, costl, postl = transforms.separate_lines(card.text.encode(randomize=False))
        if card.bside:
            prel2, keyl2, mainl2, costl2, postl2 = transforms.separate_lines(card.bside.text.encode(randomize=False))
            prel += prel2
            keyl += keyl2
            mainl += mainl2
            costl += costl2
            postl += postl2

        for line in prel:
            if line.strip() == '':
                print(card.name, card.text.text)
            if any(line.startswith(s) for s in known):
                line = 'known'
            prelines.add(line)
        for line in postl:
            if line.strip() == '':
                print(card.name, card.text.text)
            if any(line.startswith(s) for s in known):
                line = 'known'
            postlines.add(line)
        for line in keyl:
            if line.strip() == '':
                print(card.name, card.text.text)
            if any(line.startswith(s) for s in known):
                line = 'known'
            keylines.add(line)
        for line in mainl:
            if line.strip() == '':
                print(card.name, card.text.text)
            # if any(line.startswith(s) for s in known):
            #     line = 'known'
            mainlines.add(line)
        for line in costl:
            if line.strip() == '':
                print(card.name, card.text.text)
            # if any(line.startswith(s) for s in known) or 'cycling' in line or 'monstrosity' in line:
            #     line = 'known'
            costlines.add(line)

    print('prel: {:d}, keyl: {:d}, mainl: {:d}, postl {:d}'
          .format(len(prelines), len(keylines), len(mainlines), len(postlines)))

    print('\nprelines')
    for line in sorted(prelines):
        print(line)

    print('\npostlines')
    for line in sorted(postlines):
        print(line)

    print('\ncostlines')
    for line in sorted(costlines):
        print(line)

    print('\nkeylines')
    for line in sorted(keylines):
        print(line)

    print('\nmainlines')
    for line in sorted(mainlines):
        #if any(s in line for s in ['champion', 'devour', 'tribute']):
        print(line)
Exemple #24
0
def main(infile, verbose = False):
    lm = ngrams.build_ngram_model(jdecode.mtg_open_file(str(os.path.join(datadir, 'output.txt'))),
                            3, separate_lines=True, verbose=True)
    stats = get_statistics(infile, lm=lm, sep=True, verbose=verbose)
    print_statistics(stats)