def main(args): fds = args.fds fname = args.fname block_size = args.block_size main_seed = args.seed if args.seed != 0 else None # simple default encoding for now, will add more options with the curriculum # learning feature cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True) def write_stream(i, fd): local_random = random.Random(main_seed) local_random.jumpahead(i) local_cards = [card for card in cards] with open('/proc/self/fd/'+str(fd), 'wt') as f: while True: local_random.shuffle(local_cards) for card in local_cards: f.write(card.encode(randomize_mana=True, randomize_lines=True)) f.write(utils.cardsep) def mkargs(i, fd): return i, fd streaming_noreturn(fds, write_stream, mkargs)
def main(fname, oname, gmin = 2, gmax = 8, verbose = True): gmin = int(gmin) gmax = int(gmax) bins = [1, 2, 3, 10, 30, 100, 300, 1000] if gmin < 2 or gmax < gmin: print 'invalid gram sizes: ' + str(gmin) + '-' + str(gmax) exit(1) # may need to set special arguments here cards = jdecode.mtg_open_file(fname, verbose=verbose) for grams in range(gmin, gmax+1): if verbose: print 'generating ' + str(grams) + '-grams...' gramdict = {} for card in cards: update_ngrams(card.text_lines_words, gramdict, grams) oname_full = oname + '.' + str(grams) + 'g' if verbose: print ' writing ' + str(len(gramdict)) + ' unique ' + str(grams) + '-grams to ' + oname_full describe_bins(gramdict, bins) with open(oname_full, 'wt') as f: for ngram in sorted(gramdict, lambda x,y: cmp(gramdict[x], gramdict[y]), reverse = True): f.write((ngram + ': ' + str(gramdict[ngram]) + '\n').encode('utf-8'))
def main(args): fds = args.fds fname = args.fname block_size = args.block_size main_seed = args.seed if args.seed != 0 else None # simple default encoding for now, will add more options with the curriculum # learning feature cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True) def write_stream(i, fd): local_random = random.Random(main_seed) local_random.jumpahead(i) local_cards = [card for card in cards] with open('/proc/self/fd/' + str(fd), 'wt') as f: while True: local_random.shuffle(local_cards) for card in local_cards: f.write( card.encode(randomize_mana=True, randomize_lines=True)) f.write(utils.cardsep) def mkargs(i, fd): return i, fd streaming_noreturn(fds, write_stream, mkargs)
def check_vocab(fname): cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True) vocab = {} for card in cards: words = card.text.vectorize().split() if card.bside: words += card.bside.text.vectorize().split() for word in words: if not word in vocab: vocab[word] = 1 else: vocab[word] += 1 for word in sorted(vocab, lambda x,y: cmp(vocab[x], vocab[y]), reverse = True): print('{:8d} : {:s}'.format(vocab[word], word)) n = 3 for card in cards: words = card.text.vectorize().split() if card.bside: words += card.bside.text.vectorize().split() for word in words: if vocab[word] <= n: #if 'name' in word: print('\n{:8d} : {:s}'.format(vocab[word], word)) print(card.encode()) break
def main(infile, verbose=False): lm = ngrams.build_ngram_model(jdecode.mtg_open_file( str(os.path.join(datadir, 'output.txt'))), 3, separate_lines=True, verbose=True) stats = get_statistics(infile, lm=lm, sep=True, verbose=verbose) print_statistics(stats)
def main(fname, oname = None, verbose = False, dump = False): # may need to set special arguments here cards = jdecode.mtg_open_file(fname, verbose=verbose) do_grams = False if do_grams: rg = {} for card in cards: g = rare_grams(card, thresh=2, grams=2) if len(card.text_words) > 0: g = int(1.0 + (float(g) * 100.0 / float(len(card.text_words)))) if g in rg: rg[g] += 1 else: rg[g] = 1 if g >= 60: print(g) print(card.format()) tot = 0 vmax = sum(rg.values()) pct90 = None pct95 = None pct99 = None for i in sorted(rg): print(str(i) + ' rare ngrams: ' + str(rg[i])) tot += rg[i] if pct90 is None and tot >= vmax * 0.90: pct90 = i if pct95 is None and tot >= vmax * 0.95: pct95 = i if pct99 is None and tot >= vmax * 0.99: pct99 = i print('90% - ' + str(pct90)) print('95% - ' + str(pct95)) print('99% - ' + str(pct99)) else: ((total_all, total_good, total_bad, total_uncovered), values) = process_props(cards, dump=dump) # summary print('-- overall --') print((' total : ' + str(total_all))) print((' good : ' + str(total_good) + ' ' + pct(total_good, total_all))) print((' bad : ' + str(total_bad) + ' ' + pct(total_bad, total_all))) print((' uncocoverd: ' + str(total_uncovered) + ' ' + pct(total_uncovered, total_all))) print('----') # breakdown for prop in props: (total, good, bad) = values[prop] print((prop + ':')) print((' total: ' + str(total) + ' ' + pct(total, total_all))) print((' good : ' + str(good) + ' ' + pct(good, total_all))) print((' bad : ' + str(bad) + ' ' + pct(bad, total_all)))
def main(fname, oname, verbose = True, parallel = True): # may need to set special arguments here cards = jdecode.mtg_open_file(fname, verbose=verbose) # this could reasonably be some separate function # might make sense to merge cbow and namediff and have this be the main interface namediff = Namediff() cbow = CBOW() if verbose: print('Computing nearest names...') if parallel: nearest_names = namediff.nearest_par([c.name for c in cards], n=1) else: nearest_names = [namediff.nearest(c.name, n=1) for c in cards] if verbose: print('Computing nearest cards...') if parallel: nearest_cards = cbow.nearest_par(cards, n=1) else: nearest_cards = [cbow.nearest(c, n=1) for c in cards] for i in range(0, len(cards)): cards[i].nearest_names = nearest_names[i] cards[i].nearest_cards = nearest_cards[i] # # unfortunately this takes ~30 hours on 8 cores for a 10MB dump # if verbose: # print 'Computing nearest encodings by text edit distance...' # if parallel: # nearest_cards_text = namediff.nearest_card_par(cards, n=1) # else: # nearest_cards_text = [namediff.nearest_card(c, n=1) for c in cards] if verbose: print('...Done.') # write to a file to store the data, this is a terribly long computation # we could also just store this same info in the cards themselves as more fields... sep = '|' with open(oname, 'w') as ofile: for i in range(0, len(cards)): card = cards[i] ostr = str(i) + sep + card.name + sep ndist, _ = card.nearest_names[0] ostr += str(ndist) + sep cdist, _ = card.nearest_cards[0] ostr += str(cdist) + '\n' # tdist, _ = nearest_cards_text[i][0] # ostr += str(tdist) + '\n' ofile.write(ostr.encode('utf-8'))
def main(fname, oname, verbose = True, parallel = True): # may need to set special arguments here cards = jdecode.mtg_open_file(fname, verbose=verbose) # this could reasonably be some separate function # might make sense to merge cbow and namediff and have this be the main interface namediff = Namediff() cbow = CBOW() if verbose: print 'Computing nearest names...' if parallel: nearest_names = namediff.nearest_par(map(lambda c: c.name, cards), n=1) else: nearest_names = [namediff.nearest(c.name, n=1) for c in cards] if verbose: print 'Computing nearest cards...' if parallel: nearest_cards = cbow.nearest_par(cards, n=1) else: nearest_cards = [cbow.nearest(c, n=1) for c in cards] for i in range(0, len(cards)): cards[i].nearest_names = nearest_names[i] cards[i].nearest_cards = nearest_cards[i] # # unfortunately this takes ~30 hours on 8 cores for a 10MB dump # if verbose: # print 'Computing nearest encodings by text edit distance...' # if parallel: # nearest_cards_text = namediff.nearest_card_par(cards, n=1) # else: # nearest_cards_text = [namediff.nearest_card(c, n=1) for c in cards] if verbose: print '...Done.' # write to a file to store the data, this is a terribly long computation # we could also just store this same info in the cards themselves as more fields... sep = '|' with open(oname, 'w') as ofile: for i in range(0, len(cards)): card = cards[i] ostr = str(i) + sep + card.name + sep ndist, _ = card.nearest_names[0] ostr += str(ndist) + sep cdist, _ = card.nearest_cards[0] ostr += str(cdist) + '\n' # tdist, _ = nearest_cards_text[i][0] # ostr += str(tdist) + '\n' ofile.write(ostr.encode('utf-8'))
def main(fname, oname, gmin=2, gmax=8, nltk=False, sep=False, verbose=False): # may need to set special arguments here cards = jdecode.mtg_open_file(fname, verbose=verbose) gmin = int(gmin) gmax = int(gmax) if nltk: n = gmin lm = build_ngram_model(cards, n, separate_lines=sep, verbose=verbose) if verbose: teststr = 'when @ enters the battlefield' print('litmus test: perplexity of ' + repr(teststr)) print(' ' + str(lm.perplexity(teststr.split()))) if verbose: print('pickling module to ' + oname) with open(oname, 'wb') as f: pickle.dump(lm, f) else: bins = [1, 2, 3, 10, 30, 100, 300, 1000] if gmin < 2 or gmax < gmin: print 'invalid gram sizes: ' + str(gmin) + '-' + str(gmax) exit(1) for grams in range(gmin, gmax + 1): if verbose: print 'generating ' + str(grams) + '-grams...' gramdict = {} for card in cards: update_ngrams(card.text_lines_words, gramdict, grams) oname_full = oname + '.' + str(grams) + 'g' if verbose: print(' writing ' + str(len(gramdict)) + ' unique ' + str(grams) + '-grams to ' + oname_full) describe_bins(gramdict, bins) with open(oname_full, 'wt') as f: for ngram in sorted(gramdict, lambda x, y: cmp(gramdict[x], gramdict[y]), reverse=True): f.write((ngram + ': ' + str(gramdict[ngram]) + '\n').encode('utf-8'))
def main(fname, oname, gmin = 2, gmax = 8, nltk = False, sep = False, verbose = False): # may need to set special arguments here cards = jdecode.mtg_open_file(fname, verbose=verbose) gmin = int(gmin) gmax = int(gmax) if nltk: n = gmin lm = build_ngram_model(cards, n, separate_lines=sep, verbose=verbose) if verbose: teststr = 'when @ enters the battlefield' print(('litmus test: perplexity of ' + repr(teststr))) print((' ' + str(lm.perplexity(teststr.split())))) if verbose: print(('pickling module to ' + oname)) with open(oname, 'wb') as f: pickle.dump(lm, f) else: bins = [1, 2, 3, 10, 30, 100, 300, 1000] if gmin < 2 or gmax < gmin: print('invalid gram sizes: ' + str(gmin) + '-' + str(gmax)) exit(1) for grams in range(gmin, gmax+1): if verbose: print('generating ' + str(grams) + '-grams...') gramdict = {} for card in cards: update_ngrams(card.text_lines_words, gramdict, grams) oname_full = oname + '.' + str(grams) + 'g' if verbose: print((' writing ' + str(len(gramdict)) + ' unique ' + str(grams) + '-grams to ' + oname_full)) describe_bins(gramdict, bins) with open(oname_full, 'wt') as f: for ngram in sorted(gramdict, lambda x,y: cmp(gramdict[x], gramdict[y]), reverse = True): f.write((ngram + ': ' + str(gramdict[ngram]) + '\n').encode('utf-8'))
def check_characters(fname, vname): cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True) tokens = {c for c in utils.cardsep} for card in cards: for c in card.encode(): tokens.add(c) token_to_idx = {tok:i+1 for i, tok in enumerate(sorted(tokens))} idx_to_token = {i+1:tok for i, tok in enumerate(sorted(tokens))} print('Vocabulary: ({:d} symbols)'.format(len(token_to_idx))) for token in sorted(token_to_idx): print('{:8s} : {:4d}'.format(repr(token), token_to_idx[token])) # compliant with torch-rnn if vname: json_data = {'token_to_idx':token_to_idx, 'idx_to_token':idx_to_token} print('writing vocabulary to {:s}'.format(vname)) with open(vname, 'w') as f: json.dump(json_data, f)
def main(fname, oname=None, verbose=True, encoding='std', nolinetrans=False, randomize=False, nolabel=False, stable=False): fmt_ordered = cardlib.fmt_ordered_default fmt_labeled = None if nolabel else cardlib.fmt_labeled_default fieldsep = utils.fieldsep line_transformations = not nolinetrans randomize_fields = False randomize_mana = randomize initial_sep = True final_sep = True # set the properties of the encoding if encoding in ['std']: pass elif encoding in ['named']: fmt_ordered = cardlib.fmt_ordered_named elif encoding in ['noname']: fmt_ordered = cardlib.fmt_ordered_noname elif encoding in ['rfields']: randomize_fields = True final_sep = False elif encoding in ['old']: fmt_ordered = cardlib.fmt_ordered_old elif encoding in ['norarity']: fmt_ordered = cardlib.fmt_ordered_norarity elif encoding in ['vec']: pass elif encoding in ['custom']: ## put custom format decisions here ########################## ## end of custom format ###################################### pass else: raise ValueError('encode.py: unknown encoding: ' + encoding) if verbose: print 'Preparing to encode:' print ' Using encoding ' + repr(encoding) if stable: print ' NOT randomizing order of cards.' if randomize_mana: print ' Randomizing order of symobls in manacosts.' if not fmt_labeled: print ' NOT labeling fields for this run (may be harder to decode).' if not line_transformations: print ' NOT using line reordering transformations' cards = jdecode.mtg_open_file(fname, verbose=verbose, linetrans=line_transformations) # This should give a random but consistent ordering, to make comparing changes # between the output of different versions easier. if not stable: random.seed(1371367) random.shuffle(cards) def writecards(writer): for card in cards: if encoding in ['vec']: writer.write(card.vectorize() + '\n\n') else: writer.write( card.encode(fmt_ordered=fmt_ordered, fmt_labeled=fmt_labeled, fieldsep=fieldsep, randomize_fields=randomize_fields, randomize_mana=randomize_mana, initial_sep=initial_sep, final_sep=final_sep) + utils.cardsep) if oname: if verbose: print 'Writing output to: ' + oname with open(oname, 'w') as ofile: writecards(ofile) else: writecards(sys.stdout) sys.stdout.flush()
def main(fname, oname = None, verbose = True, encoding = 'std', gatherer = False, for_forum = False, for_mse = False, creativity = False, vdump = False, for_html = False): # there is a sane thing to do here (namely, produce both at the same time) # but we don't support it yet. if for_mse and for_html: print 'ERROR - decode.py - incompatible formats "mse" and "html"' return fmt_ordered = cardlib.fmt_ordered_default if encoding in ['std']: pass elif encoding in ['named']: fmt_ordered = cardlib.fmt_ordered_named elif encoding in ['noname']: fmt_ordered = cardlib.fmt_ordered_noname elif encoding in ['rfields']: pass elif encoding in ['old']: fmt_ordered = cardlib.fmt_ordered_old elif encoding in ['norarity']: fmt_ordered = cardlib.fmt_ordered_norarity elif encoding in ['vec']: pass elif encoding in ['custom']: ## put custom format decisions here ########################## ## end of custom format ###################################### pass else: raise ValueError('encode.py: unknown encoding: ' + encoding) cards = jdecode.mtg_open_file(fname, verbose=verbose, fmt_ordered=fmt_ordered) if creativity: namediff = Namediff() cbow = CBOW() if verbose: print 'Computing nearest names...' nearest_names = namediff.nearest_par(map(lambda c: c.name, cards), n=3) if verbose: print 'Computing nearest cards...' nearest_cards = cbow.nearest_par(cards) for i in range(0, len(cards)): cards[i].nearest_names = nearest_names[i] cards[i].nearest_cards = nearest_cards[i] if verbose: print '...Done.' def hoverimg(cardname, dist, nd): truename = nd.names[cardname] code = nd.codes[cardname] namestr = '' if for_html: if code: namestr = ('<div class="hover_img"><a href="#">' + truename + '<span><img src="http://magiccards.info/scans/en/' + code + '" alt="image"/></span></a>' + ': ' + str(dist) + '</div>') else: namestr = '<div>' + truename + ': ' + str(dist) + '</div>' elif for_forum: namestr = '[card]' + truename + '[/card]' + ': ' + str(dist) + '\n' else: namestr = truename + ': ' + str(dist) + '\n' return namestr def writecards(writer): if for_mse: # have to prepend a massive chunk of formatting info writer.write(utils.mse_prepend) if for_html: # have to preapend html info writer.write(utils.html_prepend) for card in cards: if for_mse: writer.write(card.to_mse().encode('utf-8')) fstring = '' if card.json: fstring += 'JSON:\n' + card.json + '\n' if card.raw: fstring += 'raw:\n' + card.raw + '\n' fstring += '\n' fstring += card.format(gatherer = gatherer, for_forum = for_forum, vdump = vdump) + '\n' fstring = fstring.replace('<', '(').replace('>', ')') writer.write(('\n' + fstring[:-1]).replace('\n', '\n\t\t')) else: fstring = card.format(gatherer = gatherer, for_forum = for_forum, vdump = vdump, for_html = for_html) if creativity and for_html: fstring = fstring[:-6] # chop off the closing </div> to stick stuff in writer.write((fstring + '\n').encode('utf-8')) if creativity: cstring = '~~ closest cards ~~\n' nearest = card.nearest_cards for dist, cardname in nearest: cstring += hoverimg(cardname, dist, namediff) cstring += '~~ closest names ~~\n' nearest = card.nearest_names for dist, cardname in nearest: cstring += hoverimg(cardname, dist, namediff) if for_html: cstring = '<hr><div>' + cstring.replace('\n', '<br>\n') + '</div>\n</div>' elif for_mse: cstring = ('\n\n' + cstring[:-1]).replace('\n', '\n\t\t') writer.write(cstring.encode('utf-8')) writer.write('\n'.encode('utf-8')) if for_mse: # more formatting info writer.write('version control:\n\ttype: none\napprentice code: ') if for_html: # closing the html file writer.write(utils.html_append) if oname: if for_html: print oname # if ('.html' != oname[-]) # oname += '.html' if verbose: print 'Writing output to: ' + oname with open(oname, 'w') as ofile: writecards(ofile) if for_mse: # Copy whatever output file is produced, name the copy 'set' (yes, no extension). if os.path.isfile('set'): print 'ERROR: tried to overwrite existing file "set" - aborting.' return shutil.copyfile(oname, 'set') # Use the freaky mse extension instead of zip. with zipfile.ZipFile(oname+'.mse-set', mode='w') as zf: try: # Zip up the set file into oname.mse-set. zf.write('set') finally: if verbose: print 'Made an MSE set file called ' + oname + '.mse-set.' # The set file is useless outside the .mse-set, delete it. os.remove('set') else: writecards(sys.stdout) sys.stdout.flush()
def main(fname, oname, n=20, verbose=False): cbow = CBOW() realcards = jdecode.mtg_open_file(str(os.path.join(datadir, 'output.txt')), verbose=verbose) real_by_name = {c.name: c for c in realcards} lm = ngrams.build_ngram_model(realcards, 3, separate_lines=separate_lines, verbose=verbose) cards = jdecode.mtg_open_file(fname, verbose=verbose) stats = analysis.get_statistics(fname, lm=lm, sep=separate_lines, verbose=verbose) selected = [] for i in range(0, len(cards)): if select_card(cards, stats, i): selected += [(i, cards[i])] limit = 3000 random.shuffle(selected) #selected = selected[:limit] if verbose: print(('computing nearest cards for ' + str(len(selected)) + ' candindates...')) cbow_nearest = cbow.nearest_par([i_c[1] for i_c in selected]) for i in range(0, len(selected)): (j, card) = selected[i] selected[i] = (j, card, cbow_nearest[i]) if verbose: print('...done') final = [] for (i, card, nearest) in selected: for dist, rname in nearest: realcard = real_by_name[rname] if compare_to_real(card, realcard): final += [(i, card, realcard, dist)] break for (i, card, realcard, dist) in final: print('-- real --') print(realcard.format()) print('-- fake --') print(card.format()) print('-- stats --') perp_per = stats['ngram']['perp_per'][i] perp_max = stats['ngram']['perp_max'][i] print(dist) print(perp_per) print(perp_max) print('----') if not oname is None: with open(oname, 'wt') as ofile: ofile.write(utils.mse_prepend) for (i, card, realcard, dist) in final: name = realcard.name writecard(realcard, name, ofile) writecard(card, name, ofile) ofile.write('version control:\n\ttype: none\napprentice code: ') # Copy whatever output file is produced, name the copy 'set' (yes, no extension). if os.path.isfile('set'): print('ERROR: tried to overwrite existing file "set" - aborting.') return shutil.copyfile(oname, 'set') # Use the freaky mse extension instead of zip. with zipfile.ZipFile(oname+'.mse-set', mode='w') as zf: try: # Zip up the set file into oname.mse-set. zf.write('set') finally: if verbose: print('Made an MSE set file called ' + oname + '.mse-set.') # The set file is useless outside the .mse-set, delete it. os.remove('set')
def main(fname, oname, n=20, verbose=False): cbow = CBOW() realcards = jdecode.mtg_open_file(str(os.path.join(datadir, 'output.txt')), verbose=verbose) real_by_name = {c.name: c for c in realcards} lm = ngrams.build_ngram_model(realcards, 3, separate_lines=separate_lines, verbose=verbose) cards = jdecode.mtg_open_file(fname, verbose=verbose) stats = analysis.get_statistics(fname, lm=lm, sep=separate_lines, verbose=verbose) selected = [] for i in range(0, len(cards)): if select_card(cards, stats, i): selected += [(i, cards[i])] limit = 3000 random.shuffle(selected) #selected = selected[:limit] if verbose: print('computing nearest cards for ' + str(len(selected)) + ' candindates...') cbow_nearest = cbow.nearest_par(map(lambda (i, c): c, selected)) for i in range(0, len(selected)): (j, card) = selected[i] selected[i] = (j, card, cbow_nearest[i]) if verbose: print('...done') final = [] for (i, card, nearest) in selected: for dist, rname in nearest: realcard = real_by_name[rname] if compare_to_real(card, realcard): final += [(i, card, realcard, dist)] break for (i, card, realcard, dist) in final: print '-- real --' print realcard.format() print '-- fake --' print card.format() print '-- stats --' perp_per = stats['ngram']['perp_per'][i] perp_max = stats['ngram']['perp_max'][i] print dist print perp_per print perp_max print '----' if not oname is None: with open(oname, 'wt') as ofile: ofile.write(utils.mse_prepend) for (i, card, realcard, dist) in final: name = realcard.name writecard(realcard, name, ofile) writecard(card, name, ofile) ofile.write('version control:\n\ttype: none\napprentice code: ') # Copy whatever output file is produced, name the copy 'set' (yes, no extension). if os.path.isfile('set'): print 'ERROR: tried to overwrite existing file "set" - aborting.' return shutil.copyfile(oname, 'set') # Use the freaky mse extension instead of zip. with zipfile.ZipFile(oname+'.mse-set', mode='w') as zf: try: # Zip up the set file into oname.mse-set. zf.write('set') finally: if verbose: print 'Made an MSE set file called ' + oname + '.mse-set.' # The set file is useless outside the .mse-set, delete it. os.remove('set')
def main(fname, oname=None, verbose=False, dump=False): # may need to set special arguments here cards = jdecode.mtg_open_file(fname, verbose=verbose) do_grams = False if do_grams: rg = {} for card in cards: g = rare_grams(card, thresh=2, grams=2) if len(card.text_words) > 0: g = int(1.0 + (float(g) * 100.0 / float(len(card.text_words)))) if g in rg: rg[g] += 1 else: rg[g] = 1 if g >= 60: print g print card.format() tot = 0 vmax = sum(rg.values()) pct90 = None pct95 = None pct99 = None for i in sorted(rg): print str(i) + ' rare ngrams: ' + str(rg[i]) tot += rg[i] if pct90 is None and tot >= vmax * 0.90: pct90 = i if pct95 is None and tot >= vmax * 0.95: pct95 = i if pct99 is None and tot >= vmax * 0.99: pct99 = i print '90% - ' + str(pct90) print '95% - ' + str(pct95) print '99% - ' + str(pct99) else: ((total_all, total_good, total_bad, total_uncovered), values) = process_props(cards, dump=dump) # summary print('-- overall --') print(' total : ' + str(total_all)) print(' good : ' + str(total_good) + ' ' + pct(total_good, total_all)) print(' bad : ' + str(total_bad) + ' ' + pct(total_bad, total_all)) print(' uncocoverd: ' + str(total_uncovered) + ' ' + pct(total_uncovered, total_all)) print('----') # breakdown for prop in props: (total, good, bad) = values[prop] print(prop + ':') print(' total: ' + str(total) + ' ' + pct(total, total_all)) print(' good : ' + str(good) + ' ' + pct(good, total_all)) print(' bad : ' + str(bad) + ' ' + pct(bad, total_all))
def get_statistics(fname, lm = None, sep = False, verbose=False): stats = OrderedDict() cards = jdecode.mtg_open_file(fname, verbose=verbose) stats['cards'] = cards # unpack the name of the checkpoint - terrible and hacky try: final_name = os.path.basename(fname) halves = final_name.split('_epoch') cp_name = halves[0] cp_info = halves[1][:-4] info_halves = cp_info.split('_') cp_epoch = float(info_halves[0]) fragments = info_halves[1].split('.') cp_vloss = float('.'.join(fragments[:2])) cp_temp = float('.'.join(fragments[-2:])) cp_ident = '.'.join(fragments[2:-2]) stats['cp'] = OrderedDict([('name', cp_name), ('epoch', cp_epoch), ('vloss', cp_vloss), ('temp', cp_temp), ('ident', cp_ident)]) except Exception as e: pass # validate ((total_all, total_good, total_bad, total_uncovered), values) = mtg_validate.process_props(cards) stats['props'] = annotate_values(values) stats['props']['overall'] = OrderedDict([('total', total_all), ('good', total_good), ('bad', total_bad), ('uncovered', total_uncovered)]) # distances distfname = fname + '.dist' if os.path.isfile(distfname): name_dupes = 0 card_dupes = 0 with open(distfname, 'rt') as f: distlines = f.read().split('\n') dists = OrderedDict([('name', []), ('cbow', [])]) for line in distlines: fields = line.split('|') if len(fields) < 4: continue idx = int(fields[0]) name = str(fields[1]) ndist = float(fields[2]) cdist = float(fields[3]) dists['name'] += [ndist] dists['cbow'] += [cdist] if ndist == 1.0: name_dupes += 1 if cdist == 1.0: card_dupes += 1 dists['name_mean'] = mean_nonan(dists['name']) dists['cbow_mean'] = mean_nonan(dists['cbow']) dists['name_geomean'] = gmean_nonzero(dists['name']) dists['cbow_geomean'] = gmean_nonzero(dists['cbow']) stats['dists'] = dists # n-grams if not lm is None: ngram = OrderedDict([('perp', []), ('perp_per', []), ('perp_max', []), ('perp_per_max', [])]) for card in cards: if len(card.text.text) == 0: perp = 0.0 perp_per = 0.0 elif sep: vtexts = [line.vectorize().split() for line in card.text_lines if len(line.vectorize().split()) > 0] perps = [lm.perplexity(vtext) for vtext in vtexts] perps_per = [perps[i] / float(len(vtexts[i])) for i in range(0, len(vtexts))] perp = gmean_nonzero(perps) perp_per = gmean_nonzero(perps_per) perp_max = max(perps) perp_per_max = max(perps_per) else: vtext = card.text.vectorize().split() perp = lm.perplexity(vtext) perp_per = perp / float(len(vtext)) perp_max = perp perp_per_max = perps_per ngram['perp'] += [perp] ngram['perp_per'] += [perp_per] ngram['perp_max'] += [perp_max] ngram['perp_per_max'] += [perp_per_max] ngram['perp_mean'] = mean_nonan(ngram['perp']) ngram['perp_per_mean'] = mean_nonan(ngram['perp_per']) ngram['perp_geomean'] = gmean_nonzero(ngram['perp']) ngram['perp_per_geomean'] = gmean_nonzero(ngram['perp_per']) stats['ngram'] = ngram return stats
def main(fname, oname = None, verbose = True, encoding = 'std', gatherer = False, for_forum = False, for_mse = False, creativity = False, vdump = False, for_html = False): # there is a sane thing to do here (namely, produce both at the same time) # but we don't support it yet. if for_mse and for_html: print 'ERROR - decode.py - incompatible formats "mse" and "html"' return fmt_ordered = cardlib.fmt_ordered_default if encoding in ['std']: pass elif encoding in ['named']: fmt_ordered = cardlib.fmt_ordered_named elif encoding in ['noname']: fmt_ordered = cardlib.fmt_ordered_noname elif encoding in ['rfields']: pass elif encoding in ['old']: fmt_ordered = cardlib.fmt_ordered_old elif encoding in ['norarity']: fmt_ordered = cardlib.fmt_ordered_norarity elif encoding in ['vec']: pass elif encoding in ['custom']: ## put custom format decisions here ########################## ## end of custom format ###################################### pass else: raise ValueError('encode.py: unknown encoding: ' + encoding) cards = jdecode.mtg_open_file(fname, verbose=verbose, fmt_ordered=fmt_ordered) if creativity: namediff = Namediff() cbow = CBOW() if verbose: print 'Computing nearest names...' nearest_names = namediff.nearest_par(map(lambda c: c.name, cards), n=3) if verbose: print 'Computing nearest cards...' nearest_cards = cbow.nearest_par(cards) for i in range(0, len(cards)): cards[i].nearest_names = nearest_names[i] cards[i].nearest_cards = nearest_cards[i] if verbose: print '...Done.' def hoverimg(cardname, dist, nd): truename = nd.names[cardname] code = nd.codes[cardname] namestr = '' if for_html: if code: namestr = ('<div class="hover_img"><a href="#">' + truename + '<span><img style="background: url(http://magiccards.info/scans/en/' + code + ');" alt=""/></span></a>' + ': ' + str(dist) + '\n</div>\n') else: namestr = '<div>' + truename + ': ' + str(dist) + '</div>' elif for_forum: namestr = '[card]' + truename + '[/card]' + ': ' + str(dist) + '\n' else: namestr = truename + ': ' + str(dist) + '\n' return namestr def writecards(writer): if for_mse: # have to prepend a massive chunk of formatting info writer.write(utils.mse_prepend) if for_html: # have to preapend html info writer.write(utils.html_prepend) # seperate the write function to allow for writing smaller chunks of cards at a time segments = sort_colors(cards) for i in range(len(segments)): # sort color by CMC segments[i] = sort_type(segments[i]) # this allows card boxes to be colored for each color # for coloring of each box seperately cardlib.Card.format() must change non-minimaly writer.write('<div id="' + utils.segment_ids[i] + '">') writehtml(writer, segments[i]) writer.write("</div><hr>") # closing the html file writer.write(utils.html_append) return #break out of the write cards funcrion to avoid writing cards twice for card in cards: if for_mse: writer.write(card.to_mse().encode('utf-8')) fstring = '' if card.json: fstring += 'JSON:\n' + card.json + '\n' if card.raw: fstring += 'raw:\n' + card.raw + '\n' fstring += '\n' fstring += card.format(gatherer = gatherer, for_forum = for_forum, vdump = vdump) + '\n' fstring = fstring.replace('<', '(').replace('>', ')') writer.write(('\n' + fstring[:-1]).replace('\n', '\n\t\t')) else: fstring = card.format(gatherer = gatherer, for_forum = for_forum, vdump = vdump, for_html = for_html) writer.write((fstring + '\n').encode('utf-8')) if creativity: cstring = '~~ closest cards ~~\n' nearest = card.nearest_cards for dist, cardname in nearest: cstring += hoverimg(cardname, dist, namediff) cstring += '~~ closest names ~~\n' nearest = card.nearest_names for dist, cardname in nearest: cstring += hoverimg(cardname, dist, namediff) if for_mse: cstring = ('\n\n' + cstring[:-1]).replace('\n', '\n\t\t') writer.write(cstring.encode('utf-8')) writer.write('\n'.encode('utf-8')) if for_mse: # more formatting info writer.write('version control:\n\ttype: none\napprentice code: ') def writehtml(writer, card_set): for card in card_set: fstring = card.format(gatherer = gatherer, for_forum = True, vdump = vdump, for_html = for_html) if creativity: fstring = fstring[:-6] # chop off the closing </div> to stick stuff in writer.write((fstring + '\n').encode('utf-8')) if creativity: cstring = '~~ closest cards ~~\n<br>\n' nearest = card.nearest_cards for dist, cardname in nearest: cstring += hoverimg(cardname, dist, namediff) cstring += "<br>\n" cstring += '~~ closest names ~~\n<br>\n' nearest = card.nearest_names for dist, cardname in nearest: cstring += hoverimg(cardname, dist, namediff) cstring = '<hr><div>' + cstring + '</div>\n</div>' writer.write(cstring.encode('utf-8')) writer.write('\n'.encode('utf-8')) # Sorting by colors def sort_colors(card_set): # Initialize sections red_cards = [] blue_cards = [] green_cards = [] black_cards = [] white_cards = [] multi_cards = [] colorless_cards = [] lands = [] for card in card_set: if len(card.get_colors())>1: multi_cards += [card] continue if 'R' in card.get_colors(): red_cards += [card] continue elif 'U' in card.get_colors(): blue_cards += [card] continue elif 'B' in card.get_colors(): black_cards += [card] continue elif 'G' in card.get_colors(): green_cards += [card] continue elif 'W' in card.get_colors(): white_cards += [card] continue else: if "land" in card.get_types(): lands += [card] continue colorless_cards += [card] return[white_cards, blue_cards, black_cards, red_cards, green_cards, multi_cards, colorless_cards, lands] def sort_type(card_set): sorting = ["creature", "enchantment", "instant", "sorcery", "artifact", "planeswalker"] sorted_cards = [[],[],[],[],[],[],[]] sorted_set = [] for card in card_set: types = card.get_types() for i in range(len(sorting)): if sorting[i] in types: sorted_cards[i] += [card] break else: sorted_cards[6] += [card] for value in sorted_cards: for card in value: sorted_set += [card] return sorted_set def sort_cmc(card_set): sorted_cards = [] sorted_set = [] for card in card_set: # make sure there is an empty set for each CMC while len(sorted_cards)-1 < card.get_cmc(): sorted_cards += [[]] # add card to correct set of CMC values sorted_cards[card.get_cmc()] += [card] # combine each set of CMC valued cards together for value in sorted_cards: for card in value: sorted_set += [card] return sorted_set if oname: if for_html: print oname # if ('.html' != oname[-]) # oname += '.html' if verbose: print 'Writing output to: ' + oname with open(oname, 'w') as ofile: writecards(ofile) if for_mse: # Copy whatever output file is produced, name the copy 'set' (yes, no extension). if os.path.isfile('set'): print 'ERROR: tried to overwrite existing file "set" - aborting.' return shutil.copyfile(oname, 'set') # Use the freaky mse extension instead of zip. with zipfile.ZipFile(oname+'.mse-set', mode='w') as zf: try: # Zip up the set file into oname.mse-set. zf.write('set') finally: if verbose: print 'Made an MSE set file called ' + oname + '.mse-set.' # The set file is useless outside the .mse-set, delete it. os.remove('set') else: writecards(sys.stdout) sys.stdout.flush()
def get_statistics(fname, lm=None, sep=False, verbose=False): stats = OrderedDict() cards = jdecode.mtg_open_file(fname, verbose=verbose) stats['cards'] = cards # unpack the name of the checkpoint - terrible and hacky try: final_name = os.path.basename(fname) halves = final_name.split('_epoch') cp_name = halves[0] cp_info = halves[1][:-4] info_halves = cp_info.split('_') cp_epoch = float(info_halves[0]) fragments = info_halves[1].split('.') cp_vloss = float('.'.join(fragments[:2])) cp_temp = float('.'.join(fragments[-2:])) cp_ident = '.'.join(fragments[2:-2]) stats['cp'] = OrderedDict([('name', cp_name), ('epoch', cp_epoch), ('vloss', cp_vloss), ('temp', cp_temp), ('ident', cp_ident)]) except Exception as e: pass # validate ((total_all, total_good, total_bad, total_uncovered), values) = mtg_validate.process_props(cards) stats['props'] = annotate_values(values) stats['props']['overall'] = OrderedDict([('total', total_all), ('good', total_good), ('bad', total_bad), ('uncovered', total_uncovered)]) # distances distfname = fname + '.dist' if os.path.isfile(distfname): name_dupes = 0 card_dupes = 0 with open(distfname, 'rt') as f: distlines = f.read().split('\n') dists = OrderedDict([('name', []), ('cbow', [])]) for line in distlines: fields = line.split('|') if len(fields) < 4: continue idx = int(fields[0]) name = str(fields[1]) ndist = float(fields[2]) cdist = float(fields[3]) dists['name'] += [ndist] dists['cbow'] += [cdist] if ndist == 1.0: name_dupes += 1 if cdist == 1.0: card_dupes += 1 dists['name_mean'] = mean_nonan(dists['name']) dists['cbow_mean'] = mean_nonan(dists['cbow']) dists['name_geomean'] = gmean_nonzero(dists['name']) dists['cbow_geomean'] = gmean_nonzero(dists['cbow']) stats['dists'] = dists # n-grams if not lm is None: ngram = OrderedDict([('perp', []), ('perp_per', []), ('perp_max', []), ('perp_per_max', [])]) for card in cards: if len(card.text.text) == 0: perp = 0.0 perp_per = 0.0 elif sep: vtexts = [ line.vectorize().split() for line in card.text_lines if len(line.vectorize().split()) > 0 ] perps = [lm.perplexity(vtext) for vtext in vtexts] perps_per = [ perps[i] / float(len(vtexts[i])) for i in range(0, len(vtexts)) ] perp = gmean_nonzero(perps) perp_per = gmean_nonzero(perps_per) perp_max = max(perps) perp_per_max = max(perps_per) else: vtext = card.text.vectorize().split() perp = lm.perplexity(vtext) perp_per = perp / float(len(vtext)) perp_max = perp perp_per_max = perps_per ngram['perp'] += [perp] ngram['perp_per'] += [perp_per] ngram['perp_max'] += [perp_max] ngram['perp_per_max'] += [perp_per_max] ngram['perp_mean'] = mean_nonan(ngram['perp']) ngram['perp_per_mean'] = mean_nonan(ngram['perp_per']) ngram['perp_geomean'] = gmean_nonzero(ngram['perp']) ngram['perp_per_geomean'] = gmean_nonzero(ngram['perp_per']) stats['ngram'] = ngram return stats
def main(fname, oname=None, verbose=True, encoding='std', gatherer=False, for_forum=False, for_mse=False, creativity=False, vdump=False, for_html=False): # there is a sane thing to do here (namely, produce both at the same time) # but we don't support it yet. if for_mse and for_html: print 'ERROR - decode.py - incompatible formats "mse" and "html"' return fmt_ordered = cardlib.fmt_ordered_default if encoding in ['std']: pass elif encoding in ['named']: fmt_ordered = cardlib.fmt_ordered_named elif encoding in ['noname']: fmt_ordered = cardlib.fmt_ordered_noname elif encoding in ['rfields']: pass elif encoding in ['old']: fmt_ordered = cardlib.fmt_ordered_old elif encoding in ['norarity']: fmt_ordered = cardlib.fmt_ordered_norarity elif encoding in ['vec']: pass elif encoding in ['custom']: ## put custom format decisions here ########################## ## end of custom format ###################################### pass else: raise ValueError('encode.py: unknown encoding: ' + encoding) cards = jdecode.mtg_open_file(fname, verbose=verbose, fmt_ordered=fmt_ordered) if creativity: namediff = Namediff() cbow = CBOW() if verbose: print 'Computing nearest names...' nearest_names = namediff.nearest_par(map(lambda c: c.name, cards), n=3) if verbose: print 'Computing nearest cards...' nearest_cards = cbow.nearest_par(cards) for i in range(0, len(cards)): cards[i].nearest_names = nearest_names[i] cards[i].nearest_cards = nearest_cards[i] if verbose: print '...Done.' def hoverimg(cardname, dist, nd): truename = nd.names[cardname] code = nd.codes[cardname] namestr = '' if for_html: if code: namestr = ( '<div class="hover_img"><a href="#">' + truename + '<span><img style="background: url(http://magiccards.info/scans/en/' + code + ');" alt=""/></span></a>' + ': ' + str(dist) + '\n</div>\n') else: namestr = '<div>' + truename + ': ' + str(dist) + '</div>' elif for_forum: namestr = '[card]' + truename + '[/card]' + ': ' + str(dist) + '\n' else: namestr = truename + ': ' + str(dist) + '\n' return namestr def writecards(writer): if for_mse: # have to prepend a massive chunk of formatting info writer.write(utils.mse_prepend) if for_html: # have to preapend html info writer.write(utils.html_prepend) # seperate the write function to allow for writing smaller chunks of cards at a time segments = sort_colors(cards) for i in range(len(segments)): # sort color by CMC segments[i] = sort_type(segments[i]) # this allows card boxes to be colored for each color # for coloring of each box seperately cardlib.Card.format() must change non-minimaly writer.write('<div id="' + utils.segment_ids[i] + '">') writehtml(writer, segments[i]) writer.write("</div><hr>") # closing the html file writer.write(utils.html_append) return #break out of the write cards funcrion to avoid writing cards twice for card in cards: if for_mse: writer.write(card.to_mse().encode('utf-8')) fstring = '' if card.json: fstring += 'JSON:\n' + card.json + '\n' if card.raw: fstring += 'raw:\n' + card.raw + '\n' fstring += '\n' fstring += card.format( gatherer=gatherer, for_forum=for_forum, vdump=vdump) + '\n' fstring = fstring.replace('<', '(').replace('>', ')') writer.write(('\n' + fstring[:-1]).replace('\n', '\n\t\t')) else: fstring = card.format(gatherer=gatherer, for_forum=for_forum, vdump=vdump, for_html=for_html) writer.write((fstring + '\n').encode('utf-8')) if creativity: cstring = '~~ closest cards ~~\n' nearest = card.nearest_cards for dist, cardname in nearest: cstring += hoverimg(cardname, dist, namediff) cstring += '~~ closest names ~~\n' nearest = card.nearest_names for dist, cardname in nearest: cstring += hoverimg(cardname, dist, namediff) if for_mse: cstring = ('\n\n' + cstring[:-1]).replace('\n', '\n\t\t') writer.write(cstring.encode('utf-8')) writer.write('\n'.encode('utf-8')) if for_mse: # more formatting info writer.write('version control:\n\ttype: none\napprentice code: ') def writehtml(writer, card_set): for card in card_set: fstring = card.format(gatherer=gatherer, for_forum=True, vdump=vdump, for_html=for_html) if creativity: fstring = fstring[: -6] # chop off the closing </div> to stick stuff in writer.write((fstring + '\n').encode('utf-8')) if creativity: cstring = '~~ closest cards ~~\n<br>\n' nearest = card.nearest_cards for dist, cardname in nearest: cstring += hoverimg(cardname, dist, namediff) cstring += "<br>\n" cstring += '~~ closest names ~~\n<br>\n' nearest = card.nearest_names for dist, cardname in nearest: cstring += hoverimg(cardname, dist, namediff) cstring = '<hr><div>' + cstring + '</div>\n</div>' writer.write(cstring.encode('utf-8')) writer.write('\n'.encode('utf-8')) # Sorting by colors def sort_colors(card_set): # Initialize sections red_cards = [] blue_cards = [] green_cards = [] black_cards = [] white_cards = [] multi_cards = [] colorless_cards = [] lands = [] for card in card_set: if len(card.get_colors()) > 1: multi_cards += [card] continue if 'R' in card.get_colors(): red_cards += [card] continue elif 'U' in card.get_colors(): blue_cards += [card] continue elif 'B' in card.get_colors(): black_cards += [card] continue elif 'G' in card.get_colors(): green_cards += [card] continue elif 'W' in card.get_colors(): white_cards += [card] continue else: if "land" in card.get_types(): lands += [card] continue colorless_cards += [card] return [ white_cards, blue_cards, black_cards, red_cards, green_cards, multi_cards, colorless_cards, lands ] def sort_type(card_set): sorting = [ "creature", "enchantment", "instant", "sorcery", "artifact", "planeswalker" ] sorted_cards = [[], [], [], [], [], [], []] sorted_set = [] for card in card_set: types = card.get_types() for i in range(len(sorting)): if sorting[i] in types: sorted_cards[i] += [card] break else: sorted_cards[6] += [card] for value in sorted_cards: for card in value: sorted_set += [card] return sorted_set def sort_cmc(card_set): sorted_cards = [] sorted_set = [] for card in card_set: # make sure there is an empty set for each CMC while len(sorted_cards) - 1 < card.get_cmc(): sorted_cards += [[]] # add card to correct set of CMC values sorted_cards[card.get_cmc()] += [card] # combine each set of CMC valued cards together for value in sorted_cards: for card in value: sorted_set += [card] return sorted_set if oname: if for_html: print oname # if ('.html' != oname[-]) # oname += '.html' if verbose: print 'Writing output to: ' + oname with open(oname, 'w') as ofile: writecards(ofile) if for_mse: # Copy whatever output file is produced, name the copy 'set' (yes, no extension). if os.path.isfile('set'): print 'ERROR: tried to overwrite existing file "set" - aborting.' return shutil.copyfile(oname, 'set') # Use the freaky mse extension instead of zip. with zipfile.ZipFile(oname + '.mse-set', mode='w') as zf: try: # Zip up the set file into oname.mse-set. zf.write('set') finally: if verbose: print 'Made an MSE set file called ' + oname + '.mse-set.' # The set file is useless outside the .mse-set, delete it. os.remove('set') else: writecards(sys.stdout) sys.stdout.flush()
def main(fname, oname = None, verbose = True, encoding = 'std', nolinetrans = False, randomize = False, nolabel = False, stable = False, addspaces = False,filtersets = None): fmt_ordered = cardlib.fmt_ordered_default fmt_labeled = None if nolabel else cardlib.fmt_labeled_default if fmt_labeled is not None and addspaces: for label in fmt_labeled: fmt_labeled[label] = ' ' + fmt_labeled[label] + ' ' fieldsep = utils.fieldsep if addspaces: fieldsep = ' ' + fieldsep + ' ' line_transformations = not nolinetrans randomize_fields = False randomize_mana = randomize initial_sep = True final_sep = True if filtersets != None: filtersets = filtersets.split(',') # set the properties of the encoding if encoding in ['std']: pass elif encoding in ['named']: fmt_ordered = cardlib.fmt_ordered_named elif encoding in ['noname']: fmt_ordered = cardlib.fmt_ordered_noname elif encoding in ['rfields']: randomize_fields = True final_sep = False elif encoding in ['old']: fmt_ordered = cardlib.fmt_ordered_old elif encoding in ['norarity']: fmt_ordered = cardlib.fmt_ordered_norarity elif encoding in ['vec']: pass elif encoding in ['custom']: ## put custom format decisions here ########################## ## end of custom format ###################################### pass else: raise ValueError('encode.py: unknown encoding: ' + encoding) if verbose: print('Preparing to encode:') print(' Using encoding ' + repr(encoding)) if stable: print(' NOT randomizing order of cards.') if randomize_mana: print(' Randomizing order of symobls in manacosts.') if not fmt_labeled: print(' NOT labeling fields for this run (may be harder to decode).') if not line_transformations: print(' NOT using line reordering transformations') cards = jdecode.mtg_open_file(fname, verbose=verbose, linetrans=line_transformations, addspaces = addspaces,include_sets=filtersets) #compression.compress_demo(cards) #RMMTMP #card.text.text.split() #cardtxts = [ card.text_words for card in cards] #ngrams = compression.count_ngrams(cardtxts) #compression.print_most_frequent(ngrams,100) #compression.build_vocab(cardtxts) # This should give a random but consistent ordering, to make comparing changes # between the output of different versions easier. if not stable: random.seed(1371367) random.shuffle(cards) def writecards(writer): for card in cards: if encoding in ['vec']: writer.write(card.vectorize() + '\n\n') else: writer.write(card.encode(fmt_ordered = fmt_ordered, fmt_labeled = fmt_labeled, fieldsep = fieldsep, randomize_fields = randomize_fields, randomize_mana = randomize_mana, initial_sep = initial_sep, final_sep = final_sep,addspaces = addspaces) + utils.cardsep) if oname: if verbose: print('Writing output to: ' + oname) with open(oname, 'w') as ofile: writecards(ofile) else: writecards(sys.stdout) sys.stdout.flush()
def main( fname, oname=None, verbose=True, encoding="std", nolinetrans=False, randomize=False, nolabel=False, stable=False ): fmt_ordered = cardlib.fmt_ordered_default fmt_labeled = None if nolabel else cardlib.fmt_labeled_default fieldsep = utils.fieldsep line_transformations = not nolinetrans randomize_fields = False randomize_mana = randomize initial_sep = True final_sep = True # set the properties of the encoding if encoding in ["std"]: pass elif encoding in ["named"]: fmt_ordered = cardlib.fmt_ordered_named elif encoding in ["noname"]: fmt_ordered = cardlib.fmt_ordered_noname elif encoding in ["rfields"]: randomize_fields = True final_sep = False elif encoding in ["old"]: fmt_ordered = cardlib.fmt_ordered_old elif encoding in ["norarity"]: fmt_ordered = cardlib.fmt_ordered_norarity elif encoding in ["vec"]: pass elif encoding in ["custom"]: ## put custom format decisions here ########################## ## end of custom format ###################################### pass else: raise ValueError("encode.py: unknown encoding: " + encoding) if verbose: print "Preparing to encode:" print " Using encoding " + repr(encoding) if stable: print " NOT randomizing order of cards." if randomize_mana: print " Randomizing order of symobls in manacosts." if not fmt_labeled: print " NOT labeling fields for this run (may be harder to decode)." if not line_transformations: print " NOT using line reordering transformations" cards = jdecode.mtg_open_file(fname, verbose=verbose, linetrans=line_transformations) # This should give a random but consistent ordering, to make comparing changes # between the output of different versions easier. if not stable: random.seed(1371367) random.shuffle(cards) def writecards(writer): for card in cards: if encoding in ["vec"]: writer.write(card.vectorize() + "\n\n") else: writer.write( card.encode( fmt_ordered=fmt_ordered, fmt_labeled=fmt_labeled, fieldsep=fieldsep, randomize_fields=randomize_fields, randomize_mana=randomize_mana, initial_sep=initial_sep, final_sep=final_sep, ) + utils.cardsep ) if oname: if verbose: print "Writing output to: " + oname with open(oname, "w") as ofile: writecards(ofile) else: writecards(sys.stdout) sys.stdout.flush()
def check_lines(fname): cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True) prelines = set() keylines = set() mainlines = set() costlines = set() postlines = set() known = ['enchant ', 'equip', 'countertype', 'multikicker', 'kicker', 'suspend', 'echo', 'awaken', 'bestow', 'buyback', 'cumulative', 'dash', 'entwine', 'evoke', 'fortify', 'flashback', 'madness', 'morph', 'megamorph', 'miracle', 'ninjutsu', 'overload', 'prowl', 'recover', 'reinforce', 'replicate', 'scavenge', 'splice', 'surge', 'unearth', 'transfigure', 'transmute', ] known = [] for card in cards: prel, keyl, mainl, costl, postl = transforms.separate_lines(card.text.encode(randomize=False)) if card.bside: prel2, keyl2, mainl2, costl2, postl2 = transforms.separate_lines(card.bside.text.encode(randomize=False)) prel += prel2 keyl += keyl2 mainl += mainl2 costl += costl2 postl += postl2 for line in prel: if line.strip() == '': print(card.name, card.text.text) if any(line.startswith(s) for s in known): line = 'known' prelines.add(line) for line in postl: if line.strip() == '': print(card.name, card.text.text) if any(line.startswith(s) for s in known): line = 'known' postlines.add(line) for line in keyl: if line.strip() == '': print(card.name, card.text.text) if any(line.startswith(s) for s in known): line = 'known' keylines.add(line) for line in mainl: if line.strip() == '': print(card.name, card.text.text) # if any(line.startswith(s) for s in known): # line = 'known' mainlines.add(line) for line in costl: if line.strip() == '': print(card.name, card.text.text) # if any(line.startswith(s) for s in known) or 'cycling' in line or 'monstrosity' in line: # line = 'known' costlines.add(line) print('prel: {:d}, keyl: {:d}, mainl: {:d}, postl {:d}' .format(len(prelines), len(keylines), len(mainlines), len(postlines))) print('\nprelines') for line in sorted(prelines): print(line) print('\npostlines') for line in sorted(postlines): print(line) print('\ncostlines') for line in sorted(costlines): print(line) print('\nkeylines') for line in sorted(keylines): print(line) print('\nmainlines') for line in sorted(mainlines): #if any(s in line for s in ['champion', 'devour', 'tribute']): print(line)
def main(infile, verbose = False): lm = ngrams.build_ngram_model(jdecode.mtg_open_file(str(os.path.join(datadir, 'output.txt'))), 3, separate_lines=True, verbose=True) stats = get_statistics(infile, lm=lm, sep=True, verbose=verbose) print_statistics(stats)