def __init__(self): bh = BackupHandler( relative_path('experimental/code_suggest/output/backup')) elem_counts = bh.load('elem_pyplot_counts_0404') self.all_elems = set(elem_counts.keys()) self.all_elem_counts = elem_counts self.enormer = ElementNormalizer()
def __init__(self): plot_commands = get_plot_commands() pyplot_fu = get_pyplot_fu() self.plot_commands = plot_commands self.nonplot_commands = [ f for f in pyplot_fu.keys() if not f in plot_commands ] print 'CodeSuggest: extracted %d plot commands' % len(plot_commands) # Load all code examples of plotting commands from db into memory # These are generated by index_examples.py print 'CodeSuggest: Loading code examples and pregenerated SVGs...' db = sqlite3.connect(relative_path('demo/data/code.sqlite3')) cursor = db.cursor() cursor.execute("SELECT func_id, code, svg FROM example") code_example_lookup = {} # [func_id] = [(code, svg)] count_code_examples = 0 for func_id, code, svg in cursor.fetchall(): count_code_examples += 1 if not func_id in code_example_lookup: code_example_lookup[func_id] = [] code_example_lookup[func_id].append({'code': code, 'svg': svg}) # Sort it again for func_id in code_example_lookup: code_example_lookup[func_id] = sorted( code_example_lookup[func_id], key=lambda x: get_effective_code_len(x['code'])) self.code_example_lookup = code_example_lookup print 'CodeSuggest: Loaded %d code examples (with svgs)...' % count_code_examples db.close() # Load element_index generated by experimental/code_suggest/mine_argvs.py # bh = BackupHandler(relative_path('demo/data')) # self.element_index = bh.load('element_index') # print 'Loaded element_index with %d keys'%len(self.element_index) # Load element value counts bh2 = BackupHandler( relative_path('experimental/code_suggest/output/backup')) self.elem_val_counts = bh2.load( 'elem_pyplot_value_counts_0404') # [elem][val] = count for elem_id in self.elem_val_counts: self.elem_val_counts[elem_id] = sorted( self.elem_val_counts[elem_id].items(), key=lambda x: -x[1]) self.func_position_finder = FuncPositionFinder()
def __init__(self): plot_commands = get_plot_commands() pyplot_fu = get_pyplot_fu() self.plot_commands = plot_commands self.nonplot_commands = [f for f in pyplot_fu.keys() if not f in plot_commands] print 'CodeSuggest: extracted %d plot commands'%len(plot_commands) # Load all code examples of plotting commands from db into memory # These are generated by index_examples.py print 'CodeSuggest: Loading code examples and pregenerated SVGs...' db = sqlite3.connect(relative_path('demo/data/code.sqlite3')) cursor = db.cursor() cursor.execute("SELECT func_id, code, svg FROM example") code_example_lookup = {} # [func_id] = [(code, svg)] count_code_examples = 0 for func_id, code, svg in cursor.fetchall(): count_code_examples += 1 if not func_id in code_example_lookup: code_example_lookup[func_id] = [] code_example_lookup[func_id].append({'code': code, 'svg':svg}) # Sort it again for func_id in code_example_lookup: code_example_lookup[func_id] = sorted( code_example_lookup[func_id], key=lambda x:get_effective_code_len(x['code'])) self.code_example_lookup = code_example_lookup print 'CodeSuggest: Loaded %d code examples (with svgs)...'%count_code_examples db.close() # Load element_index generated by experimental/code_suggest/mine_argvs.py # bh = BackupHandler(relative_path('demo/data')) # self.element_index = bh.load('element_index') # print 'Loaded element_index with %d keys'%len(self.element_index) # Load element value counts bh2 = BackupHandler(relative_path('experimental/code_suggest/output/backup')) self.elem_val_counts = bh2.load('elem_pyplot_value_counts_0404') # [elem][val] = count for elem_id in self.elem_val_counts: self.elem_val_counts[elem_id] = sorted( self.elem_val_counts[elem_id].items(), key=lambda x:-x[1]) self.func_position_finder = FuncPositionFinder()
def code_examples(): """ Yield code examples. """ global all_codes1, all_codes2, all_codes3 # 15770 code examples mined from SO answers in threads that are tagged # "matplotlib". if not all_codes1: print 'Loading SO code examples...' bh1 = BackupHandler(relative_path('experimental/code_suggest')) all_codes1 = bh1.load('all_codes') print '%d examples from SO'%len(all_codes1) for code in all_codes1: yield code # print 'WARNING: mine_element.py ignoring all GitHub code examples...' # """ if not all_codes2: # 8732 code examples (including 395 IPython Notebook files) mined from # GitHub repositories that contain "matplotlib". print 'Loading GitHub code examples...' bh2 = BackupHandler(relative_path('experimental/code_suggest/output/backup')) all_codes2 = bh2.load('all_codes_github_1k_repo_0322') print '%d examples from GitHub'%len(all_codes2) for code in all_codes2: yield code # """ if not all_codes3: # 21993 code examples extracted by Shiyan from the Web print 'Loading Web code examples' bh3 = BackupHandler(relative_path('experimental/mining/output')) all_codes3 = bh3.load('codes_shiyan_0331_web') print '%d examples from Web Shiyan'%len(all_codes3) for code in all_codes3: yield code
def __init__(self): bh = BackupHandler(relative_path('experimental/code_suggest/output/backup')) elem_counts = bh.load('elem_pyplot_counts_0404') self.all_elems = set(elem_counts.keys()) self.all_elem_counts = elem_counts self.enormer = ElementNormalizer()
from codemend.models.extract_so_code import load_threads, Thread, Answer from codemend import BackupHandler, relative_path if __name__ == '__main__': bh_dir = relative_path('models/output/backup') bh = BackupHandler(bh_dir) try: threads = bh.load('mpl_threads') except AssertionError: threads = list(load_threads( qfilter="Tags LIKE '%<matplotlib>%' AND AnswerCount > 0 AND Score >= 0", afilter="Score >= 0 ORDER BY Score DESC LIMIT 3")) bh.save('mpl_threads', threads) with open(relative_path('models/output/mpl_so_titles.txt'), 'w') as writer: for t in threads: writer.write('%d\t%s\n'%(t.qid, t.qtitle.encode('utf-8')))
# Step 1: # Copied from annotate_code_with_api.py with open('../../models/output/mpl_code_blocks.txt') as reader: content = reader.read() content = content.decode('utf-8') content = content.replace("<", "<") content = content.replace(">", ">") content = content.replace("&", "&") sompl_blocks = content.split('\n\n\n') # stackoverflow matplotlib code blocks print 'There are %d code examples from mpl stackoverflow'%len(sompl_blocks) # Step 2: bh = BackupHandler('.') cookbook_segs = bh.load('cookbook_segs') cookbook_blocks = [] for tag, p in cookbook_segs: if tag == 'CODE': cookbook_blocks.append(p) print 'There are %d code examples from matplotlib cookbook'%len(cookbook_blocks) all_codes = sompl_blocks + cookbook_blocks print 'There are %d code blocks in total'%(len(all_codes)) # Step 3: counters = {} counter_names = ['syntax_errors', 'unsafes', 'timeouts', 'exec_errors',
elem_counts[f, a, v] += 1 return is_useful if __name__ == '__main__': counters = defaultdict(int) md5s = set() all_codes = [] fu, _ = get_fu_fau() elem_counts = defaultdict(int) # [elem] = count bh = BackupHandler( relative_path('experimental/code_suggest/output/backup')) for root, dirs, files in os.walk( relative_path('mining/output/github-matplotlib-repos')): if '.git' in root: continue for file_name in files: counters['count_file'] += 1 if counters['count_file'] % 1000 == 0: print 'Processed %d files - Useful files: %d' % ( counters['count_file'], counters['count_useful_files']) file_path = os.path.join(root, file_name)
from codemend.models.extract_so_code import load_threads, Thread, Answer from codemend import BackupHandler, relative_path if __name__ == '__main__': bh_dir = relative_path('models/output/backup') bh = BackupHandler(bh_dir) try: threads = bh.load('mpl_threads') except AssertionError: threads = list( load_threads( qfilter= "Tags LIKE '%<matplotlib>%' AND AnswerCount > 0 AND Score >= 0", afilter="Score >= 0 ORDER BY Score DESC LIMIT 3")) bh.save('mpl_threads', threads) with open(relative_path('models/output/mpl_so_titles.txt'), 'w') as writer: for t in threads: writer.write('%d\t%s\n' % (t.qid, t.qtitle.encode('utf-8')))
example also has its corresponding generated SVGs. The table is like this: (func_id, code, svg) There are at most 20 (shortest) examples per func_id. """ import sqlite3 from codemend import BackupHandler, relative_path if __name__ == '__main__': print 'Reading SVGs and code examples. Takes 7.3 seconds...' bh = BackupHandler('.') svgs = bh.load('svgs') all_codes = bh.load('all_codes') plotcommands_examples = bh.load( 'plotcommands_examples') # [plot_command] = [example_idx] db = sqlite3.connect(relative_path('demo/data/code.sqlite3')) cursor = db.cursor() cursor.executescript(""" DROP TABLE IF EXISTS example; CREATE TABLE example ( func_id TEXT NOT NULL, code TEXT NOT NULL, svg TEXT
called. - #3: Not recommending elements that occur too infrequently. - #4: When a function is not used before, and its argv is recommended, we strip the "@", and recommend the function first, followed by the argv. e.g. [pie@0, pie] => [pie, pie@0]. """ from codemend import BackupHandler, relative_path from codemend.demo.code_suggest import get_plot_commands from codemend.models.baseline2 import SuggestItem plot_commands = get_plot_commands() plot_commands_set = set(plot_commands) bh = BackupHandler(relative_path('experimental/code_suggest/output/backup')) elem_counts = bh.load('elem_pyplot_counts_0404') def prune(used_elems, suggest_elems): for elem in used_elems: assert isinstance(elem, basestring) for elem in suggest_elems: assert isinstance(elem, SuggestItem), type(elem) used_elems_set = set(used_elems) used_funcs = map(get_func_name, used_elems) used_funcs_set = set(used_funcs) has_used_plot_commands = any( map(lambda x: x in plot_commands_set, used_funcs))
def __init__(self): bh = BackupHandler(relative_path('demo/data')) self.pos_ave = bh.load('pos_ave') print 'FuncPositionFinder: loaded %d average positions for functions'%len(self.pos_ave)
def __init__(self, w2v_model, all_elem_counts, maxngram=1, name=None, use_lemma=True, heuristic=False, use_coke=False): """ w2v_model can be a binary vectors file, or a loaded gensim model instance. """ self.maxngram = maxngram self.name = name self.use_lemma = use_lemma assert isinstance(all_elem_counts, dict) self.all_elem_counts = all_elem_counts self.heuristic = heuristic self.use_coke = use_coke if isinstance(w2v_model, basestring): self.model = load_gensim_from_binary_file(w2v_model) self.model.filename = w2v_model.split('/')[-1] if not self.name: self.name = self.model.filename else: assert isinstance(w2v_model, Word2Vec) self.model = w2v_model if not self.name: if hasattr(self.model, 'filename'): self.name = self.model.filename self.model.init_sims() # normalize the vectors self.enormer = ElementNormalizer() if self.use_coke: bh = BackupHandler(relative_path('models/output/backup')) coke_file = 'coke_0329' if not bh.exists(coke_file): raise ValueError('Coke file does not exist: %s'%coke_file) self.coke = bh.load(coke_file) print 'Trying to load element indexes from cache ...' bh = BackupHandler(relative_path('models/output/backup')) elem_index_backup_name = self.model.filename + '_elem_index' if bh.exists(elem_index_backup_name): self.idfs, self.elems, self.elem_lookup, self.vecmat = bh.load(elem_index_backup_name) else: print 'Word2vecBaseline building element indexes...' fu, fau = get_fu_fau() self.idfs = self.get_idf(fu.values() + fau.values()) self.elems = sorted(self.all_elem_counts.keys()) self.elem_lookup = dict((y,x) for (x,y) in enumerate(self.elems)) vecs = [] for e in self.elems: u = doc_serve.get_training_doc(e, True) v = self.get_bow_representation(u) vecs.append(v) self.vecmat = np.array(vecs) assert self.vecmat.shape == (len(self.elems), self.model.vector_size) bh.save(elem_index_backup_name, (self.idfs, self.elems, self.elem_lookup, self.vecmat)) print 'Finished building indexes.'
train_pairs.append((merged_utter, astunparse.unparse(call_node))) unique_train_pairs = list(set(train_pairs)) print 'total_block', total_block print 'total_grammatical', total_grammatical print 'total_call_nodes', total_call_nodes print 'total_matched_funcs', total_matched_funcs, '(total train pairs)' print 'total_matched_args', total_matched_args print 'total_unique_train_pairs', len(unique_train_pairs) return unique_train_pairs if __name__ == '__main__': bh = BackupHandler(relative_path('models/output/backup')) # Step 1 fu, fau = get_fu_fau() # Step 2 with open(relative_path('models/output/mpl_code_blocks.txt')) as reader: content = reader.read() content = content.decode('utf-8') content = content.replace("<", "<") content = content.replace(">", ">") content = content.replace("&", "&") blocks = content.split('\n\n\n')
def get_effective_code_len(code): """ Number of characters in a code example. Not counting lines with "import" """ lines = code.split('\n') lines = filter(lambda x: 'import' not in x.split(), lines) return len('\n'.join(lines)) if __name__ == '__main__': print 'Reading SVGs and code examples. Takes 7.3 seconds...' bh = BackupHandler('.') svgs = bh.load('svgs') all_codes = bh.load('all_codes') print 'Loading functions that are plotting commands' # Copied from code_suggest.py import csv import pattern.en # Load csv file of pyplot summary pyplot_fu = {} # [func] = utter print 'CodeSuggest: Loading pyplot fu...' with open('../../docstring_parse/pyplot_fu.csv', 'rb') as csvfile: reader = csv.reader(csvfile)
def __init__(self, w2v_model, all_elem_counts, maxngram=1, name=None, use_lemma=True, heuristic=False, use_coke=False): """ w2v_model can be a binary vectors file, or a loaded gensim model instance. """ self.maxngram = maxngram self.name = name self.use_lemma = use_lemma assert isinstance(all_elem_counts, dict) self.all_elem_counts = all_elem_counts self.heuristic = heuristic self.use_coke = use_coke if isinstance(w2v_model, basestring): self.model = load_gensim_from_binary_file(w2v_model) self.model.filename = w2v_model.split('/')[-1] if not self.name: self.name = self.model.filename else: assert isinstance(w2v_model, Word2Vec) self.model = w2v_model if not self.name: if hasattr(self.model, 'filename'): self.name = self.model.filename self.model.init_sims() # normalize the vectors self.enormer = ElementNormalizer() if self.use_coke: bh = BackupHandler(relative_path('models/output/backup')) coke_file = 'coke_0329' if not bh.exists(coke_file): raise ValueError('Coke file does not exist: %s' % coke_file) self.coke = bh.load(coke_file) print 'Trying to load element indexes from cache ...' bh = BackupHandler(relative_path('models/output/backup')) elem_index_backup_name = self.model.filename + '_elem_index' if bh.exists(elem_index_backup_name): self.idfs, self.elems, self.elem_lookup, self.vecmat = bh.load( elem_index_backup_name) else: print 'Word2vecBaseline building element indexes...' fu, fau = get_fu_fau() self.idfs = self.get_idf(fu.values() + fau.values()) self.elems = sorted(self.all_elem_counts.keys()) self.elem_lookup = dict((y, x) for (x, y) in enumerate(self.elems)) vecs = [] for e in self.elems: u = doc_serve.get_training_doc(e, True) v = self.get_bow_representation(u) vecs.append(v) self.vecmat = np.array(vecs) assert self.vecmat.shape == (len(self.elems), self.model.vector_size) bh.save(elem_index_backup_name, (self.idfs, self.elems, self.elem_lookup, self.vecmat)) print 'Finished building indexes.'
# Copied from annotate_code_with_api.py with open('../../models/output/mpl_code_blocks.txt') as reader: content = reader.read() content = content.decode('utf-8') content = content.replace("<", "<") content = content.replace(">", ">") content = content.replace("&", "&") sompl_blocks = content.split( '\n\n\n') # stackoverflow matplotlib code blocks print 'There are %d code examples from mpl stackoverflow' % len( sompl_blocks) # Step 2: bh = BackupHandler('.') cookbook_segs = bh.load('cookbook_segs') cookbook_blocks = [] for tag, p in cookbook_segs: if tag == 'CODE': cookbook_blocks.append(p) print 'There are %d code examples from matplotlib cookbook' % len( cookbook_blocks) all_codes = sompl_blocks + cookbook_blocks print 'There are %d code blocks in total' % (len(all_codes)) # Step 3: counters = {}
elif isinstance(v, ast.Name): kvs.append((k, v.id)) elif isinstance(v, ast.Tuple) \ or isinstance(v, ast.Dict) \ or isinstance(v, ast.List): kvs.append((k, astunparse.unparse(v).strip())) return func_name, kvs if __name__ == '__main__': # Step 1. fu, fau = get_fu_fau() # Step 2. bh = BackupHandler(relative_path('experimental/code_suggest')) all_codes = bh.load('all_codes') print 'There are %d code examples in total'%len(all_codes) # Step 3. f_counts = defaultdict(int) # [f] = count fa_counts = defaultdict(int) # [f,a] = count fav_counts = defaultdict(int) # [f,a,v] = count for code in all_codes: try: node = ast.parse(code) except SyntaxError: continue calls = findCallNodes(node) for call in calls:
def transform_and_filter(elem): """ Cleaning is performed to reduce sparsity: - pylab.xxx --> plt.xxx (if the function exists in pyplot) - various add_subplot.xxx --> plt.gca.xxx (see stype.tsv) - only plt.* are kept Returns: cleaned elem or None """ elem = enormer.simplify(elem) if elem.startswith('plt.'): return elem else: return None if __name__ == '__main__': coke_counts = defaultdict(int) count = 0 for code in code_examples(): count += 1 if count % 1000 == 0: print '%d ... unique_cokes=%d'%(count, len(coke_counts)) for x, y in get_cokes(code): coke_counts[x,y] += 1 bh = BackupHandler(relative_path('models/output/backup')) bh.save('coke_0329', coke_counts)
def __init__(self): bh = BackupHandler(relative_path('demo/data')) self.pos_ave = bh.load('pos_ave') print 'FuncPositionFinder: loaded %d average positions for functions' % len( self.pos_ave)
elem_counts[f,a] += 1 elem_counts[f,a,v] += 1 return is_useful if __name__ == '__main__': counters = defaultdict(int) md5s = set() all_codes = [] fu, _ = get_fu_fau() elem_counts = defaultdict(int) # [elem] = count bh = BackupHandler(relative_path('experimental/code_suggest/output/backup')) for root, dirs, files in os.walk( relative_path('mining/output/github-matplotlib-repos')): if '.git' in root: continue for file_name in files: counters['count_file'] += 1 if counters['count_file'] % 1000 == 0: print 'Processed %d files - Useful files: %d'%( counters['count_file'], counters['count_useful_files']) file_path = os.path.join(root, file_name)
elif isinstance(v, ast.Name): kvs.append((k, v.id)) elif isinstance(v, ast.Tuple) \ or isinstance(v, ast.Dict) \ or isinstance(v, ast.List): kvs.append((k, astunparse.unparse(v).strip())) return func_name, kvs if __name__ == '__main__': # Step 1. fu, fau = get_fu_fau() # Step 2. bh = BackupHandler(relative_path('experimental/code_suggest')) all_codes = bh.load('all_codes') print 'There are %d code examples in total' % len(all_codes) # Step 3. f_counts = defaultdict(int) # [f] = count fa_counts = defaultdict(int) # [f,a] = count fav_counts = defaultdict(int) # [f,a,v] = count for code in all_codes: try: node = ast.parse(code) except SyntaxError: continue calls = findCallNodes(node) for call in calls:
this code 3. take average per function Output: - a dictionary: [function] = average_position average position: between 0 (beginning of code) and 1 (end of code). """ import ast from collections import defaultdict from codemend import BackupHandler, relative_path from codemend.models.annotate_code_with_api import get_fu_fau, findCallNodes, extractCallComponents fu, fau = get_fu_fau() bh = BackupHandler(relative_path('experimental/code_suggest')) all_codes = bh.load('all_codes') print 'There are %d code examples in total'%len(all_codes) pos_sum = defaultdict(float) # [f] = sum pos_cnt = defaultdict(int) # [f] = count for code in all_codes: try: node = ast.parse(code) except SyntaxError: continue calls = findCallNodes(node) called_funcs = [extractCallComponents(x)[0] for x in calls] called_funcs = filter(lambda x: x in fu, called_funcs) if len(calls) < 3: continue
if elem_id.startswith('plt.'): element_pyplot_counts[elem_id] += 1 val = get_countable_value(e.val_node, varmap, enormer) if val: element_pyplot_value_counts[elem_id][val] += 1 for elem_id in element_pyplot_value_counts: element_pyplot_value_counts[elem_id] = dict(element_pyplot_value_counts[elem_id]) element_pyplot_value_counts = dict(element_pyplot_value_counts) print 'Processed %d code examples'%count print 'There are %d unique elements'%len(element_counts) print 'There are %d unique pyplot elements'%len(element_pyplot_counts) for k in counters: print '%s: %d'%(k, counters[k]) bh = BackupHandler(relative_path('experimental/code_suggest/output/backup')) # Change logs: # - 0322: using raw format # - 0327: using Element, tracking return type and variable assignments and # import aliases. # - 0404: fixed issue with dict as positional argument; # added element_value_counts; # added Shiyan's example. bh.save('elem_counts_0404', element_counts) bh.save('elem_pyplot_counts_0404', element_pyplot_counts) bh.save('elem_pyplot_value_counts_0404', element_pyplot_value_counts) """ Log: # 0327
def get_effective_code_len(code): """ Number of characters in a code example. Not counting lines with "import" """ lines = code.split('\n') lines = filter(lambda x: 'import' not in x.split(), lines) return len('\n'.join(lines)) if __name__ == '__main__': print 'Reading SVGs and code examples. Takes 7.3 seconds...' bh = BackupHandler('.') svgs = bh.load('svgs') all_codes = bh.load('all_codes') print 'Loading functions that are plotting commands' # Copied from code_suggest.py import csv import pattern.en # Load csv file of pyplot summary pyplot_fu = {} # [func] = utter print 'CodeSuggest: Loading pyplot fu...' with open('../../docstring_parse/pyplot_fu.csv', 'rb') as csvfile: reader = csv.reader(csvfile) next(reader, None) # skip the header for f, u in reader: if not u:
def transform_and_filter(elem): """ Cleaning is performed to reduce sparsity: - pylab.xxx --> plt.xxx (if the function exists in pyplot) - various add_subplot.xxx --> plt.gca.xxx (see stype.tsv) - only plt.* are kept Returns: cleaned elem or None """ elem = enormer.simplify(elem) if elem.startswith('plt.'): return elem else: return None if __name__ == '__main__': coke_counts = defaultdict(int) count = 0 for code in code_examples(): count += 1 if count % 1000 == 0: print '%d ... unique_cokes=%d' % (count, len(coke_counts)) for x, y in get_cokes(code): coke_counts[x, y] += 1 bh = BackupHandler(relative_path('models/output/backup')) bh.save('coke_0329', coke_counts)
from codemend import BackupHandler, relative_path from codemend.models.element import ElementNormalizer from codemend.models.word2vec_util import load_gensim_from_binary_file from codemend.models.bimodal2 import BiModal from codemend.experimental.code_suggest.mine_element import code_examples if __name__ == '__main__': bh = BackupHandler(relative_path('experimental/code_suggest/output/backup')) elem_counts = bh.load('elem_pyplot_counts_0404') all_elems = sorted(elem_counts.keys()) all_elems_counts = elem_counts enormer = ElementNormalizer() w2v_model = load_gensim_from_binary_file( relative_path('models/output/vectors-so-text-python-lemma-win5.bin')) # <-- note the change here!! model = BiModal(all_elems, all_elems_counts, w2v_model, code_examples, enormer, threads=None, alpha=0.05, window=5, negative=20, additive=0, multiply=0, concat=1, epoch=1, rand_parent_doc=True, hint_pvecs_init=True, hint_rvecs_init=False, neg_sample_used_elem=False) model.save(relative_path('models/output/bi2-0410-t.model')) # Changes: # bi2-test -- lastest gold version for user study # bi2-0410-a -- epoch=10, fixed stopwords (e.g., excluding bar from stopwords) -- this is vanilla # bi2-0410-b -- epoch=1, quick check if setting is all right. # bi2-0410-c -- epoch=10, replicating bi2-0410-a # bi2-0410-d -- epoch=1, randomly with-parent doc # bi2-0410-e -- epoch=5, randomly with-parent doc
example also has its corresponding generated SVGs. The table is like this: (func_id, code, svg) There are at most 20 (shortest) examples per func_id. """ import sqlite3 from codemend import BackupHandler, relative_path if __name__ == '__main__': print 'Reading SVGs and code examples. Takes 7.3 seconds...' bh = BackupHandler('.') svgs = bh.load('svgs') all_codes = bh.load('all_codes') plotcommands_examples = bh.load('plotcommands_examples') # [plot_command] = [example_idx] db = sqlite3.connect(relative_path('demo/data/code.sqlite3')) cursor = db.cursor() cursor.executescript(""" DROP TABLE IF EXISTS example; CREATE TABLE example ( func_id TEXT NOT NULL, code TEXT NOT NULL, svg TEXT );