class Compilation_error_db: def __init__(self, db_path=''): self.db_path = db_path self.store = UnQLite() #loading db from databases freezes the process!! self.hits = 0 self.misses = 0 self.uncommited_recs = 0 # keeping prog_id for backward compatibility def get_errors(self, prog_id, prog): #print("get_errors here") if prog in self.store: err_msg = self.store[prog] errs = get_error_list(err_msg) self.hits += 1 else: errs, err_msg = compilation_errors(prog) self.store[prog] = err_msg self.misses += 1 self.uncommited_recs += 1 if self.uncommited_recs > 0 and self.uncommited_recs % 250 == 0: self.commit() return errs def close(self): self.store.close() def commit(self, ): cnt = self.uncommited_recs self.uncommited_recs = 0 self.store.commit() def __len__(self): return len(self.store)
class UniqLiteFactory: def __init__(self): self.conn = None def connect(self): self.conn = UnQLite(filename) return self.conn def disconnect(self): self.conn.close() def create_collection(self, name): db = self.connect() collection = db.collection(name) collection.create() print "collection " + name + " created successfully " self.disconnect() def drop_collection(self, name): db = self.connect() collection = db.collection(name) collection.drop() print "collection " + name + " droped successfully " self.disconnect() def insert(self, collection_name, data=None): db = self.connect() collection_name = db.collection(collection_name) collection_name.store(data) self.disconnect()
def get_bug_report(data_prefix, vectorized_data, bug_report_id): bug_report_index_collection = UnQLite( data_prefix + "_bug_report_index_collection_index_db") bug_report = pickle.loads(bug_report_index_collection[bug_report_id]) bug_report_index_collection.close() index = bug_report['report'] return vectorized_data[index, :]
class UnQDb: """ 基于UnQLite的nosql数据存贮 """ def __init__(self, dbpath='data.db'): # UnQLite.__init__(self,dbpath) self.db = UnQLite(dbpath) self.dbpath = dbpath def __del__(self): self.db.close() def add(self, key, value): """ 添加数据 key ='2eas' value={} """ self.db[key] = value # print('222') def reload(self): self.db = UnQLite(self.dbpath) pass def get(self, key): """ 获取数据 自动转换成字典 """ # print('value',value) try: value = str(self.db[key], "utf-8") value = ast.literal_eval(value) except: return None pass return value def get_all(self): with self.db.cursor() as cursor: for key, value in cursor: yield key, self.get(key) def delete(self, key): """ 删除数据 """ # del self.db[key] self.db.delete(key) def col(self, key): self.col = self.db.collection(key) self.col.create() # Create the collection if it does not exist. self.col.exists() return self.col
def extract_enriched_api(data_prefix, bug_report_full_sha): data = sparse.load_npz(data_prefix + '_raw_count_data.npz') bug_report_files_collection_db = UnQLite(data_prefix + "_bug_report_files_collection_db", flags=0x00000100 | 0x00000001) current_files = pickle.loads( bug_report_files_collection_db[bug_report_full_sha]) bug_report_files_collection_db.close() bug_report_id = bug_report_full_sha[0:7] shas = current_files['shas'] class_name_lookup = current_files['class_name_to_sha'] bug_report_data = [] bug_report_lookup = {} n_rows = 0 for ast_sha in shas: ast_data, lookup, current_ast_sha = add_types_source_to_bug_report_data( data, data_prefix, class_name_lookup, ast_sha) current_index = n_rows bug_report_data.append(ast_data) for k in lookup: lookup[k] += current_index bug_report_lookup[current_ast_sha] = lookup n_rows += ast_data.shape[0] bug_report_row = get_bug_report(data_prefix, data, bug_report_id) bug_report_data.append(bug_report_row) bug_report_data_matrix = sparse.vstack(bug_report_data) sparse.save_npz( data_prefix + '_' + bug_report_id + '_partial_enriched_api', bug_report_data_matrix) with open( data_prefix + '_' + bug_report_id + '_partial_enriched_api_index_lookup', 'w') as outfile: json.dump(bug_report_lookup, outfile) transformer = TfidfTransformer() tf_idf_data = transformer.fit_transform(bug_report_data_matrix) sparse.save_npz(data_prefix + '_' + bug_report_id + '_tfidf_enriched_api', tf_idf_data) # print "bug_report_id", bug_report_id return bug_report_id
def index_by(index_dir: str, index_extension: str, data_iter: iter, key_fn: Callable, value_fn: Callable, checkpoint: int, object_name: str): """ Generate UnQlite data indices for each entity :param index_dir index directory :param index_extension index file extension :param data_iter iterable on data :param key_fn function to use on data to get the index key :param value_fn function to use on data to get the index value :param checkpoint commit index every checkpoints :return dict of index paths by entity name """ i = 0 index_path_by_entity = {} index_by_entity = {} for data in data_iter: entity = data['@type'] if entity not in index_path_by_entity: index_path = get_file_path([index_dir, entity], ext=index_extension) index_path_by_entity[entity] = index_path index = UnQLite(index_path_by_entity[entity]) index.begin() index_by_entity[entity] = index index = index_by_entity[entity] # Index index[str(key_fn(data))] = value_fn(data) i += 1 # Log if i % 50000 == 0: print(f'checkpoint: {i} {object_name}') # Checkpoint if i % checkpoint == 0: # Flush indices for index in index_by_entity.values(): index.commit() index.begin() print(f'checkpoint: {i} {object_name}') # Close indices for index in index_by_entity.values(): index.commit() index.close() # Output all indices return index_path_by_entity
def main(): print("Start", datetime.datetime.now().isoformat()) before = default_timer() bug_report_file_path = sys.argv[1] print("bug report file path", bug_report_file_path) data_prefix = sys.argv[2] print("data prefix", data_prefix) bug_reports = load_bug_reports(bug_report_file_path) ast_cache_db = UnQLite(data_prefix + "_ast_cache_collection_db") vectorize(ast_cache_db, bug_reports, data_prefix) after = default_timer() total = after - before print("End", datetime.datetime.now().isoformat()) print("total time ", total) ast_cache_db.close()
class TestTransaction(BaseTestCase): """ We must use a file-based database to test the transaction functions. See http://unqlite.org/forum/trouble-with-transactions+1 for details. """ def setUp(self): self._filename = "test.db" self.db = UnQLite(self._filename) def tearDown(self): try: self.db.close() except: pass if os.path.exists(self._filename): os.unlink(self._filename) def test_transaction(self): @self.db.commit_on_success def _test_success(key, value): self.db[key] = value @self.db.commit_on_success def _test_failure(key, value): self.db[key] = value raise Exception("intentional exception raised") _test_success("k1", "v1") self.assertEqual(self.db["k1"], "v1") self.assertRaises(Exception, lambda: _test_failure("k2", "v2")) self.assertRaises(KeyError, lambda: self.db["k2"]) def test_explicit_transaction(self): self.db.close() self.db.open() self.db.begin() self.db["k1"] = "v1" self.db.rollback() self.assertRaises(KeyError, lambda: self.db["k1"])
class BaseTestCase(unittest.TestCase): def setUp(self): super(BaseTestCase, self).setUp() self.db = UnQLite(':mem:') self._filename = 'test.db' self.file_db = UnQLite(self._filename) def tearDown(self): try: self.file_db.close() except: pass if os.path.exists(self._filename): os.unlink(self._filename) def store_range(self, n, db=None): if db is None: db = self.db for i in range(n): db['k%s' % i] = str(i)
class Settings(metaclass=Singleton): def __init__(self, filename=None): assert filename is not None if not os.path.exists(filename): directory = Path(filename[:filename.rfind('/')]) directory.mkdir(parents=True, exist_ok=True) self.__db = UnQLite(filename) if self.__db_get('master_password_hash'): current_app.config['INIT_STATE'] = 2 def write(self): self.__db.commit() @property def master_password_hash(self) -> str: return self.__db_get('master_password_hash') @master_password_hash.setter def master_password_hash(self, v: str): self.__db['master_password_hash'] = v @property def master_password_hash_salt(self) -> str: return self.__db_get('master_password_hash_salt') @master_password_hash_salt.setter def master_password_hash_salt(self, v: str): self.__db['master_password_hash_salt'] = v @property def ethereum_address(self) -> str: return self.__db_get('ethereum_address') @ethereum_address.setter def ethereum_address(self, v: str): self.__db['ethereum_address'] = v @property def blockchain_length(self) -> int: return int(self.__db_get('blockchain_length', 0)) @blockchain_length.setter def blockchain_length(self, v: int): self.__db['blockchain_length'] = str(v) @property def blockchain(self) -> list: return json.loads(self.__db_get('blockchain', '[]')) @blockchain.setter def blockchain(self, v: list): self.__db['blockchain'] = json.dumps(v) def __del__(self): self.__db.close() def __db_get(self, key, default=None): if key in self.__db: return self.__db[key] return default
def calculate_graph_features(data_prefix, bug_report_id, bug_report_full_sha, repository_path, sha_to_note): bug_report_files_collection_db = UnQLite(data_prefix + "_bug_report_files_collection_db", flags=0x00000100 | 0x00000001) current_files = pickle.loads( bug_report_files_collection_db[bug_report_full_sha]) bug_report_files_collection_db.close() shas = current_files['shas'] sha_to_file_name = current_files['sha_to_file_name'] graph_features_data_list = [] graph_features_lookup = {} sha_to_imports = {} sha_to_class_name = {} for sha in shas: note_sha = sha_to_note[sha] note_content = cat_file_blob(repository_path, note_sha) imports = json.loads(note_content) sha_to_imports[sha] = imports if 'className' in imports and imports[ 'className'] is not None and imports['className'] != "": class_name = imports['className'] class_name = class_name.replace(".", "") sha_to_class_name[sha] = class_name graph_data = process_graph_results(sha_to_imports) current_index = 0 for sha in shas: current_file_name = sha_to_file_name[sha] # print(sha) # print(current_file_name) try: current_node_name = sha_to_class_name[sha] # print(current_node_name) # print(graph_data.loc[current_node_name]) # exit(0) values = graph_data.loc[current_node_name] feature_15 = values['in'] feature_16 = values['out'] feature_17 = values['pr'] feature_18 = values['a'] feature_19 = values['h'] except KeyError: feature_15 = 0.0 feature_16 = 0.0 feature_17 = 0.0 feature_18 = 0.0 feature_19 = 0.0 current_features = sparse.coo_matrix( [feature_15, feature_16, feature_17, feature_18, feature_19]) # print(current_features.shape) # print(current_features) # exit(0) graph_features_data_list.append(current_features) graph_features_lookup[sha] = current_index current_index += 1 graph_features_data = sparse.vstack(graph_features_data_list) sparse.save_npz( data_prefix + '_' + bug_report_id[0:7] + '_graph_features_data', graph_features_data) with open( data_prefix + '_' + bug_report_id[0:7] + '_graph_features_index_lookup', 'w') as outfile: json.dump(graph_features_lookup, outfile) return (bug_report_id)
agregados = 0 for i, cajero_json in enumerate(cajeros_json): db = UnQLite('cajeros.db') db.open() cajero = {} cajero['id'] = cajero_json['id'] cajero['clave_institucion'] = cajero_json['cb'] cajero['lat'] = cajero_json['l']['lat'] cajero['lon'] = cajero_json['l']['lng'] cajero['nombre_institucion'] = NOMBRES[cajero['clave_institucion']] try: print 'Cajero ' + str(i) + ' de ' + str(total_cajeros) + ', ' + str(cajero['id']) + ' existe? ' + str(db.exists(cajero['id'])) if not db.exists(cajero['id']): url_cajero = CAJERO_URL + '?id=' + str(cajero['id']) + '&banco=' + str(cajero['clave_institucion']) cajero_json = requests.get(url_cajero).json()['contenido'] cajero['cp'] = str(cajero_json['cp']) cajero['horario'] = cajero_json['hs'] cajero['direccion'] = cajero_json['d'] cajero['actualizacion'] = str(datetime.datetime.now()) db[cajero['id']] = cajero print 'Agregado: ' + str(cajero) agregados += 1 except UnicodeEncodeError: print 'UnicodeEncodeError' print cajero pass finally: db.close() print 'Cajeros agregados: ' + str(agregados)
def convert_tf_idf(data_prefix, bug_report_full_sha): bug_report_files_collection_db = UnQLite(data_prefix + "_bug_report_files_collection_db", flags=0x00000100 | 0x00000001) current_files = pickle.loads( bug_report_files_collection_db[bug_report_full_sha]) bug_report_files_collection_db.close() bug_report_id = bug_report_full_sha[0:7] shas = current_files['shas'] class_name_lookup = current_files['class_name_to_sha'] ast_index_collection = UnQLite(data_prefix + "_ast_index_collection_index_db", flags=0x00000100 | 0x00000001) data = sparse.load_npz(data_prefix + '_raw_count_data.npz') data_to_tf_idf = [] lookups = {} n_rows = 0 for sha in shas: current_indexes = pickle.loads(ast_index_collection[sha]) # print(sha) # print(current_indexes) (matrix, lookup) = extract_ast(data, current_indexes) # print(lookup) # print(matrix.shape) current_index = n_rows data_to_tf_idf.append(matrix) for k in lookup: lookup[k] += current_index lookups[sha] = lookup n_rows += matrix.shape[0] ast_index_collection.close() bug_report_index_collection = UnQLite( data_prefix + "_bug_report_index_collection_index_db", flags=0x00000100 | 0x00000001) current_bug_report_indexes = pickle.loads( bug_report_index_collection[bug_report_id]) bug_report_index_collection.close() bug_report_matrix, bug_report_lookup = extract_bug_report( data, current_bug_report_indexes) current_index = n_rows data_to_tf_idf.append(bug_report_matrix) for k in bug_report_lookup: bug_report_lookup[k] += current_index lookups[bug_report_id] = bug_report_lookup n_rows += bug_report_matrix.shape[0] data_matrix = sparse.vstack(data_to_tf_idf) transformer = TfidfTransformer() tf_idf_data = transformer.fit_transform(data_matrix) # print("tf_idf_data shape",tf_idf_data.shape) sparse.save_npz(data_prefix + '_' + bug_report_id + '_tf_idf_data', tf_idf_data) with open(data_prefix + '_' + bug_report_id + '_tf_idf_index_lookup', 'w') as outfile: json.dump(lookups, outfile)
''' ('usr', b'a\x1e\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02L\\\x0b\xb5') ('usr_0', '\x01\x08\x00\x00\x00\x04name\x05\x08\x00\x00\x00\x04neet\x06\x08\x00\x00\x00\x04uuid\x05\x08\x00\x00\x00\x08D7B810FD\x06\x08\x00\x00\x00\x04__id\x05\n\x00\x00\x00\x00\x00\x00\x00\x00\x06\x02') ('usr_1', '\x01\x08\x00\x00\x00\x04name\x05\x08\x00\x00\x00\x05limbo\x06\x08\x00\x00\x00\x04uuid\x05\x08\x00\x00\x00\x08D7B810FC\x06\x08\x00\x00\x00\x04__id\x05\n\x00\x00\x00\x00\x00\x00\x00\x01\x06\x02') Well, 'collection' is also serialized ''' usr.store(u1) n = usr.filter(lambda o: o['name'] == 'neet') #filter? seems doesn't have searching function #but i can give it that~ print(n) def muti_store(db, coll, uuid, dic): next = coll.last_record_id() + 1 db[uuid] = next coll.store(dic) return True def search(db, coll, uuid): return coll.fetch(int(db[uuid])) u2 = {'name': 'limbo', 'uuid': 'D7B810FC'} muti_store(udb, usr, 'D7B810FC', u2) n2 = search(udb, usr, 'D7B810FC') print(n2) udb.close()
def process_bug_report(data_prefix, bug_report_full_sha, bug_report_file_path, max_frequency): bug_report_files_collection_db = UnQLite(data_prefix + "_bug_report_files_collection_db", flags=0x00000100 | 0x00000001) current_files = pickle.loads( bug_report_files_collection_db[bug_report_full_sha]) bug_report_files_collection_db.close() shas = current_files['shas'] sha_to_file_name = current_files['sha_to_file_name'] bug_report_id = bug_report_full_sha[0:7] vectorized_data = sparse.load_npz(data_prefix + '_' + bug_report_id + '_tf_idf_data.npz') with open(data_prefix + '_' + bug_report_id + '_tf_idf_index_lookup', 'r') as index_lookup_file: lookups = json.load(index_lookup_file) enriched_api_data, enriched_api_indexes = load_enriched_api( data_prefix, bug_report_id) enriched_report = enriched_api_data[-1, :] (vectorized_report, vectorized_summary, vectorized_description) = load_bug_report(vectorized_data, lookups, bug_report_id) ast_cache_collection = UnQLite(data_prefix + "_ast_cache_collection_db", flags=0x00000100 | 0x00000001) bug_reports = load_bug_reports(bug_report_file_path) if bug_report_id in bug_reports: current_bug_report_summary = bug_reports[bug_report_id]['bug_report'][ 'summary'] else: current_bug_report_summary = retrieve_summary( bug_reports, bug_report_full_sha)['bug_report']['summary'] feature_3_data = sparse.load_npz(data_prefix + '_' + bug_report_id + '_feature_3_data.npz') with open(data_prefix + '_' + bug_report_id + '_feature_3_index_lookup', 'r') as feature_3_file: feature_3_file_lookup = json.load(feature_3_file) graph_data = sparse.load_npz(data_prefix + '_' + bug_report_id + '_graph_features_data.npz').tocsr() with open( data_prefix + '_' + bug_report_id + '_graph_features_index_lookup', 'r') as graph_lookup_file: graph_lookup = json.load(graph_lookup_file) features_5_6_data = sparse.load_npz(data_prefix + '_' + bug_report_id[0:7] + '_features_5_6_data.npz').tocsr() with open( data_prefix + '_' + bug_report_id[0:7] + '_features_5_6_index_lookup', 'r') as feaures_5_6_lookup_file: features_5_6_lookup = json.load(feaures_5_6_lookup_file) if bug_report_id in bug_reports: fixed_filenames = bug_reports[ bug_report_id[0:7]]['commit']['diff'].keys() else: fixed_filenames = retrieve_summary( bug_reports, bug_report_full_sha)['commit']['diff'].keys() features = [] features_files = [] for file_index, current_file_sha in enumerate(shas): current_lookup = lookups[current_file_sha] source_index = current_lookup['source'] method_source_start_index = current_lookup['methodsStart'] method_source_end_index = current_lookup['methodsEnd'] class_start_index = current_lookup['classNamesStart'] class_end_index = current_lookup['classNamesEnd'] method_names_start_index = current_lookup['methodNamesStart'] method_names_end_index = current_lookup['methodNamesEnd'] variable_start_index = current_lookup['variableNamesStart'] variable_end_index = current_lookup['variableNamesEnd'] comment_start_index = current_lookup['commentsStart'] comment_end_index = current_lookup['commentsEnd'] current_graph_lookup = graph_lookup[current_file_sha] current_features_5_6 = features_5_6_lookup[current_file_sha] current_file_name = sha_to_file_name[current_file_sha] f1 = feature_1(vectorized_report, vectorized_data, source_index, method_source_start_index, method_source_end_index) f2 = feature_2(enriched_report, enriched_api_data, enriched_api_indexes, current_file_sha) f3 = feature_3(feature_3_data, feature_3_file_lookup, current_file_sha) f4 = feature_4(current_bug_report_summary, ast_cache_collection, current_file_sha) f5 = (features_5_6_data[current_features_5_6, 0]) f6 = (features_5_6_data[current_features_5_6, 1]) / max_frequency f7 = feature_sim(vectorized_summary, vectorized_data, class_start_index, class_end_index) f8 = feature_sim(vectorized_summary, vectorized_data, method_names_start_index, method_names_end_index) f9 = feature_sim(vectorized_summary, vectorized_data, variable_start_index, variable_end_index) f10 = feature_sim(vectorized_summary, vectorized_data, comment_start_index, comment_end_index) f11 = feature_sim(vectorized_description, vectorized_data, class_start_index, class_end_index) f12 = feature_sim(vectorized_description, vectorized_data, method_names_start_index, method_names_end_index) f13 = feature_sim(vectorized_description, vectorized_data, variable_start_index, variable_end_index) f14 = feature_sim(vectorized_description, vectorized_data, comment_start_index, comment_end_index) f15 = graph_data[current_graph_lookup, 0] f16 = graph_data[current_graph_lookup, 1] f17 = graph_data[current_graph_lookup, 2] f18 = graph_data[current_graph_lookup, 3] f19 = graph_data[current_graph_lookup, 4] if current_file_name in fixed_filenames: used_in_fix = 1.0 else: used_in_fix = 0.0 features.append([ f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15, f16, f17, f18, f19, used_in_fix ]) features_files.append(current_file_sha) ast_cache_collection.close() sparse_features = sparse.csr_matrix(features) sparse.save_npz(data_prefix + '_' + bug_report_id + '_features', sparse_features) with open(data_prefix + '_' + bug_report_id + '_files', 'w') as outfile: json.dump(features_files, outfile)
def vectorize(ast_cache, bug_reports, data_prefix): data = [] current_index = 0 ast_index_lookup = {} ast_types_lookup = {} with ast_cache.cursor() as cursor: for k, v in cursor: ast_sha = k current_ast = pickle.loads(v) data, current_index, current_lookup = add_ast_to_vectorization_data( data, current_index, current_ast) ast_index_lookup[ast_sha] = current_lookup ast_types_lookup[ast_sha] = extract_types(current_ast) stemmer = PorterStemmer() print("data length", len(data)) print("current index", current_index) bug_report_index_lookup = {} for bug_report_id in tqdm(bug_reports): current_bug_report = bug_reports[bug_report_id]['bug_report'] data, current_index, current_lookup = add_bug_report_to_vectorization_data( data, current_index, current_bug_report, stemmer) bug_report_index_lookup[bug_report_id[0:7]] = current_lookup print("data length", len(data)) print("current index", current_index) before_v = default_timer() vectorizer = DictVectorizer() vectorized_data = vectorizer.fit_transform(data) after_v = default_timer() total_v = after_v - before_v print("total count vectorization time ", total_v) print("vectorized_data type ", type(vectorized_data)) print("vectorized_data shape", vectorized_data.shape) feature_names = vectorizer.get_feature_names() feature_names_lenghts_dict = {} for i, feature_name in enumerate(feature_names): feature_names_lenghts_dict[i] = len(feature_name) with open(data_prefix + '_feature_names_dict', 'w') as outfile: json.dump(feature_names_lenghts_dict, outfile) sparse.save_npz(data_prefix + '_raw_count_data', vectorized_data) ast_index_collection = UnQLite(data_prefix + "_ast_index_collection_index_db") for k, v in ast_index_lookup.items(): ast_index_collection[k] = pickle.dumps(v, -1) bug_report_index_collection = UnQLite( data_prefix + "_bug_report_index_collection_index_db") for k, v in bug_report_index_lookup.items(): bug_report_index_collection[k] = pickle.dumps(v, -1) ast_types_collection = UnQLite(data_prefix + "_ast_types_collection_index_db") for k, v in ast_types_lookup.items(): ast_types_collection[k] = pickle.dumps(v, -1) ast_index_collection.close() bug_report_index_collection.close() ast_types_collection.close()
class UnQLiteTest(unittest.TestCase): def setUp(self): self.app = bottle.Bottle(catchall=False) _, filename = tempfile.mkstemp(suffix='.unqlite') self.plugin = self.app.install(unqlite.Plugin(filename=filename)) self.conn = UnQLite(filename) self.conn.collection('todo').create() self.conn.close() def tearDown(self): pass # os.unlink(self.plugin.filename) def test_with_keyword(self): @self.app.get('/') def test(db): self.assertEqual(type(db), type(UnQLite(':mem:'))) self._request('/') def test_without_keyword(self): @self.app.get('/') def test_1(): pass self._request('/') @self.app.get('/2') def test_2(**kw): self.assertFalse('db' in kw) self._request('/2') def test_install_conflicts(self): self.app.install(unqlite.Plugin(keyword='db2')) @self.app.get('/') def test(db, db2): pass # I have two plugins working with different names self._request('/') def test_commit_on_redirect(self): @self.app.get('/') def test(db): self._insert_into(db) bottle.redirect('/') self._request('/') self.assert_records(1) def test_commit_on_abort(self): @self.app.get('/') def test(db): self._insert_into(db) bottle.abort() self._request('/') self.assert_records(0) def _request(self, path, method='GET'): return self.app({ 'PATH_INFO': path, 'REQUEST_METHOD': method }, lambda x, y: None) def _insert_into(self, db): db.collection('todo').store({'task': 'PASS'}) def assert_records(self, count): self.conn.open() actual_count = len(self.conn.collection('todo').all()) self.conn.close() self.assertEqual(count, actual_count)
class Donurista: def __init__(self, brpath, dbpath): self.br = Engine(brpath) # f*****g cute chess self.br.write('setoption name Threads value 3') self.br.write('setoption name Hash value 4096') # f*****g cute chess self.db = UnQLite(dbpath) self.board = Board() self.func_book = { 'uci': self.uci, 'isready': self.isready, 'go': self.go, 'quit': self.quit, 'position': self.position } self.hello() try: self.start() except KeyboardInterrupt: self.quit(None) def simple(self, inp): self.br.write(inp) def uci(self, inp): # TODO: normal options print('id name Donurista') print('id author Gornak40') print('option name Debug Log File type string default') print('option name Contempt type spin default 24 min -100 max 100') print('option name Analysis Contempt type combo default Both var Off var White var Black var Both') print('option name Threads type spin default 1 min 1 max 512') print('option name Hash type spin default 16 min 1 max 33554432') print('option name Clear Hash type button') print('option name Ponder type check default false') print('option name MultiPV type spin default 1 min 1 max 500') print('option name Skill Level type spin default 20 min 0 max 20') print('option name Move Overhead type spin default 10 min 0 max 5000') print('option name Slow Mover type spin default 100 min 10 max 1000') print('option name nodestime type spin default 0 min 0 max 10000') print('option name UCI_Chess960 type check default false') print('option name UCI_AnalyseMode type check default false') print('option name UCI_LimitStrength type check default false') print('option name UCI_Elo type spin default 1350 min 1350 max 2850') print('option name UCI_ShowWDL type check default false') print('option name SyzygyPath type string default <empty>') print('option name SyzygyProbeDepth type spin default 1 min 1 max 100') print('option name Syzygy50MoveRule type check default true') print('option name SyzygyProbeLimit type spin default 7 min 0 max 7') print('option name Use NNUE type check default true') print('option name EvalFile type string default nn-82215d0fd0df.nnue') print('uciok') def isready(self, inp): print('readyok') def is_cached(self): # TODO: ARGS MODIFICATION return False def make_new_fen(self): return ' '.join(self.board.fen().split()[:-2]) def write_db(self, info, bestmove): data = f'{info.depth};{bestmove.bestmove}' with self.db.transaction(): self.db[self.make_new_fen()] = data def go(self, inp): fen = self.make_new_fen() db_depth = 0 T = time() if fen in self.db: logging.info('[+] update db') # testing feature db_depth, db_move = self.db[fen].decode('utf-8').split(';') db_depth = int(db_depth) if self.is_cached(): print('info smart cache moves') print(f'bestmove {db_move}') return T = time() - T logging.info(f'[+] db timing {T}') self.br.write(inp) br_pred = self.br.readfor(lambda x: 'bestmove' in x) info = Info(br_pred[-2]) bestmove = Info(br_pred[-1]) if info.depth > db_depth: self.write_db(info, bestmove) logging.info(f'[+] db size {len(self.db)}') print(info) print(bestmove) def position(self, inp): self.br.write(inp) self.board = Board() for token in inp.split(): if token in {'startpos', 'position', 'moves'}: continue self.board.push_san(token) def start(self): while True: inp = input() if not inp: continue logging.info(inp) com = inp.split()[0] func = self.func_book.get(com, self.simple) self.br.isready() func(inp) def hello(self): F = Figlet() text = F.renderText('Donurista') print(text) def quit(self, inp): logging.info('[+] ENGINE TERMINATED') del self.br self.db.close() exit(0)
def add_types_source_to_bug_report_data(data, data_prefix, class_name_lookup, ast_sha): asts = UnQLite(data_prefix + "_ast_index_collection_index_db", flags=0x00000100 | 0x00000001) types = UnQLite(data_prefix + "_ast_types_collection_index_db", flags=0x00000100 | 0x00000001) # current_type = types[ast_sha] # print "searching", ast_sha current_type = pickle.loads(types[ast_sha]) # print "found", ast_sha # print current_type['methodVariableTypes'] # exit(0) types_per_method = current_type['methodVariableTypes'] cl = data.shape[1] current_index = 0 start = current_index enriched_apis = [] for method_types in types_per_method: method_type_shas = [] for method_type in method_types: if method_type in class_name_lookup: method_type_shas.append(class_name_lookup[method_type]) supertypes_shas_per_type = [ set(find_types_shas(types, class_name_lookup, s)) for s in method_type_shas ] indexes = [] for supertypes in supertypes_shas_per_type: indexes.extend(get_indexes(asts, supertypes)) if indexes == []: method_enriched_api = sparse.coo_matrix( np.zeros(cl).reshape(1, cl)) else: method_enriched_api = sparse.coo_matrix( np.sum((data[indexes, :]), axis=0)) enriched_apis.append(method_enriched_api) if enriched_apis == []: class_enriched_api = sparse.coo_matrix(np.zeros(cl).reshape(1, cl)) else: class_enriched_api = sparse.coo_matrix(np.sum(enriched_apis, axis=0)) enriched_apis.append(class_enriched_api) current_index += len(enriched_apis) asts.close() types.close() lookup = {} lookup['enrichedApiStart'] = start lookup['enrichedApiEnd'] = current_index - 1 enriched_apis_matrix = sparse.vstack(enriched_apis) return (enriched_apis_matrix, lookup, ast_sha)
#!/usr/bin/python3 from unqlite import UnQLite from prettytable import PrettyTable from argparse import ArgumentParser def print_all(): P = PrettyTable(['#', 'Fen', 'Depth', 'Move']) for i, (fen, info) in enumerate(db): info = info.decode('utf-8') depth, move = info.split(';') P.add_row([i + 1, fen, depth, move]) print(P) parser = ArgumentParser(description='Donurista quick DB viewer') parser.add_argument('--all', '-a', action='store_const', help='print all positions (long time)', const=True, default=False) db = UnQLite('new.db') args = parser.parse_args() if args.all: print_all() else: print(len(db)) db.close()
def calculate_feature_3(data_prefix, bug_report_id, bug_report_full_sha, bug_reports): bug_report_files_collection_db = UnQLite(data_prefix + "_bug_report_files_collection_db", flags=0x00000100 | 0x00000001) current_files = pickle.loads( bug_report_files_collection_db[bug_report_full_sha]) bug_report_files_collection_db.close() shas = current_files['shas'] sha_to_file_name = current_files['sha_to_file_name'] data = sparse.load_npz(data_prefix + '_raw_count_data.npz') row_length = data.shape[1] current_bug_report = bug_reports[bug_report_id] bug_report_index_collection = UnQLite( data_prefix + "_bug_report_index_collection_index_db", flags=0x00000100 | 0x00000001) current_bug_report_summary_index = pickle.loads( bug_report_index_collection[bug_report_id[0:7]])['summary'] feature_3_data_list = [] feature_3_lookup = {} if 'views' in current_bug_report and 'bug_fixing' in current_bug_report[ 'views']: bug_fixing_view = current_bug_report['views']['bug_fixing'] current_index = 0 for sha in shas: current_file_name = sha_to_file_name[sha] if current_file_name in bug_fixing_view: related_bug_reports = bug_fixing_view[current_file_name]['br'] # print("Present",sha) # print(related_bug_reports) bug_report_history = combine(related_bug_reports, data, bug_report_index_collection) else: bug_report_history = np.zeros((1, row_length)) # print("Not present",sha) feature_3_data_list.append(bug_report_history) feature_3_lookup[sha] = current_index current_index += 1 else: current_index = 0 for sha in shas: bug_report_history = np.zeros((1, row_length)) feature_3_data_list.append(bug_report_history) feature_3_lookup[sha] = current_index current_index += 1 bug_report_index_collection.close() feature_3_data_list.append(data[current_bug_report_summary_index, :]) feature_3_data = sparse.vstack(feature_3_data_list) transformer = TfidfTransformer() feature_3_tf_idf_data = transformer.fit_transform(feature_3_data) sparse.save_npz(data_prefix + '_' + bug_report_id[0:7] + '_feature_3_data', feature_3_tf_idf_data) with open( data_prefix + '_' + bug_report_id[0:7] + '_feature_3_index_lookup', 'w') as outfile: json.dump(feature_3_lookup, outfile) return (bug_report_id)
def retrieve_features_5_6(data_prefix, bug_report_id, bug_report_full_sha, bug_reports): bug_report_files_collection_db = UnQLite(data_prefix + "_bug_report_files_collection_db", flags=0x00000100 | 0x00000001) current_files = pickle.loads( bug_report_files_collection_db[bug_report_full_sha]) bug_report_files_collection_db.close() shas = current_files['shas'] sha_to_file_name = current_files['sha_to_file_name'] current_bug_report = bug_reports[bug_report_id] features_5_6_data_list = [] features_5_6_lookup = {} max_recency = 0.0 max_frequency = 0.0 if 'views' in current_bug_report and 'bug_fixing' in current_bug_report[ 'views']: bug_fixing_view = current_bug_report['views']['bug_fixing'] current_index = 0 for sha in shas: current_file_name = sha_to_file_name[sha] if current_file_name in bug_fixing_view: recency = bug_fixing_view[current_file_name][ 'recency[30-day months]'] frequency = bug_fixing_view[current_file_name]['frequency'] features_5_6_data_list.append( sparse.coo_matrix([recency, frequency], shape=(1, 2))) features_5_6_lookup[sha] = current_index current_index += 1 if recency > max_recency: max_recency = recency if frequency > max_frequency: max_frequency = frequency else: recency = 0.0 frequency = 0.0 features_5_6_data_list.append( sparse.coo_matrix([recency, frequency], shape=(1, 2))) features_5_6_lookup[sha] = current_index current_index += 1 else: current_index = 0 for sha in shas: recency = 0.0 frequency = 0.0 features_5_6_data_list.append( sparse.coo_matrix([recency, frequency], shape=(1, 2))) features_5_6_lookup[sha] = current_index current_index += 1 features_5_6_data = sparse.vstack(features_5_6_data_list) sparse.save_npz( data_prefix + '_' + bug_report_id[0:7] + '_features_5_6_data', features_5_6_data) with open( data_prefix + '_' + bug_report_id[0:7] + '_features_5_6_index_lookup', 'w') as outfile: json.dump(features_5_6_lookup, outfile) return bug_report_id, max_recency, max_frequency
class PyCachEngine: def __init__(self, path, db_path, options=dict()): self.board = Board() self.db = UnQLite(db_path) self.engine = Popen(path, universal_newlines=True, stdin=PIPE, stdout=PIPE) self._put('uci') self._ready() for option, val in options.items(): self._set_option(option, val) self.num_games = 1 while True: self.board.reset() self.learn(200) def __del__(self): self.db.close() self.engine.kill() def _put(self, line): if not self.engine.stdin: raise BrokenPipeError() self.engine.stdin.write(line + '\n') self.engine.stdin.flush() def _read(self): if not self.engine.stdout: raise BrokenPipeError() return self.engine.stdout.readline().strip() def _ready(self): self._put('isready') while self._read() != 'readyok': continue def _bestmove(self): while True: line = self._read() if 'depth' in line: depth = int(line.split()[2]) if 'bestmove' in line: move = line.split()[1] return (move, depth) def _set_option(self, option, value): self._put(f'setoption option {option} value {value}') def _store(self, new_fen, move, depth): with self.db.transaction(): if new_fen in self.db: _move, _depth = eval(self.db[new_fen].decode('utf-8')) print(_move, _depth) if int(_depth) >= depth: return self.db[new_fen] = (move, depth) self.db.commit() def learn(self, movetime): fen = self.board.fen() new_fen = ' '.join(fen.split()[:-2]) self._put(f'position fen {fen}') self._put(f'go movetime {movetime}') move, depth = self._bestmove() self.board.push_uci(move) self._store(new_fen, move, depth) system('clear') # print(fen) print(self.board) print() print('new_fen:', new_fen) print('depth:', depth) print('move:', move) print('db_size:', len(self.db)) print('num_games:', self.num_games) if not self.board.is_game_over(): self.learn(movetime) else: result = self.board.outcome().result() self.num_games += 1 print(result)
#!/usr/bin/python3 from unqlite import UnQLite dbr = UnQLite('donurista.db') dbw = UnQLite('new.db') for fen, info in dbr: info = info.decode('utf-8') fen = ' '.join(fen.split()[:-2]) dbw[fen] = info print(fen) dbr.close() dbw.close()