class Compilation_error_db:
    def __init__(self, db_path=''):
        self.db_path = db_path
        self.store = UnQLite()  #loading db from databases freezes the process!!
        self.hits = 0
        self.misses = 0
        self.uncommited_recs = 0

    # keeping prog_id for backward compatibility
    def get_errors(self, prog_id, prog):
        #print("get_errors here")
        if prog in self.store:
            err_msg = self.store[prog]
            errs = get_error_list(err_msg)
            self.hits += 1
        else:
            errs, err_msg = compilation_errors(prog)
            self.store[prog] = err_msg
            self.misses += 1
            self.uncommited_recs += 1

            if self.uncommited_recs > 0 and self.uncommited_recs % 250 == 0:
                self.commit()
        return errs

    def close(self):
        self.store.close()

    def commit(self, ):
        cnt = self.uncommited_recs
        self.uncommited_recs = 0
        self.store.commit()

    def __len__(self):
        return len(self.store)
class UniqLiteFactory:
    def __init__(self):
        self.conn = None

    def connect(self):
        self.conn = UnQLite(filename)
        return self.conn

    def disconnect(self):
        self.conn.close()

    def create_collection(self, name):
        db = self.connect()
        collection = db.collection(name)
        collection.create()
        print "collection " + name + " created successfully "
        self.disconnect()

    def drop_collection(self, name):
        db = self.connect()
        collection = db.collection(name)
        collection.drop()
        print "collection " + name + " droped successfully "
        self.disconnect()

    def insert(self, collection_name, data=None):
        db = self.connect()
        collection_name = db.collection(collection_name)
        collection_name.store(data)
        self.disconnect()
def get_bug_report(data_prefix, vectorized_data, bug_report_id):
    bug_report_index_collection = UnQLite(
        data_prefix + "_bug_report_index_collection_index_db")
    bug_report = pickle.loads(bug_report_index_collection[bug_report_id])
    bug_report_index_collection.close()
    index = bug_report['report']
    return vectorized_data[index, :]
Esempio n. 4
0
File: db.py Progetto: napoler/tkitDb
class UnQDb:
    """
    基于UnQLite的nosql数据存贮
    """
    def __init__(self, dbpath='data.db'):
        # UnQLite.__init__(self,dbpath)
        self.db = UnQLite(dbpath)
        self.dbpath = dbpath

    def __del__(self):
        self.db.close()

    def add(self, key, value):
        """
        添加数据
        key ='2eas'
        value={}
        """
        self.db[key] = value
        # print('222')

    def reload(self):
        self.db = UnQLite(self.dbpath)
        pass

    def get(self, key):
        """
        获取数据
        自动转换成字典
        """

        # print('value',value)
        try:
            value = str(self.db[key], "utf-8")
            value = ast.literal_eval(value)
        except:
            return None
            pass
        return value

    def get_all(self):
        with self.db.cursor() as cursor:
            for key, value in cursor:
                yield key, self.get(key)

    def delete(self, key):
        """
        删除数据
        """
        # del self.db[key]
        self.db.delete(key)

    def col(self, key):
        self.col = self.db.collection(key)
        self.col.create()  # Create the collection if it does not exist.
        self.col.exists()
        return self.col
def extract_enriched_api(data_prefix, bug_report_full_sha):
    data = sparse.load_npz(data_prefix + '_raw_count_data.npz')
    bug_report_files_collection_db = UnQLite(data_prefix +
                                             "_bug_report_files_collection_db",
                                             flags=0x00000100 | 0x00000001)

    current_files = pickle.loads(
        bug_report_files_collection_db[bug_report_full_sha])
    bug_report_files_collection_db.close()

    bug_report_id = bug_report_full_sha[0:7]

    shas = current_files['shas']
    class_name_lookup = current_files['class_name_to_sha']

    bug_report_data = []
    bug_report_lookup = {}

    n_rows = 0

    for ast_sha in shas:
        ast_data, lookup, current_ast_sha = add_types_source_to_bug_report_data(
            data, data_prefix, class_name_lookup, ast_sha)
        current_index = n_rows
        bug_report_data.append(ast_data)
        for k in lookup:
            lookup[k] += current_index
        bug_report_lookup[current_ast_sha] = lookup
        n_rows += ast_data.shape[0]

    bug_report_row = get_bug_report(data_prefix, data, bug_report_id)
    bug_report_data.append(bug_report_row)

    bug_report_data_matrix = sparse.vstack(bug_report_data)

    sparse.save_npz(
        data_prefix + '_' + bug_report_id + '_partial_enriched_api',
        bug_report_data_matrix)
    with open(
            data_prefix + '_' + bug_report_id +
            '_partial_enriched_api_index_lookup', 'w') as outfile:
        json.dump(bug_report_lookup, outfile)

    transformer = TfidfTransformer()
    tf_idf_data = transformer.fit_transform(bug_report_data_matrix)
    sparse.save_npz(data_prefix + '_' + bug_report_id + '_tfidf_enriched_api',
                    tf_idf_data)

    #    print "bug_report_id", bug_report_id

    return bug_report_id
def index_by(index_dir: str, index_extension: str, data_iter: iter,
             key_fn: Callable, value_fn: Callable, checkpoint: int,
             object_name: str):
    """
    Generate UnQlite data indices for each entity
    :param index_dir index directory
    :param index_extension index file extension
    :param data_iter iterable on data
    :param key_fn function to use on data to get the index key
    :param value_fn function to use on data to get the index value
    :param checkpoint commit index every checkpoints
    :return dict of index paths by entity name
    """
    i = 0
    index_path_by_entity = {}
    index_by_entity = {}
    for data in data_iter:
        entity = data['@type']
        if entity not in index_path_by_entity:
            index_path = get_file_path([index_dir, entity],
                                       ext=index_extension)
            index_path_by_entity[entity] = index_path

            index = UnQLite(index_path_by_entity[entity])
            index.begin()
            index_by_entity[entity] = index
        index = index_by_entity[entity]

        # Index
        index[str(key_fn(data))] = value_fn(data)

        i += 1
        # Log
        if i % 50000 == 0:
            print(f'checkpoint: {i} {object_name}')
        # Checkpoint
        if i % checkpoint == 0:
            # Flush indices
            for index in index_by_entity.values():
                index.commit()
                index.begin()
    print(f'checkpoint: {i} {object_name}')

    # Close indices
    for index in index_by_entity.values():
        index.commit()
        index.close()

    # Output all indices
    return index_path_by_entity
Esempio n. 7
0
def main():
    print("Start", datetime.datetime.now().isoformat())
    before = default_timer()
    bug_report_file_path = sys.argv[1]
    print("bug report file path", bug_report_file_path)
    data_prefix = sys.argv[2]
    print("data prefix", data_prefix)

    bug_reports = load_bug_reports(bug_report_file_path)

    ast_cache_db = UnQLite(data_prefix + "_ast_cache_collection_db")

    vectorize(ast_cache_db, bug_reports, data_prefix)
    after = default_timer()
    total = after - before
    print("End", datetime.datetime.now().isoformat())
    print("total time ", total)
    ast_cache_db.close()
Esempio n. 8
0
class TestTransaction(BaseTestCase):
    """
    We must use a file-based database to test the transaction functions. See
    http://unqlite.org/forum/trouble-with-transactions+1 for details.
    """

    def setUp(self):
        self._filename = "test.db"
        self.db = UnQLite(self._filename)

    def tearDown(self):
        try:
            self.db.close()
        except:
            pass
        if os.path.exists(self._filename):
            os.unlink(self._filename)

    def test_transaction(self):
        @self.db.commit_on_success
        def _test_success(key, value):
            self.db[key] = value

        @self.db.commit_on_success
        def _test_failure(key, value):
            self.db[key] = value
            raise Exception("intentional exception raised")

        _test_success("k1", "v1")
        self.assertEqual(self.db["k1"], "v1")

        self.assertRaises(Exception, lambda: _test_failure("k2", "v2"))
        self.assertRaises(KeyError, lambda: self.db["k2"])

    def test_explicit_transaction(self):
        self.db.close()
        self.db.open()
        self.db.begin()
        self.db["k1"] = "v1"
        self.db.rollback()

        self.assertRaises(KeyError, lambda: self.db["k1"])
Esempio n. 9
0
class BaseTestCase(unittest.TestCase):
    def setUp(self):
        super(BaseTestCase, self).setUp()
        self.db = UnQLite(':mem:')
        self._filename = 'test.db'
        self.file_db = UnQLite(self._filename)

    def tearDown(self):
        try:
            self.file_db.close()
        except:
            pass
        if os.path.exists(self._filename):
            os.unlink(self._filename)

    def store_range(self, n, db=None):
        if db is None:
            db = self.db
        for i in range(n):
            db['k%s' % i] = str(i)
Esempio n. 10
0
class BaseTestCase(unittest.TestCase):
    def setUp(self):
        super(BaseTestCase, self).setUp()
        self.db = UnQLite(':mem:')
        self._filename = 'test.db'
        self.file_db = UnQLite(self._filename)

    def tearDown(self):
        try:
            self.file_db.close()
        except:
            pass
        if os.path.exists(self._filename):
            os.unlink(self._filename)

    def store_range(self, n, db=None):
        if db is None:
            db = self.db
        for i in range(n):
            db['k%s' % i] = str(i)
Esempio n. 11
0
class Settings(metaclass=Singleton):
    def __init__(self, filename=None):
        assert filename is not None
        if not os.path.exists(filename):
            directory = Path(filename[:filename.rfind('/')])
            directory.mkdir(parents=True, exist_ok=True)

        self.__db = UnQLite(filename)

        if self.__db_get('master_password_hash'):
            current_app.config['INIT_STATE'] = 2

    def write(self):
        self.__db.commit()

    @property
    def master_password_hash(self) -> str:
        return self.__db_get('master_password_hash')

    @master_password_hash.setter
    def master_password_hash(self, v: str):
        self.__db['master_password_hash'] = v

    @property
    def master_password_hash_salt(self) -> str:
        return self.__db_get('master_password_hash_salt')

    @master_password_hash_salt.setter
    def master_password_hash_salt(self, v: str):
        self.__db['master_password_hash_salt'] = v

    @property
    def ethereum_address(self) -> str:
        return self.__db_get('ethereum_address')

    @ethereum_address.setter
    def ethereum_address(self, v: str):
        self.__db['ethereum_address'] = v

    @property
    def blockchain_length(self) -> int:
        return int(self.__db_get('blockchain_length', 0))

    @blockchain_length.setter
    def blockchain_length(self, v: int):
        self.__db['blockchain_length'] = str(v)

    @property
    def blockchain(self) -> list:
        return json.loads(self.__db_get('blockchain', '[]'))

    @blockchain.setter
    def blockchain(self, v: list):
        self.__db['blockchain'] = json.dumps(v)

    def __del__(self):
        self.__db.close()

    def __db_get(self, key, default=None):
        if key in self.__db:
            return self.__db[key]
        return default
def calculate_graph_features(data_prefix, bug_report_id, bug_report_full_sha,
                             repository_path, sha_to_note):
    bug_report_files_collection_db = UnQLite(data_prefix +
                                             "_bug_report_files_collection_db",
                                             flags=0x00000100 | 0x00000001)
    current_files = pickle.loads(
        bug_report_files_collection_db[bug_report_full_sha])
    bug_report_files_collection_db.close()

    shas = current_files['shas']
    sha_to_file_name = current_files['sha_to_file_name']

    graph_features_data_list = []
    graph_features_lookup = {}

    sha_to_imports = {}
    sha_to_class_name = {}
    for sha in shas:
        note_sha = sha_to_note[sha]
        note_content = cat_file_blob(repository_path, note_sha)
        imports = json.loads(note_content)
        sha_to_imports[sha] = imports
        if 'className' in imports and imports[
                'className'] is not None and imports['className'] != "":
            class_name = imports['className']
            class_name = class_name.replace(".", "")
            sha_to_class_name[sha] = class_name

    graph_data = process_graph_results(sha_to_imports)

    current_index = 0
    for sha in shas:
        current_file_name = sha_to_file_name[sha]
        # print(sha)
        # print(current_file_name)
        try:
            current_node_name = sha_to_class_name[sha]
            # print(current_node_name)
            # print(graph_data.loc[current_node_name])
            # exit(0)
            values = graph_data.loc[current_node_name]
            feature_15 = values['in']
            feature_16 = values['out']
            feature_17 = values['pr']
            feature_18 = values['a']
            feature_19 = values['h']
        except KeyError:
            feature_15 = 0.0
            feature_16 = 0.0
            feature_17 = 0.0
            feature_18 = 0.0
            feature_19 = 0.0

        current_features = sparse.coo_matrix(
            [feature_15, feature_16, feature_17, feature_18, feature_19])
        #        print(current_features.shape)
        #        print(current_features)
        #        exit(0)
        graph_features_data_list.append(current_features)
        graph_features_lookup[sha] = current_index
        current_index += 1

    graph_features_data = sparse.vstack(graph_features_data_list)

    sparse.save_npz(
        data_prefix + '_' + bug_report_id[0:7] + '_graph_features_data',
        graph_features_data)
    with open(
            data_prefix + '_' + bug_report_id[0:7] +
            '_graph_features_index_lookup', 'w') as outfile:
        json.dump(graph_features_lookup, outfile)

    return (bug_report_id)
Esempio n. 13
0
agregados = 0
for i, cajero_json in enumerate(cajeros_json):
    db = UnQLite('cajeros.db')
    db.open()
    cajero = {}
    cajero['id'] = cajero_json['id']
    cajero['clave_institucion'] = cajero_json['cb']
    cajero['lat'] = cajero_json['l']['lat']
    cajero['lon'] = cajero_json['l']['lng']
    cajero['nombre_institucion'] = NOMBRES[cajero['clave_institucion']]
    try:
        print 'Cajero ' + str(i) + ' de ' + str(total_cajeros) + ', ' + str(cajero['id']) + ' existe? ' + str(db.exists(cajero['id']))
        if not db.exists(cajero['id']):
            url_cajero = CAJERO_URL + '?id=' + str(cajero['id']) + '&banco=' + str(cajero['clave_institucion'])
            cajero_json = requests.get(url_cajero).json()['contenido']
            cajero['cp'] = str(cajero_json['cp'])
            cajero['horario'] = cajero_json['hs']
            cajero['direccion'] = cajero_json['d']
            cajero['actualizacion'] = str(datetime.datetime.now())
            db[cajero['id']] = cajero
            print 'Agregado: ' + str(cajero)
            agregados += 1
    except UnicodeEncodeError:
        print 'UnicodeEncodeError'
        print cajero
        pass
    finally:
        db.close()
print 'Cajeros agregados: ' + str(agregados)
def convert_tf_idf(data_prefix, bug_report_full_sha):
    bug_report_files_collection_db = UnQLite(data_prefix +
                                             "_bug_report_files_collection_db",
                                             flags=0x00000100 | 0x00000001)
    current_files = pickle.loads(
        bug_report_files_collection_db[bug_report_full_sha])
    bug_report_files_collection_db.close()

    bug_report_id = bug_report_full_sha[0:7]

    shas = current_files['shas']
    class_name_lookup = current_files['class_name_to_sha']

    ast_index_collection = UnQLite(data_prefix +
                                   "_ast_index_collection_index_db",
                                   flags=0x00000100 | 0x00000001)
    data = sparse.load_npz(data_prefix + '_raw_count_data.npz')

    data_to_tf_idf = []
    lookups = {}
    n_rows = 0
    for sha in shas:
        current_indexes = pickle.loads(ast_index_collection[sha])
        #        print(sha)
        #        print(current_indexes)
        (matrix, lookup) = extract_ast(data, current_indexes)
        #        print(lookup)
        #        print(matrix.shape)
        current_index = n_rows
        data_to_tf_idf.append(matrix)
        for k in lookup:
            lookup[k] += current_index
        lookups[sha] = lookup
        n_rows += matrix.shape[0]

    ast_index_collection.close()

    bug_report_index_collection = UnQLite(
        data_prefix + "_bug_report_index_collection_index_db",
        flags=0x00000100 | 0x00000001)
    current_bug_report_indexes = pickle.loads(
        bug_report_index_collection[bug_report_id])
    bug_report_index_collection.close()

    bug_report_matrix, bug_report_lookup = extract_bug_report(
        data, current_bug_report_indexes)
    current_index = n_rows
    data_to_tf_idf.append(bug_report_matrix)
    for k in bug_report_lookup:
        bug_report_lookup[k] += current_index
    lookups[bug_report_id] = bug_report_lookup
    n_rows += bug_report_matrix.shape[0]

    data_matrix = sparse.vstack(data_to_tf_idf)

    transformer = TfidfTransformer()
    tf_idf_data = transformer.fit_transform(data_matrix)
    #    print("tf_idf_data shape",tf_idf_data.shape)
    sparse.save_npz(data_prefix + '_' + bug_report_id + '_tf_idf_data',
                    tf_idf_data)
    with open(data_prefix + '_' + bug_report_id + '_tf_idf_index_lookup',
              'w') as outfile:
        json.dump(lookups, outfile)
Esempio n. 15
0
'''
('usr', b'a\x1e\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02L\\\x0b\xb5')
('usr_0', '\x01\x08\x00\x00\x00\x04name\x05\x08\x00\x00\x00\x04neet\x06\x08\x00\x00\x00\x04uuid\x05\x08\x00\x00\x00\x08D7B810FD\x06\x08\x00\x00\x00\x04__id\x05\n\x00\x00\x00\x00\x00\x00\x00\x00\x06\x02')
('usr_1', '\x01\x08\x00\x00\x00\x04name\x05\x08\x00\x00\x00\x05limbo\x06\x08\x00\x00\x00\x04uuid\x05\x08\x00\x00\x00\x08D7B810FC\x06\x08\x00\x00\x00\x04__id\x05\n\x00\x00\x00\x00\x00\x00\x00\x01\x06\x02')
Well, 'collection' is also serialized
'''
usr.store(u1)
n = usr.filter(lambda o: o['name'] == 'neet')
#filter? seems doesn't have searching function
#but i can give it that~
print(n)


def muti_store(db, coll, uuid, dic):
    next = coll.last_record_id() + 1
    db[uuid] = next
    coll.store(dic)
    return True


def search(db, coll, uuid):
    return coll.fetch(int(db[uuid]))


u2 = {'name': 'limbo', 'uuid': 'D7B810FC'}
muti_store(udb, usr, 'D7B810FC', u2)
n2 = search(udb, usr, 'D7B810FC')
print(n2)

udb.close()
Esempio n. 16
0
def process_bug_report(data_prefix, bug_report_full_sha, bug_report_file_path,
                       max_frequency):
    bug_report_files_collection_db = UnQLite(data_prefix +
                                             "_bug_report_files_collection_db",
                                             flags=0x00000100 | 0x00000001)
    current_files = pickle.loads(
        bug_report_files_collection_db[bug_report_full_sha])
    bug_report_files_collection_db.close()

    shas = current_files['shas']
    sha_to_file_name = current_files['sha_to_file_name']

    bug_report_id = bug_report_full_sha[0:7]
    vectorized_data = sparse.load_npz(data_prefix + '_' + bug_report_id +
                                      '_tf_idf_data.npz')
    with open(data_prefix + '_' + bug_report_id + '_tf_idf_index_lookup',
              'r') as index_lookup_file:
        lookups = json.load(index_lookup_file)

    enriched_api_data, enriched_api_indexes = load_enriched_api(
        data_prefix, bug_report_id)
    enriched_report = enriched_api_data[-1, :]

    (vectorized_report, vectorized_summary,
     vectorized_description) = load_bug_report(vectorized_data, lookups,
                                               bug_report_id)

    ast_cache_collection = UnQLite(data_prefix + "_ast_cache_collection_db",
                                   flags=0x00000100 | 0x00000001)
    bug_reports = load_bug_reports(bug_report_file_path)
    if bug_report_id in bug_reports:
        current_bug_report_summary = bug_reports[bug_report_id]['bug_report'][
            'summary']
    else:
        current_bug_report_summary = retrieve_summary(
            bug_reports, bug_report_full_sha)['bug_report']['summary']

    feature_3_data = sparse.load_npz(data_prefix + '_' + bug_report_id +
                                     '_feature_3_data.npz')
    with open(data_prefix + '_' + bug_report_id + '_feature_3_index_lookup',
              'r') as feature_3_file:
        feature_3_file_lookup = json.load(feature_3_file)

    graph_data = sparse.load_npz(data_prefix + '_' + bug_report_id +
                                 '_graph_features_data.npz').tocsr()
    with open(
            data_prefix + '_' + bug_report_id + '_graph_features_index_lookup',
            'r') as graph_lookup_file:
        graph_lookup = json.load(graph_lookup_file)

    features_5_6_data = sparse.load_npz(data_prefix + '_' +
                                        bug_report_id[0:7] +
                                        '_features_5_6_data.npz').tocsr()
    with open(
            data_prefix + '_' + bug_report_id[0:7] +
            '_features_5_6_index_lookup', 'r') as feaures_5_6_lookup_file:
        features_5_6_lookup = json.load(feaures_5_6_lookup_file)

    if bug_report_id in bug_reports:
        fixed_filenames = bug_reports[
            bug_report_id[0:7]]['commit']['diff'].keys()
    else:
        fixed_filenames = retrieve_summary(
            bug_reports, bug_report_full_sha)['commit']['diff'].keys()

    features = []
    features_files = []

    for file_index, current_file_sha in enumerate(shas):
        current_lookup = lookups[current_file_sha]
        source_index = current_lookup['source']

        method_source_start_index = current_lookup['methodsStart']
        method_source_end_index = current_lookup['methodsEnd']

        class_start_index = current_lookup['classNamesStart']
        class_end_index = current_lookup['classNamesEnd']

        method_names_start_index = current_lookup['methodNamesStart']
        method_names_end_index = current_lookup['methodNamesEnd']

        variable_start_index = current_lookup['variableNamesStart']
        variable_end_index = current_lookup['variableNamesEnd']

        comment_start_index = current_lookup['commentsStart']
        comment_end_index = current_lookup['commentsEnd']

        current_graph_lookup = graph_lookup[current_file_sha]

        current_features_5_6 = features_5_6_lookup[current_file_sha]

        current_file_name = sha_to_file_name[current_file_sha]

        f1 = feature_1(vectorized_report, vectorized_data, source_index,
                       method_source_start_index, method_source_end_index)
        f2 = feature_2(enriched_report, enriched_api_data,
                       enriched_api_indexes, current_file_sha)
        f3 = feature_3(feature_3_data, feature_3_file_lookup, current_file_sha)
        f4 = feature_4(current_bug_report_summary, ast_cache_collection,
                       current_file_sha)

        f5 = (features_5_6_data[current_features_5_6, 0])
        f6 = (features_5_6_data[current_features_5_6, 1]) / max_frequency

        f7 = feature_sim(vectorized_summary, vectorized_data,
                         class_start_index, class_end_index)
        f8 = feature_sim(vectorized_summary, vectorized_data,
                         method_names_start_index, method_names_end_index)
        f9 = feature_sim(vectorized_summary, vectorized_data,
                         variable_start_index, variable_end_index)
        f10 = feature_sim(vectorized_summary, vectorized_data,
                          comment_start_index, comment_end_index)

        f11 = feature_sim(vectorized_description, vectorized_data,
                          class_start_index, class_end_index)
        f12 = feature_sim(vectorized_description, vectorized_data,
                          method_names_start_index, method_names_end_index)
        f13 = feature_sim(vectorized_description, vectorized_data,
                          variable_start_index, variable_end_index)
        f14 = feature_sim(vectorized_description, vectorized_data,
                          comment_start_index, comment_end_index)

        f15 = graph_data[current_graph_lookup, 0]
        f16 = graph_data[current_graph_lookup, 1]
        f17 = graph_data[current_graph_lookup, 2]
        f18 = graph_data[current_graph_lookup, 3]
        f19 = graph_data[current_graph_lookup, 4]

        if current_file_name in fixed_filenames:
            used_in_fix = 1.0
        else:
            used_in_fix = 0.0

        features.append([
            f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
            f16, f17, f18, f19, used_in_fix
        ])
        features_files.append(current_file_sha)

    ast_cache_collection.close()

    sparse_features = sparse.csr_matrix(features)
    sparse.save_npz(data_prefix + '_' + bug_report_id + '_features',
                    sparse_features)
    with open(data_prefix + '_' + bug_report_id + '_files', 'w') as outfile:
        json.dump(features_files, outfile)
Esempio n. 17
0
def vectorize(ast_cache, bug_reports, data_prefix):
    data = []
    current_index = 0

    ast_index_lookup = {}
    ast_types_lookup = {}
    with ast_cache.cursor() as cursor:
        for k, v in cursor:
            ast_sha = k
            current_ast = pickle.loads(v)
            data, current_index, current_lookup = add_ast_to_vectorization_data(
                data, current_index, current_ast)
            ast_index_lookup[ast_sha] = current_lookup
            ast_types_lookup[ast_sha] = extract_types(current_ast)

    stemmer = PorterStemmer()

    print("data length", len(data))
    print("current index", current_index)

    bug_report_index_lookup = {}
    for bug_report_id in tqdm(bug_reports):
        current_bug_report = bug_reports[bug_report_id]['bug_report']
        data, current_index, current_lookup = add_bug_report_to_vectorization_data(
            data, current_index, current_bug_report, stemmer)
        bug_report_index_lookup[bug_report_id[0:7]] = current_lookup

    print("data length", len(data))
    print("current index", current_index)

    before_v = default_timer()
    vectorizer = DictVectorizer()
    vectorized_data = vectorizer.fit_transform(data)
    after_v = default_timer()
    total_v = after_v - before_v
    print("total count vectorization time ", total_v)
    print("vectorized_data type ", type(vectorized_data))
    print("vectorized_data shape", vectorized_data.shape)

    feature_names = vectorizer.get_feature_names()
    feature_names_lenghts_dict = {}
    for i, feature_name in enumerate(feature_names):
        feature_names_lenghts_dict[i] = len(feature_name)
    with open(data_prefix + '_feature_names_dict', 'w') as outfile:
        json.dump(feature_names_lenghts_dict, outfile)

    sparse.save_npz(data_prefix + '_raw_count_data', vectorized_data)

    ast_index_collection = UnQLite(data_prefix +
                                   "_ast_index_collection_index_db")
    for k, v in ast_index_lookup.items():
        ast_index_collection[k] = pickle.dumps(v, -1)

    bug_report_index_collection = UnQLite(
        data_prefix + "_bug_report_index_collection_index_db")
    for k, v in bug_report_index_lookup.items():
        bug_report_index_collection[k] = pickle.dumps(v, -1)

    ast_types_collection = UnQLite(data_prefix +
                                   "_ast_types_collection_index_db")
    for k, v in ast_types_lookup.items():
        ast_types_collection[k] = pickle.dumps(v, -1)

    ast_index_collection.close()
    bug_report_index_collection.close()
    ast_types_collection.close()
Esempio n. 18
0
class UnQLiteTest(unittest.TestCase):
    def setUp(self):
        self.app = bottle.Bottle(catchall=False)
        _, filename = tempfile.mkstemp(suffix='.unqlite')
        self.plugin = self.app.install(unqlite.Plugin(filename=filename))

        self.conn = UnQLite(filename)
        self.conn.collection('todo').create()
        self.conn.close()

    def tearDown(self):
        pass
        # os.unlink(self.plugin.filename)

    def test_with_keyword(self):
        @self.app.get('/')
        def test(db):
            self.assertEqual(type(db), type(UnQLite(':mem:')))

        self._request('/')

    def test_without_keyword(self):
        @self.app.get('/')
        def test_1():
            pass

        self._request('/')

        @self.app.get('/2')
        def test_2(**kw):
            self.assertFalse('db' in kw)

        self._request('/2')

    def test_install_conflicts(self):
        self.app.install(unqlite.Plugin(keyword='db2'))

        @self.app.get('/')
        def test(db, db2):
            pass

        # I have two plugins working with different names
        self._request('/')

    def test_commit_on_redirect(self):
        @self.app.get('/')
        def test(db):
            self._insert_into(db)
            bottle.redirect('/')

        self._request('/')
        self.assert_records(1)

    def test_commit_on_abort(self):
        @self.app.get('/')
        def test(db):
            self._insert_into(db)
            bottle.abort()

        self._request('/')
        self.assert_records(0)

    def _request(self, path, method='GET'):
        return self.app({
            'PATH_INFO': path,
            'REQUEST_METHOD': method
        }, lambda x, y: None)

    def _insert_into(self, db):
        db.collection('todo').store({'task': 'PASS'})

    def assert_records(self, count):
        self.conn.open()
        actual_count = len(self.conn.collection('todo').all())
        self.conn.close()
        self.assertEqual(count, actual_count)
Esempio n. 19
0
class Donurista:
	def __init__(self, brpath, dbpath):
		self.br = Engine(brpath)
		# f*****g cute chess
		self.br.write('setoption name Threads value 3')
		self.br.write('setoption name Hash value 4096')
		# f*****g cute chess
		self.db = UnQLite(dbpath)
		self.board = Board()
		self.func_book = {
			'uci': self.uci,
			'isready': self.isready,
			'go': self.go,
			'quit': self.quit,
			'position': self.position
		}
		self.hello()
		try:
			self.start()
		except KeyboardInterrupt:
			self.quit(None)

	def simple(self, inp):
		self.br.write(inp)

	def uci(self, inp): # TODO: normal options
		print('id name Donurista')
		print('id author Gornak40')
		print('option name Debug Log File type string default') 
		print('option name Contempt type spin default 24 min -100 max 100')
		print('option name Analysis Contempt type combo default Both var Off var White var Black var Both')
		print('option name Threads type spin default 1 min 1 max 512')
		print('option name Hash type spin default 16 min 1 max 33554432')
		print('option name Clear Hash type button')
		print('option name Ponder type check default false')
		print('option name MultiPV type spin default 1 min 1 max 500')
		print('option name Skill Level type spin default 20 min 0 max 20')
		print('option name Move Overhead type spin default 10 min 0 max 5000')
		print('option name Slow Mover type spin default 100 min 10 max 1000')
		print('option name nodestime type spin default 0 min 0 max 10000')
		print('option name UCI_Chess960 type check default false')
		print('option name UCI_AnalyseMode type check default false')
		print('option name UCI_LimitStrength type check default false')
		print('option name UCI_Elo type spin default 1350 min 1350 max 2850')
		print('option name UCI_ShowWDL type check default false')
		print('option name SyzygyPath type string default <empty>')
		print('option name SyzygyProbeDepth type spin default 1 min 1 max 100')
		print('option name Syzygy50MoveRule type check default true')
		print('option name SyzygyProbeLimit type spin default 7 min 0 max 7')
		print('option name Use NNUE type check default true')
		print('option name EvalFile type string default nn-82215d0fd0df.nnue')
		print('uciok')

	def isready(self, inp):
		print('readyok')

	def is_cached(self): # TODO: ARGS MODIFICATION
		return False

	def make_new_fen(self):
		return ' '.join(self.board.fen().split()[:-2])

	def write_db(self, info, bestmove):
		data = f'{info.depth};{bestmove.bestmove}'
		with self.db.transaction():
			self.db[self.make_new_fen()] = data

	def go(self, inp):
		fen = self.make_new_fen()
		db_depth = 0
		T = time()
		if fen in self.db:
			logging.info('[+] update db') # testing feature
			db_depth, db_move = self.db[fen].decode('utf-8').split(';')
			db_depth = int(db_depth)
			if self.is_cached():
				print('info smart cache moves')
				print(f'bestmove {db_move}')
				return
		T = time() - T
		logging.info(f'[+] db timing {T}')
		self.br.write(inp)
		br_pred = self.br.readfor(lambda x: 'bestmove' in x)
		info = Info(br_pred[-2])
		bestmove = Info(br_pred[-1])
		if info.depth > db_depth:
			self.write_db(info, bestmove)
		logging.info(f'[+] db size {len(self.db)}')
		print(info)
		print(bestmove)

	def position(self, inp):
		self.br.write(inp)
		self.board = Board()
		for token in inp.split():
			if token in {'startpos', 'position', 'moves'}:
				continue
			self.board.push_san(token)

	def start(self):
		while True:
			inp = input()
			if not inp:
				continue
			logging.info(inp)
			com = inp.split()[0]
			func = self.func_book.get(com, self.simple)
			self.br.isready()
			func(inp)

	def hello(self):
		F = Figlet()
		text = F.renderText('Donurista')
		print(text)

	def quit(self, inp):
		logging.info('[+] ENGINE TERMINATED')
		del self.br
		self.db.close()
		exit(0)
def add_types_source_to_bug_report_data(data, data_prefix, class_name_lookup,
                                        ast_sha):
    asts = UnQLite(data_prefix + "_ast_index_collection_index_db",
                   flags=0x00000100 | 0x00000001)
    types = UnQLite(data_prefix + "_ast_types_collection_index_db",
                    flags=0x00000100 | 0x00000001)

    #    current_type = types[ast_sha]
    #    print "searching", ast_sha
    current_type = pickle.loads(types[ast_sha])
    #    print "found", ast_sha
    #    print current_type['methodVariableTypes']
    #    exit(0)
    types_per_method = current_type['methodVariableTypes']

    cl = data.shape[1]

    current_index = 0

    start = current_index
    enriched_apis = []
    for method_types in types_per_method:
        method_type_shas = []

        for method_type in method_types:
            if method_type in class_name_lookup:
                method_type_shas.append(class_name_lookup[method_type])

        supertypes_shas_per_type = [
            set(find_types_shas(types, class_name_lookup, s))
            for s in method_type_shas
        ]

        indexes = []
        for supertypes in supertypes_shas_per_type:
            indexes.extend(get_indexes(asts, supertypes))

        if indexes == []:
            method_enriched_api = sparse.coo_matrix(
                np.zeros(cl).reshape(1, cl))
        else:
            method_enriched_api = sparse.coo_matrix(
                np.sum((data[indexes, :]), axis=0))

        enriched_apis.append(method_enriched_api)

    if enriched_apis == []:
        class_enriched_api = sparse.coo_matrix(np.zeros(cl).reshape(1, cl))
    else:
        class_enriched_api = sparse.coo_matrix(np.sum(enriched_apis, axis=0))

    enriched_apis.append(class_enriched_api)

    current_index += len(enriched_apis)

    asts.close()
    types.close()

    lookup = {}
    lookup['enrichedApiStart'] = start
    lookup['enrichedApiEnd'] = current_index - 1

    enriched_apis_matrix = sparse.vstack(enriched_apis)

    return (enriched_apis_matrix, lookup, ast_sha)
Esempio n. 21
0
#!/usr/bin/python3
from unqlite import UnQLite
from prettytable import PrettyTable
from argparse import ArgumentParser


def print_all():
    P = PrettyTable(['#', 'Fen', 'Depth', 'Move'])
    for i, (fen, info) in enumerate(db):
        info = info.decode('utf-8')
        depth, move = info.split(';')
        P.add_row([i + 1, fen, depth, move])
    print(P)


parser = ArgumentParser(description='Donurista quick DB viewer')
parser.add_argument('--all',
                    '-a',
                    action='store_const',
                    help='print all positions (long time)',
                    const=True,
                    default=False)
db = UnQLite('new.db')
args = parser.parse_args()
if args.all:
    print_all()
else:
    print(len(db))
db.close()
Esempio n. 22
0
def calculate_feature_3(data_prefix, bug_report_id, bug_report_full_sha,
                        bug_reports):
    bug_report_files_collection_db = UnQLite(data_prefix +
                                             "_bug_report_files_collection_db",
                                             flags=0x00000100 | 0x00000001)
    current_files = pickle.loads(
        bug_report_files_collection_db[bug_report_full_sha])
    bug_report_files_collection_db.close()

    shas = current_files['shas']
    sha_to_file_name = current_files['sha_to_file_name']

    data = sparse.load_npz(data_prefix + '_raw_count_data.npz')
    row_length = data.shape[1]

    current_bug_report = bug_reports[bug_report_id]

    bug_report_index_collection = UnQLite(
        data_prefix + "_bug_report_index_collection_index_db",
        flags=0x00000100 | 0x00000001)
    current_bug_report_summary_index = pickle.loads(
        bug_report_index_collection[bug_report_id[0:7]])['summary']

    feature_3_data_list = []
    feature_3_lookup = {}

    if 'views' in current_bug_report and 'bug_fixing' in current_bug_report[
            'views']:
        bug_fixing_view = current_bug_report['views']['bug_fixing']
        current_index = 0
        for sha in shas:
            current_file_name = sha_to_file_name[sha]
            if current_file_name in bug_fixing_view:
                related_bug_reports = bug_fixing_view[current_file_name]['br']
                # print("Present",sha)
                # print(related_bug_reports)
                bug_report_history = combine(related_bug_reports, data,
                                             bug_report_index_collection)
            else:
                bug_report_history = np.zeros((1, row_length))
                # print("Not present",sha)
            feature_3_data_list.append(bug_report_history)
            feature_3_lookup[sha] = current_index
            current_index += 1
    else:
        current_index = 0
        for sha in shas:
            bug_report_history = np.zeros((1, row_length))
            feature_3_data_list.append(bug_report_history)
            feature_3_lookup[sha] = current_index
            current_index += 1

    bug_report_index_collection.close()

    feature_3_data_list.append(data[current_bug_report_summary_index, :])

    feature_3_data = sparse.vstack(feature_3_data_list)

    transformer = TfidfTransformer()
    feature_3_tf_idf_data = transformer.fit_transform(feature_3_data)

    sparse.save_npz(data_prefix + '_' + bug_report_id[0:7] + '_feature_3_data',
                    feature_3_tf_idf_data)
    with open(
            data_prefix + '_' + bug_report_id[0:7] + '_feature_3_index_lookup',
            'w') as outfile:
        json.dump(feature_3_lookup, outfile)

    return (bug_report_id)
def retrieve_features_5_6(data_prefix, bug_report_id, bug_report_full_sha,
                          bug_reports):
    bug_report_files_collection_db = UnQLite(data_prefix +
                                             "_bug_report_files_collection_db",
                                             flags=0x00000100 | 0x00000001)
    current_files = pickle.loads(
        bug_report_files_collection_db[bug_report_full_sha])
    bug_report_files_collection_db.close()

    shas = current_files['shas']
    sha_to_file_name = current_files['sha_to_file_name']

    current_bug_report = bug_reports[bug_report_id]

    features_5_6_data_list = []
    features_5_6_lookup = {}

    max_recency = 0.0
    max_frequency = 0.0

    if 'views' in current_bug_report and 'bug_fixing' in current_bug_report[
            'views']:
        bug_fixing_view = current_bug_report['views']['bug_fixing']
        current_index = 0
        for sha in shas:
            current_file_name = sha_to_file_name[sha]
            if current_file_name in bug_fixing_view:
                recency = bug_fixing_view[current_file_name][
                    'recency[30-day months]']
                frequency = bug_fixing_view[current_file_name]['frequency']
                features_5_6_data_list.append(
                    sparse.coo_matrix([recency, frequency], shape=(1, 2)))
                features_5_6_lookup[sha] = current_index
                current_index += 1
                if recency > max_recency:
                    max_recency = recency
                if frequency > max_frequency:
                    max_frequency = frequency
            else:
                recency = 0.0
                frequency = 0.0
                features_5_6_data_list.append(
                    sparse.coo_matrix([recency, frequency], shape=(1, 2)))
                features_5_6_lookup[sha] = current_index
                current_index += 1
    else:
        current_index = 0
        for sha in shas:
            recency = 0.0
            frequency = 0.0
            features_5_6_data_list.append(
                sparse.coo_matrix([recency, frequency], shape=(1, 2)))
            features_5_6_lookup[sha] = current_index
            current_index += 1

    features_5_6_data = sparse.vstack(features_5_6_data_list)

    sparse.save_npz(
        data_prefix + '_' + bug_report_id[0:7] + '_features_5_6_data',
        features_5_6_data)
    with open(
            data_prefix + '_' + bug_report_id[0:7] +
            '_features_5_6_index_lookup', 'w') as outfile:
        json.dump(features_5_6_lookup, outfile)

    return bug_report_id, max_recency, max_frequency
Esempio n. 24
0
class PyCachEngine:
    def __init__(self, path, db_path, options=dict()):
        self.board = Board()
        self.db = UnQLite(db_path)
        self.engine = Popen(path,
                            universal_newlines=True,
                            stdin=PIPE,
                            stdout=PIPE)
        self._put('uci')
        self._ready()
        for option, val in options.items():
            self._set_option(option, val)
        self.num_games = 1
        while True:
            self.board.reset()
            self.learn(200)

    def __del__(self):
        self.db.close()
        self.engine.kill()

    def _put(self, line):
        if not self.engine.stdin:
            raise BrokenPipeError()
        self.engine.stdin.write(line + '\n')
        self.engine.stdin.flush()

    def _read(self):
        if not self.engine.stdout:
            raise BrokenPipeError()
        return self.engine.stdout.readline().strip()

    def _ready(self):
        self._put('isready')
        while self._read() != 'readyok':
            continue

    def _bestmove(self):
        while True:
            line = self._read()
            if 'depth' in line:
                depth = int(line.split()[2])
            if 'bestmove' in line:
                move = line.split()[1]
                return (move, depth)

    def _set_option(self, option, value):
        self._put(f'setoption option {option} value {value}')

    def _store(self, new_fen, move, depth):
        with self.db.transaction():
            if new_fen in self.db:
                _move, _depth = eval(self.db[new_fen].decode('utf-8'))
                print(_move, _depth)
                if int(_depth) >= depth:
                    return
            self.db[new_fen] = (move, depth)
        self.db.commit()

    def learn(self, movetime):
        fen = self.board.fen()
        new_fen = ' '.join(fen.split()[:-2])
        self._put(f'position fen {fen}')
        self._put(f'go movetime {movetime}')
        move, depth = self._bestmove()
        self.board.push_uci(move)
        self._store(new_fen, move, depth)
        system('clear')
        #		print(fen)
        print(self.board)
        print()
        print('new_fen:', new_fen)
        print('depth:', depth)
        print('move:', move)
        print('db_size:', len(self.db))
        print('num_games:', self.num_games)
        if not self.board.is_game_over():
            self.learn(movetime)
        else:
            result = self.board.outcome().result()
            self.num_games += 1
            print(result)
Esempio n. 25
0
#!/usr/bin/python3
from unqlite import UnQLite

dbr = UnQLite('donurista.db')
dbw = UnQLite('new.db')

for fen, info in dbr:
    info = info.decode('utf-8')
    fen = ' '.join(fen.split()[:-2])
    dbw[fen] = info
    print(fen)

dbr.close()
dbw.close()