Beispiel #1
0
def build_database(events, dbname="mancala.db"):
    moves_count = 0
    wins = [0, 0]
    database = SqliteDict(dbname)
    sows = []
    for e in events:
        if e[0] == "sow":
            sows.append(e)
            moves_count += 1
        elif e[0] == "end":
            winner = e[2]
            if winner is not None:
                wins[winner] += 1
            diff = e[1][0] - e[1][1]
            for _, stones, player, hole_idx in sows:
                stones = tuple(stones)
                key = repr((player, stones))
                if key not in database:
                    database[key] = {}
                if hole_idx not in database[key]:
                    entry = database[key]
                    entry[hole_idx] = (0, 0)
                    database[key] = entry
                entry = database[key]
                total, count = entry[hole_idx]
                entry[hole_idx] = (total + diff, count + 1)
                database[key] = entry
            sows.clear()
    database.commit()
    return database, wins, moves_count
Beispiel #2
0
    def test_snopt_hotstart_starting_from_grad(self):
        self.optName = "SNOPT"
        self.setup_optProb()
        histName = f"{self.id()}.hst"

        # Optimize without hot start and store the history
        self.optimize(storeHistory=histName, hotStart=False)

        # Load the history dictionary
        hist = SqliteDict(histName)

        # Delete the last two keys in the dictionary
        lastKey = hist["last"]
        for i in range(2):
            del hist[str(int(lastKey) - i)]
        hist.commit()

        # Optimize starting from the modified history file
        # The first call will be a gradient evaluation
        self.optimize(storeHistory=False, hotStart=histName)

        # Check that we had two function evaluations
        # The first is from a recursive call and the second is the 'last' call we deleted
        self.assertEqual(self.nf, 2)

        # Also check that we had two gradient evaluations
        # The first is from a call we deleted and the second is the call after 'last'
        self.assertEqual(self.ng, 2)
Beispiel #3
0
class EmbeddingIndexer(object):
    def __init__(self, embedding_file, index_file):
        self.embedding_file = embedding_file
        self.index_file = index_file
        logger.info("Input path: " + self.embedding_file)
        logger.info("Index path: " + self.index_file)
        self.embedding = SqliteDict(os.path.join(self.index_file, EMBEDDING),
                                    autocommit=True)

    def iterator(self):
        with open(self.embedding_file) as f:
            for line in f:
                tokens = line.strip().split(" ")
                if len(tokens) == 2:
                    continue
                uri = tokens[0]
                embedding = np.array(tokens[1:], dtype=np.float32)
                yield (uri, embedding)

    def run(self):
        count = 0
        for key, value in self.iterator():
            self.embedding[key] = value
            count += 1
            if count % 20000 == 0:
                self.embedding.commit()
                logger.info("[{}] {} index added.".format(
                    datetime.datetime.now(), count))
        self.embedding.close()
Beispiel #4
0
def clear_db(db_path_shadow: str) -> None:
    doc_vecs_db = SqliteDict(db_path_shadow)
    print("Clearing db {}".format(db_path_shadow))
    for key in tqdm(doc_vecs_db.keys()):
        del doc_vecs_db[key]
    doc_vecs_db.commit()
    doc_vecs_db.close()
Beispiel #5
0
 def __test_irregular_tablenames(tablename):
     filename = ':memory:'
     db = SqliteDict(filename, tablename=tablename)
     db['key'] = 'value'
     db.commit()
     self.assertEqual(db['key'], 'value')
     db.close()
Beispiel #6
0
    def _remote():
        """ Code to execute on forked process. """
        service = _create_srpo_service(obj, name, registry_path=registry_path)
        # set new process group

        protocol = dict(allow_all_attrs=True)

        kwargs = dict(
            hostname="localhost",
            nbThreads=server_threads,
            protocol_config=protocol,
            port=port,
        )

        server = ThreadPoolServer(service(), **kwargs)
        sql_kwargs = dict(
            filename=server_registry.filename,
            tablename=server_registry.tablename,
            flag="c",
        )
        # register new server
        registery = SqliteDict(**sql_kwargs)
        registery[name] = (server.host, server.port, os.getpid())
        registery.commit()
        # get a new new view of registry, make sure name is there
        assert name in SqliteDict(**sql_kwargs)
        service._server = server
        server.start()
Beispiel #7
0
def _persist_v0(file_path, zg):
    print 'Creating db...'
    persisted = SqliteDict(file_path, autocommit=False)
    print 'Updating data...'
    persisted.update(zg.country_postal_codes)
    print 'Committing data...'
    persisted.commit()
Beispiel #8
0
def main(_):
    if FLAGS.in_memory:
        db = dict()
    else:
        db = SqliteDict(FLAGS.db, autocommit=True, journal_mode='OFF')

    for data in generate_from_wikidump(FLAGS.input):

        id = data['id']

        try:
            wikilink = data['sitelinks']['enwiki']['title']
        except KeyError:
            logger.debug('No enwiki title found for entity "%s"', id)
            continue
        else:
            wikilink = format_wikilink(wikilink)

        logger.debug('id: "%s" - enwiki title: "%s"', id, wikilink)
        if wikilink in db:
            logger.warning('Collision for enwiki title: "%s"', wikilink)

        db[wikilink] = id

    if FLAGS.in_memory:
        logger.info('Dumping')
        with open(FLAGS.db, 'wb') as f:
            pickle.dump(db, f)
    else:
        db.commit()

    logger.info('Done')
Beispiel #9
0
    def test_readonly(self):
        fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
        orig_db = SqliteDict(filename=fname)
        orig_db['key'] = 'value'
        orig_db['key_two'] = 2
        orig_db.commit()
        orig_db.close()

        readonly_db = SqliteDict(filename=fname, flag='r')
        self.assertTrue(readonly_db['key'] == 'value')
        self.assertTrue(readonly_db['key_two'] == 2)

        def attempt_write():
            readonly_db['key'] = ['new_value']

        def attempt_update():
            readonly_db.update(key='value2', key_two=2.1)

        def attempt_delete():
            del readonly_db['key']

        def attempt_clear():
            readonly_db.clear()

        def attempt_terminate():
            readonly_db.terminate()

        attempt_funcs = [
            attempt_write, attempt_update, attempt_delete, attempt_clear,
            attempt_terminate
        ]

        for func in attempt_funcs:
            with self.assertRaises(RuntimeError):
                func()
Beispiel #10
0
def test_sqlitedict_write(text):
    d = SqliteDict(f'debug_{datasize}.sqlite')
    for j in range(3):
        for i, line in enumerate(text):
            d[str(i + j * len(text))] = line
    d.commit()
    d.close()
Beispiel #11
0
class Storage:

    def __init__(self, mode=StorageModes.MEMORY):
        self.mode = mode
        if self.mode == StorageModes.PERSISTENT:
            self.cache = SqliteDict('../my_db.sqlite', autocommit=True)
        elif self.mode == StorageModes.MEMORY:
            self.cache = dict()

    def set(self, k, v):

        self.cache[k] = v
        if self.mode == StorageModes.PERSISTENT:
            # need to commit manually, as autocomit only commits with commit(blocking=False) and might not persist data
            self.cache.commit()

    def dump(self, k):
        self.cache.pop(k)
        if self.mode == StorageModes.PERSISTENT:
            # need to commit manually, as autocomit only commits with commit(blocking=False) and might not persist data
            self.cache.commit()

    def get(self, k):
        return self.cache.get(k)

    def append(self, k, v):
        current_data = self.cache.get(k)
        if not current_data:
            self.set(k, [v])
        else:
            if not isinstance(current_data, list):
                current_data = [current_data]
            current_data.append(v)
            self.set(k, current_data)
Beispiel #12
0
class PrefOrderedSet:
    """
    store/retrieve an ordered set of strings (like a list, but no duplicates) to/from a sqlite database
    """
    def __init__(self, name: str, author: str, table: str):
        # DB stores values directly (not encoded as a pickle)
        self.sqlite_dict = SqliteDict(get_sqlite_path(name, author),
                                      table,
                                      encode=lambda x: x,
                                      decode=lambda x: x)

    def set(self, strings: list):
        """
        set the list of strings
        :param strings: list of strings
        """
        self.sqlite_dict.clear()  # delete entire table
        # ordering is done by making the value in the key/value pair the index and our desired list "value" is the key
        for index, string in enumerate(strings):
            self.sqlite_dict[string] = index
        self.sqlite_dict.commit(
        )  # not using autocommit since we're updating (setting) multiple values in the above for loop

    def get(self) -> List[str]:
        """
        returns the list of strings
        :return: list of strings
        """
        return list(sorted(self.sqlite_dict, key=self.sqlite_dict.get))
Beispiel #13
0
def hashDemultiplexedReads(reads,
                           has_umi,
                           umi_start,
                           umi_end,
                           low_memory):
    """
    This function extracts the read name and the x,y coordinates
    from the reads given as input and returns a hash
    with the clean read name as key and (x,y,umi) as
    values (umi is optional). X and Y correspond
    to the array coordinates of the barcode of the read.
    :param reads: path to a file with the fastq reads after demultiplexing
    :param has_umi: True if the read sequence contains UMI
    :param umi_start: the start position of the UMI
    :param umi_end: the end position of the UMI
    :param low_memory: True to use a key-value db instead of dict
    :type reads: str
    :type has_umi: boolean
    :type umi_start: integer
    :type umi_end: integer
    :type low_memory: boolean
    :return: a dictionary of read_name -> (x,y,umi) tags where umi is optional
    """
    logger = logging.getLogger("STPipeline")
    
    if not os.path.isfile(reads):
        error = "Error, input file not present {}\n".format(reads)
        logger.error(error)
        raise RuntimeError(error)
    
    assert(umi_start >= 0 and umi_start < umi_end)
    if low_memory:
        hash_reads = SqliteDict(autocommit=False, flag='c', journal_mode='OFF')
    else:
        hash_reads = dict()
    
    fastq_file = safeOpenFile(reads, "rU")
    for name, sequence, _ in readfq(fastq_file):
        # Assumes the header ends like this B0:Z:GTCCCACTGGAACGACTGTCCCGCATC B1:Z:678 B2:Z:678
        header_tokens = name.split()
        # TODO add an error check here
        x = header_tokens[-2]
        y = header_tokens[-1]
        # Assumes STAR will only output the first token of the read name
        # We keep the same naming for the extra attributes
        # We add the UMI as tag is present
        tags = [x,y]
        if has_umi:
            # Add the UMI as an extra tag
            umi = sequence[umi_start:umi_end]
            tags.append("B3:Z:%s" % umi)
        # The probability of a collision is very very low
        key = hash(header_tokens[0])
        hash_reads[key] = tags
        
    if low_memory: hash_reads.commit()
    fastq_file.close()    
    return hash_reads
Beispiel #14
0
class CachingLM:
    def __init__(self, lm, cache_db):
        """LM wrapper that returns cached results if they exist, and uses the underlying LM if not.

        :param lm: LM
            Underlying LM
        :param cache_db: str
            Path to cache db
        """
        self.lm = lm
        self.cache_db = cache_db
        if os.path.dirname(cache_db):
            os.makedirs(os.path.dirname(cache_db), exist_ok=True)
        self.dbdict = SqliteDict(cache_db, autocommit=True)

        # add hook to lm
        lm.set_cache_hook(self.get_cache_hook())

    def __getattr__(self, attr):
        def fn(requests):
            res = []
            remaining_reqs = []

            # figure out which ones are cached and which ones are new
            for req in requests:
                hsh = hash_args(attr, req)
                if hsh in self.dbdict:
                    ob = self.dbdict[hsh]

                    assert ob is not None

                    res.append(ob)
                else:
                    res.append(None)
                    remaining_reqs.append(req)

            # actually run the LM on the requests that do not have cached results
            rem_res = getattr(self.lm, attr)(remaining_reqs)

            # stick the new ones back into the list and also cache any of the new ones
            resptr = 0
            for req, r in zip(remaining_reqs, rem_res):
                while res[resptr] is not None:
                    resptr += 1

                res[resptr] = r

                # caching
                hsh = hash_args(attr, req)
                self.dbdict[hsh] = r
            self.dbdict.commit()

            return res

        return fn

    def get_cache_hook(self):
        return CacheHook(self)
Beispiel #15
0
def set_query_results(query, k=20):
    query_map = SqliteDict(query_map_path)
    query_vec = query_map[query]
    query_map.close()
    query_db = SqliteDict(query_db_path)
    results = search(docs_tfidf, query_vec, hw2.cosine_sim)
    query_db[query] = results[:k]
    query_db.commit()
    query_db.close()
Beispiel #16
0
def main(data_dir):
    print 'Loading data...'
    zg = Zipgun(data_dir, force_text=True)
    print 'Creating db...'
    persisted = SqliteDict(os.path.join(data_dir, DATA_FILE), autocommit=False)
    print 'Updating data...'
    persisted.update(zg.country_postal_codes)
    print 'Committing data...'
    persisted.commit()
Beispiel #17
0
 def test_reopen_conn(self):
     """Verify using a contextmanager that a connection can be reopened."""
     fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
     db = SqliteDict(filename=fname)
     with db:
         db['key'] = 'value'
         db.commit()
     with db:
         db['key'] = 'value'
         db.commit()
 def test_reopen_conn(self):
     """Verify using a contextmanager that a connection can be reopened."""
     fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
     db = SqliteDict(filename=fname)
     with db:
         db['key'] = 'value'
         db.commit()
     with db:
         db['key'] = 'value'
         db.commit()
Beispiel #19
0
class Cache(object):
    """ Cache -- Key-Value Store for Twizzle to reduce unnecessary recomputations
    """
    def __init__(self,
                 bPersistent=False,
                 sPathToPersistenceDB="twizzle_cache.db"):
        """Constructor of the Twizzle Cache

        Note:
            You can decide whether the Chache should be persistent between multiple executions or
            just a runtime cache for one execution of a set of tests
        Args:
            bPersistent (bool): Flag whether the chache should be persistent or not (Note: persistent cache
            is much slower because it has to write the data to the harddisk)

            sPathToPersistenceDB (str): Path to the Cache DB where the Cache should write its data to
        """
        self._cache = {}
        self._lock = Lock()
        self._persistent = bPersistent
        self._first_get = True

        if bPersistent:
            if not sPathToPersistenceDB:
                raise Exception(
                    "On persistent mode a path to the persistence database has to be defined"
                )
            self._db = SqliteDict(sPathToPersistenceDB)

    def set(self, sKey, oValue):
        """set cache element by key"""
        # debug
        print("ADDING CACHELINE: %s" % (sKey))
        self._lock.acquire()
        self._cache[sKey] = oValue
        if self._persistent:
            self._db[CACHE_KEY] = self._cache
            self._db.commit()
        self._lock.release()

    def get(self, sKey):
        """get cache element by key"""

        if self._persistent:
            self._lock.acquire()
            if self._first_get:
                self._first_get = False
                self._cache = self._db.get(CACHE_KEY, {})
            self._lock.release()
        return self._cache.get(sKey, None)

    def calc_unique_key(self, *params):
        """create a unique key based on parameters given by converting them
        to string and concatenating them"""
        return "".join([str(elem) for elem in params])
Beispiel #20
0
def train():
    loader = DataLoader(TrainEvalDataset(LesionSegMask(split='train')),
                        batch_size=1,
                        shuffle=False,
                        num_workers=num_processor)
    test_loader = DataLoader(TrainEvalDataset(LesionSegMask(split='test')),
                             batch_size=1,
                             shuffle=False,
                             num_workers=num_processor)

    net = MaskRCNN(num_class=5)
    net = net.to(device)
    optimizer = SGD(net.parameters(), 0.001, 0.9, weight_decay=0.00001)
    # exp_lr_scheduler = lr_scheduler.ExponentialLR(optimizer, 0.95)

    storage_dict = SqliteDict(f'log/maskrcnn/dcl_snap.db')
    start_epoach = 0
    # if len(storage_dict) > 0:
    #     kk = list(storage_dict.keys())
    #     # net.load_state_dict(
    #     #     torch.load(BytesIO(storage_dict[38])))
    #     net.load_state_dict(
    #         torch.load(BytesIO(storage_dict[kk[-1]])))
    #     start_epoach = int(kk[-1]) + 1
    #     logger.info(f'loading from epoach{start_epoach}')
    global_step = 0
    for epoach in (range(start_epoach, 120)):
        net.train()
        for batch_cnt, batch in tqdm(enumerate(loader), total=len(loader)):
            image, label = batch
            for k, v in label.items():
                label[k] = v.squeeze()
            image = image.to(device)
            for k, v in label.items():
                if isinstance(v, torch.Tensor):
                    label[k] = label[k].to(device)
            # print(label['boxes'].shape)
            optimizer.zero_grad()
            net_out = net(image, [label])
            loss = 0
            for i in net_out.values():
                loss += i
            net_out['loss_sum'] = loss
            loss.backward()
            wtire_summary(net_out, 'train', global_step)
            optimizer.step()
            global_step += 1
        # exp_lr_scheduler.step(epoach)
        logger.debug(f'saving epoach {epoach}')
        buffer = BytesIO()
        torch.save(net.state_dict(), buffer)
        buffer.seek(0)
        storage_dict[epoach] = buffer.read()
        storage_dict.commit()
    def test_overwrite_using_flag_n(self):
        """Re-opening of a database with flag='c' destroys it all."""
        # given,
        fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
        orig_db = SqliteDict(filename=fname, tablename='sometable')
        orig_db['key'] = 'value'
        orig_db.commit()
        orig_db.close()

        # verify,
        next_db = SqliteDict(filename=fname, tablename='sometable', flag='n')
        self.assertNotIn('key', next_db.keys())
    def test_default_reuse_existing_flag_c(self):
        """Re-opening of a database does not destroy it."""
        # given,
        fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
        orig_db = SqliteDict(filename=fname)
        orig_db['key'] = 'value'
        orig_db.commit()
        orig_db.close()

        next_db = SqliteDict(filename=fname)
        self.assertIn('key', next_db.keys())
        self.assertEqual(next_db['key'], 'value')
Beispiel #23
0
def update_query(query,
                 relevant=None,
                 irrelevant=None,
                 alpha=0.9,
                 beta=0.5,
                 gamma=0.1):
    """
    Update query in our db using rocchio algorithm. Note, the query string is not updated but
    the results in the results db are updated.
    :param query: query string
    :param relevant: list of doc_ids
    :param irrelevant: list of doc_ids
    :param alpha: weight of original query
    :param beta: weight of relevant docs
    :param gamma: weight or irrelevant docs
    :return: True if successful, False if unsuccessful
    """
    if relevant is None:
        relevant = []
    if irrelevant is None:
        irrelevant = []
    assert (query != "")
    query_map = SqliteDict(query_map_path)
    try:
        q0 = query_map[query]
    except KeyError:
        # Can't update queries we've never seen
        query_map.close()
        return False
    if not isinstance(q0, DictVector):
        q0 = DictVector(q0)
    doc_vec_db = SqliteDict(doc_vecs_db_path)
    Nr = len(relevant)
    for doc_id in relevant:
        try:
            doc_vec = doc_vec_db[doc_id]
            if not isinstance(doc_vec, DictVector):
                doc_vec = DictVector(doc_vec)
        except KeyError:
            continue
        q0 = q0 + (beta / Nr) * doc_vec
    Ni = len(irrelevant)
    for doc_id in irrelevant:
        try:
            doc_vec = doc_vec_db[doc_id]
            if not isinstance(doc_vec, DictVector):
                doc_vec = DictVector(doc_vec)
        except KeyError:
            continue
        q0 = q0 - (gamma / Ni) * doc_vec
    query_map[query] = q0
    query_map.commit()
    set_query_results(query)
Beispiel #24
0
    def test_overwrite_using_flag_n(self):
        """Re-opening of a database with flag='c' destroys it all."""
        # given,
        fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
        orig_db = SqliteDict(filename=fname, tablename='sometable')
        orig_db['key'] = 'value'
        orig_db.commit()
        orig_db.close()

        # verify,
        next_db = SqliteDict(filename=fname, tablename='sometable', flag='n')
        self.assertNotIn('key', next_db.keys())
def hashDemultiplexedReads(reads, umi_start, umi_end, low_memory):
    """
    This function extracts the read name, the UMI and the x,y coordinates
    from the reads given as input (output of TaggD) and returns a dictionary
    with the clean read name as key and (x,y,umi) as value. X and Y correspond
    to the array coordinates of the barcode of the read and UMI is extracted from the read
    sequence.
    :param reads: path to a file with the fastq reads after demultiplexing (TaggD)
    :param umi_start: the start position of the UMI
    :param umi_end: the end position of the UMI
    :param low_memory: True to use a key-value db instead of dict
    :type reads: str
    :type umi_start: integer
    :type umi_end: integer
    :type low_memory: boolean
    :return: a dictionary of read_name -> (x,y,umi) tags where umi is optional
    """
    logger = logging.getLogger("STPipeline")

    if not os.path.isfile(reads):
        error = "Error parsing TaggD output, input file not present {}\n".format(
            reads)
        logger.error(error)
        raise RuntimeError(error)

    if low_memory:
        hash_reads = SqliteDict(autocommit=False, flag='c', journal_mode='OFF')
    else:
        hash_reads = dict()

    fastq_file = safeOpenFile(reads, "rU")
    for name, sequence, _ in readfq(fastq_file):
        # Assumes the header is like this
        # @NS500688:111:HNYW7BGXX:1:11101:13291:1099 1:N:0:TGCCCA B0:Z:GTCCCACTGGAACGACTGTCCCGCATC B1:Z:678 B2:Z:678
        header_tokens = name.split()
        assert (len(header_tokens) > 3)
        assert (len(sequence) >= umi_end)
        # Get the X and Y tags from the header of the read
        x = header_tokens[-2]
        y = header_tokens[-1]
        # The UMI is retrieved from the sequence
        umi = sequence[umi_start:umi_end]
        # We keep the same naming convention for the UMI attribute
        tags = (x, y, "B3:Z:{}".format(umi))
        # In order to save memory we truncate the read
        # name to only keep the unique part (lane, tile, x_pos, y_pos)
        # TODO this procedure is specific to only Illumina technology
        key = "".join(header_tokens[0].split(":")[-4:])
        hash_reads[key] = tags

    if low_memory: hash_reads.commit()
    fastq_file.close()
    return hash_reads
def save2SqliteDict(_dict: collections.OrderedDict, _dir):
    # sqliteDict = SqliteDict(_dir, autocommit=True)
    sqliteDict = SqliteDict(_dir)
    print("Saving database to" + _dir)
    i_max = len(_dict) - 1
    i = 0
    for key, value in _dict.items():
        print(_dir, str(i), str(i_max))
        i += 1
        sqliteDict[key] = value
    sqliteDict.commit()
    sqliteDict.close()
Beispiel #27
0
    def test_default_reuse_existing_flag_c(self):
        """Re-opening of a database does not destroy it."""
        # given,
        fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
        orig_db = SqliteDict(filename=fname)
        orig_db['key'] = 'value'
        orig_db.commit()
        orig_db.close()

        next_db = SqliteDict(filename=fname)
        self.assertIn('key', next_db.keys())
        self.assertEqual(next_db['key'], 'value')
Beispiel #28
0
def undo_update(query,
                relevant=None,
                irrelevant=None,
                alpha=0.9,
                beta=0.5,
                gamma=0.1):
    """
    Method for undoing an update if a user decides a post that was relevant isn't actually relevant.
    :param query: query string
    :param relevant: list of doc_ids
    :param irrelevant: list of doc_ids
    :param alpha: weight of original query
    :param beta: weight of relevant docs
    :param gamma: weight or irrelevant docs
    :return: True if successful, False if unsuccessful
    """
    if relevant is None:
        relevant = []
    if irrelevant is None:
        irrelevant = []
    assert (query != "")
    query_map = SqliteDict(query_map_path)
    try:
        q0 = query_map[query]
    except KeyError:
        # Can't update queries we've never seen
        query_map.close()
        return False
    if not isinstance(q0, DictVector):
        q0 = DictVector(q0)
    doc_vec_db = SqliteDict(doc_vecs_db_path)
    Nr = len(relevant)
    for doc_id in relevant:
        try:
            doc_vec = doc_vec_db[doc_id]
            if not isinstance(doc_vec, DictVector):
                doc_vec = DictVector(doc_vec)
        except KeyError:
            continue
        q0 = q0 - (beta / Nr) * doc_vec
    Ni = len(irrelevant)
    for doc_id in irrelevant:
        try:
            doc_vec = doc_vec_db[doc_id]
            if not isinstance(doc_vec, DictVector):
                doc_vec = DictVector(doc_vec)
        except KeyError:
            continue
        q0 = q0 + (gamma / Ni) * doc_vec
    query_map[query] = q0
    query_map.commit()
    set_query_results(query)
Beispiel #29
0
def patterns(api, dbname):
    Route.get(api, 88)
    patterns = SqliteDict(dbname, tablename="patterns")

    for rt in Route.all_routes.values():
        color = rt.color
        rtpatterns = rt.patterns['ptr']
        if type(rtpatterns) != list:
            rtpatterns = [rtpatterns]
            # Outputs first element, not list of length one for some reason
        for pt in rtpatterns:
            print pt['pid']
            patterns[pt['pid']] = utils.patterntogeojson(pt, color)
    patterns.commit()
Beispiel #30
0
class ImageCache:
    def __init__(self, directory):
        db_dirname = os.path.join(directory, ".mikula")
        if not os.path.isdir(db_dirname):
            os.mkdir(db_dirname)
        db_filename = os.path.join(db_dirname, "images.cache")
        self.cache = SqliteDict(db_filename)
        self.recent_lookup_ = None

    def reset(self):
        self.cache.clear()
        self.recent_lookup_ = None

    def config_changed(self, config):
        if "config" not in self.cache.keys():
            return True
        stored = self.cache["config"]
        return stored != config

    def update_config(self, config):
        self.cache["config"] = config
        self.cache.commit()

    def require_update(self, filename):
        if filename not in self.cache.keys():
            self.recent_lookup_ = None
            return True

        timestamp, scaled, scaled_time, thumbnail, thumbnail_time = self.cache[
            filename]
        if os.path.exists(scaled) and os.path.exists(thumbnail):
            if os.path.getmtime(filename) == timestamp and \
               os.path.getmtime(scaled) == scaled_time and \
               os.path.getmtime(thumbnail) == thumbnail_time:
                self.recent_lookup_ = (scaled, thumbnail)
                return False
        self.recent_lookup_ = None
        return True

    def get_filenames(self):
        return self.recent_lookup_

    def update(self, filename, scaled, thumbnail):
        timestamp = os.path.getmtime(filename)
        scaled_time = os.path.getmtime(scaled)
        thumbnail_time = os.path.getmtime(thumbnail)
        self.cache[filename] = (timestamp, scaled, scaled_time, thumbnail,
                                thumbnail_time)
        self.cache.commit()
Beispiel #31
0
    def test_irregular_tablenames(self):
        """Irregular table names need to be quoted"""
        db = SqliteDict(':memory:', tablename='9nine')
        db['key'] = 'value'
        db.commit()
        self.assertEqual(db['key'], 'value')
        db.close()

        db = SqliteDict(':memory:', tablename='outer space')
        db['key'] = 'value'
        db.commit()
        self.assertEqual(db['key'], 'value')
        db.close()

        with self.assertRaisesRegexp(ValueError, r'^Invalid tablename '):
            SqliteDict(':memory:', '"')
Beispiel #32
0
def upload_query(query, results):
    query_db = SqliteDict(query_db_path)
    query_map = SqliteDict(query_map_path)
    flag = False
    try:
        query_db[query] = results
        query_vec = query2vec(query)
        query_map[query] = query_vec
        query_db.commit()
        query_map.commit()
        flag = True
    except:
        pass
    query_db.close()
    query_map.close()
    return flag
    def test_irregular_tablenames(self):
        """Irregular table names need to be quoted"""
        db = SqliteDict(':memory:', tablename='9nine')
        db['key'] = 'value'
        db.commit()
        self.assertEqual(db['key'], 'value')
        db.close()

        db = SqliteDict(':memory:', tablename='outer space')
        db['key'] = 'value'
        db.commit()
        self.assertEqual(db['key'], 'value')
        db.close()

        with self.assertRaisesRegexp(ValueError, r'^Invalid tablename '):
            SqliteDict(':memory:', '"')
Beispiel #34
0
def _persist_v1(file_path, zg):
    print 'Creating meta db...'
    zipgun_info = SqliteDict(
        file_path, tablename='zipgun_info', autocommit=False)
    zipgun_info['version'] = 1
    zipgun_info['country_codes'] = zg.country_postal_codes.keys()
    zipgun_info.commit()

    for country_code in zg.country_postal_codes:
        print 'Creating {} db...'.format(country_code)
        country_data = SqliteDict(
            file_path, tablename='zg_{}'.format(country_code),
            autocommit=False)
        country_data.update(zg.country_postal_codes[country_code])
        country_data.commit()
        time.sleep(1.0)                   # Pretty bullshit
        country_data.close()
    zipgun_info.close()
Beispiel #35
0
class CachingLM:
    def __init__(self, lm, cache_db):
        self.lm = lm
        self.cache_db = cache_db
        os.makedirs(os.path.dirname(cache_db), exist_ok=True)
        self.dbdict = SqliteDict(cache_db, autocommit=True)

    def __getattr__(self, attr):
        def fn(requests):
            res = []
            remaining_reqs = []

            # figure out which ones are cached and which ones are new
            for req in requests:
                hsh = hash_args(attr, req)
                if hsh in self.dbdict:
                    ob = self.dbdict[hsh]

                    assert ob is not None

                    res.append(ob)
                else:
                    res.append(None)
                    remaining_reqs.append(req)

            # actually run the LM
            rem_res = getattr(self.lm, attr)(remaining_reqs)

            # stick the new ones back into the list and also cache any of the new ones
            resptr = 0
            for req, r in zip(remaining_reqs, rem_res):
                while res[resptr] is not None:
                    resptr += 1

                res[resptr] = r

                # caching
                hsh = hash_args(attr, req)
                self.dbdict[hsh] = r
            self.dbdict.commit()

            return res

        return fn
Beispiel #36
0
class SqliteDataResource(DataResource, abc.ABC):
    """
    Class representing a data resource in SQLiteDict format (.db file, key-value store).
    """
    def __init__(self, context, path):
        super().__init__(context)
        self.path = path
        self.data = None

    @property
    def keys(self):
        return list(self.data.keys())

    def load(self, *args, **kwargs):
        """
        Use 'with x:' syntax instead to prevent resource being left open.
        """
        pass

    def update(self, *args, **kwargs):
        """
        Clear the resource, ready for new data.
        """
        if self.data is None:
            raise Exception('Resource is not open.')
        self.data.clear()

    def get(self, key, default=None):
        if self.data is None:
            raise Exception('Resource is not open.')
        return self.data.get(key, default)

    def add(self, key, value):
        if self.data is None:
            raise Exception('Resource is not open.')
        self.data[key] = value

    def __enter__(self):
        self.data = SqliteDict(str(self.path))

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.data.commit()
        self.data.close()
        self.data = None
class ModelCacheStoreSqlite(ModelCacheStore):

    """ BTree查找实现 """

    def __init__(self, name):
        from sqlitedict import SqliteDict
        self.datadict = SqliteDict(name)

    def sync(self):
        return self.datadict.commit()  # instead of #sync
Beispiel #38
0
    def test_readonly_table(self):
        """
        Read-only access on a non-existant tablename should raise RuntimeError,
        and not create a new (empty) table.
        """
        fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
        dummy_tablename = 'table404'
        orig_db = SqliteDict(filename=fname)
        orig_db['key'] = 'value'
        orig_db['key_two'] = 2
        orig_db.commit()
        orig_db.close()

        self.assertFalse(dummy_tablename in SqliteDict.get_tablenames(fname))

        with self.assertRaises(RuntimeError):
            SqliteDict(filename=fname, tablename=dummy_tablename, flag='r')

        self.assertFalse(dummy_tablename in SqliteDict.get_tablenames(fname))
    def test_overwrite_using_flag_w(self):
        """Re-opening of a database with flag='w' destroys only the target table."""
        # given,
        fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
        orig_db_1 = SqliteDict(filename=fname, tablename='one')
        orig_db_1['key'] = 'value'
        orig_db_1.commit()
        orig_db_1.close()

        orig_db_2 = SqliteDict(filename=fname, tablename='two')
        orig_db_2['key'] = 'value'
        orig_db_2.commit()
        orig_db_2.close()

        # verify, when re-opening table space 'one' with flag='2', we destroy
        # its contents.  However, when re-opening table space 'two' with
        # default flag='r', its contents remain.
        next_db_1 = SqliteDict(filename=fname, tablename='one', flag='w')
        self.assertNotIn('key', next_db_1.keys())

        next_db_2 = SqliteDict(filename=fname, tablename='two')
        self.assertIn('key', next_db_2.keys())
    def test_readonly(self):
        fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
        orig_db = SqliteDict(filename=fname)
        orig_db['key'] = 'value'
        orig_db['key_two'] = 2
        orig_db.commit()
        orig_db.close()

        readonly_db = SqliteDict(filename=fname, flag = 'r')
        self.assertTrue(readonly_db['key'] == 'value')
        self.assertTrue(readonly_db['key_two'] == 2)

        def attempt_write():
            readonly_db['key'] = ['new_value']

        def attempt_update():
            readonly_db.update(key = 'value2', key_two = 2.1)

        def attempt_delete():
            del readonly_db['key']

        def attempt_clear():
            readonly_db.clear()

        def attempt_terminate():
            readonly_db.terminate()

        attempt_funcs = [attempt_write, 
                         attempt_update, 
                         attempt_delete,
                         attempt_clear,
                         attempt_terminate]

        for func in attempt_funcs:
            with self.assertRaises(RuntimeError):
                func()
Beispiel #41
0
class SimServer(object):
    """
    Top-level functionality for similarity services. A similarity server takes
    care of::

    1. creating semantic models
    2. indexing documents using these models
    3. finding the most similar documents in an index.

    An object of this class can be shared across network via Pyro, to answer remote
    client requests. It is thread safe. Using a server concurrently from multiple
    processes is safe for reading = answering similarity queries. Modifying
    (training/indexing) is realized via locking = serialized internally.
    """
    def __init__(self, basename, use_locks=False):
        """
        All data will be stored under directory `basename`. If there is a server
        there already, it will be loaded (resumed).

        The server object is stateless in RAM -- its state is defined entirely by its location.
        There is therefore no need to store the server object.
        """
        if not os.path.isdir(basename):
            raise ValueError("%r must be a writable directory" % basename)
        self.basename = basename
        self.use_locks = use_locks
        self.lock_update = threading.RLock() if use_locks else gensim.utils.nocm
        try:
            self.fresh_index = SimIndex.load(self.location('index_fresh'))
        except:
            logger.debug("starting a new fresh index")
            self.fresh_index = None
        try:
            self.opt_index = SimIndex.load(self.location('index_opt'))
        except:
            logger.debug("starting a new optimized index")
            self.opt_index = None
        try:
            self.model = SimModel.load(self.location('model'))
        except:
            self.model = None
        self.payload = SqliteDict(self.location('payload'), autocommit=True, journal_mode=JOURNAL_MODE)
        self.flush(save_index=False, save_model=False, clear_buffer=True)
        logger.info("loaded %s" % self)


    def location(self, name):
        return os.path.join(self.basename, name)


    @gensim.utils.synchronous('lock_update')
    def flush(self, save_index=False, save_model=False, clear_buffer=False):
        """Commit all changes, clear all caches."""
        if save_index:
            if self.fresh_index is not None:
                self.fresh_index.save(self.location('index_fresh'))
            if self.opt_index is not None:
                self.opt_index.save(self.location('index_opt'))
        if save_model:
            if self.model is not None:
                self.model.save(self.location('model'))
        self.payload.commit()
        if clear_buffer:
            if hasattr(self, 'fresh_docs'):
                try:
                    self.fresh_docs.terminate() # erase all buffered documents + file on disk
                except:
                    pass
            self.fresh_docs = SqliteDict(journal_mode=JOURNAL_MODE) # buffer defaults to a random location in temp
        self.fresh_docs.sync()


    def close(self):
        """Explicitly close open file handles, databases etc."""
        try:
            self.payload.close()
        except:
            pass
        try:
            self.model.close()
        except:
            pass
        try:
            self.fresh_index.close()
        except:
            pass
        try:
            self.opt_index.close()
        except:
            pass
        try:
            self.fresh_docs.terminate()
        except:
            pass

    def __del__(self):
        """When the server went out of scope, make an effort to close its DBs."""
        self.close()

    @gensim.utils.synchronous('lock_update')
    def buffer(self, documents):
        """
        Add a sequence of documents to be processed (indexed or trained on).

        Here, the documents are simply collected; real processing is done later,
        during the `self.index` or `self.train` calls.

        `buffer` can be called repeatedly; the result is the same as if it was
        called once, with a concatenation of all the partial document batches.
        The point is to save memory when sending large corpora over network: the
        entire `documents` must be serialized into RAM. See `utils.upload_chunked()`.

        A call to `flush()` clears this documents-to-be-processed buffer (`flush`
        is also implicitly called when you call `index()` and `train()`).
        """
        logger.info("adding documents to temporary buffer of %s" % (self))
        for doc in documents:
            docid = doc['id']
#            logger.debug("buffering document %r" % docid)
            if docid in self.fresh_docs:
                logger.warning("asked to re-add id %r; rewriting old value" % docid)
            self.fresh_docs[docid] = doc
        self.fresh_docs.sync()


    @gensim.utils.synchronous('lock_update')
    def train(self, corpus=None, method='auto', clear_buffer=True, params=None):
        """
        Create an indexing model. Will overwrite the model if it already exists.
        All indexes become invalid, because documents in them use a now-obsolete
        representation.

        The model is trained on documents previously entered via `buffer`,
        or directly on `corpus`, if specified.
        """
        if corpus is not None:
            # use the supplied corpus only (erase existing buffer, if any)
            self.flush(clear_buffer=True)
            self.buffer(corpus)
        if not self.fresh_docs:
            msg = "train called but no training corpus specified for %s" % self
            logger.error(msg)
            raise ValueError(msg)
        if method == 'auto':
            numdocs = len(self.fresh_docs)
            if numdocs < 1000:
                logging.warning("too few training documents; using simple log-entropy model instead of latent semantic indexing")
                method = 'logentropy'
            else:
                method = 'lsi'
        if params is None:
            params = {}
        self.model = SimModel(self.fresh_docs, method=method, params=params)
        self.flush(save_model=True, clear_buffer=clear_buffer)


    @gensim.utils.synchronous('lock_update')
    def index(self, corpus=None, clear_buffer=True):
        """
        Permanently index all documents previously added via `buffer`, or
        directly index documents from `corpus`, if specified.

        The indexing model must already exist (see `train`) before this function
        is called.
        """
        if not self.model:
            msg = 'must initialize model for %s before indexing documents' % self.basename
            logger.error(msg)
            raise AttributeError(msg)

        if corpus is not None:
            # use the supplied corpus only (erase existing buffer, if any)
            self.flush(clear_buffer=True)
            self.buffer(corpus)

        if not self.fresh_docs:
            msg = "index called but no indexing corpus specified for %s" % self
            logger.error(msg)
            raise ValueError(msg)

        if not self.fresh_index:
            logger.info("starting a new fresh index for %s" % self)
            self.fresh_index = SimIndex(self.location('index_fresh'), self.model.num_features)
        self.fresh_index.index_documents(self.fresh_docs, self.model)
        if self.opt_index is not None:
            self.opt_index.delete(self.fresh_docs.keys())
        logger.info("storing document payloads")
        for docid in self.fresh_docs:
            payload = self.fresh_docs[docid].get('payload', None)
            if payload is None:
                # HACK: exit on first doc without a payload (=assume all docs have payload, or none does)
                break
            self.payload[docid] = payload
        self.flush(save_index=True, clear_buffer=clear_buffer)


    @gensim.utils.synchronous('lock_update')
    def optimize(self):
        """
        Precompute top similarities for all indexed documents. This speeds up
        `find_similar` queries by id (but not queries by fulltext).

        Internally, documents are moved from a fresh index (=no precomputed similarities)
        to an optimized index (precomputed similarities). Similarity queries always
        query both indexes, so this split is transparent to clients.

        If you add documents later via `index`, they go to the fresh index again.
        To precompute top similarities for these new documents too, simply call
        `optimize` again.

        """
        if self.fresh_index is None:
            logger.warning("optimize called but there are no new documents")
            return # nothing to do!

        if self.opt_index is None:
            logger.info("starting a new optimized index for %s" % self)
            self.opt_index = SimIndex(self.location('index_opt'), self.model.num_features)

        self.opt_index.merge(self.fresh_index)
        self.fresh_index.terminate() # delete old files
        self.fresh_index = None
        self.flush(save_index=True)


    @gensim.utils.synchronous('lock_update')
    def drop_index(self, keep_model=True):
        """Drop all indexed documents. If `keep_model` is False, also dropped the model."""
        modelstr = "" if keep_model else "and model "
        logger.info("deleting similarity index " + modelstr + "from %s" % self.basename)

        # delete indexes
        for index in [self.fresh_index, self.opt_index]:
            if index is not None:
                index.terminate()
        self.fresh_index, self.opt_index = None, None

        # delete payload
        if self.payload is not None:
            self.payload.close()

            fname = self.location('payload')
            try:
                if os.path.exists(fname):
                    os.remove(fname)
                    logger.info("deleted %s" % fname)
            except Exception, e:
                logger.warning("failed to delete %s" % fname)
        self.payload = SqliteDict(self.location('payload'), autocommit=True, journal_mode=JOURNAL_MODE)

        # optionally, delete the model as well
        if not keep_model and self.model is not None:
            self.model.close()
            fname = self.location('model')
            try:
                if os.path.exists(fname):
                    os.remove(fname)
                    logger.info("deleted %s" % fname)
            except Exception, e:
                logger.warning("failed to delete %s" % fname)
            self.model = None
def main(result_file, site_file, constant_modification_list=None, variable_modification_list=None,
         enzyme_info=None, n_processes=4, output_file=None):
    if output_file is None:
        # output_file = os.path.splitext(result_file)[0] + '.theoretical_ions'
        output_file = os.path.splitext(result_file)[0] + ".db"
    else:
        output_file += ".db"
    modification_table = RestrictedModificationTable.bootstrap(constant_modification_list, variable_modification_list)
    if constant_modification_list is None and variable_modification_list is None:
        modification_table = ModificationTable.bootstrap()

    if isinstance(site_file, basestring):
        site_list = [line.strip() for line in open(site_file, "r")]
        site_list = list(map(int, site_list))
    else:
        site_list = site_file

    compo_dict = csv.DictReader(open(result_file, "r"), delimiter=",")
    colnames = compo_dict.fieldnames
    glycan_identity = get_glycan_identities(colnames)
    enzyme_info = map(get_enzyme, enzyme_info)
    tag = datetime.datetime.strftime(datetime.datetime.now(), "%Y%m%d-%H%M%S")
    metadata = {
        "glycan_identities": glycan_identity,
        "constant_modifications": constant_modification_list,
        "variable_modifications": variable_modification_list,
        "site_list": site_list,
        "ms1_output_file": result_file,
        "enzyme": enzyme_info,
        "tag": tag,
        "enable_partial_hexnac_match": constants.PARTIAL_HEXNAC_LOSS
    }

    metadata_store = SqliteDict(output_file, tablename="metadata", flag='n')
    metadata_store.update(metadata)
    metadata_store.commit()

    theoretical_search_space_store = SqliteDict(output_file, tablename="theoretical_search_space")
    pool = multiprocessing.Pool(n_processes)

    task_fn = functools.partial(process_predicted_ms1_ion, modification_table=modification_table,
                                site_list=site_list, glycan_identity=glycan_identity)

    cntr = 0
    if n_processes > 1:
        logger.debug("Building theoretical sequences concurrently")
        for res in (itertools.chain.from_iterable(pool.imap(task_fn, compo_dict, chunksize=500))):
            theoretical_search_space_store[cntr] = res
            cntr += 1
    else:
        logger.debug("Building theoretical sequences sequentially")
        for row in compo_dict:
            res = task_fn(row)
            for item in res:
                theoretical_search_space_store[cntr] = item
                cntr += 1
                if (cntr % 10000) == 0:
                    theoretical_search_space_store.commit()
                    logger.info("Committing, %d records made", cntr)
    theoretical_search_space_store.commit()
    theoretical_search_space_store.close()

    pool.close()
    pool.join()
    pool.terminate()

    logger.info("Hypothesis building complete")

    return output_file
Beispiel #43
0
class IMAPMailbox(ExtendedMaildir):
    implements(imap4.IMailbox, imap4.ICloseableMailbox)
    
    AppendFactory = SerpentAppendMessageTask

    def __init__(self, path):
        maildir.initializeMaildir(path)
        self.listeners = []
        self.path = path
        self.open_flags()
        self.lastadded = None
        self.__check_flags_()
    
    def open_flags(self):
        self.msg_info = SqliteDict(os.path.join(self.path, conf.imap_msg_info))
        self.mbox_info = SqliteDict(os.path.join(self.path, conf.imap_mbox_info))

    def _start_monitor(self):
        self.notifier = inotify.INotify()
        self.notifier.startReading()
        self.notifier.watch(filepath.FilePath(os.path.join(self.path, 'new')),
                   callbacks=[self._new_files])
        self.notifier.watch(filepath.FilePath(os.path.join(self.path,'cur')),
                   callbacks=[self._new_files])

    def _stop_monitor(self):
        self.notifier.stopReading()
        self.notifier.loseConnection()

    def _new_files(self, wo, path, code):
        if code == inotify.IN_MOVED_TO or code == inotify.IN_DELETE:
            for l in self.listeners:
                l.newMessages(self.getMessageCount(), self.getRecentCount())

    def __check_flags_(self):
        if 'subscribed' not in self.mbox_info.keys(): self.mbox_info['subscribed'] = False
        if 'flags' not in self.mbox_info.keys(): self.mbox_info['flags'] = []
        if 'special' not in self.mbox_info.keys(): self.mbox_info['special'] = ''
        if 'uidvalidity' not in self.mbox_info.keys(): self.mbox_info['uidvalidity'] = random.randint(0, 2**32)
        if 'uidnext' not in self.mbox_info.keys(): self.mbox_info['uidnext'] = 1
        #self.mbox_info.commit(blocking=False)    # XXX
        l = [l for l in self.__msg_list_()]
        for i in l:
            fn = i.split('/')[-1]
            if fn not in self.msg_info.keys():
                val1 = {'uid': self.getUIDNext()}
                if i.split('/')[-2] == 'new':
                    val1['flags'] = []
                else:
                    val1['flags'] = [misc.IMAP_FLAGS['SEEN']]
                self.msg_info[fn] = val1
        #self.msg_info.commit(blocking=False)    # XXX

    def subscribe(self):
        self.mbox_info['subscribed'] = True
        #self.mbox_info.commit(blocking=False)    # XXX

    def unsubscribe(self):
        self.mbox_info['subscribed'] = False
        #self.mbox_info.commit(blocking=False)    # XXX
    
    def is_subscribed(self):
        return self.mbox_info['subscribed']

    def __count_flagged_msgs_(self, flag):
        val1 = [0 for fn in self.msg_info.keys() if flag in self.msg_info[fn]['flags']]
        return len(val1)
    
    def getHierarchicalDelimiter(self):
        return misc.IMAP_HDELIM

    def setSpecial(self, special):
        self.mbox_info['special'] = special
        #self.mbox_info.commit(blocking=False)    # XXX

    def getFlags(self):
        return sorted(misc.IMAP_FLAGS.values())
    
    def getMboxFlags(self):
        f = list(self.mbox_info['flags'])
        if self.mbox_info['special'] != '': f.append(self.mbox_info['special'])
        return f
    
    def addFlag(self, flag):
        self.mbox_info['flags'] = list(set(self.mbox_info['flags']).union([flag]))
        #self.mbox_info.commit(blocking=False)    # XXX
    
    def removeFlag(self, flag):
        self.mbox_info['flags'] = list(set(self.mbox_info['flags']).difference([flag]))
        #self.mbox_info.commit(blocking=False)    # XXX
    
    def hasChildren(self):
        flags = self.getFlags()
        if misc.MBOX_FLAGS['HASCHILDREN'] not in flags:
            self.addFlag(misc.MBOX_FLAGS['HASCHILDREN'])
        if misc.MBOX_FLAGS['HASNOCHILDREN'] in flags:
            self.removeFlag(misc.MBOX_FLAGS['HASNOCHILDREN'])
    def hasNoChildren(self):
        flags = self.getFlags()
        if misc.MBOX_FLAGS['HASNOCHILDREN'] not in flags:
            self.addFlag(misc.MBOX_FLAGS['HASNOCHILDREN'])
        if misc.MBOX_FLAGS['HASCHILDREN'] in flags:
            self.removeFlag(misc.MBOX_FLAGS['HASCHILDREN'])

    def getMessageCount(self):
        val1 = [0 for fn in self.msg_info.keys() if misc.IMAP_FLAGS['DELETED'] not in self.msg_info[fn]['flags']]
        return len(val1)

    def getRecentCount(self):
        c = 0
        for fn in self.msg_info.keys():
            if misc.IMAP_FLAGS['RECENT'] in self.msg_info[fn]['flags']:
                c += 1
                info = self.msg_info[fn]
                info['flags'] = set(info['flags']).difference(set([misc.IMAP_FLAGS['RECENT']]))
                self.msg_info[fn] = info
        #self.msg_info.commit(blocking=False)    # XXX
        return c
    
    def getUnseenCount(self):
        return self.getMessageCount() - self.__count_flagged_msgs_(misc.IMAP_FLAGS['SEEN'])

    def isWriteable(self):
        return True

    def getUIDValidity(self):
        return self.mbox_info['uidvalidity']
    
    def getUIDNext(self):
        un = self.mbox_info['uidnext']
        self.mbox_info['uidnext'] += 1
        #self.mbox_info.commit(blocking=False)    # XXX
        return un
    
    def getUID(self, num):
        return num

    def addMessage(self, message, flags = (), date = None):
        return self.appendMessage(message).addCallback(self._cbAddMessage, flags)
    
    def _cbAddMessage(self, obj, flags):
        path = self.lastadded
        self.lastadded = None
        fn = path.split('/')[-1]
        self.msg_info[fn] = {'uid': self.getUIDNext(), 'flags': flags}
        #self.msg_info.commit(blocking=False)    # XXX
        if misc.IMAP_FLAGS['SEEN'] in flags and path.split('/')[-2] != 'cur':
            new_path = os.path.join(self.path, 'cur', fn)
            os.rename(path, new_path)

    def __msg_list_(self):
        a = []
        for m in os.listdir(os.path.join(self.path, 'new')):
            a.append(os.path.join(self.path, 'new', m))
        for m in os.listdir(os.path.join(self.path, 'cur')):
            a.append(os.path.join(self.path, 'cur', m))
        return a

    def _seqMessageSetToSeqDict(self, messageSet):
        if not messageSet.last:
            messageSet.last = self.getMessageCount()

        seqMap = {}
        msgs = self.__msg_list_()
        for messageNum in messageSet:
            if messageNum > 0 and messageNum <= self.getMessageCount():
                seqMap[messageNum] = msgs[messageNum - 1]
        return seqMap

    def fetch(self, messages, uid):
        return [[seq, MaildirMessage(seq,
                                     file(filename, 'rb').read(),
                                     self.msg_info[filename.split('/')[-1]]['flags'],
                                     rfc822date())]
                for seq, filename in self.__fetch_(messages, uid).iteritems()]
    def __fetch_(self, messages, uid):
        if uid:
            messagesToFetch = {}
            if not messages.last:
                messages.last = self.mbox_info['uidnext']
            fn_uid = dict((fn, self.msg_info[fn]['uid']) for fn in self.msg_info.keys())
            for uid in messages:
                if uid in fn_uid.values():
                    for name, _id in fn_uid.iteritems():
                        if uid == _id:
                            if os.path.exists(os.path.join(self.path,'new', name)):
                                messagesToFetch[uid] = os.path.join(self.path,'new', name)
                            elif os.path.exists(os.path.join(self.path,'cur', name)):
                                messagesToFetch[uid] = os.path.join(self.path,'cur', name)
        else:
            messagesToFetch = self._seqMessageSetToSeqDict(messages)
        return messagesToFetch
    def store(self, messages, flags, mode, uid):
        d = {}
        for _id, path in self.__fetch_(messages, uid).iteritems():
            filename = path.split('/')[-1]
            if mode < 0:
                old_f = self.msg_info[filename]
                old_f['flags'] = list(set(old_f['flags']).difference(set(flags)))
                self.msg_info[filename] = old_f
                if misc.IMAP_FLAGS['SEEN'] in flags and path.split('/')[-2] != 'new':
                    new_path = os.path.join(self.path, 'new', filename)
                    os.rename(path, new_path)
            elif mode == 0:
                old_f = self.msg_info[filename]
                old_f['flags'] = flags
                self.msg_info[filename] = old_f
            elif mode > 0:
                old_f = self.msg_info[filename]
                old_f['flags'] = list(set(old_f['flags']).union(set(flags)))
                self.msg_info[filename] = old_f
                if misc.IMAP_FLAGS['SEEN'] in flags and path.split('/')[-2] != 'cur':
                    new_path = os.path.join(self.path, 'cur', filename)
                    os.rename(path, new_path)
            d[_id] = self.msg_info[filename]['flags']
        #self.msg_info.commit(blocking=False)    # XXX
        return d
    
    def expunge(self):
        uids = []
        for path in self.__msg_list_():
            fn = path.split('/')[-1]
            if fn not in self.msg_info.keys():
                continue
            uid = self.msg_info[fn]['uid']
            if misc.IMAP_FLAGS['DELETED'] in self.msg_info[fn]['flags']:
                os.remove(path)
                del self.msg_info[fn]
                uids.append(uid)
        #self.msg_info.commit(blocking=False)    # XXX
        return uids
    
    def addListener(self, listener):
        self.listeners.append(listener)
        return True

    def removeListener(self, listener):
        self.listeners.remove(listener)
        return True
    
    def requestStatus(self, names):
        return imap4.statusRequestHelper(self, names)
    
    def destroy(self):
        pass

    def close(self):
        print('!!! %s - %d !!!' % (self.path, len(self.listeners)))
        if len(self.listeners) == 0:
            self._stop_monitor() 
            if conf.imap_expunge_on_close:
                self.expunge()
            self.msg_info.commit(blocking=False)
            self.mbox_info.commit(blocking = False)
            self.msg_info.close()
            self.mbox_info.close()