Example #1
0
class TestBigInt(unittest.TestCase):

    @classmethod
    def setup_class(cls):
        sys = SystemManager()
        sys.create_column_family(TEST_KS, 'StdInteger', comparator_type=IntegerType())

    @classmethod
    def teardown_class(cls):
        sys = SystemManager()
        sys.drop_column_family(TEST_KS, 'StdInteger')

    def setUp(self):
        self.key = 'TestBigInt'
        self.cf = ColumnFamily(pool, 'StdInteger')

    def tearDown(self):
        self.cf.remove(self.key)

    def test_negative_integers(self):
        self.cf.insert(self.key, {-1: '-1'})
        self.cf.insert(self.key, {-12342390: '-12342390'})
        self.cf.insert(self.key, {-255: '-255'})
        self.cf.insert(self.key, {-256: '-256'})
        self.cf.insert(self.key, {-257: '-257'})
        for key, cols in self.cf.get_range():
            self.assertEquals(str(cols.keys()[0]), cols.values()[0])
Example #2
0
    def execute(self):
        ## first validate data
        data_ok, fault = self._validate_data()

        if not data_ok:
            return (False, fault)

        ## if data ok, construct InsertCommands
        if self.op_type == CassandraQuery.OP_DELETE:
            try:
                domain = self.data.domain
                row_key = self.data.get_pk()

                client = db_connection.get_client()
                cf = ColumnFamily(client, domain)

                ## if cascading is enabled, first delete all DBObject and collections comprised in this DBObject
                if self.cascade:
                    pass

                ## lastly remove data for current element
                cf.remove(row_key)

                return (True, None)
            except Exception, ex:
                return (False, ex)
Example #3
0
class TestBigInt(unittest.TestCase):

    @classmethod
    def setup_class(cls):
        sys = SystemManager()
        sys.create_column_family(TEST_KS, 'StdInteger', comparator_type=IntegerType())

    @classmethod
    def teardown_class(cls):
        sys = SystemManager()
        sys.drop_column_family(TEST_KS, 'StdInteger')

    def setUp(self):
        self.key = 'TestBigInt'
        self.cf = ColumnFamily(pool, 'StdInteger')

    def tearDown(self):
        self.cf.remove(self.key)

    def test_negative_integers(self):
        self.cf.insert(self.key, {-1: '-1'})
        self.cf.insert(self.key, {-12342390: '-12342390'})
        self.cf.insert(self.key, {-255: '-255'})
        self.cf.insert(self.key, {-256: '-256'})
        self.cf.insert(self.key, {-257: '-257'})
        for key, cols in self.cf.get_range():
            self.assertEquals(str(cols.keys()[0]), cols.values()[0])
Example #4
0
def remove_column(columnFamily, uid, columns) :
	"To remove columns from a key"
	try:
		column = ColumnFamily(pool, columnFamily)
		column.remove(uid,columns)
	except:
		return {'status':0}
	def column_family_remove(self,machine_id,keyspace_name,column_family_name,key):
		"""Remove a key from column family for a given keyspace """
		if (self.keyspace_contains(keyspace_name,column_family_name) == False):
			print "Error : Keyspace:column family could not be found."
			return False
		pool = ConnectionPool(keyspace = keyspace_name, server_list =  keyspace.server_ips, prefill=False)
		col_fam = ColumnFamily(pool, column_family_name)
		col_fam.remove(key)
		return True
 def column_family_remove(self, machine_id, keyspace_name,
                          column_family_name, key):
     """Remove a key from column family for a given keyspace """
     if (self.keyspace_contains(keyspace_name,
                                column_family_name) == False):
         print "Error : Keyspace:column family could not be found."
         return False
     pool = ConnectionPool(keyspace=keyspace_name,
                           server_list=keyspace.server_ips,
                           prefill=False)
     col_fam = ColumnFamily(pool, column_family_name)
     col_fam.remove(key)
     return True
    def truncate_build_metadata(self):
        """Truncates all derived build metadata.

        This bulk removes all build metadata and should not be performed
        unless you want to reload all derived data!
        """
        for cf in ['slaves', 'masters', 'builders', 'builds']:
            cf = ColumnFamily(self.pool, cf)
            cf.truncate()

        cf = ColumnFamily(self.pool, 'indices')
        for key in BUILD_METADATA_INDICES:
            cf.remove(key)

        cf = ColumnFamily(self.pool, 'simple_indices')
        for key in BUILD_METADATA_SIMPLE_INDICES:
            cf.remove(key)

        cf = ColumnFamily(self.pool, 'counters')
        for key in BUILD_METADATA_COUNTERS:
            cf.remove(key)

        cf = ColumnFamily(self.pool, 'super_counters')
        for key in BUILD_METADATA_SUPER_COUNTERS:
            cf.remove(key)
class TestTypeErrors(unittest.TestCase):
    def test_packing_enabled(self):
        self.cf = ColumnFamily(pool, "Standard1")
        self.cf.insert("key", {"col": "val"})
        assert_raises(TypeError, self.cf.insert, args=("key", {123: "val"}))
        assert_raises(TypeError, self.cf.insert, args=("key", {"col": 123}))
        assert_raises(TypeError, self.cf.insert, args=("key", {123: 123}))
        self.cf.remove("key")

    def test_packing_disabled(self):
        self.cf = ColumnFamily(pool, "Standard1", autopack_names=False, autopack_values=False)
        self.cf.insert("key", {"col": "val"})
        assert_raises(TypeError, self.cf.insert, args=("key", {123: "val"}))
        assert_raises(TypeError, self.cf.insert, args=("key", {"col": 123}))
        assert_raises(TypeError, self.cf.insert, args=("key", {123: 123}))
        self.cf.remove("key")
Example #9
0
    def test_validated_columns(self):
        sys = SystemManager()
        sys.create_column_family(
            TEST_KS,
            'Validators',
        )
        sys.alter_column(TEST_KS, 'Validators', 'long', LongType())
        sys.alter_column(TEST_KS, 'Validators', 'int', IntegerType())
        sys.alter_column(TEST_KS, 'Validators', 'time', TimeUUIDType())
        sys.alter_column(TEST_KS, 'Validators', 'lex', LexicalUUIDType())
        sys.alter_column(TEST_KS, 'Validators', 'ascii', AsciiType())
        sys.alter_column(TEST_KS, 'Validators', 'utf8', UTF8Type())
        sys.alter_column(TEST_KS, 'Validators', 'bytes', BytesType())
        sys.close()

        cf = ColumnFamily(pool, 'Validators')
        key = 'key1'

        col = {'long': 1L}
        cf.insert(key, col)
        assert_equal(cf.get(key)['long'], 1L)

        col = {'int': 1}
        cf.insert(key, col)
        assert_equal(cf.get(key)['int'], 1)

        col = {'time': TIME1}
        cf.insert(key, col)
        assert_equal(cf.get(key)['time'], TIME1)

        col = {'lex': uuid.UUID(bytes='aaa aaa aaa aaaa')}
        cf.insert(key, col)
        assert_equal(cf.get(key)['lex'], uuid.UUID(bytes='aaa aaa aaa aaaa'))

        col = {'ascii': 'aaa'}
        cf.insert(key, col)
        assert_equal(cf.get(key)['ascii'], 'aaa')

        col = {'utf8': u'a\u0020'}
        cf.insert(key, col)
        assert_equal(cf.get(key)['utf8'], u'a\u0020')

        col = {'bytes': 'aaa'}
        cf.insert(key, col)
        assert_equal(cf.get(key)['bytes'], 'aaa')

        cf.remove(key)
Example #10
0
    def test_validated_columns(self):
        sys = SystemManager()
        sys.create_column_family(
            TEST_KS,
            'Validators',
        )
        sys.alter_column(TEST_KS, 'Validators', 'long', LONG_TYPE)
        sys.alter_column(TEST_KS, 'Validators', 'int', INT_TYPE)
        sys.alter_column(TEST_KS, 'Validators', 'time', TIME_UUID_TYPE)
        sys.alter_column(TEST_KS, 'Validators', 'lex', LEXICAL_UUID_TYPE)
        sys.alter_column(TEST_KS, 'Validators', 'ascii', ASCII_TYPE)
        sys.alter_column(TEST_KS, 'Validators', 'utf8', UTF8_TYPE)
        sys.alter_column(TEST_KS, 'Validators', 'bytes', BYTES_TYPE)
        sys.close()

        cf = ColumnFamily(pool, 'Validators')
        key = 'key1'

        col = {'long': 1L}
        cf.insert(key, col)
        assert_equal(cf.get(key)['long'], 1L)

        col = {'int': 1}
        cf.insert(key, col)
        assert_equal(cf.get(key)['int'], 1)

        col = {'time': TIME1}
        cf.insert(key, col)
        assert_equal(cf.get(key)['time'], TIME1)

        col = {'lex': uuid.UUID(bytes='aaa aaa aaa aaaa')}
        cf.insert(key, col)
        assert_equal(cf.get(key)['lex'], uuid.UUID(bytes='aaa aaa aaa aaaa'))

        col = {'ascii': 'aaa'}
        cf.insert(key, col)
        assert_equal(cf.get(key)['ascii'], 'aaa')

        col = {'utf8': u'a\u0020'}
        cf.insert(key, col)
        assert_equal(cf.get(key)['utf8'], u'a\u0020')

        col = {'bytes': 'aaa'}
        cf.insert(key, col)
        assert_equal(cf.get(key)['bytes'], 'aaa')

        cf.remove(key)
Example #11
0
class TestTypeErrors(unittest.TestCase):

    def test_packing_enabled(self):
        self.cf = ColumnFamily(pool, 'Standard1')
        self.cf.insert('key', {'col': 'val'})
        assert_raises(TypeError, self.cf.insert, args=('key', {123: 'val'}))
        assert_raises(TypeError, self.cf.insert, args=('key', {'col': 123}))
        assert_raises(TypeError, self.cf.insert, args=('key', {123: 123}))
        self.cf.remove('key')

    def test_packing_disabled(self):
        self.cf = ColumnFamily(pool, 'Standard1', autopack_names=False, autopack_values=False)
        self.cf.insert('key', {'col': 'val'})
        assert_raises(TypeError, self.cf.insert, args=('key', {123: 'val'}))
        assert_raises(TypeError, self.cf.insert, args=('key', {'col': 123}))
        assert_raises(TypeError, self.cf.insert, args=('key', {123: 123}))
        self.cf.remove('key')
Example #12
0
    def remove(self, instance, columns=None, write_consistency_level=None):
        """
        Removes a stored instance.

        The `columns` parameter is a list of columns that should be removed.
        If this is left as the default value of ``None``, the entire stored
        instance will be removed.

        """
        if self.super:
            return ColumnFamily.remove(self, instance.key,
                                       super_column=instance.super_column,
                                       columns=columns,
                                       write_consistency_level=write_consistency_level)
        else:
            return ColumnFamily.remove(self, instance.key, columns,
                                       write_consistency_level=write_consistency_level)
Example #13
0
class TestTypeErrors(unittest.TestCase):

    def test_packing_enabled(self):
        self.cf = ColumnFamily(pool, 'Standard1')
        self.cf.insert('key', {'col': 'val'})
        assert_raises(TypeError, self.cf.insert, args=('key', {123: 'val'}))
        assert_raises(TypeError, self.cf.insert, args=('key', {'col': 123}))
        assert_raises(TypeError, self.cf.insert, args=('key', {123: 123}))
        self.cf.remove('key')

    def test_packing_disabled(self):
        self.cf = ColumnFamily(pool, 'Standard1', autopack_names=False, autopack_values=False)
        self.cf.insert('key', {'col': 'val'})
        assert_raises(TypeError, self.cf.insert, args=('key', {123: 'val'}))
        assert_raises(TypeError, self.cf.insert, args=('key', {'col': 123}))
        assert_raises(TypeError, self.cf.insert, args=('key', {123: 123}))
        self.cf.remove('key')
Example #14
0
    def remove(self, instance, columns=None, write_consistency_level=None):
        """
        Removes a stored instance.

        The `columns` parameter is a list of columns that should be removed.
        If this is left as the default value of ``None``, the entire stored
        instance will be removed.

        """
        if self.super:
            return ColumnFamily.remove(self, instance.key,
                                       super_column=instance.super_column,
                                       columns=columns,
                                       write_consistency_level=write_consistency_level)
        else:
            return ColumnFamily.remove(self, instance.key, columns,
                                       write_consistency_level=write_consistency_level)
Example #15
0
    def create_new_thread(self, thread_name):
        threads = ColumnFamily(self.conn, 'threads')

        ret = list(threads.get_range())
        if len(ret) > 99:
            oldest_thread = self._get_oldest_thread()
            oldest_thread_id = str(oldest_thread['thread_id'])
            threads.remove(oldest_thread_id)
            self._drop_cf(oldest_thread_id)

        thread_id = '%s' % random.randint(1,sys.maxint)

        dt = datetime.datetime.today()
        str_dt = dt.strftime('%Y-%m-%d %H:%M:%S')
        threads.insert(thread_id, {'thread_name': thread_name, 'post_count': '1', 'create_time': str_dt, 'update_time': str_dt})

        self._create_cf(thread_id)
        return thread_id
    def test_validated_columns(self):
        sys = SystemManager()
        sys.create_column_family(TEST_KS, "Validators")
        sys.alter_column(TEST_KS, "Validators", "long", LongType())
        sys.alter_column(TEST_KS, "Validators", "int", IntegerType())
        sys.alter_column(TEST_KS, "Validators", "time", TimeUUIDType())
        sys.alter_column(TEST_KS, "Validators", "lex", LexicalUUIDType())
        sys.alter_column(TEST_KS, "Validators", "ascii", AsciiType())
        sys.alter_column(TEST_KS, "Validators", "utf8", UTF8Type())
        sys.alter_column(TEST_KS, "Validators", "bytes", BytesType())
        sys.close()

        cf = ColumnFamily(pool, "Validators")
        key = "key1"

        col = {"long": 1L}
        cf.insert(key, col)
        assert_equal(cf.get(key)["long"], 1L)

        col = {"int": 1}
        cf.insert(key, col)
        assert_equal(cf.get(key)["int"], 1)

        col = {"time": TIME1}
        cf.insert(key, col)
        assert_equal(cf.get(key)["time"], TIME1)

        col = {"lex": uuid.UUID(bytes="aaa aaa aaa aaaa")}
        cf.insert(key, col)
        assert_equal(cf.get(key)["lex"], uuid.UUID(bytes="aaa aaa aaa aaaa"))

        col = {"ascii": "aaa"}
        cf.insert(key, col)
        assert_equal(cf.get(key)["ascii"], "aaa")

        col = {"utf8": u"a\u0020"}
        cf.insert(key, col)
        assert_equal(cf.get(key)["utf8"], u"a\u0020")

        col = {"bytes": "aaa"}
        cf.insert(key, col)
        assert_equal(cf.get(key)["bytes"], "aaa")

        cf.remove(key)
Example #17
0
    def test_validated_columns(self):
        sys = SystemManager()
        sys.create_column_family(TEST_KS, 'Validators',)
        sys.alter_column(TEST_KS, 'Validators', 'long', LongType())
        sys.alter_column(TEST_KS, 'Validators', 'int', IntegerType())
        sys.alter_column(TEST_KS, 'Validators', 'time', TimeUUIDType())
        sys.alter_column(TEST_KS, 'Validators', 'lex', LexicalUUIDType())
        sys.alter_column(TEST_KS, 'Validators', 'ascii', AsciiType())
        sys.alter_column(TEST_KS, 'Validators', 'utf8', UTF8Type())
        sys.alter_column(TEST_KS, 'Validators', 'bytes', BytesType())
        sys.close()

        cf = ColumnFamily(pool, 'Validators')
        key = 'key1'

        col = {'long':1L}
        cf.insert(key, col)
        assert_equal(cf.get(key)['long'], 1L)

        col = {'int':1}
        cf.insert(key, col)
        assert_equal(cf.get(key)['int'], 1)

        col = {'time':TIME1}
        cf.insert(key, col)
        assert_equal(cf.get(key)['time'], TIME1)

        col = {'lex':uuid.UUID(bytes='aaa aaa aaa aaaa')}
        cf.insert(key, col)
        assert_equal(cf.get(key)['lex'], uuid.UUID(bytes='aaa aaa aaa aaaa'))

        col = {'ascii':'aaa'}
        cf.insert(key, col)
        assert_equal(cf.get(key)['ascii'], 'aaa')

        col = {'utf8':u'a\u0020'}
        cf.insert(key, col)
        assert_equal(cf.get(key)['utf8'], u'a\u0020')

        col = {'bytes':'aaa'}
        cf.insert(key, col)
        assert_equal(cf.get(key)['bytes'], 'aaa')

        cf.remove(key)
Example #18
0
    def test_validated_columns(self):
        sys = SystemManager()
        sys.create_column_family(TEST_KS, 'Validators',)
        sys.alter_column(TEST_KS, 'Validators', 'long', LONG_TYPE)
        sys.alter_column(TEST_KS, 'Validators', 'int', INT_TYPE)
        sys.alter_column(TEST_KS, 'Validators', 'time', TIME_UUID_TYPE)
        sys.alter_column(TEST_KS, 'Validators', 'lex', LEXICAL_UUID_TYPE)
        sys.alter_column(TEST_KS, 'Validators', 'ascii', ASCII_TYPE)
        sys.alter_column(TEST_KS, 'Validators', 'utf8', UTF8_TYPE)
        sys.alter_column(TEST_KS, 'Validators', 'bytes', BYTES_TYPE)
        sys.close()

        cf = ColumnFamily(pool, 'Validators')
        key = 'key1'

        col = {'long':1L}
        cf.insert(key, col)
        assert_equal(cf.get(key)['long'], 1L)

        col = {'int':1}
        cf.insert(key, col)
        assert_equal(cf.get(key)['int'], 1)

        col = {'time':TIME1}
        cf.insert(key, col)
        assert_equal(cf.get(key)['time'], TIME1)

        col = {'lex':uuid.UUID(bytes='aaa aaa aaa aaaa')}
        cf.insert(key, col)
        assert_equal(cf.get(key)['lex'], uuid.UUID(bytes='aaa aaa aaa aaaa'))

        col = {'ascii':'aaa'}
        cf.insert(key, col)
        assert_equal(cf.get(key)['ascii'], 'aaa')

        col = {'utf8':u'a\u0020'}
        cf.insert(key, col)
        assert_equal(cf.get(key)['utf8'], u'a\u0020')

        col = {'bytes':'aaa'}
        cf.insert(key, col)
        assert_equal(cf.get(key)['bytes'], 'aaa')

        cf.remove(key)
    def truncate_log_metadata(self):
        for cf in ['build_timelines']:
            cf = ColumnFamily(self.pool, cf)
            cf.truncate()

        cf = ColumnFamily(self.pool, 'indices')
        for key in LOG_METADATA_INDICES:
            cf.remove(key)

        cf = ColumnFamily(self.pool, 'counters')
        for key in LOG_METADATA_COUNTERS:
            cf.remove(key)

        cf = ColumnFamily(self.pool, 'super_counters')
        for key in LOG_METADATA_SUPER_COUNTERS:
            cf.remove(key)

        cf = ColumnFamily(self.pool, 'builds')
        batch = cf.batch()
        # Remove log parsing state from builds.
        for key, cols in cf.get_range(columns=['log_parsing_version']):
            if 'log_parsing_version' not in cols:
                continue

            batch.remove(key, ['log_parsing_version'])

        batch.send()
Example #20
0
 def column_family_remove(self, keyspace_name, column_family_name, key):
     """Remove a key from column family for a given keyspace """
     keyspace.error = "Unknown error occur please check your inputs"
     if not self.keyspace_contains(keyspace.local_system, keyspace_name, column_family_name):
         keyspace.error = "Desired Keyspace,Column Family pair could not be found."
         return False
     try:
         pool = ConnectionPool(keyspace=keyspace_name, server_list=keyspace.server_ips, prefill=False)
     except Exception as e:
         print e
         return False
     try:
         col_fam = ColumnFamily(pool, column_family_name)
     except Exception as e:
         print e
         return False
     try:
         col_fam.remove(key)
     except Exception as e:
         print e
         return False
     return True
class TestDateTypes(unittest.TestCase):
    def _compare_dates(self, d1, d2):
        self.assertEquals(d1.timetuple(), d2.timetuple())
        self.assertEquals(int(d1.microsecond / 1e3), int(d2.microsecond / 1e3))

    def test_compatibility(self):
        self.cf = ColumnFamily(pool, "Standard1")
        self.cf.column_validators["date"] = OldPycassaDateType()

        d = datetime.utcnow()
        self.cf.insert("key1", {"date": d})
        self._compare_dates(self.cf.get("key1")["date"], d)

        self.cf.column_validators["date"] = IntermediateDateType()
        self._compare_dates(self.cf.get("key1")["date"], d)
        self.cf.insert("key1", {"date": d})
        self._compare_dates(self.cf.get("key1")["date"], d)

        self.cf.column_validators["date"] = DateType()
        self._compare_dates(self.cf.get("key1")["date"], d)
        self.cf.insert("key1", {"date": d})
        self._compare_dates(self.cf.get("key1")["date"], d)
        self.cf.remove("key1")
Example #22
0
    def undo(self):
        if type == InsertCommand.INS_BASIC:
            ## I know that data for a basic insert is of this tuple type
            domain, row_key, basic_type_dict = self.data

            client = db_connection.get_client()
            cf = ColumnFamily(client, domain)
            cf.remove(row_key)

        elif type == InsertCommand.INS_OBJECT:
            ## call the save operation for the object
            if self.data:
                self.data.delete(cascade=False)

        elif type == InsertCommand.INS_BATCH:
            domain, basic_type_item_dict = self.data
            client = db_connection.get_client()
            cf = ColumnFamily(client, domain)

            b = cf.batch()
            for row_key in basic_type_item_dict.keys():
                b.remove(row_key)
            b.send()
Example #23
0
import pycassa
from pycassa.pool import ConnectionPool
from pycassa.columnfamily import ColumnFamily

pool = ConnectionPool('ApplicationData',
                      ['localhost:9160'])
col_fam = ColumnFamily(pool, 'UserInfo')
col_fam.insert('Diego', {'email': '*****@*****.**'})

readData = col_fam.get('Diego', columns=['email'])

col_fam.remove('Diego', columns=['email'])

#batch

b = col_fam.batch(queue_size=10)

b.insert('John',
         {'email': '*****@*****.**',
          'state': 'IL',
          'gender': 'M'})

b.insert('Jane',
         {'email': '*****@*****.**',
          'state': 'CA'})

b.remove('John', ['gender'])
b.remove('Jane')
b.send()
class DailyTemporalBloomFilter(DailyTemporalBase):
    """Long Range Temporal BloomFilter using a daily resolution.

    For really high value of expiration (like 60 days) with low requirement on precision.
    The actual error of this BF will the be native error of the BF + the error related
    to the coarse aspect of the expiration, since we no longer expires information precisely.
    Also, as opposed to a classic Bloom Filter, this one will aslo have false positive (reporting membership for a non-member)
    AND false negative (reporting non-membership for a member).

    The upper bound of the temporal_error can be theoricaly quite high. However, if the
    items of the set are uniformly distributed over time, the avg error will be something like 1.0 / expiration
    """

    def __new__(cls, capacity, error_rate, expiration, name, cassandra_session, snapshot_path='./'):
        return super(DailyTemporalBloomFilter, cls).__new__(cls, capacity=capacity, error_rate=error_rate)

    def __init__(self, capacity, error_rate, expiration, name, cassandra_session, snapshot_path='./'):
        filename = ""
        super(DailyTemporalBloomFilter, self).__init__(capacity=capacity, error_rate=error_rate)
        self.bf_name = name
        self.expiration = expiration
        self.initialize_period()
        self.cassandra_session = cassandra_session
        self.cassandra_columns_family = "temporal_bf"
        self.keyspace = 'parsely'
        self.uncommited_keys = []
        self.commit_batch = 1000
        self.columnfamily = None
        self.ensure_cassandra_cf()
        self.snapshot_path = snapshot_path

    def ensure_cassandra_cf(self):
        s = SystemManager()
        if self.keyspace not in s.list_keyspaces():
            s.create_keyspace(self.keyspace, SIMPLE_STRATEGY, {'replication_factor': '1'})
        if self.cassandra_columns_family not in s.get_keyspace_column_families(self.keyspace):
            s.create_column_family(self.keyspace, self.cassandra_columns_family)
        self.columnfamily = ColumnFamily(self.cassandra_session, self.cassandra_columns_family)

    def archive_bf_key(self, bf_key):
        self.uncommited_keys.append(bf_key)
        if len(self.uncommited_keys) >= self.commit_batch:
            current_period_hour = dt.datetime.now().strftime('%Y-%m-%d:%H')
            self.columnfamily.insert('%s_%s' % (self.bf_name, current_period_hour), {k:'' for k in self.uncommited_keys})
            self.uncommited_keys = []

    def _hour_range(self, start, end, reverse=False, inclusive=True):
        """Generator that gives us all the hours between a start and end datetime
        (inclusive)."""

        def total_seconds(td):
            return (td.microseconds + (td.seconds + td.days * 24.0 * 3600.0) * 10.0**6) / 10.0**6

        hours = int(math.ceil(total_seconds(end - start) / (60.0 * 60.0)))
        if inclusive:
            hours += 1
        for i in xrange(hours):
            if reverse:
                yield end - dt.timedelta(hours=i)
            else:
                yield start + dt.timedelta(hours=i)

    def _day_range(self, start, end, reverse=False, inclusive=True):
        """Generator that gives us all the days between a start and end datetime
        (inclusive)."""
        days = (end - start).days
        if inclusive:
            days += 1
        for i in xrange(days):
            if reverse:
                yield end - dt.timedelta(days=i)
            else:
                yield start + dt.timedelta(days=i)

    def _drop_archive(self):
        last_period = self.current_period - dt.timedelta(days=self.expiration-1)
        hours = self._hour_range(last_period, dt.datetime.now())
        for hour in hours:
            try:
                row = "%s_%s" % (self.bf_name, hour.strftime('%Y-%m-%d:%H'))
                nbr_keys = self.columnfamily.get_count(row)
                keys = self.columnfamily.remove(row)
            except:
                pass

    def rebuild_from_archive(self, rebuild_snapshot=True):
        """Rebuild the BF using the archived items"""
        self.initialize_bitarray()

        #if rebuild_snapshot:
        #    self.delete_snapshots()

        def multi_rows_itr(rows):
            for row in rows.values():
                for k in row.keys():
                    yield k

        last_period = self.current_period - dt.timedelta(days=self.expiration-1)
        hours = self._hour_range(last_period, dt.datetime.now())
        days = self._day_range(last_period, dt.datetime.now())
        rows = []
        for i,day in enumerate(days):
            rows = ["%s_%s:%s" % (self.bf_name, day.strftime('%Y-%m-%d'), hour_str) for hour_str in ["%02d" % i for i in range(24)]]
            rows_content = self.columnfamily.multiget(rows, column_count=1E6)
            update_current = day == self.current_period

            for k in multi_rows_itr(rows_content):
                self.add_rebuild(k, update_current)

            if rebuild_snapshot:
                self.save_snaphot(override_period=day)

            if not update_current:
                self.initialize_current_day_bitarray()

    def restore_from_disk(self, clean_old_snapshot=False):
        """Restore the state of the BF using previous snapshots.

        :clean_old_snapshot: Delete the old snapshot on the disk (period < current - expiration)
        """
        base_filename = "%s/%s_%s_*.dat" % (self.snapshot_path, self.bf_name, self.expiration)
        availables_snapshots = glob.glob(base_filename)
        last_period = self.current_period - dt.timedelta(days=self.expiration-1)
        for filename in availables_snapshots:
            snapshot_period = dt.datetime.strptime(filename.split('_')[-1].strip('.dat'), "%Y-%m-%d")
            if snapshot_period <  last_period and not clean_old_snapshot:
                continue
            else:
                self._union_bf_from_file(filename)
                if snapshot_period == self.current_period:
                    self._union_bf_from_file(filename, current=True)

            if snapshot_period < last_period and clean_old_snapshot:
                os.remove(filename)
        self.ready = True

    def add_rebuild(self, key, update_current=True):
        super(DailyTemporalBloomFilter, self).add(key, update_current)

    def add(self, key_string):
        if isinstance(key_string, unicode):
            key = key_string.encode('utf8')
        else:
            key = key_string

        self.archive_bf_key(key)
        result = super(DailyTemporalBloomFilter, self).add(key)

        return result

    def resize(self, new_capacity=None, new_error_rate=None):
        self._set_capacity(new_capacity or self.capacity)
        self._set_error_rate(new_error_rate or self.error_rate)
        self._initialize_parameters()
        self.initialize_bitarray()
        self.rebuild_from_archive(rebuild_snapshot=True)

    def initialize_period(self, period=None):
        """Initialize the period of BF.

        :period: datetime.datetime for setting the period explicity.
        """
        if not period:
            self.current_period = dt.datetime.now()
        else:
            self.current_period = period
        self.current_period = dt.datetime(self.current_period.year, self.current_period.month, self.current_period.day)
        self.date = self.current_period.strftime("%Y-%m-%d")

    def save_snaphot(self, override_period=None):
        """Save the current state of the current day bitarray on disk.

        Save the internal representation (bitarray) into a binary file using this format:
            filename : name_expiration_2013-01-01.dat
        """
        period = override_period or self.current_period
        filename = "%s/%s_%s_%s.dat" % (self.snapshot_path, self.bf_name, self.expiration, period.strftime("%Y-%m-%d"))
        self._save_snapshot(filename)
Example #25
0
class Buyer(Llama):
    def __init__(self, client, qname, trend=5):
        super(Buyer, self).__init__(client, uuid.uuid4().hex)
        self.holdings = {}
        self.cash = 100000.0
        self.history = {}
        self.trend = trend
        self.pool = ConnectionPool('example_consumer_Buyer')
        self.stored_holdings = ColumnFamily(self.pool, 'Holdings')
        self.quote_history = ColumnFamily(self.pool, 'Quotes')
        self.stored_cash = ColumnFamily(self.pool, 'Cash')

        try:
            cash = self.stored_cash.get('current')
            self.cash = cash['amount']
        except ttypes.NotFoundException:
            self.stored_cash.insert('current', {'amount': self.cash})

        for symbol, columns in self.stored_holdings.get_range():
            self.holdings[symbol] = (columns['number_of_shares'],
                                     columns['price'], columns['cost'])

        date_expression = create_index_expression('timestamp',
                                                  datetime.date.today(), GT)
        date_clause = create_index_clause([date_expression], count=1000)

        for key, columns in self.quote_history.get_range():
            symbol = columns['symbol']
            price = columns['price']
            self.add_quote(symbol, price)

    def add_quote(self, symbol, price):
        if symbol not in self.history:
            self.history[symbol] = [price]
        else:
            self.history[symbol].append(price)

        if len(self.history[symbol]) >= self.trend:
            price_low = min(self.history[symbol][-self.trend:])
            price_max = max(self.history[symbol][-self.trend:])
            price_avg = sum(self.history[symbol][-self.trend:]) / self.trend
            #print "Recent history of %s is %s" % (symbol, self.history[symbol][-self.trend:])
        else:
            price_low, price_max, price_avg = (-1, -1, -1)
            print "%s quotes until we start deciding whether to buy or sell %s" % (
                self.trend - len(self.history[symbol]), symbol)
            #print "Recent history of %s is %s" % (symbol, self.history[symbol])

        return (price_low, price_max, price_avg)

    def do_message(self, quote):
        symbol, price, date, counter = quote
        #print "Thinking about whether to buy or sell %s at %s" % (symbol, price)

        price_low, price_max, price_avg = self.add_quote(symbol, price)

        self.save_quote(symbol, price)

        if price_low == -1: return

        #print "Trending minimum/avg/max of %s is %s-%s-%s" % (symbol, price_low, price_avg, price_max)
        #for symbol in self.holdings.keys():
        #    print "self.history[symbol][-1] = %s" % self.history[symbol][-1]
        #    print "self.holdings[symbol][0] = %s" % self.holdings[symbol][0]
        #    print "Value of %s is %s" % (symbol, float(self.holdings[symbol][0])*self.history[symbol][-1])
        value = sum([
            self.holdings[symbol][0] * self.history[symbol][-1]
            for symbol in self.holdings.keys()
        ])
        print "Net worth is %s + %s = %s" % (self.cash, value,
                                             self.cash + value)

        if symbol not in self.holdings:
            if price < 1.01 * price_low:
                shares_to_buy = random.choice([10, 15, 20, 25, 30])
                print "I don't own any %s yet, and the price is below the trending minimum of %s so I'm buying %s shares." % (
                    symbol, price_low, shares_to_buy)
                cost = shares_to_buy * price
                print "Cost is %s, cash is %s" % (cost, self.cash)
                if cost < self.cash:
                    self.buy_holdings(symbol, shares_to_buy, price, cost)
                    self.update_cash(-cost)
                    print "Cash is now %s" % self.cash
                else:
                    print "Unfortunately, I don't have enough cash at this time."
        else:
            if price > self.holdings[symbol][1] and price > 0.99 * price_max:
                print "+++++++ Price of %s is higher than my holdings, so I'm going to sell!" % symbol
                sale_value = self.holdings[symbol][0] * price
                print "Sale value is %s" % sale_value
                print "Holdings value is %s" % self.holdings[symbol][2]
                print "Total net is %s" % (sale_value -
                                           self.holdings[symbol][2])
                self.update_cash(sale_value)
                print "Cash is now %s" % self.cash
                self.sell_holdings(symbol)

    def update_cash(self, change):
        self.cash += change
        cash = self.stored_cash.get('current')
        cash['amount'] = self.cash
        self.stored_cash.insert('current', cash)

    def buy_holdings(self, symbol, shares_to_buy, price, cost):
        self.holdings[symbol] = (shares_to_buy, price, cost)
        stored_holding = {
            'number_of_shares': shares_to_buy,
            'price': price,
            'cost': cost
        }
        self.stored_holdings.insert(symbol, stored_holding)

    def sell_holdings(self, symbol):
        del self.holdings[symbol]
        self.stored_holdings.remove(symbol)

    def save_quote(self, symbol, price):
        key = str(uuid.uuid4())
        self.quote_history.insert(key, {'symbol': symbol, 'price': price})
import pycassa
from pycassa.pool import ConnectionPool
from pycassa.columnfamily import ColumnFamily

pool = ConnectionPool('superKeySpace', ['localhost:9160'])

col_fam = ColumnFamily(pool, 'superGroup')

row = col_fam.get('Will')
print('Got and printing row = will')
print(row)

superCol= row.get('name')
print('printing the super column "name" for Will\'s record')
print(superCol)

#add first name will
col_fam.insert('Will', {'name': {'first': 'Will'}})
print('added Will\'s first name and printing Will\'s record')
print(col_fam.get('Will'))

#change first from will to bill
col_fam.insert('Will', {'name': {'first': 'Bill'}})
print('changed Will\'s first name to Bill and printing Will\'s record')
print(col_fam.get('Will'))

#resetting Will's first name to empty
print('removeing Will\'s first name')
col_fam.remove('Will', super_column='name', columns=['first'])
print(col_fam.get('Will'))
Example #27
0
def remove_row(columnFamily, uid) :
	"To remove row from a column family"
	column = ColumnFamily(pool, columnFamily)
	column.remove(uid)
class Buyer(Llama):
    def __init__(self, client, qname, trend=5):
        super(Buyer, self).__init__(client, uuid.uuid4().hex)
        self.holdings = {}
        self.cash = 100000.0
        self.history = {}
        self.trend = trend
        self.pool = ConnectionPool('example_consumer_Buyer')
        self.stored_holdings = ColumnFamily(self.pool, 'Holdings')
        self.quote_history = ColumnFamily(self.pool, 'Quotes')
        self.stored_cash = ColumnFamily(self.pool, 'Cash')

        try:
          cash = self.stored_cash.get('current')
          self.cash = cash['amount']
        except ttypes.NotFoundException:
          self.stored_cash.insert('current', { 'amount': self.cash })

        for symbol, columns in self.stored_holdings.get_range():
          self.holdings[symbol] = (columns['number_of_shares'], columns['price'], columns['cost'])

        date_expression = create_index_expression('timestamp', datetime.date.today(), GT)
        date_clause = create_index_clause([date_expression], count=1000)

        for key, columns in self.quote_history.get_range():
          symbol = columns['symbol']
          price = columns['price']
          self.add_quote(symbol, price)

    def add_quote(self, symbol, price):
        if symbol not in self.history: 
            self.history[symbol] = [price]
        else:
            self.history[symbol].append(price)

        if len(self.history[symbol]) >= self.trend:
            price_low = min(self.history[symbol][-self.trend:])
            price_max = max(self.history[symbol][-self.trend:])
            price_avg = sum(self.history[symbol][-self.trend:])/self.trend
            #print "Recent history of %s is %s" % (symbol, self.history[symbol][-self.trend:])
        else:
            price_low, price_max, price_avg = (-1, -1, -1)
            print "%s quotes until we start deciding whether to buy or sell %s" % (self.trend - len(self.history[symbol]), symbol)
            #print "Recent history of %s is %s" % (symbol, self.history[symbol])

        return (price_low, price_max, price_avg)


    def do_message(self, quote):
        symbol, price, date, counter = quote
        #print "Thinking about whether to buy or sell %s at %s" % (symbol, price)

        price_low, price_max, price_avg = self.add_quote(symbol, price)

        self.save_quote(symbol, price)

        if price_low == -1: return

        #print "Trending minimum/avg/max of %s is %s-%s-%s" % (symbol, price_low, price_avg, price_max)
        #for symbol in self.holdings.keys():
        #    print "self.history[symbol][-1] = %s" % self.history[symbol][-1]
        #    print "self.holdings[symbol][0] = %s" % self.holdings[symbol][0]
        #    print "Value of %s is %s" % (symbol, float(self.holdings[symbol][0])*self.history[symbol][-1])
        value = sum([self.holdings[symbol][0]*self.history[symbol][-1] for symbol in self.holdings.keys()])
        print "Net worth is %s + %s = %s" % (self.cash, value, self.cash + value)

        if symbol not in self.holdings:
            if price < 1.01*price_low:
                shares_to_buy = random.choice([10, 15, 20, 25, 30])
                print "I don't own any %s yet, and the price is below the trending minimum of %s so I'm buying %s shares." % (symbol, price_low, shares_to_buy)
                cost = shares_to_buy * price
                print "Cost is %s, cash is %s" % (cost, self.cash)
                if cost < self.cash:
                    self.buy_holdings(symbol, shares_to_buy, price, cost)
                    self.update_cash(-cost)
                    print "Cash is now %s" % self.cash
                else:
                    print "Unfortunately, I don't have enough cash at this time."
        else:
            if price > self.holdings[symbol][1] and price > 0.99*price_max:
                print "+++++++ Price of %s is higher than my holdings, so I'm going to sell!" % symbol
                sale_value = self.holdings[symbol][0] * price
                print "Sale value is %s" % sale_value
                print "Holdings value is %s" % self.holdings[symbol][2]
                print "Total net is %s" % (sale_value - self.holdings[symbol][2])
                self.update_cash(sale_value)
                print "Cash is now %s" % self.cash
                self.sell_holdings(symbol)

    def update_cash(self, change):
      self.cash += change
      cash = self.stored_cash.get('current')
      cash['amount'] = self.cash
      self.stored_cash.insert('current', cash)

    def buy_holdings(self, symbol, shares_to_buy, price, cost):
      self.holdings[symbol] = (shares_to_buy, price, cost)
      stored_holding = {'number_of_shares': shares_to_buy, 'price': price, 'cost': cost}
      self.stored_holdings.insert(symbol, stored_holding)

    def sell_holdings(self, symbol):
      del self.holdings[symbol]
      self.stored_holdings.remove(symbol)

    def save_quote(self, symbol, price):
      key = str(uuid.uuid4())
      self.quote_history.insert(key, { 'symbol': symbol, 'price': price })
Example #29
0
class TestTimeUUIDs(unittest.TestCase):
    def setUp(self):
        self.cf_time = ColumnFamily(pool, 'StdTimeUUID')

    def tearDown(self):
        self.cf_time.remove('key1')

    def test_datetime_to_uuid(self):
        key = 'key1'
        timeline = []

        timeline.append(datetime.now())
        time1 = uuid1()
        col1 = {time1: '0'}
        self.cf_time.insert(key, col1)
        time.sleep(1)

        timeline.append(datetime.now())
        time2 = uuid1()
        col2 = {time2: '1'}
        self.cf_time.insert(key, col2)
        time.sleep(1)

        timeline.append(datetime.now())

        cols = {time1: '0', time2: '1'}

        assert_equal(self.cf_time.get(key, column_start=timeline[0]), cols)
        assert_equal(self.cf_time.get(key, column_finish=timeline[2]), cols)
        assert_equal(
            self.cf_time.get(key,
                             column_start=timeline[0],
                             column_finish=timeline[2]), cols)
        assert_equal(
            self.cf_time.get(key,
                             column_start=timeline[0],
                             column_finish=timeline[2]), cols)
        assert_equal(
            self.cf_time.get(key,
                             column_start=timeline[0],
                             column_finish=timeline[1]), col1)
        assert_equal(
            self.cf_time.get(key,
                             column_start=timeline[1],
                             column_finish=timeline[2]), col2)

    def test_time_to_uuid(self):
        key = 'key1'
        timeline = []

        timeline.append(time.time())
        time1 = uuid1()
        col1 = {time1: '0'}
        self.cf_time.insert(key, col1)
        time.sleep(0.1)

        timeline.append(time.time())
        time2 = uuid1()
        col2 = {time2: '1'}
        self.cf_time.insert(key, col2)
        time.sleep(0.1)

        timeline.append(time.time())

        cols = {time1: '0', time2: '1'}

        assert_equal(self.cf_time.get(key, column_start=timeline[0]), cols)
        assert_equal(self.cf_time.get(key, column_finish=timeline[2]), cols)
        assert_equal(
            self.cf_time.get(key,
                             column_start=timeline[0],
                             column_finish=timeline[2]), cols)
        assert_equal(
            self.cf_time.get(key,
                             column_start=timeline[0],
                             column_finish=timeline[2]), cols)
        assert_equal(
            self.cf_time.get(key,
                             column_start=timeline[0],
                             column_finish=timeline[1]), col1)
        assert_equal(
            self.cf_time.get(key,
                             column_start=timeline[1],
                             column_finish=timeline[2]), col2)

    def test_auto_time_to_uuid1(self):
        key = 'key1'
        t = time.time()
        col = {t: 'foo'}
        self.cf_time.insert(key, col)
        uuid_res = self.cf_time.get(key).keys()[0]
        timestamp = convert_uuid_to_time(uuid_res)
        assert_almost_equal(timestamp, t, places=3)
names3.iteritems()])
name_cf.insert("sacharya3", {'last_name':
attrs['last_name'].append("acharya3")})
print name_cf.get('sacharya3')

################################# COUNT #######################################
# Count the number of columns for the row key
count=author_cf.get_count("sacharya1")
print count 

count=author_cf.multiget_count(["sacharya1","sacharya2"])
print count
################################## REMOVE #####################################
# Remove the column for the row key and column key
print "Removing the column last_name for row key sacharya1"
author_cf.remove('sacharya1', columns=['last_name'])

time.sleep(5)

authors = author_cf.get('sacharya')
print authors

# REMOVE the entire row
author_cf.remove('sacharya')
try:
    time.sleep(5)
    print "Getting object already deleted"
    author_cf.get('sacharya')
except Exception as e:
    print e
Example #31
0
class DailyTemporalBloomFilter(object):
    """Long Range Temporal BloomFilter using a daily resolution.

    For really high value of expiration (like 60 days) with low requirement on precision.
    The actual error of this BF will the be native error of the BF + the error related
    to the coarse aspect of the expiration, since we no longer expires information precisely.
    Also, as opposed to a classic Bloom Filter, this one will aslo have false positive (reporting membership for a non-member)
    AND false negative (reporting non-membership for a member).

    The upper bound of the temporal_error can be theoricaly quite high. However, if the
    items of the set are uniformly distributed over time, the avg error will be something like 1.0 / expiration
    """
    def __init__(self, capacity, error_rate, expiration, name, snapshot_path,
                 cassandra_session):
        self.error_rate = error_rate
        self.capacity = capacity
        self._initialize_parameters()
        self.initialize_bitarray()
        self.count = 0
        self.hashed_values = []
        self.name = name
        self.snapshot_path = snapshot_path
        self.expiration = expiration
        self.initialize_period()
        self.snapshot_to_load = None
        self.ready = False
        self.warm_period = None
        self.next_snapshot_load = time.time()
        self.cassandra_session = cassandra_session
        self.cassandra_columns_family = "temporal_bf"
        self.keyspace = 'parsely'
        self.uncommited_keys = []
        self.commit_batch = 1000
        self.columnfamily = None
        self.ensure_cassandra_cf()

    def _initialize_parameters(self):
        self.nbr_slices = int(np.ceil(np.log2(1.0 / self.error_rate)))
        self.bits_per_slice = int(
            np.ceil((self.capacity * abs(np.log(self.error_rate))) /
                    (self.nbr_slices * (np.log(2)**2))))
        self.nbr_bits = self.nbr_slices * self.bits_per_slice
        self.hashes = generate_hashfunctions(self.bits_per_slice,
                                             self.nbr_slices)

    def ensure_cassandra_cf(self):
        s = SystemManager()
        if self.keyspace not in s.list_keyspaces():
            s.create_keyspace(self.keyspace, SIMPLE_STRATEGY,
                              {'replication_factor': '1'})
        if self.cassandra_columns_family not in s.get_keyspace_column_families(
                self.keyspace):
            s.create_column_family(self.keyspace,
                                   self.cassandra_columns_family)
        self.columnfamily = ColumnFamily(self.cassandra_session,
                                         self.cassandra_columns_family)

    def archive_bf_key(self, bf_key):
        self.uncommited_keys.append(bf_key)
        if len(self.uncommited_keys) >= self.commit_batch:
            current_period_hour = dt.datetime.now().strftime('%Y-%m-%d:%H')
            self.columnfamily.insert(
                '%s_%s' % (self.name, current_period_hour),
                {k: ''
                 for k in self.uncommited_keys})
            self.uncommited_keys = []

    def _hour_range(self, start, end, reverse=False, inclusive=True):
        """Generator that gives us all the hours between a start and end datetime
        (inclusive)."""
        def total_seconds(td):
            return (td.microseconds +
                    (td.seconds + td.days * 24.0 * 3600.0) * 10.0**6) / 10.0**6

        hours = int(math.ceil(total_seconds(end - start) / (60.0 * 60.0)))
        if inclusive:
            hours += 1
        for i in xrange(hours):
            if reverse:
                yield end - dt.timedelta(hours=i)
            else:
                yield start + dt.timedelta(hours=i)

    def resize(self, new_capacity):
        self.capacity = new_capacity
        self._initialize_parameters()
        self.rebuild_from_archive()

    def _drop_archive(self):
        last_period = self.current_period - dt.timedelta(days=self.expiration -
                                                         1)
        hours = self._hour_range(last_period, dt.datetime.now())
        for hour in hours:
            try:
                row = "%s_%s" % (self.name, hour.strftime('%Y-%m-%d:%H'))
                nbr_keys = self.columnfamily.get_count(row)
                keys = self.columnfamily.remove(row)
            except:
                pass

    def rebuild_from_archive(self):
        """Rebuild the BF using the archived items"""
        self.initialize_bitarray()
        last_period = self.current_period - dt.timedelta(days=self.expiration -
                                                         1)
        hours = self._hour_range(last_period, dt.datetime.now())
        rows = []
        for i, hour in enumerate(hours):
            row = "%s_%s" % (self.name, hour.strftime('%Y-%m-%d:%H'))
            rows.append(row)
        rows_content = self.columnfamily.multiget(rows, column_count=1E6)

        for row_content in rows_content.values():
            for k in row_content.keys():
                self.add(k, rebuild_mode=True)

    def initialize_bitarray(self):
        """Initialize both bitarray.

        This BF contain two bit arrays instead of single one like a plain BF. bitarray
        is the main bit array where all the historical items are stored. It's the one
        used for the membership query. The second one, current_day_bitarray is the one
        used for creating the daily snapshot.
        """
        self.bitarray = bitarray.bitarray(self.nbr_bits)
        self.current_day_bitarray = bitarray.bitarray(self.nbr_bits)
        self.bitarray.setall(False)
        self.current_day_bitarray.setall(False)

    def __contains__(self, key):
        """Check membership."""
        self.hashed_values = self.hashes(key)
        offset = 0
        for value in self.hashed_values:
            if not self.bitarray[offset + value]:
                return False
            offset += self.bits_per_slice
        return True

    def add(self, key, rebuild_mode=False):
        if not rebuild_mode:
            self.archive_bf_key(key)
        if key in self:
            return True
        offset = 0
        if not self.hashed_values:
            self.hashed_values = self.hashes(key)
        for value in self.hashed_values:
            self.bitarray[offset + value] = True
            self.current_day_bitarray[offset + value] = True
            offset += self.bits_per_slice
        self.count += 1
        return False

    def initialize_period(self, period=None):
        """Initialize the period of BF.

        :period: datetime.datetime for setting the period explicity.
        """
        if not period:
            self.current_period = dt.datetime.now()
        else:
            self.current_period = period
        self.current_period = dt.datetime(self.current_period.year,
                                          self.current_period.month,
                                          self.current_period.day)
        self.date = self.current_period.strftime("%Y-%m-%d")

    def maintenance(self):
        """Expire the old element of the set.

        Initialize a new bitarray and load the previous snapshot. Execute this guy
        at the beginining of each day.
        """
        self.initialize_period()
        self.initialize_bitarray()
        self.restore_from_disk()

    def compute_refresh_period(self):
        self.warm_period = (60 * 60 * 24) // (self.expiration - 2)

    def _should_warm(self):
        return time.time() >= self.next_snapshot_load

    def warm(self, jittering_ratio=0.2):
        """Progressively load the previous snapshot during the day.

        Loading all the snapshots at once can takes a substantial amount of time. This method, if called
        periodically during the day will progressively load those snapshots one by one. Because many workers are
        going to use this method at the same time, we add a jittering to the period between load to avoid
        hammering the disk at the same time.
        """
        if self.snapshot_to_load == None:
            last_period = self.current_period - dt.timedelta(
                days=self.expiration - 1)
            self.compute_refresh_period()
            self.snapshot_to_load = []
            base_filename = "%s/%s_%s_*.dat" % (self.snapshot_path, self.name,
                                                self.expiration)
            availables_snapshots = glob.glob(base_filename)
            for filename in availables_snapshots:
                snapshot_period = dt.datetime.strptime(
                    filename.split('_')[-1].strip('.dat'), "%Y-%m-%d")
                if snapshot_period >= last_period:
                    self.snapshot_to_load.append(filename)
                    self.ready = False

        if self.snapshot_to_load and self._should_warm():
            filename = self.snapshot_to_load.pop()
            self._union_bf_from_file(filename)
            jittering = self.warm_period * (np.random.random() -
                                            0.5) * jittering_ratio
            self.next_snapshot_load = time.time(
            ) + self.warm_period + jittering
            if not self.snapshot_to_load:
                self.ready = True

    def _union_bf_from_file(self, filename, current=False):
        snapshot = cPickle.loads(zlib.decompress(open(filename, 'r').read()))
        if current:
            self.current_day_bitarray = self.current_day_bitarray | snapshot
        else:
            self.bitarray = self.bitarray | snapshot

    def restore_from_disk(self, clean_old_snapshot=False):
        """Restore the state of the BF using previous snapshots.

        :clean_old_snapshot: Delete the old snapshot on the disk (period < current - expiration)
        """
        base_filename = "%s/%s_%s_*.dat" % (self.snapshot_path, self.name,
                                            self.expiration)
        availables_snapshots = glob.glob(base_filename)
        last_period = self.current_period - dt.timedelta(days=self.expiration -
                                                         1)
        for filename in availables_snapshots:
            snapshot_period = dt.datetime.strptime(
                filename.split('_')[-1].strip('.dat'), "%Y-%m-%d")
            if snapshot_period < last_period and not clean_old_snapshot:
                continue
            else:
                self._union_bf_from_file(filename)
                if snapshot_period == self.current_period:
                    self._union_bf_from_file(filename, current=True)

            if snapshot_period < last_period and clean_old_snapshot:
                os.remove(filename)
        self.ready = True

    def save_snaphot(self):
        """Save the current state of the current day bitarray on disk.

        Save the internal representation (bitarray) into a binary file using this format:
            filename : name_expiration_2013-01-01.dat
        """
        filename = "%s/%s_%s_%s.dat" % (self.snapshot_path, self.name,
                                        self.expiration, self.date)
        with open(filename, 'w') as f:
            f.write(
                zlib.compress(
                    cPickle.dumps(self.current_day_bitarray,
                                  protocol=cPickle.HIGHEST_PROTOCOL)))

    def union_current_day(self, bf):
        """Union only the current_day of an other BF."""
        self.bitarray = self.bitarray | bf.current_day_bitarray
Example #32
0
        #key = 'rss:' + v[0] + ':' + tzids[tzid] + ':SUM'
        key = 'rss:' + v[0] + ':SUM'
        if (long(v[1]) < tt):
            delete[v[0]] = tzids.keys()[0]
        else:
            save[v[0]] = tzids.keys()[0]


filter(oids)


### delete rows in counter
#delete_rows_in_counter(delete)


### delete columns in meta
meta.remove('rss.All', columns = delete.keys())

oid = save.keys()[0]
counterkey = save[oid]
print counterkey

print "Start to get all counters ..."
counters_generator = countercf.xget(counterkey, column_reversed=True, include_timestamp=True)
print "A lot ...."
for counter in counters_generator:
    print counter

#print oids_generator
#counter.remove(key)
class DailyTemporalBloomFilter(object):
    """Long Range Temporal BloomFilter using a daily resolution.

    For really high value of expiration (like 60 days) with low requirement on precision.
    The actual error of this BF will the be native error of the BF + the error related
    to the coarse aspect of the expiration, since we no longer expires information precisely.
    Also, as opposed to a classic Bloom Filter, this one will aslo have false positive (reporting membership for a non-member)
    AND false negative (reporting non-membership for a member).

    The upper bound of the temporal_error can be theoricaly quite high. However, if the
    items of the set are uniformly distributed over time, the avg error will be something like 1.0 / expiration
    """

    def __init__(self, capacity, error_rate, expiration, name, snapshot_path, cassandra_session):
        self.error_rate = error_rate
        self.capacity = capacity
        self._initialize_parameters()
        self.initialize_bitarray()
        self.count = 0
        self.hashed_values = []
        self.name = name
        self.snapshot_path = snapshot_path
        self.expiration = expiration
        self.initialize_period()
        self.snapshot_to_load = None
        self.ready = False
        self.warm_period = None
        self.next_snapshot_load = time.time()
        self.cassandra_session = cassandra_session
        self.cassandra_columns_family = "temporal_bf"
        self.keyspace = 'parsely'
        self.uncommited_keys = []
        self.commit_batch = 1000
        self.columnfamily = None
        self.ensure_cassandra_cf()

    def _initialize_parameters(self):
        self.nbr_slices = int(np.ceil(np.log2(1.0 / self.error_rate)))
        self.bits_per_slice = int(np.ceil((self.capacity * abs(np.log(self.error_rate))) / (self.nbr_slices * (np.log(2) ** 2))))
        self.nbr_bits = self.nbr_slices * self.bits_per_slice
        self.hashes = generate_hashfunctions(self.bits_per_slice, self.nbr_slices)

    def ensure_cassandra_cf(self):
        s = SystemManager()
        if self.keyspace not in s.list_keyspaces():
            s.create_keyspace(self.keyspace, SIMPLE_STRATEGY, {'replication_factor': '1'})
        if self.cassandra_columns_family not in s.get_keyspace_column_families(self.keyspace):
            s.create_column_family(self.keyspace, self.cassandra_columns_family)
        self.columnfamily = ColumnFamily(self.cassandra_session, self.cassandra_columns_family)

    def archive_bf_key(self, bf_key):
        self.uncommited_keys.append(bf_key)
        if len(self.uncommited_keys) >= self.commit_batch:
            current_period_hour = dt.datetime.now().strftime('%Y-%m-%d:%H')
            self.columnfamily.insert('%s_%s' % (self.name, current_period_hour), {k:'' for k in self.uncommited_keys})
            self.uncommited_keys = []

    def _hour_range(self, start, end, reverse=False, inclusive=True):
        """Generator that gives us all the hours between a start and end datetime
        (inclusive)."""

        def total_seconds(td):
            return (td.microseconds + (td.seconds + td.days * 24.0 * 3600.0) * 10.0**6) / 10.0**6

        hours = int(math.ceil(total_seconds(end - start) / (60.0 * 60.0)))
        if inclusive:
            hours += 1
        for i in xrange(hours):
            if reverse:
                yield end - dt.timedelta(hours=i)
            else:
                yield start + dt.timedelta(hours=i)

    def resize(self, new_capacity):
        self.capacity = new_capacity
        self._initialize_parameters()
        self.rebuild_from_archive()

    def _drop_archive(self):
        last_period = self.current_period - dt.timedelta(days=self.expiration-1)
        hours = self._hour_range(last_period, dt.datetime.now())
        for hour in hours:
            try:
                row = "%s_%s" % (self.name, hour.strftime('%Y-%m-%d:%H'))
                nbr_keys = self.columnfamily.get_count(row)
                keys = self.columnfamily.remove(row)
            except:
                pass

    def rebuild_from_archive(self):
        """Rebuild the BF using the archived items"""
        self.initialize_bitarray()
        last_period = self.current_period - dt.timedelta(days=self.expiration-1)
        hours = self._hour_range(last_period, dt.datetime.now())
        rows = []
        for i,hour in enumerate(hours):
            row = "%s_%s" % (self.name, hour.strftime('%Y-%m-%d:%H'))
            rows.append(row)
        rows_content = self.columnfamily.multiget(rows, column_count=1E6)

        for row_content in rows_content.values():
            for k in row_content.keys():
                self.add(k, rebuild_mode=True)

    def initialize_bitarray(self):
        """Initialize both bitarray.

        This BF contain two bit arrays instead of single one like a plain BF. bitarray
        is the main bit array where all the historical items are stored. It's the one
        used for the membership query. The second one, current_day_bitarray is the one
        used for creating the daily snapshot.
        """
        self.bitarray = bitarray.bitarray(self.nbr_bits)
        self.current_day_bitarray = bitarray.bitarray(self.nbr_bits)
        self.bitarray.setall(False)
        self.current_day_bitarray.setall(False)

    def __contains__(self, key):
        """Check membership."""
        self.hashed_values = self.hashes(key)
        offset = 0
        for value in self.hashed_values:
            if not self.bitarray[offset + value]:
                return False
            offset += self.bits_per_slice
        return True

    def add(self, key, rebuild_mode=False):
        if not rebuild_mode:
            self.archive_bf_key(key)
        if key in self:
            return True
        offset = 0
        if not self.hashed_values:
            self.hashed_values = self.hashes(key)
        for value in self.hashed_values:
            self.bitarray[offset + value] = True
            self.current_day_bitarray[offset + value] = True
            offset += self.bits_per_slice
        self.count += 1
        return False

    def initialize_period(self, period=None):
        """Initialize the period of BF.

        :period: datetime.datetime for setting the period explicity.
        """
        if not period:
            self.current_period = dt.datetime.now()
        else:
            self.current_period = period
        self.current_period = dt.datetime(self.current_period.year, self.current_period.month, self.current_period.day)
        self.date = self.current_period.strftime("%Y-%m-%d")

    def maintenance(self):
        """Expire the old element of the set.

        Initialize a new bitarray and load the previous snapshot. Execute this guy
        at the beginining of each day.
        """
        self.initialize_period()
        self.initialize_bitarray()
        self.restore_from_disk()

    def compute_refresh_period(self):
        self.warm_period =  (60 * 60 * 24) // (self.expiration-2)

    def _should_warm(self):
        return time.time() >= self.next_snapshot_load

    def warm(self, jittering_ratio=0.2):
        """Progressively load the previous snapshot during the day.

        Loading all the snapshots at once can takes a substantial amount of time. This method, if called
        periodically during the day will progressively load those snapshots one by one. Because many workers are
        going to use this method at the same time, we add a jittering to the period between load to avoid
        hammering the disk at the same time.
        """
        if self.snapshot_to_load == None:
            last_period = self.current_period - dt.timedelta(days=self.expiration-1)
            self.compute_refresh_period()
            self.snapshot_to_load = []
            base_filename = "%s/%s_%s_*.dat" % (self.snapshot_path, self.name, self.expiration)
            availables_snapshots = glob.glob(base_filename)
            for filename in availables_snapshots:
                snapshot_period = dt.datetime.strptime(filename.split('_')[-1].strip('.dat'), "%Y-%m-%d")
                if snapshot_period >= last_period:
                    self.snapshot_to_load.append(filename)
                    self.ready = False

        if self.snapshot_to_load and self._should_warm():
            filename = self.snapshot_to_load.pop()
            self._union_bf_from_file(filename)
            jittering = self.warm_period * (np.random.random()-0.5) * jittering_ratio
            self.next_snapshot_load = time.time() + self.warm_period + jittering
            if not self.snapshot_to_load:
                self.ready = True


    def _union_bf_from_file(self, filename, current=False):
        snapshot = cPickle.loads(zlib.decompress(open(filename,'r').read()))
        if current:
            self.current_day_bitarray = self.current_day_bitarray | snapshot
        else:
            self.bitarray = self.bitarray | snapshot

    def restore_from_disk(self, clean_old_snapshot=False):
        """Restore the state of the BF using previous snapshots.

        :clean_old_snapshot: Delete the old snapshot on the disk (period < current - expiration)
        """
        base_filename = "%s/%s_%s_*.dat" % (self.snapshot_path, self.name, self.expiration)
        availables_snapshots = glob.glob(base_filename)
        last_period = self.current_period - dt.timedelta(days=self.expiration-1)
        for filename in availables_snapshots:
            snapshot_period = dt.datetime.strptime(filename.split('_')[-1].strip('.dat'), "%Y-%m-%d")
            if snapshot_period <  last_period and not clean_old_snapshot:
                continue
            else:
                self._union_bf_from_file(filename)
                if snapshot_period == self.current_period:
                    self._union_bf_from_file(filename, current=True)

            if snapshot_period < last_period and clean_old_snapshot:
                os.remove(filename)
        self.ready = True

    def save_snaphot(self):
        """Save the current state of the current day bitarray on disk.

        Save the internal representation (bitarray) into a binary file using this format:
            filename : name_expiration_2013-01-01.dat
        """
        filename = "%s/%s_%s_%s.dat" % (self.snapshot_path, self.name, self.expiration, self.date)
        with open(filename, 'w') as f:
            f.write(zlib.compress(cPickle.dumps(self.current_day_bitarray, protocol=cPickle.HIGHEST_PROTOCOL)))

    def union_current_day(self, bf):
        """Union only the current_day of an other BF."""
        self.bitarray = self.bitarray | bf.current_day_bitarray
Example #34
0
def Remove(pool, columnFamily, key, val,*args, **kwargs):
    col_fam = ColumnFamily(pool, columnFamily)
    d = col_fam.remove(key, columns=val,*args, **kwargs)
Example #35
0
class TestTimeUUIDs(unittest.TestCase):

    def setUp(self):
        self.cf_time = ColumnFamily(pool, 'StdTimeUUID')

    def tearDown(self):
        self.cf_time.remove('key1')

    def test_datetime_to_uuid(self):
        key = 'key1'
        timeline = []

        timeline.append(datetime.now())
        time1 = uuid1()
        col1 = {time1:'0'}
        self.cf_time.insert(key, col1)
        time.sleep(1)

        timeline.append(datetime.now())
        time2 = uuid1()
        col2 = {time2:'1'}
        self.cf_time.insert(key, col2)
        time.sleep(1)

        timeline.append(datetime.now())

        cols = {time1:'0', time2:'1'}

        assert_equal(self.cf_time.get(key, column_start=timeline[0])                            , cols)
        assert_equal(self.cf_time.get(key,                           column_finish=timeline[2]) , cols)
        assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]) , cols)
        assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]) , cols)
        assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[1]) , col1)
        assert_equal(self.cf_time.get(key, column_start=timeline[1], column_finish=timeline[2]) , col2)

    def test_time_to_uuid(self):
        key = 'key1'
        timeline = []

        timeline.append(time.time())
        time1 = uuid1()
        col1 = {time1:'0'}
        self.cf_time.insert(key, col1)
        time.sleep(0.1)

        timeline.append(time.time())
        time2 = uuid1()
        col2 = {time2:'1'}
        self.cf_time.insert(key, col2)
        time.sleep(0.1)

        timeline.append(time.time())

        cols = {time1:'0', time2:'1'}

        assert_equal(self.cf_time.get(key, column_start=timeline[0])                            , cols)
        assert_equal(self.cf_time.get(key,                           column_finish=timeline[2]) , cols)
        assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]) , cols)
        assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]) , cols)
        assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[1]) , col1)
        assert_equal(self.cf_time.get(key, column_start=timeline[1], column_finish=timeline[2]) , col2)

    def test_auto_time_to_uuid1(self):
        key = 'key1'
        t = time.time()
        col = {t: 'foo'}
        self.cf_time.insert(key, col)
        uuid_res = self.cf_time.get(key).keys()[0]
        timestamp = convert_uuid_to_time(uuid_res)
        assert_almost_equal(timestamp, t, places=3)
Example #36
0
class CassandraDemo(object):
    def __init__(self, database, table):
        self.database = database
        self.table = table

    def create_connections(self):
        self.pool = ConnectionPool(self.database)
        self.cf = ColumnFamily(self.pool, self.table)

    def create_database_and_table(self):
        super_cf = False # consider super columns to be deprecated
        s = SystemManager()

        # create keyspace if it doesn't exist
        if database not in s.list_keyspaces():
            s.create_keyspace(database, SIMPLE_STRATEGY, {'replication_factor': '1'})

        # delete column family from the keyspace if it does exist.
        if table in s.get_keyspace_column_families(database):
            s.drop_column_family(database, table)

        # create coulmn family in the keyspace
        if table not in s.get_keyspace_column_families(database):
            print("table is creating...")
            s.create_column_family(database, table, super = super_cf, comparator_type = ASCII_TYPE)
        s.close()

        return True

    def insert_data(self):
        print '\nemployee data is inserting...'
        self.cf.insert('1', {'fn':'yogesh', 'ln':'kumar', 'ct': 'Ajmer', 'em': '*****@*****.**'})
        self.cf.insert('2', {'fn':'amit', 'ln':'pandita', 'ct': 'Delhi', 'em': '*****@*****.**'})
        self.cf.insert('3', {'fn':'sandeep', 'ln':'tak', 'ct': 'Ajmer', 'em': '*****@*****.**', 'mb': '8890467032'})


    def get_data(self):
        print '\nemployee data is featching...'
        data1 = self.cf.get('1')
        data2 = self.cf.get('2', columns = ['fn', 'ln', 'em'])
        data3 = self.cf.get('3', column_start = 'ct', column_finish = 'fn')
        data4 = self.cf.get('1', column_reversed = False, column_count = 3)
        data5 = self.cf.get('1', column_reversed = True, column_count = 3)
        print data1
        print data2
        print data3
        print data4
        print data5

    def get_multiple_data(self):
        print '\ngetting multiple employees data...'
        row_keys = ['1','2','3']
        data = self.cf.multiget(row_keys)
        print data

    def get_data_by_range(self):
        '''
        if you get an error don't worry about this, it's a Cassandra limitation Issue
        '''
        print '\ngetting employees data by range...'
        start_row_key = '1'
        end_row_key = '3'
        data = self.cf.get_range(start = start_row_key, finish = end_row_key)
        for key, columns in data:
            print key,coulmns

    def get_count(self):
        print '\nget employee row\'s colunm count'
        print self.cf.get_count('1')
        print self.cf.get_count('1', columns = ['fn', 'ln'])
        print self.cf.get_count('1', column_start = 'em')

    def get_multi_count(self):
        print '\nget multiple employees row\'s colunm count'
        row_keys = ['1','2','3']
        columns = ['fn', 'ln', 'mb']
        column_start = 'ct'
        column_finish = 'fn'
        print self.cf.multiget_count(row_keys)
        print self.cf.multiget_count(row_keys, columns = columns)
        print self.cf.multiget_count(row_keys, column_start = column_start, column_finish = column_finish)

    def update_data(self):
        print '\nemployee data is updating...'
        self.cf.insert('1', {'pwd':'yoku@2010', 'ct':'Noida'})


    def delete_data(self):
        print '\ndelete data from employee'
        row = '2'
        self.cf.remove(row)

    def get_all_rows(self):
        print '\ngetting rows name...'
        print [v[0] for v in self.cf.get_range()]

    def get_all_columns_of_row(self):
        print '\ngetting columns name of a row'
        row = '1'
        data = self.cf.get(row)
        print data.keys()
class DailyTemporalBloomFilter(DailyTemporalBase):
    """Long Range Temporal BloomFilter using a daily resolution.

    For really high value of expiration (like 60 days) with low requirement on precision.
    The actual error of this BF will the be native error of the BF + the error related
    to the coarse aspect of the expiration, since we no longer expires information precisely.
    Also, as opposed to a classic Bloom Filter, this one will aslo have false positive (reporting membership for a non-member)
    AND false negative (reporting non-membership for a member).

    The upper bound of the temporal_error can be theoricaly quite high. However, if the
    items of the set are uniformly distributed over time, the avg error will be something like 1.0 / expiration
    """
    def __new__(cls,
                capacity,
                error_rate,
                expiration,
                name,
                cassandra_session,
                snapshot_path='./'):
        return super(DailyTemporalBloomFilter,
                     cls).__new__(cls,
                                  capacity=capacity,
                                  error_rate=error_rate)

    def __init__(self,
                 capacity,
                 error_rate,
                 expiration,
                 name,
                 cassandra_session,
                 snapshot_path='./'):
        filename = ""
        super(DailyTemporalBloomFilter, self).__init__(capacity=capacity,
                                                       error_rate=error_rate)
        self.bf_name = name
        self.expiration = expiration
        self.initialize_period()
        self.cassandra_session = cassandra_session
        self.cassandra_columns_family = "temporal_bf"
        self.keyspace = 'parsely'
        self.uncommited_keys = []
        self.commit_batch = 1000
        self.columnfamily = None
        self.ensure_cassandra_cf()
        self.snapshot_path = snapshot_path

    def ensure_cassandra_cf(self):
        s = SystemManager()
        if self.keyspace not in s.list_keyspaces():
            s.create_keyspace(self.keyspace, SIMPLE_STRATEGY,
                              {'replication_factor': '1'})
        if self.cassandra_columns_family not in s.get_keyspace_column_families(
                self.keyspace):
            s.create_column_family(self.keyspace,
                                   self.cassandra_columns_family)
        self.columnfamily = ColumnFamily(self.cassandra_session,
                                         self.cassandra_columns_family)

    def archive_bf_key(self, bf_key):
        self.uncommited_keys.append(bf_key)
        if len(self.uncommited_keys) >= self.commit_batch:
            current_period_hour = dt.datetime.now().strftime('%Y-%m-%d:%H')
            self.columnfamily.insert(
                '%s_%s' % (self.bf_name, current_period_hour),
                {k: ''
                 for k in self.uncommited_keys})
            self.uncommited_keys = []

    def _hour_range(self, start, end, reverse=False, inclusive=True):
        """Generator that gives us all the hours between a start and end datetime
        (inclusive)."""
        def total_seconds(td):
            return (td.microseconds +
                    (td.seconds + td.days * 24.0 * 3600.0) * 10.0**6) / 10.0**6

        hours = int(math.ceil(total_seconds(end - start) / (60.0 * 60.0)))
        if inclusive:
            hours += 1
        for i in xrange(hours):
            if reverse:
                yield end - dt.timedelta(hours=i)
            else:
                yield start + dt.timedelta(hours=i)

    def _day_range(self, start, end, reverse=False, inclusive=True):
        """Generator that gives us all the days between a start and end datetime
        (inclusive)."""
        days = (end - start).days
        if inclusive:
            days += 1
        for i in xrange(days):
            if reverse:
                yield end - dt.timedelta(days=i)
            else:
                yield start + dt.timedelta(days=i)

    def _drop_archive(self):
        last_period = self.current_period - dt.timedelta(days=self.expiration -
                                                         1)
        hours = self._hour_range(last_period, dt.datetime.now())
        for hour in hours:
            try:
                row = "%s_%s" % (self.bf_name, hour.strftime('%Y-%m-%d:%H'))
                nbr_keys = self.columnfamily.get_count(row)
                keys = self.columnfamily.remove(row)
            except:
                pass

    def rebuild_from_archive(self, rebuild_snapshot=True):
        """Rebuild the BF using the archived items"""
        self.initialize_bitarray()

        #if rebuild_snapshot:
        #    self.delete_snapshots()

        def multi_rows_itr(rows):
            for row in rows.values():
                for k in row.keys():
                    yield k

        last_period = self.current_period - dt.timedelta(days=self.expiration -
                                                         1)
        hours = self._hour_range(last_period, dt.datetime.now())
        days = self._day_range(last_period, dt.datetime.now())
        rows = []
        for i, day in enumerate(days):
            rows = [
                "%s_%s:%s" % (self.bf_name, day.strftime('%Y-%m-%d'), hour_str)
                for hour_str in ["%02d" % i for i in range(24)]
            ]
            rows_content = self.columnfamily.multiget(rows, column_count=1E6)
            update_current = day == self.current_period

            for k in multi_rows_itr(rows_content):
                self.add_rebuild(k, update_current)

            if rebuild_snapshot:
                self.save_snaphot(override_period=day)

            if not update_current:
                self.initialize_current_day_bitarray()

    def restore_from_disk(self, clean_old_snapshot=False):
        """Restore the state of the BF using previous snapshots.

        :clean_old_snapshot: Delete the old snapshot on the disk (period < current - expiration)
        """
        base_filename = "%s/%s_%s_*.dat" % (self.snapshot_path, self.bf_name,
                                            self.expiration)
        availables_snapshots = glob.glob(base_filename)
        last_period = self.current_period - dt.timedelta(days=self.expiration -
                                                         1)
        for filename in availables_snapshots:
            snapshot_period = dt.datetime.strptime(
                filename.split('_')[-1].strip('.dat'), "%Y-%m-%d")
            if snapshot_period < last_period and not clean_old_snapshot:
                continue
            else:
                self._union_bf_from_file(filename)
                if snapshot_period == self.current_period:
                    self._union_bf_from_file(filename, current=True)

            if snapshot_period < last_period and clean_old_snapshot:
                os.remove(filename)
        self.ready = True

    def add_rebuild(self, key, update_current=True):
        super(DailyTemporalBloomFilter, self).add(key, update_current)

    def add(self, key_string):
        if isinstance(key_string, unicode):
            key = key_string.encode('utf8')
        else:
            key = key_string

        self.archive_bf_key(key)
        result = super(DailyTemporalBloomFilter, self).add(key)

        return result

    def resize(self, new_capacity=None, new_error_rate=None):
        self._set_capacity(new_capacity or self.capacity)
        self._set_error_rate(new_error_rate or self.error_rate)
        self._initialize_parameters()
        self.initialize_bitarray()
        self.rebuild_from_archive(rebuild_snapshot=True)

    def initialize_period(self, period=None):
        """Initialize the period of BF.

        :period: datetime.datetime for setting the period explicity.
        """
        if not period:
            self.current_period = dt.datetime.now()
        else:
            self.current_period = period
        self.current_period = dt.datetime(self.current_period.year,
                                          self.current_period.month,
                                          self.current_period.day)
        self.date = self.current_period.strftime("%Y-%m-%d")

    def save_snaphot(self, override_period=None):
        """Save the current state of the current day bitarray on disk.

        Save the internal representation (bitarray) into a binary file using this format:
            filename : name_expiration_2013-01-01.dat
        """
        period = override_period or self.current_period
        filename = "%s/%s_%s_%s.dat" % (self.snapshot_path, self.bf_name,
                                        self.expiration,
                                        period.strftime("%Y-%m-%d"))
        self._save_snapshot(filename)