def load_builders(self, o):
        cf = ColumnFamily(self._pool, 'builders')
        indices = ColumnFamily(self._pool, 'indices')

        batch = cf.batch()
        i_batch = indices.batch()

        for builder_id, params in o.items():
            cat = params['category']
            columns = {
                'category': cat,
                'master': unicode(params['master_id']),
                'name': params['name'],
            }

            batch.insert(builder_id, columns)
            i_batch.insert('builder_category_to_builder_ids',
                {cat: {builder_id: ''}})
            i_batch.insert('master_id_to_slave_ids', {columns['master']: {
                builder_id: ''}})

            if len(params['slaves']):
                i_batch.insert('builder_id_to_slave_ids', {builder_id: {
                    unicode(slave_id): '' for slave_id in params['slaves']}})

        batch.send()
        i_batch.send()

        return len(o)
def main(filename):
    data =  file(filename)
    # Set up the connection pool
    pool = ConnectionPool('tuitterdb',['localhost:9160'])
    
    # CF connections
    user_family = ColumnFamily(pool, 'user')
    tweet_family = ColumnFamily(pool, 'tweet')
    user_tweets_family = ColumnFamily(pool, 'userTweets')
    followers = ColumnFamily(pool, 'followers')
    followerTweets = ColumnFamily(pool, 'followsTweets')

    # Batch Definitions
    user_batch = user_family.batch(queue_size=1000)
    followers_batch = followers.batch(queue_size=500)
    user_tweets_batch = user_tweets_family.batch(queue_size = 500)
    followerTweets_batch = followerTweets.batch(queue_size = 500)
    
                           
    while True: # loop
        line = data.readline()
        if line == "": break # This isn't handled properly
        else:
            tweet = tweet_get(line)
            try:
                tweet_data = get_tweet_data(tweet)
                if check_user(tweet[u"from_user_id_str"]) == False: # check in script if user is there.
                    sender = get_sender(tweet)
                    user_batch.insert(sender.user_id,{'user_name':sender.user_name, 'screen_name':sender.from_user}) # create user entry for sender
                    user_tweets_batch.insert(sender.user_id,{line:''}) # insert the whole tweet into a userTweets column header

                if tweet[u"to_user"] is not None and check_user(tweet[u"to_user_id"]) == False:
                    to_user = get_to_user(tweet)
                    user_batch.insert(to_user.user_id,{'user_name':to_user.user_name, 'screen_name':to_user.from_user})
                    followers_batch.insert(to_user.user_id,{sender.user_id:'follower_id'}) 
                    followerTweets_batch.insert(to_user.user_id,{line:''}) # insert the whole tweet into a followeTweets column header for the to user.


                if u"entities" in tweet: # iterate over the users in mentions and add them to users and follows if necessary
                    if tweet[u"entities"][u"user_mentions"] != []:
                        user_mentions = get_mentions(tweet)
                        for obj in user_mentions:
                            if check_user(obj.user_id) == False:
                                user_batch.insert(obj.user_id,{'user_name':obj.user_name, 'screen_name':obj.from_user})
                            followers_batch.insert(obj.user_id,{'user_id':sender.user_id})
                            followerTweets_batch.insert(obj.user_id,{line:''}) # insert the whole tweet to a followerTweet entry for the mentioned user
                else:
                    continue

                tweet_family.insert(tweet_data.tweet_id,{'text':tweet_data.textbody,'user_id':sender.user_id,'timeanddate':tweet_data.timestamp})

            except Exception:
                err = sys.exc_info()
                print "Broken cos %s %s %s" % (err[0],err[1], traceback.print_tb(err[2])) #print the exception data with traceback and continue.
                continue

    # Pools Closed.
    pool.dispose()
def main(filename):
    data =  file(filename)
    # Set up the connection pool
    pool = ConnectionPool('tuitterdb',['localhost:9160'])
    # CF connections
    user_family = ColumnFamily(pool, 'user')
    tweet_family = ColumnFamily(pool, 'tweet')
    user_tweets_family = ColumnFamily(pool, 'userTweets')
    #follows_tweets_family = ColumnFamily(pool, 'follows.tweets')
    followers = ColumnFamily(pool, 'followers')

    # Batch Definitions
    user_batch = user_family.batch(queue_size=1000)
    followers_batch = followers.batch(queue_size=500)
    user_tweets_batch = user_tweets_family.batch(queue_size = 500)
    
                           
    while True:
        line = data.readline()
        if line is None: break
        else:
            tweet = tweet_get(line)
            try:
                tweet_data = get_tweet_data(tweet)
                if check_user(tweet[u"from_user_id_str"]) == False:
                    sender = get_sender(tweet)
                    user_batch.insert(sender.user_id,{'user_name':sender.user_name, 'screen_name':sender.from_user})
                    user_tweets_batch.insert(sender.user_id,{tweet_data.tweet_id:tweet_data.timestamp})

                if tweet[u"to_user"] is not None and check_user(tweet[u"to_user_id"]) == False:
                    to_user = get_to_user(tweet)
                    user_batch.insert(to_user.user_id,{'user_name':to_user.user_name, 'screen_name':to_user.from_user})
                    followers_batch.insert(to_user.user_id,{'user_id':sender.user_id})


                if u"entities" in tweet:
                    if tweet[u"entities"][u"user_mentions"] != []:
                        user_mentions = get_mentions(tweet)
                        for obj in user_mentions:
                            user_batch.insert(obj.user_id,{'user_name':obj.user_name, 'screen_name':obj.from_user})
                            followers_batch.insert(obj.user_id,{'user_id':sender.user_id})
                else:
                    continue

                tweet_family.insert(tweet_data.tweet_id,{'text':tweet_data.textbody,'user_id':sender.user_id,'timeanddate':tweet_data.timestamp})

            except Exception:
                err = sys.exc_info()
                print "Broken cos %s %s %s" % (err[0],err[1], traceback.print_tb(err[2]))
                continue

    # Pools Closed.
    pool.dispose()

#if __name__ == "__main__":
    #unittest.main()
    def load_builds(self, o, builders):
        cf = ColumnFamily(self._pool, 'builds')
        batch = cf.batch()

        indices = ColumnFamily(self._pool, 'indices')
        i_batch = indices.batch()

        simple_indices = ColumnFamily(self._pool, 'simple_indices')
        si_batch = simple_indices.batch()

        # We defer counter inserts until then end because Python
        # increments are cheaper than Cassandra increments (hopefully).
        counters = {
            'builder_number': Counter(),
            'builder_duration': Counter(),
            'builder_number_by_day': {},
            'builder_duration_by_day': {},
            'builder_number_by_category': {},
            'builder_duration_by_category': {},
            'builder_number_by_day_and_category': {},
            'builder_duration_by_day_and_category': {},
        }

        existing_filenames = set(self._connection.filenames())

        for build in o:
            self._load_build(batch, i_batch, si_batch, counters, build,
                builders, existing_filenames)

        batch.send()
        i_batch.send()
        si_batch.send()

        cf = ColumnFamily(self._pool, 'counters')
        for builder, count in counters['builder_number'].items():
            cf.add('builder_number', builder, count)

        for builder, count in counters['builder_duration'].items():
            cf.add('builder_duration', builder, count)

        cf = ColumnFamily(self._pool, 'super_counters')
        del counters['builder_number']
        del counters['builder_duration']

        for key, super_columns in counters.items():
            for super_column, counts in super_columns.items():
                for column, count in counts.items():
                    cf.add(key, column, count, super_column)

        return len(o)
    def truncate_log_metadata(self):
        for cf in ['build_timelines']:
            cf = ColumnFamily(self.pool, cf)
            cf.truncate()

        cf = ColumnFamily(self.pool, 'indices')
        for key in LOG_METADATA_INDICES:
            cf.remove(key)

        cf = ColumnFamily(self.pool, 'counters')
        for key in LOG_METADATA_COUNTERS:
            cf.remove(key)

        cf = ColumnFamily(self.pool, 'super_counters')
        for key in LOG_METADATA_SUPER_COUNTERS:
            cf.remove(key)

        cf = ColumnFamily(self.pool, 'builds')
        batch = cf.batch()
        # Remove log parsing state from builds.
        for key, cols in cf.get_range(columns=['log_parsing_version']):
            if 'log_parsing_version' not in cols:
                continue

            batch.remove(key, ['log_parsing_version'])

        batch.send()
    def load_slaves(self, o):
        cf = ColumnFamily(self._pool, 'slaves')
        with cf.batch() as batch:
            for slave_id, name in o.items():
                batch.insert(slave_id, {'name': name})

        return len(o)
    def load_masters(self, o):
        cf = ColumnFamily(self._pool, 'masters')

        with cf.batch() as batch:
            for master_id, info in o.items():
                batch.insert(master_id, info)

        return len(o)
Example #8
0
    def undo(self):
        if type == InsertCommand.INS_BASIC:
            ## I know that data for a basic insert is of this tuple type
            domain, row_key, basic_type_dict = self.data

            client = db_connection.get_client()
            cf = ColumnFamily(client, domain)
            cf.remove(row_key)

        elif type == InsertCommand.INS_OBJECT:
            ## call the save operation for the object
            if self.data:
                self.data.delete(cascade=False)

        elif type == InsertCommand.INS_BATCH:
            domain, basic_type_item_dict = self.data
            client = db_connection.get_client()
            cf = ColumnFamily(client, domain)

            b = cf.batch()
            for row_key in basic_type_item_dict.keys():
                b.remove(row_key)
            b.send()
Example #9
0
    def update(self, values):
        """
        Changes an entity that already exists in the database.

        :param values: A list of (field, new-value) pairs.
        """

        pool = self.connection
        column_family_name = get_column_family()
        col_fam = CF(pool, column_family_name)

        pk_column = get_pk_column()

        pk_index = -1
        fields = self.get_fields()

        for index in range(len(fields)):
            if fields[index].column == pk_column:
                pk_index = index
                break

        if pk_index == -1:
            raise DatabaseError('Invalid primary key column.')

        b = col_fam.batch(
            write_consistency_level=self.connection.write_consistency_level)
        row_count = 0
        for result in self.results_iter():
            row_count += 1
            key = result[pk_index]

            for k, v in values.items():
                b.insert(k, v)

        b.send()

        return row_count
Example #10
0
    def update(self, values):
        """
        Changes an entity that already exists in the database.

        :param values: A list of (field, new-value) pairs.
        """

        pool = self.connection
        column_family_name = get_column_family()
        col_fam = CF(pool, column_family_name)

        pk_column = get_pk_column()
    
        pk_index = -1
        fields = self.get_fields()

        for index in range(len(fields)):
            if fields[index].column == pk_column:
                pk_index = index
                break
        
        if pk_index == -1:
            raise DatabaseError('Invalid primary key column.')

        b = col_fam.batch(write_consistency_level=self.connection.write_consistency_level)
        row_count = 0
        for result in self.results_iter():
            row_count += 1
            key = result[pk_index]

            for k, v in values.items():
                b.insert(k, v)

        b.send()

        return row_count
Example #11
0
    def do(self):
        if type == InsertCommand.INS_BASIC:
            ## I know that data for a basic insert is of this tuple type
            domain, row_key, basic_type_dict = self.data

            client = db_connection.get_client()
            cf = ColumnFamily(client, domain)
            cf.insert(row_key, basic_type_dict)

        elif type == InsertCommand.INS_OBJECT:
            ## call the save operation for the object
            if self.data:
                self.data.save()

        elif type == InsertCommand.INS_BATCH:
            ## Again, I know data for a batch insert will be of the following tuple type
            domain, basic_type_item_dict = self.data
            client = db_connection.get_client()
            cf = ColumnFamily(client, domain)
            b = cf.batch()

            for row_key in basic_type_item_dict.keys():
                b.insert(row_key, basic_type_item_dict[row_key])
            b.send()
def add_data(opt):
    pool = ConnectionPool('CrashData',
                          ['localhost:9160'])

    col_fam = ColumnFamily(pool, 'CrashInfo2')
    col_fam.insert('7d625afa-ca2b-41e7-bcf3-e180d2140202',
                    {"useragent_locale": "en-US",
                    "AdapterVendorID": "0x10de",
                    "TotalVirtualMemory": "4294836224",
                    "BreakpadReserveAddress": "44826624",
                    "Theme": "classic/1.0",
                    "Version": "29.0a1",
                    "id": "{ec8030f7-c20a-464f-9b0e-13a3a9e97384}",
                    "BIOS_Manufacturer": "stuff",
                    "Vendor": "Mozilla",
                    "uuid": "7d625afa-ca2b-41e7-bcf3-e180d2140202",
                    "EMCheckCompatibility": "true",
                    "Throttleable": "1",
                    "throttle_rate": "100",
                    "AvailablePageFile": "14036480000",
                    "version": "29.0a1",
                    "AdapterDeviceID": "0x1080",
                    "ReleaseChannel": "nightly",
                    "submitted_timestamp": "2014-02-02T23:32:59.584636+00:00",
                    "buildid": "20140202030204",
                    "Notes": "AdapterVendorID: 0x10de, AdapterDeviceID: 0x1080, AdapterSubsysID: 15803842, AdapterDriverVersion: 9.18.13.3158\nD2D? D2D+ DWrite? DWrite+ D3D10 Layers? D3D10 Layers+ D3D10 Layers- D3D9 Layers? D3D9 Layers- ",
                    "CrashTime": "1391383937",
                    "Winsock_LSP": "MSAFD Tcpip [TCP/IP] : 2 : 1 : %SystemRoot%\\system32\\mswsock.dll \n MSAFD Tcpip [UDP/IP] : 2 : 2 :  \n MSAFD Tcpip [RAW/IP] : 2 : 3 : %SystemRoot%\\system32\\mswsock.dll \n MSAFD Tcpip [TCP/IPv6] : 2 : 1 :  \n MSAFD Tcpip [UDP/IPv6] : 2 : 2 : %SystemRoot%\\system32\\mswsock.dll \n MSAFD Tcpip [RAW/IPv6] : 2 : 3 :  \n RSVP TCPv6 Service Provider : 2 : 1 : %SystemRoot%\\system32\\mswsock.dll \n RSVP TCP Service Provider : 2 : 1 :  \n RSVP UDPv6 Service Provider : 2 : 2 : %SystemRoot%\\system32\\mswsock.dll \n RSVP UDP Service Provider : 2 : 2 : ",
                    "FramePoisonBase": "00000000f0de0000",
                    "AvailablePhysicalMemory": "5240811520",
                    "FramePoisonSize": "65536",
                    "BreakpadReserveSize": "37748736",
                    "StartupTime": "1391382356",
                    "Add-ons": "%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:29.0a1",
                    "BuildID": "20140202030204",
                    "SecondsSinceLastCrash": "930758",
                    "ProductName": "Firefox",
                    "legacy_processing": "0",
                    "BlockedDllList": "",
                    "AvailableVirtualMemory": "3497549824",
                    "SystemMemoryUsePercentage": "38",
                    "ProductID": "{ec8030f7-c20a-464f-9b0e-13a3a9e97384}"})

    crash_column_family = ColumnFamily(pool, opt.column_family_counter)
    crash_batch = crash_column_family.batch(queue_size=100)

    # Insert buckets of data hourly for crashes

    crash_seed_signatures = [
        "FakeSignature1",
        "FakeSignature2",
        "FakeSignature3",
        "FakeSignature4",
        "FakeSignature5",
        "FakeSignature6",
        "FakeSignature7",
        "FakeSignature8",
        "FakeSignature9"
    ]

    now = datetime.datetime.now()
    for i in xrange(100):
        current_time = now + datetime.timedelta(seconds=i)

        row_bucket = "h-%d" % int(current_time.strftime("%H"))
        print "Adding row_bucket %s" % row_bucket
        column_bucket = int(current_time.strftime("%M"))
        print "Adding to column_bucket %s" % column_bucket

        crash_batch.insert(row_bucket, {column_bucket: 1})

        next_sig = random.randint(0, len(crash_seed_signatures)-1)
        #new_row_bucket = "%s-%s" % (row_bucket, crash_seed_signatures[next_sig])
        new_row_bucket = '{ "hour": "%d", "signature": "%s" }' % (
            int(current_time.strftime("%H")), crash_seed_signatures[next_sig]
        )
        print "Adding row_bucket %s, column_bucket %s" % (new_row_bucket, column_bucket)
        crash_batch.insert(new_row_bucket, {column_bucket: 1})
        print "Just put %s into cassandra." % crash_seed_signatures[next_sig]
Example #13
0
from pycassa.pool import ConnectionPool
from pycassa.columnfamily import ColumnFamily

pool = ConnectionPool("pykeyspace", ["localhost:9160"])
col_family = ColumnFamily(pool, "UserInfo")
col_family.insert("dosht2", {"email": "*****@*****.**", "name": "mostafa"})
# print col_family.get("dosht2", columns=["email"])['email']
print col_family.get("dosht2")
b = col_family.batch()
b.insert("dodo", {"email": "*****@*****.**"})
b.remove("dosht2", ["name"])
b.send()
print col_family.get("dosht2")
print col_family.multiget(["dosht", "dodo"])["dodo"]

from pycassa.types import *


class User(object):
    key = UTF8Type()  # name key is mandatory
    email = AsciiType()
    age = IntegerType()

    def __repr__(self):
        return "User(key: %s, email: %s, age: %s)" % (self.key, self.email, self.age)


from pycassa.columnfamilymap import ColumnFamilyMap

cfmap = ColumnFamilyMap(User, pool, "UserInfo")
user = User()
Example #14
0
import pycassa
from pycassa.pool import ConnectionPool
from pycassa.columnfamily import ColumnFamily

pool = ConnectionPool('ApplicationData',
                      ['localhost:9160'])
col_fam = ColumnFamily(pool, 'UserInfo')
col_fam.insert('Diego', {'email': '*****@*****.**'})

readData = col_fam.get('Diego', columns=['email'])

col_fam.remove('Diego', columns=['email'])

#batch

b = col_fam.batch(queue_size=10)

b.insert('John',
         {'email': '*****@*****.**',
          'state': 'IL',
          'gender': 'M'})

b.insert('Jane',
         {'email': '*****@*****.**',
          'state': 'CA'})

b.remove('John', ['gender'])
b.remove('Jane')
b.send()