예제 #1
0
def main(input_dir, keyspace, output_table):

    cluster = Cluster(['199.60.17.188', '199.60.17.216'])
    session = cluster.connect(keyspace)

    linesep = re.compile(r'^(\S+) - - \[(\S+) [+-]\d+\] \"[A-Z]+ (\S+) HTTP/\d\.\d\" \d+ (\d+)$')

    insert_nasalogs = session.prepare("INSERT INTO nasalogs (id, host, datetime, path, bytes) VALUES (?, ?, ?, ?, ?)")
    batch = BatchStatement(consistency_level=ConsistencyLevel.ONE)
    
    session.execute("TRUNCATE " + keyspace + '.' + output_table + ';')    
    for f in os.listdir(input_dir):
        with gzip.open(os.path.join(input_dir, f), 'rt') as logfile:
            count = 0
            for line in logfile:
                linesplit = linesep.split(line)
                if len(linesplit) > 4:
                    batch.add(insert_nasalogs, (uuid.uuid1(), linesplit[1], \
                    dt.datetime.strptime(linesplit[2], '%d/%b/%Y:%H:%M:%S'), linesplit[3], int(linesplit[4])))

                    count += 1

                    if count > 300:
                        session.execute(batch)
                        batch.clear()
                        count = 0

            session.execute(batch)
예제 #2
0
def insert_batch(rec, session, table_name):
    for each_list in rec:
        for list_content in each_list:
            host = list_content[0]
            datetime = d.datetime.strptime(list_content[1],
                                           "%d/%b/%Y:%H:%M:%S")
            path = list_content[2]
            byte = int(list_content[3])
            insert_statement = session.prepare(
                "INSERT INTO %s(host,uid,datetime,path,bytes) VALUES (?,UUID(),?,?,?)"
                % table_name)
            batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM)
            batch.add(insert_statement, (host, datetime, path, byte))
            session.execute(batch)
            batch.clear()
예제 #3
0
                    msg['event_type'] = 'event_profile_change'

                    msg['event'] = {"profile_field_id": 'extid'}
                    channel_basic_publish(msg)

                    msg['event'] = {
                        "profile_field_id": 'profile_proc_location_pred'
                    }
                    channel_basic_publish(msg)
                elif args.cassandra:
                    events.append(event)

            # send events to cassandra
            if args.cassandra:
                txt = json.dumps(events).decode()
                batch.add(query, (txt, extid, platform))
                if len(batch) == 100:
                    try:
                        casconn.session().execute(batch, timeout=60)
                    except (cassandra.OperationTimedOut,
                            cassandra.WriteTimeout) as e:
                        time.sleep(30)  # important, really useful
                        casconn = cassanda_connection()
                        casconn.session().execute(batch, timeout=60)

                    batch.clear()

        # finally
        if len(batch):
            casconn.session().execute(batch, timeout=60)