def getJoinCnt(session, params): try: l = [] minTS = params[0].strftime('%Y-%m-%d %H:%M:%S') maxTS = params[1].strftime('%Y-%m-%d %H:%M:%S') rows = session.execute(query=CNT_QUERY, parameters=(minTS, maxTS), trace=True) print(rows.get_query_trace()) for row in rows: d = { 'prodID': row.prodid, 'consID': row.consid, 'topic': row.topic, 'ts': params[0] } l.append(d) log.info("cnt: {}, {}, {}".format(row.prodid, row.consid, row.topic)) return l except ReadFailure as rf: log.error("Error executing getJoinCnt to Cassandra: {}".format( rf.args)) tools.logfile("Error executing getJoinCnt to Cassandra: {}".format( rf.args)) return None
def updateQuarter(): global last_update_quarter, session # first time execution if last_update_quarter is None: log.info("last_update_quarter is None") if session is None: session = db.connect() # connect to database if session is None: return False last_update_quarter = db.getMinTimestamp(session, "cnt") # get minTS if last_update_quarter is None: return False last_update_quarter = datetime.datetime(last_update_quarter.year, last_update_quarter.month, last_update_quarter.day, last_update_quarter.hour, 15 * (last_update_quarter.minute // 15)) # convert first time to local timestamp, because cassandra driver reads TS in UTC # last_update_quarter = tools.toUSTZ(first_time) not required in VM (VM time is UTC) # 1 hour window not complete if last_update_quarter.replace(tzinfo=None) + datetime.timedelta(minutes=15) > datetime.datetime.now(): log.info("1hour window not complete") return False # 1 hour window completed log.info("window complete, executing update quarter") if session is None: session = db.connect() # connect to db if session is None: return False # get cubes rows = db.getJoinCnt(session, [last_update_quarter, last_update_quarter + datetime.timedelta(minutes=15)]) if rows is None: # None result means error return False # check empty result df = pandas.DataFrame(rows) if not df.empty: # if empty change window run again df = df.groupby(['prodID', 'consID', 'topic']).size().reset_index() df = df.rename(columns={0: 'cnt'}) if db.deleteFromCNT(session, [last_update_quarter, last_update_quarter + datetime.timedelta(minutes=15)]): insert_statement = session.prepare("INSERT INTO quarter_cnt (prodid, consid, topic, ts, cnt) " "VALUES (?, ?, ?, ?, ?)") db.insertCnt(session, insert_statement, df, last_update_quarter) else: tools.logfile("Delete at update quarter failed at {} : {}\n".format(last_update_quarter, last_update_quarter + datetime.timedelta(minutes=15))) last_update_quarter = last_update_quarter + datetime.timedelta(minutes=15) return True
def connect(): try: cluster = Cluster(CASS_CONTACT_POINTS) session = cluster.connect(CASS_KEYSPACE) log.info("Connected to Cassandra.") return session except AttributeError as e: log.error("Error connecting to Cassandra: {}".format(e.args)) tools.logfile("Error connecting to Cassandra: {}".format(e.args)) return None
def updateHalf(): global last_update_half, session # first time execution if last_update_half is None: log.info("last_update_half is None") if session is None: session = db.connect() # connect to database if session is None: return False last_update_half = db.getMinTimestamp(session, "quarter_cnt") if last_update_half is None: return False last_update_half = datetime.datetime(last_update_half.year, last_update_half.month, last_update_half.day, last_update_half.hour, 30 * (last_update_half.minute // 30)) # last_update_half = tools.toUSTZ(first_time) # convert localize to UTC time VM doesnt need this # 6 hours window not complete if last_update_half.replace(tzinfo=None) + datetime.timedelta(hours=3) > datetime.datetime.now(): log.info("6 hours window not complete") return False log.info("window complete, executing update half") if session is None: session = db.connect() # connect to db if session is None: return False # get cubes rows = db.getJoinCntFromX(session, [last_update_half, last_update_half + datetime.timedelta(minutes=30), "quarter_cnt"]) if rows is None: # None means error return False # check empty result df = pandas.DataFrame(rows) if not df.empty: df = df.groupby(['prodID', 'consID', 'topic', 'ts']).sum().reset_index() if db.deleteFromCNTX(session, [last_update_half, last_update_half + datetime.timedelta(minutes=30), "quarter_cnt"]): insert_statement = session.prepare("INSERT INTO half_cnt (prodid, consid, topic, ts, cnt) " "VALUES (?, ?, ?, ?, ?)") db.insertCnt(session, insert_statement, df, last_update_half) else: tools.logfile("Delete at update half failed at {} : {} \n".format(last_update_half, last_update_half + datetime.timedelta(minutes=30))) last_update_half = last_update_half + datetime.timedelta(minutes=30) return True
def getMinTimestamp(session, tableName): try: mints = None rows = session.execute(query=MINTS_QUERY.format(tableName)) mints = rows[0].mints log.info("Executed getMinTS query") return mints except ReadFailure as rf: log.error("Error executing getMinTS to Cassandra: {}".format(rf.args)) tools.logfile("Error executing getMinTS to Cassandra: {}".format( rf.args)) return None
def updateHour(): global last_update_hour, session # first time execution if last_update_hour is None: log.info("last_update_hour is None") if session is None: session = db.connect() # connect to database if session is None: return False last_update_hour = db.getMinTimestamp(session, "half_cnt") if last_update_hour is None: return False last_update_hour = last_update_hour.replace(microsecond=0, second=0, minute=0) # last_update_hour = tools.toUSTZ(first_time) # convert localize to UTC time # 12 hours window not complete if last_update_hour.replace(tzinfo=None) + datetime.timedelta(hours=12) > datetime.datetime.now(): log.info("12 hours window not complete") return False # 12 hours window completed log.info("window complete, executing last_update_hour") if session is None: session = db.connect() # connect to db if session is None: return False # get cubes rows = db.getJoinCntFromX(session, [last_update_hour, last_update_hour + datetime.timedelta(hours=1), "half_cnt"]) if rows is None: # None means error return False # check empty result df = pandas.DataFrame(rows) if not df.empty: # if not empty df = df.groupby(['prodID', 'consID', 'topic', 'ts']).sum().reset_index() if db.deleteFromCNTX(session, [last_update_hour, last_update_hour + datetime.timedelta(hours=1), "half_cnt"]): insert_statement = session.prepare("INSERT INTO hour_cnt (prodid, consid, topic, ts, cnt) " "VALUES (?, ?, ?, ?, ?)") db.insertCnt(session, insert_statement, df, last_update_hour) else: tools.logfile("Delete at UpdateHour Failed {} : {}\n".format(last_update_hour, last_update_hour + datetime.timedelta(hours=1))) last_update_hour = last_update_hour + datetime.timedelta(hours=1) return True