def bulkReadCurry(db, func): document = db[documentname] dma = dwh_meta_access.DwhMetaData() with dma: start = dma.getLastTime(documentname) end = getNowTimestamp() * 1000 condition = {'statisticsUpdatedAt': {'$gt': start, '$lt': end}} total = document.count(condition) print(str(total)+" items data need to be update into datawarehouse!") print("start!") for i in range(0, int(total/max) + 1): userIt = document.find(filter=condition).batch_size(max).skip(i*max).limit(max).sort("createdAt", DESCENDING) func(userIt) print(str(i+1)+"k items in Pqs!") print("success!") with dma: dma.updateLastTime(documentname, end) with dwh_log_access.DwhLog() as dla: dla.log(timeStampTrans(end), documentname, total, end)
def bulkReadCurry(db, func): document = db[documentname] dma = dwh_meta_access.DwhMetaData() with dma: start = dma.getLastTime(documentname) end = getNowTimestamp() * 1000 condition = {'statisticsUpdatedAt': {'$gt': start, '$lt': end}} total = document.count(condition) print( str(total) + " items data need to be update into datawarehouse!") print("start!") for i in range(0, int(total / max) + 1): userIt = document.find(filter=condition).batch_size(max).skip( i * max).limit(max).sort("createdAt", DESCENDING) func(userIt) print(str(i + 1) + "k items in Pqs!") print("success!") with dma: dma.updateLastTime(documentname, end) with dwh_log_access.DwhLog() as dla: dla.log(timeStampTrans(end), documentname, total, end)
print(column_type_transform.arrayTrans(['1a', '2a', '3a', '4a', '5a'])) print(column_type_transform.arrayTrans(['a', 'a', 'a', 'a', 'a'])) print(column_type_transform.booleanTrans(True)) print(column_type_transform.booleanTrans('True')) print(column_type_transform.booleanTrans(False)) print(column_type_transform.booleanTrans('False')) print(column_type_transform.booleanTrans('aa')) print(column_type_transform.booleanTrans('false')) print(column_type_transform.floatTrans(1.2)) print(column_type_transform.floatTrans(1.0)) print(column_type_transform.floatTrans(1)) print(column_type_transform.floatTrans('1.3')) print(column_type_transform.floatTrans('1.')) print(column_type_transform.floatTrans('.3')) print(column_type_transform.floatTrans('d')) print(column_type_transform.floatTrans({'a':2})) print(column_type_transform.integerTrans(1.0)) print(column_type_transform.integerTrans(1.3)) print(column_type_transform.integerTrans(1.9)) print(column_type_transform.integerTrans(1)) print(column_type_transform.integerTrans('1')) print(column_type_transform.integerTrans('d')) print(column_type_transform.integerTrans({'a':2})) print(column_type_transform.timeStampTrans(1450500062)) print(column_type_transform.timeStampTrans('1450500062')) print(column_type_transform.timeStampTrans('1450500062130')) jsonObj = {'a': 'a', 'b': {'b1': 'b1', 'b2': 'b2'}} encodedjson = json.dumps(jsonObj) print(column_type_transform.getValueFromJsonRecurs(jsonObj, 'b.b1', str))
print(column_type_transform.arrayTrans(['1a', '2a', '3a', '4a', '5a'])) print(column_type_transform.arrayTrans(['a', 'a', 'a', 'a', 'a'])) print(column_type_transform.booleanTrans(True)) print(column_type_transform.booleanTrans('True')) print(column_type_transform.booleanTrans(False)) print(column_type_transform.booleanTrans('False')) print(column_type_transform.booleanTrans('aa')) print(column_type_transform.booleanTrans('false')) print(column_type_transform.floatTrans(1.2)) print(column_type_transform.floatTrans(1.0)) print(column_type_transform.floatTrans(1)) print(column_type_transform.floatTrans('1.3')) print(column_type_transform.floatTrans('1.')) print(column_type_transform.floatTrans('.3')) print(column_type_transform.floatTrans('d')) print(column_type_transform.floatTrans({'a': 2})) print(column_type_transform.integerTrans(1.0)) print(column_type_transform.integerTrans(1.3)) print(column_type_transform.integerTrans(1.9)) print(column_type_transform.integerTrans(1)) print(column_type_transform.integerTrans('1')) print(column_type_transform.integerTrans('d')) print(column_type_transform.integerTrans({'a': 2})) print(column_type_transform.timeStampTrans(1450500062)) print(column_type_transform.timeStampTrans('1450500062')) print(column_type_transform.timeStampTrans('1450500062130')) jsonObj = {'a': 'a', 'b': {'b1': 'b1', 'b2': 'b2'}} encodedjson = json.dumps(jsonObj) print(column_type_transform.getValueFromJsonRecurs(jsonObj, 'b.b1', str))
__author__ = 'zhaoyifei' from etl.postgresql import dwh_log_access from etl.tools.column_type_transform import timeStampTrans if __name__ == "__main__": dla = dwh_log_access.DwhLog() with dla: for i in range(0, 9): dla.log(timeStampTrans(1450667162), 'users', 30, 1450667162000) with dla: dla.log(timeStampTrans(1450667162), 'users', 30, 1450667162000)
cur.execute("SELECT count(*) FROM log;") rows = cur.fetchall() # all rows in table for row in rows: print(row) with pgaccess.PgAccess() as pga: pga.execFunc(selectLog) insertSql = 'INSERT INTO test(\ m_id, price, birthday, "isDeletec", num, flo, tags, ints)\ VALUES (%s, %s, %s, %s, %s, %s, %s, %s);' sqlL = [] print(arrayTrans([123, 34, 43, 54, 12])) sqlL.append((insertSql, (None, str('1,000,000'), timeStampTrans(1450576800), booleanTrans(False), integerTrans(12), None, arrayTrans(['ad', 'dfd', 'dfs', 'fsd', 'ewrew']), arrayTrans([123, 34, 43, 54, 12]) ) ) ) sqlL.append((insertSql, (str('dfd'), None, timeStampTrans(1450576800), booleanTrans(False), integerTrans(12), floatTrans(1.23455), arrayTrans(['ad', 'dfd', 'dfs', 'fsd', 'ewrew']), None ) ) )