def test_toItem(ip, pixel_app): currentTime = datetime.datetime.now() browser = Browser(pixel_app, ip, 100, 200, '2020-01-01T00:00:00.000Z', dateAdded=currentTime) assert browser.toItem() == { 'PK': { 'S': f'VISITOR#{ ip }' }, 'SK': { 'S': 'BROWSER#2020-01-01T00:00:00.000Z' }, 'Type': { 'S': 'browser' }, 'App': { 'S': pixel_app }, 'Width': { 'N': '100' }, 'Height': { 'N': '200' }, 'DateVisited': { 'S': '2020-01-01T00:00:00.000Z' }, 'Device': { 'S': 'Pixel 4 XL' }, 'DeviceType': { 'S': 'mobile' }, 'Browser': { 'S': 'chrome' }, 'OS': { 'S': '11' }, 'Webkit': { 'S': '537.36' }, 'Version': { 'S': '86.0.4240.198' }, 'DateAdded': { 'S': currentTime.strftime( '%Y-%m-%dT%H:%M:%S.' ) \ + currentTime.strftime('%f')[:3] + 'Z' } }
def test_pk(ip, pixel_app): currentTime = datetime.datetime.now() browser = Browser(pixel_app, ip, 100, 200, '2020-01-01T00:00:00.000Z', dateAdded=currentTime) assert browser.pk() == {'S': f'VISITOR#{ ip }'}
def browsers(): return[ Browser( 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_3 like Mac OS X) ' + \ 'AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.2 ' + \ 'Mobile/15E148 Safari/604.1', '0.0.0.0', 100, 200, '2020-01-01T00:00:00.000Z', dateAdded = '2020-01-01T00:00:00.000Z' ), Browser( 'Mozilla/5.0 (Linux; Android 11; Pixel 4 XL) AppleWebKit/537.36 ' + \ '(KHTML, like Gecko) Chrome/86.0.4240.198 Mobile Safari/537.36', '0.0.0.0', 100, 200, '2020-01-01T00:01:00.000Z', dateAdded = '2020-01-01T00:00:00.000Z' ) ]
def test_key(ip, pixel_app): currentTime = datetime.datetime.now() browser = Browser(pixel_app, ip, 100, 200, '2020-01-01T00:00:00.000Z', dateAdded=currentTime) assert browser.key() == { 'PK': { 'S': f'VISITOR#{ ip }' }, 'SK': { 'S': 'BROWSER#2020-01-01T00:00:00.000Z' } }
def browser(): return Browser( 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_3 like Mac OS X) ' + \ 'AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.2 ' + \ 'Mobile/15E148 Safari/604.1', '0.0.0.0', 100, 200, '2020-01-01T00:00:00.000Z' )
def test_itemToBrowser(ip, pixel_app): currentTime = datetime.datetime.now() browser = Browser( pixel_app, ip, 100, 200, '2020-01-01T00:00:00.000Z', dateAdded = currentTime.strftime( '%Y-%m-%dT%H:%M:%S.' ) \ + currentTime.strftime('%f')[:3] + 'Z' ) newBrowser = itemToBrowser(browser.toItem()) assert browser.app == newBrowser.app assert browser.ip == newBrowser.ip assert browser.width == newBrowser.width assert browser.height == newBrowser.height assert browser.dateVisited == newBrowser.dateVisited assert browser.dateAdded == newBrowser.dateAdded assert browser.device == newBrowser.device assert browser.deviceType == newBrowser.deviceType assert browser.browser == newBrowser.browser assert browser.os == newBrowser.os assert browser.webkit == newBrowser.webkit assert browser.version == newBrowser.version
def test_default_mac_safari_init(ip, mac_safari_app): currentTime = datetime.datetime.now() browser = Browser(mac_safari_app, ip, 100, 200, '2020-01-01T00:00:00.000Z', dateAdded=currentTime) assert browser.app == mac_safari_app assert browser.ip == ip assert browser.width == 100 assert browser.height == 200 assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0) assert browser.dateAdded == currentTime assert browser.device == 'mac' assert browser.deviceType == 'desktop' assert browser.browser == 'safari' assert browser.os == '10.15.6' assert browser.webkit == '605.1.15' assert browser.version == '14.0.2'
def test_default_mac_chrome_init(ip, mac_chrome_app): currentTime = datetime.datetime.now() browser = Browser(mac_chrome_app, ip, 100, 200, '2020-01-01T00:00:00.000Z', dateAdded=currentTime) assert browser.app == mac_chrome_app assert browser.ip == ip assert browser.width == 100 assert browser.height == 200 assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0) assert browser.dateAdded == currentTime assert browser.device == 'mac' assert browser.deviceType == 'desktop' assert browser.browser == 'chrome' assert browser.os == '11.1.0' assert browser.webkit == '537.36' assert browser.version == '87.0.4280.88'
def test_default_pixel_init(ip, pixel_app): currentTime = datetime.datetime.now() browser = Browser(pixel_app, ip, 100, 200, '2020-01-01T00:00:00.000Z', dateAdded=currentTime) assert browser.app == pixel_app assert browser.ip == ip assert browser.width == 100 assert browser.height == 200 assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0) assert browser.dateAdded == currentTime assert browser.device == 'Pixel 4 XL' assert browser.deviceType == 'mobile' assert browser.browser == 'chrome' assert browser.os == '11' assert browser.webkit == '537.36' assert browser.version == '86.0.4240.198'
def test_default_samsung_G981U1_init(ip, samsung_G981U1_app): currentTime = datetime.datetime.now() browser = Browser(samsung_G981U1_app, ip, 100, 200, '2020-01-01T00:00:00.000Z', dateAdded=currentTime) assert browser.app == samsung_G981U1_app assert browser.ip == ip assert browser.width == 100 assert browser.height == 200 assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0) assert browser.dateAdded == currentTime assert browser.device == 'SAMSUNG SM-G981U1' assert browser.deviceType == 'mobile' assert browser.browser == 'samsung' assert browser.os == '10' assert browser.webkit == '537.36' assert browser.version == '13.0'
def test_default_samsung_G950U_init(ip, samsung_G950U_app): currentTime = datetime.datetime.now() browser = Browser(samsung_G950U_app, ip, 100, 200, '2020-01-01T00:00:00.000Z', dateAdded=currentTime) assert browser.app == samsung_G950U_app assert browser.ip == ip assert browser.width == 100 assert browser.height == 200 assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0) assert browser.dateAdded == currentTime assert browser.device == 'SM-G950U' assert browser.deviceType == 'mobile' assert browser.browser == 'chrome' assert browser.os == '9' assert browser.webkit == '537.36' assert browser.version == '87.0.4280.101'
def test_default_windows_chrome_init(windows_chrome_app): currentTime = datetime.datetime.now() browser = Browser(visitor_id, windows_chrome_app, 100, 200, '2020-01-01T00:00:00.000Z', dateAdded=currentTime) assert browser.app == windows_chrome_app assert browser.id == visitor_id assert browser.width == 100 assert browser.height == 200 assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0) assert browser.dateAdded == currentTime assert browser.device == 'windows' assert browser.deviceType == 'desktop' assert browser.browser == 'chrome' assert browser.os == '10.0' assert browser.webkit == '537.36' assert browser.version == '87.0.4280.88'
def test_default_unknown_init(ip): currentTime = datetime.datetime.now() browser = Browser('unknown', ip, 100, 200, '2020-01-01T00:00:00.000Z', dateAdded=currentTime) assert browser.app == 'unknown' assert browser.ip == ip assert browser.width == 100 assert browser.height == 200 assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0) assert browser.dateAdded == currentTime assert browser.device is None assert browser.deviceType is None assert browser.browser is None assert browser.os is None assert browser.webkit is None assert browser.version is None
def test_default_iphone_linkedin_init(ip, iphone_linkedin_app): currentTime = datetime.datetime.now() browser = Browser(iphone_linkedin_app, ip, 100, 200, '2020-01-01T00:00:00.000Z', dateAdded=currentTime) assert browser.app == iphone_linkedin_app assert browser.ip == ip assert browser.width == 100 assert browser.height == 200 assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0) assert browser.dateAdded == currentTime assert browser.device == 'iphone' assert browser.deviceType == 'mobile' assert browser.browser == '[LinkedInApp]' assert browser.os == '14.2' assert browser.webkit == '605.1.15' assert browser.version is None
def test_default_iphone_safari_init(iphone_safari_app): currentTime = datetime.datetime.now() browser = Browser(visitor_id, iphone_safari_app, 100, 200, '2020-01-01T00:00:00.000Z', dateAdded=currentTime) assert browser.app == iphone_safari_app assert browser.id == visitor_id assert browser.width == 100 assert browser.height == 200 assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0) assert browser.dateAdded == currentTime assert browser.device == 'iphone' assert browser.deviceType == 'mobile' assert browser.browser == 'safari' assert browser.os == '14.3' assert browser.webkit == '605.1.15' assert browser.version == '14.0.2'
def test_repr(ip, pixel_app): assert repr(Browser(pixel_app, ip, 100, 200, '2020-01-01T00:00:00.000Z')) == f'{ ip } - chrome'
def processDF( key, s3_client ): '''Reads a raw csv file S3 and parses the browsers, visits, and sessions. Parameters ---------- key : str The key of the '.parquet' file in the S3 bucket. s3_client : S3Client The S3 client used to get the '.parquet' file from. Returns ------- result : dict The browsers, visits, and sessions parsed from the file. ''' request = s3_client.getObject( key ) # Read the parquet file as a pandas DF df = pd.read_csv( io.BytesIO( request['Body'].read() ), sep = ',\t', engine = 'python', names = [ 'process', 'id', 'time', 'title', 'slug', 'userAgent', 'width', 'height', 'x', 'y' ], usecols = [ 'id', 'time', 'title', 'slug', 'userAgent', 'width', 'height', 'x', 'y' ], index_col = 'time' ) df = df.drop_duplicates().sort_index() index_change = df.ne( df.shift() ).apply( lambda x: x.index[x].tolist() ).title indexes = [ ( index_change[index], index_change[index + 1] - 1 ) if index != len( index_change ) - 1 else (index_change[index], df.tail(1).index[0]) for index in range( len( index_change ) ) ] visits = [] for ( start, stop ) in indexes: temp = df.loc[ start: stop ] visits.append( Visit( temp.id.unique()[0], formatEpoch( temp.iloc[[0]].index[0] ), '0', temp.title.unique()[0], temp.slug.unique()[0], formatEpoch( temp.iloc[[0]].index[0] ), { formatEpoch( index ): { 'x': row.x, 'y': row.y } for index, row in temp.iterrows() }, ( temp.iloc[[-1]].index[0] - temp.iloc[[0]].index[0] ) / 1000 ) ) for visit in visits: visit.sessionStart=visits[0].date for index in range( 1, len( visits ) ): visits[index - 1].nextTitle = visits[index].title visits[index - 1].nextSlug = visits[index].slug for index in range( len( visits ) - 1 ): visits[index + 1].prevTitle = visits[index].title visits[index + 1].prevSlug = visits[index].slug session = Session( visits[0].sessionStart, df.id.unique()[0], np.mean( [ visit.timeOnPage for visit in visits ] ), np.sum( [ visit.timeOnPage for visit in visits ] ) ) browsers = [ Browser( df.id.unique()[0], row.userAgent, row.width, row.height, formatEpoch( df.loc[ ( df['height'] == row.height ) & ( df['width'] == row.width ) ].head(1).index[0] ) ) for index, row in df.groupby( ['userAgent','height','width'] ).size().reset_index().rename( columns={0:'count'} ).iterrows() ] return{ 'visits': visits, 'session': session, 'browsers': browsers }
def test_repr(pixel_app): assert repr( Browser(visitor_id, pixel_app, 100, 200, '2020-01-01T00:00:00.000Z')) == f'{ visitor_id } - chrome'