Ejemplo n.º 1
0
def test_processPages(table_name):
    ip = randomIP()
    this_event = event(ip, table_name)
    visits = [
        itemToVisit(record['dynamodb']['NewImage'])
        for record in this_event['Records']
        if record['dynamodb']['NewImage']['Type']['S'] == 'visit'
    ]
    client = DynamoClient(table_name)
    client.addVisitor(Visitor(ip))
    client.addVisits(visits)
    client.addBrowsers([
        itemToBrowser(record['dynamodb']['NewImage'])
        for record in this_event['Records']
        if record['dynamodb']['NewImage']['Type']['S'] == 'browser'
    ])
    for session in [
            itemToSession(record['dynamodb']['NewImage'])
            for record in this_event['Records']
            if record['dynamodb']['NewImage']['Type']['S'] == 'session'
    ]:
        client.addSession(session)
    assert processPages( client, this_event ) == 'Successfully added ' + \
      f'{ len( { visit.slug for visit in visits } ) } pages and updated 0 ' + \
      f'from { len( visits ) } records.'
Ejemplo n.º 2
0
 def test_incrementVisitorSessions(self, table_name, visitor):
     client = DynamoClient(table_name)
     client.addVisitor(visitor)
     result = client.incrementVisitorSessions(visitor)
     visitor.numberSessions += 1
     assert 'visitor' in result.keys()
     assert result['visitor'] == visitor
Ejemplo n.º 3
0
def test_getSessionDetails(dynamo_client, table_init, table_name, visitor,
                           browsers, visits, session):
    client = DynamoClient(table_name)
    client.addVisitor(visitor)
    client.addNewSession(visitor, browsers, visits)
    result = client.getSessionDetails(session)
    assert 'visits' in result.keys()
    assert all([
        dict(result['visits'][index]) == dict(visits[index])
        for index in range(len(visits))
    ])
    assert 'session' in result.keys()
    assert result['session'].ip == visitor.ip
    assert result['session'].sessionStart == visits[0].date
Ejemplo n.º 4
0
def test_addNewSession(dynamo_client, table_init, table_name, visitor,
                       browsers, visits):
    client = DynamoClient(table_name)
    client.addVisitor(visitor)
    result = client.addNewSession(visitor, browsers, visits)
    assert 'visitor' in result.keys()
    assert result['visitor'] == visitor
    assert 'browsers' in result.keys()
    assert result['browsers'] == browsers
    assert 'visits' in result.keys()
    assert result['visits'] == visits
    assert 'session' in result.keys()
    assert result['session'].ip == visitor.ip
    assert result['session'].sessionStart == visits[0].date
Ejemplo n.º 5
0
 def test_duplicate_visitor_addNewVisitor(self, table_name, visitor,
                                          browsers, visits):
     client = DynamoClient(table_name)
     result = client.addVisitor(visitor)
     result = client.addNewVisitor(visitor, location(), browsers, visits)
     assert 'error' in result.keys()
     assert result['error'] == f'Visitor already in table { visitor }'
Ejemplo n.º 6
0
 def test_addVisitor(self, table_name, visitor):
     client = DynamoClient(table_name)
     result = client.addVisitor(visitor)
     assert 'visitor' in result.keys()
     assert result['visitor'] == visitor
Ejemplo n.º 7
0
 def test_table_addVisitor(self, visitor):
     client = DynamoClient('no name')
     result = client.addVisitor(visitor)
     assert 'error' in result.keys()
     assert result['error'] == 'Could not add new visitor to table'
Ejemplo n.º 8
0
 def test_listVisitors(self, table_name, visitor):
     client = DynamoClient(table_name)
     client.addVisitor(visitor)
     result = client.listVisitors()
     assert isinstance(result, list)
     assert len(result) == 1
Ejemplo n.º 9
0
 def test_parameter_addVisitor(self, table_name):
     client = DynamoClient(table_name)
     with pytest.raises(ValueError) as e:
         assert client.addVisitor({})
     assert str(e.value) == 'Must pass a Visitor object'
Ejemplo n.º 10
0
 def test_duplicate_addVisitor(self, table_name, visitor):
     client = DynamoClient(table_name)
     result = client.addVisitor(visitor)
     result = client.addVisitor(visitor)
     assert 'error' in result.keys()
     assert result['error'] == f'Visitor already in table { visitor }'
Ejemplo n.º 11
0
def s3_processor(event, context):
  """[summary]

  Args:
      event ([type]): [description]
      context ([type]): [description]

  Returns:
      [type]: [description]
  """  
  new = 0
  updated = 0
  additional = 0
  # Get the necessary data from the S3 event.
  key = urllib.parse.unquote_plus(
    event['Records'][0]['s3']['object']['key'], encoding='utf-8'
  )
  aws_region = event['Records'][0]['awsRegion']
  bucket_name = event['Records'][0]['s3']['bucket']['name']
  # Create the necessary clients
  dynamo_client = DynamoClient( os.environ['TABLE_NAME'], aws_region )
  s3_client = S3Client( bucket_name, aws_region )
  # Parse the record to get the browsers, visits, and session.
  record = processDF( key, s3_client )
  # Get the visitor from the table
  visitor_details = dynamo_client.getVisitorDetails( 
    Visitor( record['session'].id ) 
  )
  # Add the visitor, visits, session, and browsers if the visitor is not in 
  # the table.
  if not 'visitor' in visitor_details:
    dynamo_client.addVisitor( Visitor( record['session'].id ) )
    dynamo_client.addSession( record['session'] )
    dynamo_client.addVisits( record['visits'] )
    dynamo_client.addBrowsers( record['browsers'] ) 
    new += 1
  # Check to see if the last session can be combined with the one in this
  # record.
  else:
    last_session = visitor_details['sessions'][-1]
    last_sessions_visits = [ 
      visit for visit in visitor_details['visits'] 
      if visit.sessionStart == last_session.sessionStart
    ]
    # Combine the visits and update the session when the last session was
    # less than 30 minutes from this record,
    if (
      (
        last_sessions_visits[-1].date - record['visits'][0].date
      ).total_seconds() < 60 * 30
    ):
      # Update all of the record's with the previous session start
      for visit in record['visits']:
        visit.sessionStart = last_session.sessionStart
      # Update the last visit of the last session when the first visit of
      # the record is the last page visited in the previous session.
      if ( last_sessions_visits[-1].title == record['visits'][0].title ):
        updated_visit = Visit(
          last_sessions_visits[-1].id, # visitor_id 
          last_sessions_visits[-1].date, # date 
          last_sessions_visits[-1].user, # user 
          last_sessions_visits[-1].title, # title
          last_sessions_visits[-1].slug, # slug
          last_sessions_visits[-1].sessionStart, # sessionStart 
          {
            **last_sessions_visits[-1].scrollEvents,
            **record['visits'][0].scrollEvents
          }, # scrollEvents
          (
            # The total time on the updated page is the last scroll
            # event on the record's first visit minus the first 
            # scroll event of the last visit of the session to 
            # update.
            datetime.datetime.strptime(
              list( 
                record['visits'][0].scrollEvents.keys()
              )[-1],
              '%Y-%m-%dT%H:%M:%S.%fZ'
            ) - datetime.datetime.strptime(
              list(
                last_sessions_visits[-1].scrollEvents.keys()
              )[0],
              '%Y-%m-%dT%H:%M:%S.%fZ'
            )
          ).total_seconds(), #timeOnPage 
          last_sessions_visits[-1].prevTitle, # prevTitle
          last_sessions_visits[-1].prevSlug, # prevSlug
          record['visits'][0].nextTitle, # nextTitle
          record['visits'][0].nextSlug # nextSlug
        )
        visits_to_update = [ updated_visit ] + record['visits'][1:] + \
          last_sessions_visits[:-1]
      else:
        visits_to_update = record['visits'] + last_sessions_visits
      # Update all of the visits in the record to have the session
      dynamo_client.updateVisits( visits_to_update )
      dynamo_client.addBrowsers( record['browsers'] ) 
      dynamo_client.updateSession(
        Session( 
          last_session.sessionStart, # Start date-time
          last_session.id, # Visitor ID
          np.mean( [
            visit.timeOnPage for visit in visits_to_update
          ] ), # avgTime
          np.sum( [
            visit.timeOnPage for visit in visits_to_update
          ] ) # totalTime
        ),
        []
      )
      updated += 1
    # Add a the new session, visits, and browsers when the last session was
    # more than 30 minutes from this record.
    else: 
      dynamo_client.addSession( record['session'] )
      dynamo_client.addVisits( record['visits'] )
      dynamo_client.addBrowsers( record['browsers'] ) 
      additional += 1
  
  return {
    'statusCode': 200,
    'body': json.dumps(f'updated { updated }\nnew { new }\nadditional {additional}')
  }