def __init__(self, file_name):
   DataFile.__init__(self, file_name)
   result_file_info = self._get_file_info()
   self.topic = Topic.create_or_update(result_file_info['topic_id'])
   self.user = User.create_or_update(result_file_info['user_id'])
   self.query = Query.create_or_update(result_file_info['query_id'], topic=self.topic, user=self.user)
   self.__parse()
 def __parse( self ):
   with open( self.file_name, 'r' ) as result_file:
     result_reader = csv.DictReader( result_file, delimiter=',')
     for row in result_reader:
         topic = Topic.create_or_update( row['topic'] )
         user = User.create_or_update( row['userid'] )
         condition = Condition.create_or_update( row['condition'] )
         autocomplete = row['autocomplete_used'] == 1
         query = Query.create_or_update( row['queryid'], topic = topic, user = user, condition = condition, autocomplete = autocomplete, query_text = row['terms'], precision = self.__build_precision_dict( row ) )
Beispiel #3
0
 def __init__(self, file_name):
   DataFile.__init__(self, file_name)
   result_file_info = self._get_file_info()
   self.topic = Topic.create_or_update( result_file_info['topic_id'] )
   self.user = User.create_or_update( result_file_info['user_id'] )
   self.condition = Condition.create_or_update( result_file_info['condition'] )
   self.__create_or_update_session()
   self.query = Query.create_or_update( result_file_info['query_id'], topic = self.topic, user = self.user, session = self.session )
   self.actions = self.__parse()
   self.topic.add_actions( self.actions )
   self.user.add_actions( self.actions )
   self.query.add_actions( self.actions )
   self.session.add_actions( self.actions )
   self.session.add_query( self.query )
 def __parse(self):
     with open(self.file_name, 'r') as result_file:
         result_reader = csv.DictReader(result_file, delimiter=',')
         for row in result_reader:
             topic = Topic.create_or_update(row['topic'])
             user = User.create_or_update(row['userid'])
             condition = Condition.create_or_update(row['condition'])
             autocomplete = row['autocomplete_used'] == 1
             query = Query.create_or_update(
                 row['queryid'],
                 topic=topic,
                 user=user,
                 condition=condition,
                 autocomplete=autocomplete,
                 query_text=row['terms'],
                 precision=self.__build_precision_dict(row))
    def __parse(self):
        actions = []
        query_text = None
        serp_page_num = None
        with open(self.file_name, 'r') as log_file:
            for line in log_file:
                parsed_line = _parse_line(line)

                # these occur in weird places and have weird data, so ignore them to avoid trouble
                if parsed_line['action'] in [
                        'PERFORMANCE', 'DEMOGRAPHICS_SURVEY_STARTED',
                        'DEMOGRAPHICS_SURVEY_COMPLETED',
                        'SELF_SEARCH_EFFICACY_SURVEY_STARTED',
                        'SELF_SEARCH_EFFICACY_SURVEY_COMPLETED',
                        'PRE_TASK_SURVEY_COMPLETED',
                        'POST_TASK_SURVEY_COMPLETED', 'SEARCH_TASK_VIEWED'
                ]:
                    continue

                if parsed_line['action'] == 'QUERY_ISSUED' or parsed_line[
                        'action'] == 'QUERY_SUGGESTION_ISSUED':
                    self.query_counter += 1
                    query_text = parsed_line.get('action_parameters', None)
                    serp_page_num = None
                    #print ("%s - %s - X - %s: %s" % (self.query_counter, parsed_line['user_id'], parsed_line['topic_id'], query_text))

                topic = Topic.create_or_update(parsed_line['topic_id'])
                user = User.create_or_update(parsed_line['user_id'])
                condition = Condition.create_or_update(
                    parsed_line['condition'])

                session = self.__create_or_update_session(
                    user, topic, condition)

                query_id = str(self.query_counter)
                # These actions belong with the next query
                if parsed_line['action'] in [
                        'SEARCH_TASK_COMMENCED', 'VIEW_SEARCH_BOX'
                ]:
                    query_id = str(self.query_counter + 1)
                query = Query.create_or_update(query_id,
                                               topic=topic,
                                               user=user,
                                               session=session,
                                               query_text=query_text)

                timestamp = _parse_datetime(parsed_line['date'],
                                            parsed_line['time'])
                action = Action(timestamp=timestamp,
                                session=session,
                                condition=condition,
                                action_type=parsed_line['action'],
                                query=query,
                                serp_page_num=serp_page_num,
                                action_parameters=parsed_line.get(
                                    'action_parameters', None))
                if action.is_serp_switch_event():
                    serp_page_num = int(action.result_page)
                actions.append(action)

                topic.add_actions([action])
                user.add_actions([action])
                query.add_actions([action])
                session.add_actions([action])
                session.add_query(query)

        return sorted(actions, key=lambda action: action.timestamp)