def __init__(self, file_name): DataFile.__init__(self, file_name) result_file_info = self._get_file_info() self.topic = Topic.create_or_update(result_file_info['topic_id']) self.user = User.create_or_update(result_file_info['user_id']) self.query = Query.create_or_update(result_file_info['query_id'], topic=self.topic, user=self.user) self.__parse()
def __parse( self ): with open( self.file_name, 'r' ) as result_file: result_reader = csv.DictReader( result_file, delimiter=',') for row in result_reader: topic = Topic.create_or_update( row['topic'] ) user = User.create_or_update( row['userid'] ) condition = Condition.create_or_update( row['condition'] ) autocomplete = row['autocomplete_used'] == 1 query = Query.create_or_update( row['queryid'], topic = topic, user = user, condition = condition, autocomplete = autocomplete, query_text = row['terms'], precision = self.__build_precision_dict( row ) )
def __init__(self, file_name): DataFile.__init__(self, file_name) result_file_info = self._get_file_info() self.topic = Topic.create_or_update( result_file_info['topic_id'] ) self.user = User.create_or_update( result_file_info['user_id'] ) self.condition = Condition.create_or_update( result_file_info['condition'] ) self.__create_or_update_session() self.query = Query.create_or_update( result_file_info['query_id'], topic = self.topic, user = self.user, session = self.session ) self.actions = self.__parse() self.topic.add_actions( self.actions ) self.user.add_actions( self.actions ) self.query.add_actions( self.actions ) self.session.add_actions( self.actions ) self.session.add_query( self.query )
def __parse(self): with open(self.file_name, 'r') as result_file: result_reader = csv.DictReader(result_file, delimiter=',') for row in result_reader: topic = Topic.create_or_update(row['topic']) user = User.create_or_update(row['userid']) condition = Condition.create_or_update(row['condition']) autocomplete = row['autocomplete_used'] == 1 query = Query.create_or_update( row['queryid'], topic=topic, user=user, condition=condition, autocomplete=autocomplete, query_text=row['terms'], precision=self.__build_precision_dict(row))
def __parse(self): actions = [] query_text = None serp_page_num = None with open(self.file_name, 'r') as log_file: for line in log_file: parsed_line = _parse_line(line) # these occur in weird places and have weird data, so ignore them to avoid trouble if parsed_line['action'] in [ 'PERFORMANCE', 'DEMOGRAPHICS_SURVEY_STARTED', 'DEMOGRAPHICS_SURVEY_COMPLETED', 'SELF_SEARCH_EFFICACY_SURVEY_STARTED', 'SELF_SEARCH_EFFICACY_SURVEY_COMPLETED', 'PRE_TASK_SURVEY_COMPLETED', 'POST_TASK_SURVEY_COMPLETED', 'SEARCH_TASK_VIEWED' ]: continue if parsed_line['action'] == 'QUERY_ISSUED' or parsed_line[ 'action'] == 'QUERY_SUGGESTION_ISSUED': self.query_counter += 1 query_text = parsed_line.get('action_parameters', None) serp_page_num = None #print ("%s - %s - X - %s: %s" % (self.query_counter, parsed_line['user_id'], parsed_line['topic_id'], query_text)) topic = Topic.create_or_update(parsed_line['topic_id']) user = User.create_or_update(parsed_line['user_id']) condition = Condition.create_or_update( parsed_line['condition']) session = self.__create_or_update_session( user, topic, condition) query_id = str(self.query_counter) # These actions belong with the next query if parsed_line['action'] in [ 'SEARCH_TASK_COMMENCED', 'VIEW_SEARCH_BOX' ]: query_id = str(self.query_counter + 1) query = Query.create_or_update(query_id, topic=topic, user=user, session=session, query_text=query_text) timestamp = _parse_datetime(parsed_line['date'], parsed_line['time']) action = Action(timestamp=timestamp, session=session, condition=condition, action_type=parsed_line['action'], query=query, serp_page_num=serp_page_num, action_parameters=parsed_line.get( 'action_parameters', None)) if action.is_serp_switch_event(): serp_page_num = int(action.result_page) actions.append(action) topic.add_actions([action]) user.add_actions([action]) query.add_actions([action]) session.add_actions([action]) session.add_query(query) return sorted(actions, key=lambda action: action.timestamp)