def create_comment_item(self, response, suggestion_id): """ Create a CommentItem, see :class:`~OnlineParticipationDataset.items.CommentItem`, from given response. :param response: scrapy response :return: scrapy item """ comment_item = items.CommentItem() comment_item['comment_id'] = self.get_comment_id(response) comment_item['suggestion_id'] = suggestion_id comment_item['date_time'] = self.get_comment_datetime(response) comment_item['author'] = self.get_comment_author(response) comment_item['title'] = self.get_comment_title(response) comment_item['content'] = self.get_comment_content(response) vote = self.get_comment_vote(response) if vote == "sonstiges": comment_item['vote'] = 'misc' elif vote == 'unterstützt den Vorschlag': comment_item['vote'] = 'approval' elif vote == 'lehnt Vorschlag ab': comment_item['vote'] = 'refusal' elif response.xpath('.//div[@class="field-label"]/text()' ).extract_first() == 'Kommentarlabel:\xa0': comment_item['vote'] = 'offical' else: comment_item['vote'] = 'answer' return comment_item
def create_comment_item(self, response, suggestion_id, parent_id=None, level=1): """ Create a CommentItem, see :class:`~OnlineParticipationDataset.items.CommentItem`, from given response. :param response: scrapy response :return: scrapy item """ comment_item = items.CommentItem() comment_item['level'] = level comment_item['comment_id'] = self.get_comment_id(response) comment_item['suggestion_id'] = suggestion_id if parent_id: comment_item['parent_id'] = parent_id comment_item['date_time'] = self.get_comment_datetime(response) comment_item['author'] = self.get_comment_author(response) comment_item['title'] = self.get_comment_title(response) comment_item['content'] = self.get_comment_content(response) children_list = [] if level == 1: children_xpath = 'following-sibling::div/div[@class="indented"]/article' else: children_xpath = 'following-sibling::div[@class="indented"]/article' for child in response.xpath(children_xpath): children_list.append(self.create_comment_item(child, suggestion_id, comment_item['comment_id'], level+1)) comment_item['children'] = children_list return comment_item
def create_comment_item(self, response, suggestion_id): """ Create a CommentItem, see :class:`~OnlineParticipationDataset.items.CommentItem`, from given response. :param response: scrapy response :return: scrapy item """ comment_item = items.CommentItem() comment_class = response.xpath('@class').extract_first().replace( 'kommentar_', '').split() comment_item['level'] = int(comment_class[0]) comment_item['comment_id'] = self.get_comment_id(response) comment_item['suggestion_id'] = suggestion_id if int(comment_class[0]) > 1: comment_item['parent_id'] = 0 comment_item['date_time'] = self.get_comment_datetime(response) comment_item['author'] = self.get_comment_author(response) comment_item['title'] = self.get_comment_title(response) comment_item['content'] = self.get_comment_content(response) # If official the id is located elsewhere # if len < 2 its an answer on a comment with vote if (len(comment_class) == 2): if "ablehnung" == comment_class[1]: comment_item['vote'] = "refusal" elif "zustimmung" == comment_class[1]: comment_item['vote'] = "approval" elif "neutral" == comment_class[1]: comment_item['vote'] = "neutral" elif "stellungnahme" == comment_class[ 1] or "verwaltung" == comment_class[1]: comment_item['comment_id'] = self.get_comment_id_official( response) comment_item['vote'] = "official" else: comment_item['vote'] = "misc" else: comment_item['vote'] = "answer" return comment_item
def create_comment_item(self, response, suggestion_id, level=1): """ Create a CommentItem, see :class:`~OnlineParticipationDataset.items.CommentItem`, from given response. :param response: scrapy response :return: scrapy item """ comment_item = items.CommentItem() comment_item['level'] = level comment_item['comment_id'] = self.get_comment_id(response) comment_item['suggestion_id'] = suggestion_id if level > 1: comment_item['parent_id'] = 0 comment_item['date_time'] = self.get_comment_datetime(response) comment_item['author'] = self.get_comment_author(response) comment_item['title'] = self.get_comment_title(response) comment_item['content'] = self.get_comment_content(response) return comment_item
def create_comments(self, comments, comment_sublists, parent_id): ''' Creates comment items recursively based on given list of comments (selectors) and list of comment-sublists (selectors) :param comments: list of comments (selectors) :param comment_sublists: list of comment-sublists (selectors) :param id: ID of parent comment (For top level comments the id of the suggestion) :return: list of items to be yielded ''' comment_list = [] sub_iterator = iter(comment_sublists) for comment in comments: self.comments_counter += 1 # Populate current item tmp_comment = items.CommentItem() tmp_comment['author'] = self.get_author(comment) tmp_comment['date_time'] = self.get_datetime(comment) tmp_comment['id'] = next(self.id_counter) tmp_comment['parent'] = parent_id tmp_comment['content'] = self.get_content(comment) tmp_comment['vote'] = self.get_voting(comment) tmp_comment['children'] = [] # Check if comment has children if self.has_children(comment): # Get next sublist (contains children comments) comment_sublist = next(sub_iterator) # Add child-ids to current comment #tmp_comment['children'] = self.get_child_ids(comment_sublist) # Recursively call function with child comments and sublists children_comments = self.get_children_comments(comment_sublist) children_sublists = self.get_children_sublists(comment_sublist) children = self.create_comments(children_comments, children_sublists, tmp_comment['id']) # Add child comments to list comment_list.extend(children) # Add current comment to list comment_list.append(tmp_comment) return comment_list