コード例 #1
0
    def test_delete(self):
        client = GraphQLClient()

        speech = self._build_sample_speech()
        client.merge(speech)
        client.get(speech.id)  # should exist

        client.delete(speech.id)
        with pytest.raises(GraphQLException):
            client.get(speech.id)  # should be deleted

        with pytest.raises(GraphQLException):
            client.delete('invalid:class')

        # don't raise exception when given non-existing id
        client.delete('bill:invalid')
コード例 #2
0
class SpiderTemplate(scrapy.Spider):
    domain = NotImplemented

    def __init__(self, *args, **kwargs):
        super(SpiderTemplate, self).__init__(*args, **kwargs)
        logging.getLogger('elasticsearch').setLevel(logging.WARNING)
        logging.getLogger('sgqlc').setLevel(logging.WARNING)
        self.gql_client = GraphQLClient()
        self.es_client = ElasticsearchClient()
        self.bill_finder = BillFinder()
        self.minutes_finder = MinutesFinder()
        self.committee_finder = CommitteeFinder()
        self.member_finder = MemberFinder()

    def parse(self, response):
        NotImplemented

    def link_urls(self, urls):
        """
        link Url to parent resource
        """

        from_ids, to_ids = [], []
        for url in urls:
            if hasattr(url, 'to_id'):
                from_ids.append(url.id)
                to_ids.append(url.to_id)
        if from_ids:
            self.gql_client.bulk_link(from_ids, to_ids)

    def link_activities(self, activities):
        """
        link Activity to Member, Bill, and Minutes
        """

        from_ids, to_ids = [], []
        for activity in activities:
            for id_field in ['member_id', 'bill_id', 'minutes_id']:
                if hasattr(activity, id_field):
                    from_ids.append(activity.id)
                    to_ids.append(getattr(activity, id_field))
        if from_ids:
            self.gql_client.bulk_link(from_ids, to_ids)

    def link_bill_action(self, bill_action_lst):
        """
        link BillAction to Bill, Minutes, and Speech
        """

        from_ids, to_ids = [], []
        for bill_action in bill_action_lst:
            for id_field in ['bill_id', 'minutes_id', 'speech_id']:
                if hasattr(bill_action, id_field):
                    from_ids.append(bill_action.id)
                    to_ids.append(getattr(bill_action, id_field))
        if from_ids:
            self.gql_client.bulk_link(from_ids, to_ids)

    def link_minutes(self, minutes):
        """
        link Minutes to Bill, Member, and Committee
        """

        if hasattr(minutes, 'topic_ids'):
            bill_ids = list(filter(lambda x: x, minutes.topic_ids))
            if bill_ids:
                self.gql_client.bulk_link([minutes.id] * len(bill_ids), bill_ids)
                LOGGER.info(f'linked {len(bill_ids)} bills to {minutes.id}')

        if hasattr(minutes, 'speaker_ids'):
            member_ids = list(filter(lambda x: x, minutes.speaker_ids))
            if member_ids:
                self.gql_client.bulk_link(member_ids, [minutes.id] * len(member_ids))
                LOGGER.info(f'linked {len(member_ids)} members to {minutes.id}')

        try:
            committee = self.committee_finder.find_one(minutes.name)
        except ValueError as e:
            LOGGER.warning(e)
        else:
            self.gql_client.link(minutes.id, committee.id)

    def link_speeches(self, speeches):
        from_ids, to_ids = [], []
        for speech in speeches:
            from_ids.append(speech.id)
            to_ids.append(speech.minutes_id)
            if hasattr(speech, 'member_id'):
                from_ids.append(speech.member_id)
                to_ids.append(speech.id)
        if from_ids:
            self.gql_client.bulk_link(from_ids, to_ids)

    def delete_old_urls(self, src_id, url_title):
        obj = self.gql_client.get(src_id, fields=['urls'])
        for url in obj.urls:
            if url.title == url_title:
                self.gql_client.delete(url.id)
                LOGGER.info(f'deleted {url.id}')

    def get_diet(self, diet_number=None):
        if diet_number:
            return self.gql_client.get(f'Diet:{diet_number}', ['id', 'number', 'start_date'])
        else:
            return self.get_latest_diet()

    def get_latest_diet(self):
        diets = sorted(self.gql_client.get_all_diets(['id', 'number', 'start_date']), key=lambda x: x.number)
        return diets[-1]

    def get_topic_ids(self, topics):
        def get_topic_id(topic):
            maybe_bill_number = extract_bill_number_or_none(topic)
            maybe_category = extract_bill_category_or_none(topic)
            try:
                if maybe_bill_number:
                    bill = self.bill_finder.find_one(maybe_bill_number)
                elif maybe_category:
                    bill = self.bill_finder.find_one(topic, category=maybe_category)
                else:
                    bill = self.bill_finder.find_one(topic)
                return bill.id
            except ValueError as e:
                LOGGER.debug(e)  # this is expected when topic does not include bill
            return ''

        return list(map(lambda x: get_topic_id(x), topics))

    def get_speakers_ids(self, speakers):
        def get_speaker_id(speaker):
            try:
                member = self.member_finder.find_one(speaker)
                return member.id
            except ValueError as e:
                LOGGER.debug(e)  # this is expected when speaker is not member
            return ''

        return list(map(lambda x: get_speaker_id(x), speakers))
コード例 #3
0
class SpiderTemplate(scrapy.Spider):
    domain = NotImplemented

    def __init__(self, *args, **kwargs):
        super(SpiderTemplate, self).__init__(*args, **kwargs)
        logging.getLogger('elasticsearch').setLevel(logging.WARNING)
        logging.getLogger('sgqlc').setLevel(logging.WARNING)
        self.gql_client = GraphQLClient()
        self.es_client = ElasticsearchClient()
        self.bill_finder = BillFinder()
        self.minutes_finder = MinutesFinder()
        self.committee_finder = CommitteeFinder()
        self.member_finder = MemberFinder()

    def parse(self, response):
        NotImplemented

    def link_urls(self, urls):
        """
        link Url to parent resource
        """

        from_ids, to_ids = [], []
        for url in urls:
            if hasattr(url, 'to_id'):
                from_ids.append(url.id)
                to_ids.append(url.to_id)
        if from_ids:
            self.gql_client.bulk_link(from_ids, to_ids)

    def link_activities(self, activities):
        """
        link Activity to Member, Bill, and Minutes
        """

        from_ids, to_ids = [], []
        for activity in activities:
            for id_field in ['member_id', 'bill_id', 'minutes_id']:
                if hasattr(activity, id_field):
                    from_ids.append(activity.id)
                    to_ids.append(getattr(activity, id_field))
        if from_ids:
            self.gql_client.bulk_link(from_ids, to_ids)

    def link_minutes(self, minutes):
        """
        link Minutes to Bill, Committee and Member
        """

        self.link_bills_by_topics(minutes)

        try:
            committee = self.committee_finder.find_one(minutes.name)
        except ValueError as e:
            LOGGER.warning(e)
        else:
            self.gql_client.link(minutes.id, committee.id)

        if hasattr(minutes, 'speakers'):
            from_ids = []
            to_ids = []
            for speaker in minutes.speakers:
                try:
                    member = self.member_finder.find_one(speaker)
                except ValueError as e:
                    LOGGER.debug(e)  # this is expected when speaker is not member
                else:
                    from_ids.append(member.id)
                    to_ids.append(minutes.id)
            if from_ids:
                self.gql_client.bulk_link(from_ids, to_ids)

    def link_speeches(self, speeches):
        from_ids, to_ids = [], []
        for speech in speeches:
            from_ids.append(speech.id)
            to_ids.append(speech.minutes_id)
        if from_ids:
            self.gql_client.bulk_link(from_ids, to_ids)

    def store_urls_for_bill(self, urls, bill_query):
        if not urls:
            return
        try:
            bill = self.bill_finder.find_one(bill_query)
        except ValueError as e:
            LOGGER.warning(e)
        else:
            self.gql_client.bulk_merge(urls)
            self.gql_client.bulk_link(map(lambda x: x.id, urls), [bill.id] * len(urls))

    def delete_old_urls(self, src_id, url_title):
        obj = self.gql_client.get(src_id)
        for url in obj.urls:
            if url.title == url_title:
                self.gql_client.delete(url.id)
                LOGGER.info(f'deleted {url.id}')

    def link_bills_by_topics(self, minutes: Minutes):
        if not hasattr(minutes, 'topics'):
            return

        from_ids, to_ids = [], []
        for topic in minutes.topics:
            try:
                bill = self.bill_finder.find_one(topic)
            except ValueError as e:
                LOGGER.debug(e)  # this is expected when topic does not include bill
            else:
                from_ids.append(minutes.id)
                to_ids.append(bill.id)
                LOGGER.debug(f'link {minutes.id} to {bill.id}')
        if from_ids:
            self.gql_client.bulk_link(from_ids, to_ids)
            LOGGER.info(f'linked {len(from_ids)} bills to {minutes.id}')