def test_delete(self): client = GraphQLClient() speech = self._build_sample_speech() client.merge(speech) client.get(speech.id) # should exist client.delete(speech.id) with pytest.raises(GraphQLException): client.get(speech.id) # should be deleted with pytest.raises(GraphQLException): client.delete('invalid:class') # don't raise exception when given non-existing id client.delete('bill:invalid')
class SpiderTemplate(scrapy.Spider): domain = NotImplemented def __init__(self, *args, **kwargs): super(SpiderTemplate, self).__init__(*args, **kwargs) logging.getLogger('elasticsearch').setLevel(logging.WARNING) logging.getLogger('sgqlc').setLevel(logging.WARNING) self.gql_client = GraphQLClient() self.es_client = ElasticsearchClient() self.bill_finder = BillFinder() self.minutes_finder = MinutesFinder() self.committee_finder = CommitteeFinder() self.member_finder = MemberFinder() def parse(self, response): NotImplemented def link_urls(self, urls): """ link Url to parent resource """ from_ids, to_ids = [], [] for url in urls: if hasattr(url, 'to_id'): from_ids.append(url.id) to_ids.append(url.to_id) if from_ids: self.gql_client.bulk_link(from_ids, to_ids) def link_activities(self, activities): """ link Activity to Member, Bill, and Minutes """ from_ids, to_ids = [], [] for activity in activities: for id_field in ['member_id', 'bill_id', 'minutes_id']: if hasattr(activity, id_field): from_ids.append(activity.id) to_ids.append(getattr(activity, id_field)) if from_ids: self.gql_client.bulk_link(from_ids, to_ids) def link_bill_action(self, bill_action_lst): """ link BillAction to Bill, Minutes, and Speech """ from_ids, to_ids = [], [] for bill_action in bill_action_lst: for id_field in ['bill_id', 'minutes_id', 'speech_id']: if hasattr(bill_action, id_field): from_ids.append(bill_action.id) to_ids.append(getattr(bill_action, id_field)) if from_ids: self.gql_client.bulk_link(from_ids, to_ids) def link_minutes(self, minutes): """ link Minutes to Bill, Member, and Committee """ if hasattr(minutes, 'topic_ids'): bill_ids = list(filter(lambda x: x, minutes.topic_ids)) if bill_ids: self.gql_client.bulk_link([minutes.id] * len(bill_ids), bill_ids) LOGGER.info(f'linked {len(bill_ids)} bills to {minutes.id}') if hasattr(minutes, 'speaker_ids'): member_ids = list(filter(lambda x: x, minutes.speaker_ids)) if member_ids: self.gql_client.bulk_link(member_ids, [minutes.id] * len(member_ids)) LOGGER.info(f'linked {len(member_ids)} members to {minutes.id}') try: committee = self.committee_finder.find_one(minutes.name) except ValueError as e: LOGGER.warning(e) else: self.gql_client.link(minutes.id, committee.id) def link_speeches(self, speeches): from_ids, to_ids = [], [] for speech in speeches: from_ids.append(speech.id) to_ids.append(speech.minutes_id) if hasattr(speech, 'member_id'): from_ids.append(speech.member_id) to_ids.append(speech.id) if from_ids: self.gql_client.bulk_link(from_ids, to_ids) def delete_old_urls(self, src_id, url_title): obj = self.gql_client.get(src_id, fields=['urls']) for url in obj.urls: if url.title == url_title: self.gql_client.delete(url.id) LOGGER.info(f'deleted {url.id}') def get_diet(self, diet_number=None): if diet_number: return self.gql_client.get(f'Diet:{diet_number}', ['id', 'number', 'start_date']) else: return self.get_latest_diet() def get_latest_diet(self): diets = sorted(self.gql_client.get_all_diets(['id', 'number', 'start_date']), key=lambda x: x.number) return diets[-1] def get_topic_ids(self, topics): def get_topic_id(topic): maybe_bill_number = extract_bill_number_or_none(topic) maybe_category = extract_bill_category_or_none(topic) try: if maybe_bill_number: bill = self.bill_finder.find_one(maybe_bill_number) elif maybe_category: bill = self.bill_finder.find_one(topic, category=maybe_category) else: bill = self.bill_finder.find_one(topic) return bill.id except ValueError as e: LOGGER.debug(e) # this is expected when topic does not include bill return '' return list(map(lambda x: get_topic_id(x), topics)) def get_speakers_ids(self, speakers): def get_speaker_id(speaker): try: member = self.member_finder.find_one(speaker) return member.id except ValueError as e: LOGGER.debug(e) # this is expected when speaker is not member return '' return list(map(lambda x: get_speaker_id(x), speakers))
class SpiderTemplate(scrapy.Spider): domain = NotImplemented def __init__(self, *args, **kwargs): super(SpiderTemplate, self).__init__(*args, **kwargs) logging.getLogger('elasticsearch').setLevel(logging.WARNING) logging.getLogger('sgqlc').setLevel(logging.WARNING) self.gql_client = GraphQLClient() self.es_client = ElasticsearchClient() self.bill_finder = BillFinder() self.minutes_finder = MinutesFinder() self.committee_finder = CommitteeFinder() self.member_finder = MemberFinder() def parse(self, response): NotImplemented def link_urls(self, urls): """ link Url to parent resource """ from_ids, to_ids = [], [] for url in urls: if hasattr(url, 'to_id'): from_ids.append(url.id) to_ids.append(url.to_id) if from_ids: self.gql_client.bulk_link(from_ids, to_ids) def link_activities(self, activities): """ link Activity to Member, Bill, and Minutes """ from_ids, to_ids = [], [] for activity in activities: for id_field in ['member_id', 'bill_id', 'minutes_id']: if hasattr(activity, id_field): from_ids.append(activity.id) to_ids.append(getattr(activity, id_field)) if from_ids: self.gql_client.bulk_link(from_ids, to_ids) def link_minutes(self, minutes): """ link Minutes to Bill, Committee and Member """ self.link_bills_by_topics(minutes) try: committee = self.committee_finder.find_one(minutes.name) except ValueError as e: LOGGER.warning(e) else: self.gql_client.link(minutes.id, committee.id) if hasattr(minutes, 'speakers'): from_ids = [] to_ids = [] for speaker in minutes.speakers: try: member = self.member_finder.find_one(speaker) except ValueError as e: LOGGER.debug(e) # this is expected when speaker is not member else: from_ids.append(member.id) to_ids.append(minutes.id) if from_ids: self.gql_client.bulk_link(from_ids, to_ids) def link_speeches(self, speeches): from_ids, to_ids = [], [] for speech in speeches: from_ids.append(speech.id) to_ids.append(speech.minutes_id) if from_ids: self.gql_client.bulk_link(from_ids, to_ids) def store_urls_for_bill(self, urls, bill_query): if not urls: return try: bill = self.bill_finder.find_one(bill_query) except ValueError as e: LOGGER.warning(e) else: self.gql_client.bulk_merge(urls) self.gql_client.bulk_link(map(lambda x: x.id, urls), [bill.id] * len(urls)) def delete_old_urls(self, src_id, url_title): obj = self.gql_client.get(src_id) for url in obj.urls: if url.title == url_title: self.gql_client.delete(url.id) LOGGER.info(f'deleted {url.id}') def link_bills_by_topics(self, minutes: Minutes): if not hasattr(minutes, 'topics'): return from_ids, to_ids = [], [] for topic in minutes.topics: try: bill = self.bill_finder.find_one(topic) except ValueError as e: LOGGER.debug(e) # this is expected when topic does not include bill else: from_ids.append(minutes.id) to_ids.append(bill.id) LOGGER.debug(f'link {minutes.id} to {bill.id}') if from_ids: self.gql_client.bulk_link(from_ids, to_ids) LOGGER.info(f'linked {len(from_ids)} bills to {minutes.id}')