def init_app(app): # we have to set URL field here, because config is not available at time # of parsing url = join(app.config['SERVER_URL'], 'newsworthy') url_field = NewsworthyFeedingService.fields[0] assert url_field['id'] == 'url' url_field['default_value'] = url # init_app can be called several times during tests # so we skip registration if we have an AlreadyExistsError try: register_feeding_service(NewsworthyFeedingService) except superdesk.errors.AlreadyExistsError: pass else: register_feeding_service_parser(NewsworthyFeedingService.NAME, 'ninjs') service = NewsworthyWebhookService() resource = NewsworthyWebhookResource("newsworthy", app=app, service=service) resource.authentication = NewsworthyFeedingServiceAuth
# we need to access config to set the URL, so we do it here field = next(f for f in cls.fields if f["type"] == "url_request") field["url"] = join(app.config["SERVER_URL"], "login", "google", "{URL_ID}") def _test(self, provider): self._update(provider, update=None, test=True) def authenticate(self, provider: dict, config: dict) -> imaplib.IMAP4_SSL: oauth2_token_service = superdesk.get_resource_service("oauth2_token") token = oauth2_token_service.find_one(req=None, _id=provider["url_id"]) if token is None: raise IngestEmailError.notConfiguredError(ValueError( l_("You need to log in first")), provider=provider) imap = imaplib.IMAP4_SSL("imap.gmail.com") if token["expires_at"].timestamp() < time.time() + 600: logger.info("Refreshing token for {provider_name}".format( provider_name=provider["name"])) token = oauth.refresh_google_token(token["_id"]) auth_string = "user={email}\x01auth=Bearer {token}\x01\x01".format( email=token["email"], token=token["access_token"]) imap.authenticate("XOAUTH2", lambda __: auth_string.encode()) return imap register_feeding_service(GMailFeedingService) register_feeding_service_parser(GMailFeedingService.NAME, "email_rfc822")
""" payload = {'id': id} tree = self._get_tree('item', payload) parser = self.get_feed_parser(self.provider, tree) items = parser.parse(tree, self.provider) return items def _fetch_items_in_package(self, item): """ Fetch remote assets for given item. """ items = [] for group in item.get('groups', []): for ref in group.get('refs', []): if 'residRef' in ref: items.extend(self._parse_items(ref.get('residRef'))) return items def prepare_href(self, href, mimetype=None): (scheme, netloc, path, params, query, fragment) = urlparse(href) new_href = urlunparse((scheme, netloc, path, '', '', '')) return '%s?auth_token=%s' % (new_href, self._get_auth_token(self.provider, update=True)) register_feeding_service(ReutersHTTPFeedingService) register_feeding_service_parser(ReutersHTTPFeedingService.NAME, 'newsml2')
"media": media_id, "filename": fileName, "title": 'attachment', "description": "email's attachment" }]) if ids: attachments.append( {'attachment': next(iter(ids), None)}) except Exception as ex: logger.error( "cannot add attachment for %s, %s" % (fileName, ex.args[0])) app.media.delete(media_id) if attachments: for item in items: if item['type'] == 'text': item['attachments'] = attachments item[ 'ednote'] = 'The story has %s attachment(s)' % str( len(attachments)) register_feeding_service(EmailBelgaFeedingService) register_feeding_service_parser(EmailBelgaFeedingService.NAME, EMailRFC822FeedParser.NAME)
:param items: dict with events, ntbId used as a key :type items: dict :return: a list of events """ req = ParsedRequest() req.projection = json.dumps({'ntb_id': 1, 'guid': 1, ITEM_STATE: 1}) req.max_results = len(items) existing_items = superdesk.get_resource_service('events').get_from_mongo( req, { 'ntb_id': { '$in': [ntb_id for ntb_id in items.keys()] } } ) for existing_item in existing_items: if existing_item.get(ITEM_STATE) == WORKFLOW_STATE.INGESTED: # update event items[existing_item['ntb_id']][GUID_FIELD] = existing_item[GUID_FIELD] else: # remove event when it has a state different from 'ingested' del items[existing_item['ntb_id']] return [items[i] for i in items.keys()] register_feeding_service(NTBEventsApiFeedingService) register_feeding_service_parser(NTBEventsApiFeedingService.NAME, 'ntb_events_api_xml')
for item in items: if embed: urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-;]|[\[\]?@_~]|' r'(?:%[0-9a-fA-F][0-9a-fA-F]))+', item.get('body_html', '')) for url in set(urls): embed_content = self._create_embed(url, key) if embed_content: item['body_html'] += '<!-- EMBED START Twitter -->' item['body_html'] += embed_content item['body_html'] += '<!-- EMBED END Twitter -->' return [items] def _create_embed(self, url, key): """ Get embed html from iframely service for provided url """ response = requests.get('https://iframe.ly/api/oembed?url={}&api_key={}'.format(url, key)) content = response.json() if response.status_code == 200: return content.get('html', '') elif response.status_code == 403: raise IngestTwitterBelgaError.TwitterInvalidIframelyKey() # when turn off setting: On URL errors, don't repeat it as HTTP status (use code 200 instead) # iframely will return 417 response on URL error return '' register_feeding_service(TwitterBelgaFeedingService) register_feeding_service_parser(TwitterBelgaFeedingService.NAME, None)
] def __init__(self): self.fields_cache = {} def _update(self, provider, update): user = provider['config']['wufoo_username'] wufoo_data = { "url": WUFOO_URL.format(subdomain=user), "user": user, "api_key": provider['config']['wufoo_api_key'], "form_query_entries_tpl": WUFOO_QUERY_FORM + WUFOO_QUERY_ENTRIES, "update": update} try: parser = self.get_feed_parser(provider, None) except requests.exceptions.Timeout as ex: raise IngestApiError.apiTimeoutError(ex, provider) except requests.exceptions.TooManyRedirects as ex: raise IngestApiError.apiRedirectError(ex, provider) except requests.exceptions.RequestException as ex: raise IngestApiError.apiRequestError(ex, provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) items = parser.parse(wufoo_data, provider) return [items] register_feeding_service(WufooFeedingService) register_feeding_service_parser(WufooFeedingService.NAME, 'wufoo')
try: rv, data = imap.search(None, config.get('filter', '(UNSEEN)')) if rv != 'OK': raise IngestEmailError.emailFilterError() for num in data[0].split(): rv, data = imap.fetch(num, '(RFC822)') if rv == 'OK' and not test: try: parser = self.get_feed_parser(provider, data) new_items.append(parser.parse(data, provider)) rv, data = imap.store(num, '+FLAGS', '\\Seen') except IngestEmailError: continue finally: imap.close() finally: imap.logout() except IngestEmailError: raise except Exception as ex: raise IngestEmailError.emailError(ex, provider) return new_items def prepare_href(self, href, mimetype=None): return url_for_media(href, mimetype) register_feeding_service(EmailFeedingService) register_feeding_service_parser(EmailFeedingService.NAME, 'email_rfc822')
if not byline.startswith('By '): byline_prefix = 'By ' byline_found = elem_text.lower().startswith( '{}{}'.format(byline_prefix, byline).lower()) else: byline_found = elem_text.startswith('By ') if byline_found: item['byline'] = elem_text # remove the byline from the body text if not byline_found: elements.append('<%s>%s</%s>' % (tag, elem_text, tag)) line_counter += 1 content = dict() content['contenttype'] = tree.attrib['contenttype'] if len(elements) > 0: content['content'] = "\n".join(elements) elif body.text: content['content'] = '<pre>' + body.text + '</pre>' content['format'] = CONTENT_TYPE.PREFORMATTED return content register_feed_parser(ReutersNewsMLTwoFeedParser.NAME, ReutersNewsMLTwoFeedParser()) register_feeding_service_parser(ReutersHTTPFeedingService.NAME, ReutersNewsMLTwoFeedParser.NAME)
raise IngestEmailError.emailMailboxError() try: rv, data = imap.search(None, config.get('filter', '(UNSEEN)')) if rv != 'OK': raise IngestEmailError.emailFilterError() for num in data[0].split(): rv, data = imap.fetch(num, '(RFC822)') if rv == 'OK' and not test: try: parser = self.get_feed_parser(provider, data) new_items.append(parser.parse(data, provider)) rv, data = imap.store(num, '+FLAGS', '\\Seen') except IngestEmailError: continue finally: imap.close() finally: imap.logout() except IngestEmailError: raise except Exception as ex: raise IngestEmailError.emailError(ex, provider) return new_items def prepare_href(self, href, mimetype=None): return url_for_media(href, mimetype) register_feeding_service(EmailFeedingService) register_feeding_service_parser(EmailFeedingService.NAME, 'email_rfc822')
raise IngestApiError.apiRequestError(Exception('error while parsing the request answer')) try: if root_elt.xpath('(//error/text())[1]')[0] != '0': err_msg = root_elt.xpath('(//errormsg/text())[1]')[0] raise IngestApiError.apiRequestError(Exception('error code returned by API: {msg}'.format(msg=err_msg))) except IndexError: raise IngestApiError.apiRequestError(Exception('Invalid XML, <error> element not found')) parser = self.get_feed_parser(provider) items = [] for elt in root_elt.xpath('//RBNews'): item = parser.parse(elt, provider) items.append(item) if not url_override: try: queue_id = elt.xpath('.//ServiceQueueId/text()')[0] except IndexError: raise IngestApiError.apiRequestError(Exception('missing ServiceQueueId element')) ack_params = {'user': user, 'password': password, 'servicequeueid': queue_id} try: requests.get(URL_ACK, params=ack_params) except Exception: raise IngestApiError.apiRequestError(Exception('error while doing the request')) return [items] register_feeding_service(RitzauFeedingService) register_feeding_service_parser(RitzauFeedingService.NAME, 'ritzau')
# # This file is part of Superdesk. # # Copyright 2013, 2014 Sourcefabric z.u. and contributors. # # For the full copyright and license information, please see the # AUTHORS and LICENSE files distributed with this source code, or # at https://www.sourcefabric.org/superdesk/license from superdesk.io.feeding_services import FTPFeedingService from superdesk.io.registry import register_feeding_service, register_feeding_service_parser class NTBEventsFTPFeedingService(FTPFeedingService): """ Feeding Service class which can read events from NTB via FTP """ NAME = 'ntb_events_ftp' label = 'NTB Events FTP' service = 'events' register_feeding_service(NTBEventsFTPFeedingService) register_feeding_service_parser(NTBEventsFTPFeedingService.NAME, 'ntb_events_api_xml') register_feeding_service_parser(NTBEventsFTPFeedingService.NAME, 'ntb_event_xml') register_feeding_service_parser(NTBEventsFTPFeedingService.NAME, 'ics20') register_feeding_service_parser(NTBEventsFTPFeedingService.NAME, 'ntb_nifs')
item_details['media_link'] = media_link return item_details def _fetch_media_link(self, source_id, item_id): """ Fetch a list of all available renditions for an item and return a link to file :param source_id: :param item_id: :return str or None: link to the image or None """ # fetch media renditions media_renditions = self.get_url( url=self.HTTP_ITEM_MEDIA_LIST_URL.format(source_id=source_id, item_id=item_id) ) for rend in media_renditions: if rend.get('kind') in self.ALLOWED_MEDIA_KINDS and rend.get('mimeType') in self.ALLOWED_MEDIA_MIMETYPES: media_id = rend.get('id') if media_id: return self.HTTP_ITEM_MEDIA_DETAILS_URL.format( source_id=source_id, item_id=item_id, media_id=media_id ) return None register_feeding_service(ANPNewsApiFeedingService) register_feeding_service_parser(ANPNewsApiFeedingService.NAME, 'anp_news_api')
def init_app(_app): register_feeding_service(CNAFeedingService) register_feeding_service_parser(CNAFeedingService.NAME, None)
'firstcreated': text_item['firstcreated'], 'versioncreated': text_item['versioncreated'], 'headline': text_item.get('headline', ''), 'groups': [{ 'id': 'root', 'role': 'grpRole:NEP', 'refs': [{ 'idRef': 'main' }], }, { 'id': 'main', 'role': 'main', 'refs': [], }] } item_references = package['groups'][1]['refs'] item_references.append({'residRef': text_item['guid']}) for image in image_items: item_references.append({'residRef': image['guid']}) return package register_feeding_service(RSSFeedingService) register_feeding_service_parser(RSSFeedingService.NAME, None)
try: root_elt = etree.fromstring(r.text) except Exception: raise IngestApiError.apiRequestError(Exception('error while parsing the request answer')) try: if root_elt.xpath('(//error/text())[1]')[0] != '0': err_msg = root_elt.xpath('(//errormsg/text())[1]')[0] raise IngestApiError.apiRequestError(Exception('error code returned by API: {msg}'.format(msg=err_msg))) except IndexError: raise IngestApiError.apiRequestError(Exception('Invalid XML, <error> element not found')) parser = self.get_feed_parser(provider) items = [] for elt in root_elt.xpath('//RBNews'): item = parser.parse(elt, provider) items.append(item) if not url_override: try: queue_id = elt.xpath('.//ServiceQueueId/text()')[0] except IndexError: raise IngestApiError.apiRequestError(Exception('missing ServiceQueueId element')) ack_params = {'user': user, 'password': password, 'servicequeueid': queue_id} self.get_url(URL_ACK, params=ack_params) return [items] register_feeding_service(RitzauFeedingService) register_feeding_service_parser(RitzauFeedingService.NAME, 'ritzau')
location[0]['name'], 'address.line': location[0]['address']['line'], 'address.country': location[0]['address']['country'], })) if saved_location and status == 'UPDATED': location_service.patch( saved_location[0][superdesk.config.ID_FIELD], location[0]) elif not saved_location: _location = deepcopy(location) location_service.post(_location) item['location'][0]['qcode'] = _location[0]['guid'] old_item = events_service.find_one(guid=item[GUID_FIELD], req=None) if not old_item: if not status: item.setdefault('firstcreated', datetime.now()) item.setdefault('versioncreated', datetime.now()) list_items.append(item) else: old_item.update(item) list_items.append(old_item) return list_items register_feeding_service(SpreadsheetFeedingService) register_feeding_service_parser(SpreadsheetFeedingService.NAME, 'belgaspreadsheet')
""" payload = {"id": id} tree = self._get_tree("item", payload) parser = self.get_feed_parser(self.provider, tree) items = parser.parse(tree, self.provider) return items def _fetch_items_in_package(self, item): """ Fetch remote assets for given item. """ items = [] for group in item.get("groups", []): for ref in group.get("refs", []): if "residRef" in ref: items.extend(self._parse_items(ref.get("residRef"))) return items def prepare_href(self, href, mimetype=None): (scheme, netloc, path, params, query, fragment) = urlparse(href) new_href = urlunparse((scheme, netloc, path, "", "", "")) return "%s?auth_token=%s" % (new_href, self._get_auth_token(self.provider, update=True)) register_feeding_service(ReutersHTTPFeedingService) register_feeding_service_parser(ReutersHTTPFeedingService.NAME, "newsml2")
byline = item.get('byline') or '' if byline: byline_prefix = '' if not byline.startswith('By '): byline_prefix = 'By ' byline_found = elem_text.lower().startswith('{}{}'.format(byline_prefix, byline).lower()) else: byline_found = elem_text.startswith('By ') if byline_found: item['byline'] = elem_text # remove the byline from the body text if not byline_found: elements.append('<%s>%s</%s>' % (tag, elem_text, tag)) line_counter += 1 content = dict() content['contenttype'] = tree.attrib['contenttype'] if len(elements) > 0: content['content'] = "\n".join(elements) elif body.text: content['content'] = '<pre>' + body.text + '</pre>' content['format'] = CONTENT_TYPE.PREFORMATTED return content register_feed_parser(ReutersNewsMLTwoFeedParser.NAME, ReutersNewsMLTwoFeedParser()) register_feeding_service_parser(ReutersHTTPFeedingService.NAME, ReutersNewsMLTwoFeedParser.NAME)