def _transmit(self, queue_item, subscriber): config = queue_item.get("destination", {}).get("config", {}) try: with ftp_connect(config) as ftp: if config.get("push_associated", False): # Set the working directory for the associated files if "associated_path" in config and config.get( "associated_path"): ftp.cwd("/" + config.get("associated_path", "").lstrip("/")) item = self._get_published_item(queue_item) if item: self._copy_published_media_files(item, ftp) # If the directory was changed to push associated files change it back if "associated_path" in config and config.get( "associated_path"): ftp.cwd("/" + config.get("path").lstrip("/")) filename = get_publish_service().get_filename(queue_item) b = BytesIO( queue_item.get( "encoded_item", queue_item.get("formatted_item").encode("UTF-8"))) ftp.storbinary("STOR " + filename, b) except PublishFtpError: raise except Exception as ex: raise PublishFtpError.ftpError(ex, queue_item.get("destination"))
def _transmit(self, queue_item, subscriber): config = queue_item.get('destination', {}).get('config', {}) try: with ftp_connect(config) as ftp: if config.get('push_associated', False): # Set the working directory for the associated files if 'associated_path' in config and config.get('associated_path'): ftp.cwd('/' + config.get('associated_path', '').lstrip('/')) item = self._get_published_item(queue_item) if item: self._copy_published_media_files(item, ftp) # If the directory was changed to push associated files change it back if 'associated_path' in config and config.get('associated_path'): ftp.cwd('/' + config.get('path').lstrip('/')) filename = get_publish_service().get_filename(queue_item) b = BytesIO(queue_item.get('encoded_item', queue_item.get('formatted_item').encode('UTF-8'))) ftp.storbinary('STOR ' + filename, b) except PublishFtpError: raise except Exception as ex: raise PublishFtpError.ftpError(ex, config)
def _update(self, provider, update): config = provider.get('config', {}) last_updated = provider.get('last_updated') crt_last_updated = None if 'dest_path' not in config: config['dest_path'] = tempfile.mkdtemp(prefix='superdesk_ingest_') try: with ftp_connect(config) as ftp: items = [] for filename, facts in ftp.mlsd(): if facts.get('type', '') != 'file': continue if not filename.lower().endswith(self.FILE_SUFFIX): continue if last_updated: item_last_updated = datetime.strptime(facts['modify'], self.DATE_FORMAT).replace(tzinfo=utc) if item_last_updated < last_updated: continue elif not crt_last_updated or item_last_updated > crt_last_updated: crt_last_updated = item_last_updated local_file_path = os.path.join(config['dest_path'], filename) try: with open(local_file_path, 'xb') as f: try: ftp.retrbinary('RETR %s' % filename, f.write) except ftplib.all_errors as ex: os.remove(local_file_path) logger.exception('Exception retrieving from FTP server') continue except FileExistsError: logger.exception('Exception retrieving from FTP server, file already exists') continue registered_parser = self.get_feed_parser(provider) if isinstance(registered_parser, XMLFeedParser): xml = etree.parse(local_file_path).getroot() parser = self.get_feed_parser(provider, xml) parsed = parser.parse(xml, provider) else: parser = self.get_feed_parser(provider, local_file_path) parsed = parser.parse(local_file_path, provider) if isinstance(parsed, dict): parsed = [parsed] items.append(parsed) if crt_last_updated: update[LAST_UPDATED] = crt_last_updated return items except IngestFtpError: raise except Exception as ex: raise IngestFtpError.ftpError(ex, provider)
def _test(self, provider): config = provider.get('config', {}) try: with ftp_connect(config) as ftp: ftp.mlsd() except IngestFtpError: raise except Exception as ex: raise IngestFtpError.ftpError(ex, provider)
def _update(self, provider): config = provider.get('config', {}) last_updated = provider.get('last_updated') if 'dest_path' not in config: config['dest_path'] = tempfile.mkdtemp(prefix='superdesk_ingest_') try: with ftp_connect(config) as ftp: items = [] for filename, facts in ftp.mlsd(): if facts.get('type', '') != 'file': continue if not filename.lower().endswith(self.FILE_SUFFIX): continue if last_updated: item_last_updated = datetime.strptime( facts['modify'], self.DATE_FORMAT).replace(tzinfo=utc) if item_last_updated < last_updated: continue local_file_path = os.path.join(config['dest_path'], filename) try: with open(local_file_path, 'xb') as f: try: ftp.retrbinary('RETR %s' % filename, f.write) except ftplib.all_errors as ex: os.remove(local_file_path) logger.exception( 'Exception retrieving from FTP server') continue except FileExistsError: continue registered_parser = self.get_feed_parser(provider) if isinstance(registered_parser, XMLFeedParser): xml = etree.parse(local_file_path).getroot() parser = self.get_feed_parser(provider, xml) parsed = parser.parse(xml, provider) else: parser = self.get_feed_parser(provider, local_file_path) parsed = parser.parse(local_file_path, provider) if isinstance(parsed, dict): parsed = [parsed] items.append(parsed) return items except IngestFtpError: raise except Exception as ex: raise IngestFtpError.ftpError(ex, provider)
def _test(self, provider): config = provider.get("config", {}) try: with ftp_connect(config) as ftp: ftp.mlsd() except IngestFtpError: raise except Exception as ex: if "500" in str(ex): ftp.nlst() else: raise IngestFtpError.ftpError(ex, provider)
def _transmit(self, queue_item, subscriber): config = queue_item.get('destination', {}).get('config', {}) try: with ftp_connect(config) as ftp: filename = PublishService.get_filename(queue_item) b = BytesIO(queue_item['encoded_item']) ftp.storbinary("STOR " + filename, b) except PublishFtpError: raise except Exception as ex: raise PublishFtpError.ftpError(ex, config)
def _move_file(self, file_dir, filename, config): if self._provider.get('feeding_service') == 'ftp': with ftp_connect(config) as ftp: if config.get('move', False): ftp_service = FTPFeedingService() move_path, _ = ftp_service._create_move_folders(config, ftp) ftp_service._move( ftp, os.path.join(file_dir, filename), os.path.join(move_path, filename), datetime.now(), False ) else: file_service = FileFeedingService() # move processed attachments to the same folder with XML file_dir = os.path.dirname(file_dir) file_service.move_file(file_dir, 'attachments/' + filename, self._provider)
def _get_file(filename): retries = 0 while True: try: raw = BytesIO() with ftp_connect({'username': app.config.get('BOM_WEATHER_FTP_USERNAME', ''), 'password': app.config.get('BOM_WEATHER_FTP_PASSWORD', ''), 'host': 'ftp.bom.gov.au', 'path': 'fwo'}) as ftp: ftp.retrbinary('RETR ' + filename, raw.write) except: logger.exception('Failed to download on attempt {} file: {}'.format(retries, filename), exc_info=True) retries += 1 if retries < 3: continue else: logger.exception('Retries exceeded downloading {}'.format(filename)) raise break return raw.getvalue()
def _update(self, provider, update): config = provider.get("config", {}) do_move = config.get("move", False) last_processed_file_modify = provider.get( "private", {}).get("last_processed_file_modify") limit = app.config.get("FTP_INGEST_FILES_LIST_LIMIT", 100) registered_parser = self.get_feed_parser(provider) allowed_ext = getattr(registered_parser, "ALLOWED_EXT", self.ALLOWED_EXT_DEFAULT) try: self._timer.start("ftp_connect") with ftp_connect(config) as ftp: self._log_msg( "Connected to FTP server. Exec time: {:.4f} secs.".format( self._timer.stop("ftp_connect"))) files_to_process = [] files = self._sort_files(self._list_files(ftp, provider)) if do_move: move_path, move_path_error = self._create_move_folders( config, ftp) self._timer.start("files_to_process") for filename, modify in files: # filter by extension if not self._is_allowed(filename, allowed_ext): logger.info( "ignoring file {filename} because of file extension" .format(filename=filename)) continue # filter by modify datetime file_modify = datetime.strptime( modify, self.DATE_FORMAT).replace(tzinfo=utc) if last_processed_file_modify: # ignore limit and add files for processing if last_processed_file_modify == file_modify: files_to_process.append((filename, file_modify)) elif last_processed_file_modify < file_modify: # even if we have reached a limit, we must add at least one file to increment # a `last_processed_file_modify` in provider files_to_process.append((filename, file_modify)) # limit amount of files to process per ingest update if len(files_to_process) >= limit: break else: # limit amount of files to process per ingest update if len(files_to_process) >= limit: break # add files for processing files_to_process.append((filename, file_modify)) self._log_msg( "Got {} file for processing. Exec time: {:.4f} secs.". format(len(files_to_process), self._timer.stop("files_to_process"))) # process files self._timer.start("start_processing") for filename, file_modify in files_to_process: try: update["private"] = { "last_processed_file_modify": file_modify } failed = yield self._retrieve_and_parse( ftp, config, filename, provider, registered_parser) if do_move: move_dest_file_path = os.path.join( move_path if not failed else move_path_error, filename) self._move(ftp, filename, move_dest_file_path, file_modify, failed=failed) except EmptyFile: continue except Exception as e: logger.error( "Error while parsing {filename}: {msg}".format( filename=filename, msg=e)) if do_move: move_dest_file_path_error = os.path.join( move_path_error, filename) self._move(ftp, filename, move_dest_file_path_error, file_modify, failed=True) self._log_msg( "Processing finished. Exec time: {:.4f} secs.".format( self._timer.stop("start_processing"))) except IngestFtpError: raise except Exception as ex: raise IngestFtpError.ftpError(ex, provider)
def _update(self, provider, update): config = provider.get('config', {}) last_updated = provider.get('last_updated') registered_parser = self.get_feed_parser(provider) try: allowed_ext = registered_parser.ALLOWED_EXT except AttributeError: allowed_ext = self.ALLOWED_EXT_DEFAULT crt_last_updated = None if config.get('move', False): do_move = True if not config.get('move_path'): logger.debug('missing move_path, default will be used') move_dest_path = os.path.join( config.get('path', ''), config.get('move_path') or DEFAULT_SUCCESS_PATH) if not config.get('move_path_error'): logger.debug('missing move_path_error, default will be used') move_dest_path_error = os.path.join( config.get('path', ''), config.get('move_path_error') or DEFAULT_FAILURE_PATH) else: do_move = False if 'dest_path' not in config: config['dest_path'] = tempfile.mkdtemp(prefix='superdesk_ingest_') try: with ftp_connect(config) as ftp: if do_move: try: self._create_if_missing(ftp, move_dest_path) self._create_if_missing(ftp, move_dest_path_error) except ftplib.all_errors as e: logger.warning( "Can't create move directory, files will not be moved: {reason}" .format(reason=e)) do_move = False items = [] for filename, facts in ftp.mlsd(): if facts.get('type', '') != 'file': continue try: if not self._is_allowed(filename, allowed_ext): logger.info( 'ignoring file {filename} because of file extension' .format(filename=filename)) continue if last_updated: item_last_updated = datetime.strptime( facts['modify'], self.DATE_FORMAT).replace(tzinfo=utc) if item_last_updated <= last_updated: continue elif not crt_last_updated or item_last_updated > crt_last_updated: crt_last_updated = item_last_updated local_file_path = os.path.join(config['dest_path'], filename) with open(local_file_path, 'wb') as f: try: ftp.retrbinary('RETR %s' % filename, f.write) except ftplib.all_errors: os.remove(local_file_path) raise Exception( 'Exception retrieving file from FTP server ({filename})' .format(filename=filename)) if isinstance(registered_parser, XMLFeedParser): xml = etree.parse(local_file_path).getroot() parser = self.get_feed_parser(provider, xml) parsed = parser.parse(xml, provider) else: parser = self.get_feed_parser( provider, local_file_path) parsed = parser.parse(local_file_path, provider) if isinstance(parsed, dict): parsed = [parsed] items.append(parsed) if do_move: move_dest_file_path = os.path.join( move_dest_path, filename) self._move(ftp, filename, move_dest_file_path) except Exception as e: logger.error( "Error while parsing {filename}: {msg}".format( filename=filename, msg=e)) if do_move: move_dest_file_path_error = os.path.join( move_dest_path_error, filename) self._move(ftp, filename, move_dest_file_path_error) if crt_last_updated: update[LAST_UPDATED] = crt_last_updated return items except IngestFtpError: raise except Exception as ex: raise IngestFtpError.ftpError(ex, provider)
def _update(self, provider, update): self.HTTP_URL = provider.get('config', {}).get('api_url', '') self.provider = provider # Set the apikey parameter we're going to use it on all calls params = dict() params['apikey'] = provider.get('config', {}).get('apikey') # Use the next link if one is available in the config if provider.get('config', {}).get('next_link'): r = self.get_url(url=provider.get('config', {}).get('next_link'), params=params, verify=False, allow_redirects=True) r.raise_for_status() else: id_list = provider.get('config', {}).get('productList', '').strip() recovery_time = provider.get('config', {}).get('recoverytime', '1').strip() if recovery_time == '': recovery_time = '1' start = (utcnow() - timedelta(hours=int(recovery_time)) ).isoformat()[:19] + 'Z' # If there has been a list of products defined then we format them for the request, if not all # allowed products will be returned. if id_list: # we remove spaces and empty values from id_list to do a clean list id_list = ' OR '.join( [id_.strip() for id_ in id_list.split(',') if id_.strip()]) params[ 'q'] = 'productid:(' + id_list + ') AND mindate:>{}'.format( start) else: params['q'] = 'mindate:>{}'.format(start) params['page_size'] = '100' r = self.get_url(params=params, verify=False, allow_redirects=True) r.raise_for_status() try: response = json.loads(r.text) except Exception: raise IngestApiError.apiRequestError( Exception('error parsing response')) nextLink = response.get('data', {}).get('next_page') # Got the same next link as last time so nothing new if nextLink == provider.get('config', {}).get('next_link'): logger.info('Nothing new from AP Media') return [] if len(response.get('data', {}).get('items', [])) > 0: try: sequence_number = int( provider.get('config', {}).get('sequence', 0)) with ftp_connect({ 'username': provider.get('config', {}).get('ftp_user', ''), 'password': provider.get('config', {}).get('ftp_password', ''), 'host': provider.get('config', {}).get('ftp_server', ''), 'path': provider.get('config', {}).get('ftp_path', '') }) as ftp: for item in response.get('data', {}).get('items', []): try: if item['item']['type'] == 'picture': image_ref = item['item']['renditions']['main'][ 'href'] if provider.get('config', {}).get( 'filenametemplate', '') == '': filename = to_ascii( item['item']['renditions']['main'] ['originalfilename']) else: # The filename is generated by applying the date format string in the template filename = datetime.now().strftime( provider.get('config', {}).get( 'filenametemplate', '')) # and appending the sequence number filename += '-' + str( sequence_number).zfill(4) + '.jpg' sequence_number = (sequence_number + 1) % 10000 logger.info( 'file: {} versioncreated: {}'.format( filename, item['item']['versioncreated'])) r = requests.get(url=image_ref, params={ 'apikey': provider.get( 'config', {}).get('apikey') }) r.raise_for_status() try: ftp.storbinary('STOR {}'.format(filename), BytesIO(r.content)) except ftplib.all_errors as e: logger.error(e) # Any exception processing an indivisual item is swallowed except Exception as ex: logger.exception(ex) except Exception as ex: logger.exception(ex) # Save the link for next time upd_provider = provider.get('config') upd_provider['next_link'] = nextLink upd_provider['recoverytime'] = '' upd_provider['sequence'] = str(sequence_number) update['config'] = upd_provider return None
def _update(self, provider, update): config = provider.get('config', {}) last_updated = provider.get('last_updated') crt_last_updated = None if config.get('move', False): do_move = True if not config.get('move_path'): logger.debug('missing move_path, default will be used') move_dest_path = os.path.join(config.get('path', ''), config.get('move_path') or DEFAULT_SUCCESS_PATH) if not config.get('move_path_error'): logger.debug('missing move_path_error, default will be used') move_dest_path_error = os.path.join(config.get('path', ''), config.get('move_path_error') or DEFAULT_FAILURE_PATH) else: do_move = False if 'dest_path' not in config: config['dest_path'] = tempfile.mkdtemp(prefix='superdesk_ingest_') try: with ftp_connect(config) as ftp: if do_move: try: self._create_if_missing(ftp, move_dest_path) self._create_if_missing(ftp, move_dest_path_error) except ftplib.all_errors as e: logger.warning("Can't create move directory, files will not be moved: {reason}".format( reason=e)) do_move = False items = [] for filename, facts in ftp.mlsd(): if facts.get('type', '') != 'file': continue try: if not filename.lower().endswith(self.FILE_SUFFIX): raise if last_updated: item_last_updated = datetime.strptime(facts['modify'], self.DATE_FORMAT).replace(tzinfo=utc) if item_last_updated < last_updated: continue elif not crt_last_updated or item_last_updated > crt_last_updated: crt_last_updated = item_last_updated local_file_path = os.path.join(config['dest_path'], filename) try: with open(local_file_path, 'xb') as f: try: ftp.retrbinary('RETR %s' % filename, f.write) except ftplib.all_errors as ex: os.remove(local_file_path) raise Exception('Exception retrieving file from FTP server ({filename})'.format( filename=filename)) except FileExistsError as e: raise Exception('Exception retrieving from FTP server, file already exists ({filename])' .format(filename=local_file_path)) registered_parser = self.get_feed_parser(provider) if isinstance(registered_parser, XMLFeedParser): xml = etree.parse(local_file_path).getroot() parser = self.get_feed_parser(provider, xml) parsed = parser.parse(xml, provider) else: parser = self.get_feed_parser(provider, local_file_path) parsed = parser.parse(local_file_path, provider) if isinstance(parsed, dict): parsed = [parsed] items.append(parsed) if do_move: move_dest_file_path = os.path.join(move_dest_path, filename) self._move(ftp, filename, move_dest_file_path) except Exception as e: logger.error("Error while parsing {filename}: {msg}".format(filename=filename, msg=e)) if do_move: move_dest_file_path_error = os.path.join(move_dest_path_error, filename) self._move(ftp, filename, move_dest_file_path_error) if crt_last_updated: update[LAST_UPDATED] = crt_last_updated return items except IngestFtpError: raise except Exception as ex: raise IngestFtpError.ftpError(ex, provider)
def _download_file(self, filename, file_path, config): tmp_dir = os.path.join(gettempdir(), filename) with ftp_connect(config) as ftp, open(tmp_dir, 'wb') as f: ftp.retrbinary('RETR ' + file_path, f.write) return tmp_dir
def _update(self, provider, update): config = provider.get('config', {}) do_move = config.get('move', False) last_processed_file_modify = provider.get( 'private', {}).get('last_processed_file_modify') limit = app.config.get('FTP_INGEST_FILES_LIST_LIMIT', 100) registered_parser = self.get_feed_parser(provider) allowed_ext = getattr(registered_parser, 'ALLOWED_EXT', self.ALLOWED_EXT_DEFAULT) try: with ftp_connect(config) as ftp: items = [] files_to_process = [] files = self._sort_files(self._list_files(ftp, provider)) if do_move: move_path, move_path_error = self._create_move_folders( config, ftp) for filename, modify in files: # filter by extension if not self._is_allowed(filename, allowed_ext): logger.info( 'ignoring file {filename} because of file extension' .format(filename=filename)) continue # filter by modify datetime file_modify = datetime.strptime( modify, self.DATE_FORMAT).replace(tzinfo=utc) if last_processed_file_modify: # ignore limit and add files for processing if last_processed_file_modify == file_modify: files_to_process.append((filename, file_modify)) elif last_processed_file_modify < file_modify: # evenv if we have reached a limit, we must add at least one file to increment # a `last_processed_file_modify` in provider files_to_process.append((filename, file_modify)) # limit amount of files to process per ingest update if len(files_to_process) >= limit: break else: # limit amount of files to process per ingest update if len(files_to_process) >= limit: break # add files for processing files_to_process.append((filename, file_modify)) # process files for filename, file_modify in files_to_process: try: items += self._retrieve_and_parse( ftp, config, filename, provider, registered_parser) update['private'] = { 'last_processed_file_modify': file_modify } if do_move: move_dest_file_path = os.path.join( move_path, filename) self._move(ftp, filename, move_dest_file_path) except Exception as e: logger.error( "Error while parsing {filename}: {msg}".format( filename=filename, msg=e)) if do_move: move_dest_file_path_error = os.path.join( move_path_error, filename) self._move(ftp, filename, move_dest_file_path_error) return items except IngestFtpError: raise except Exception as ex: raise IngestFtpError.ftpError(ex, provider)
def _update(self, provider, update): config = provider.get('config', {}) last_updated = provider.get('last_updated') registered_parser = self.get_feed_parser(provider) try: allowed_ext = registered_parser.ALLOWED_EXT except AttributeError: allowed_ext = self.ALLOWED_EXT_DEFAULT crt_last_updated = None if config.get('move', False): do_move = True if not config.get('ftp_move_path'): logger.debug('missing move_path, default will be used') move_dest_path = os.path.join(config.get('path', ''), config.get('ftp_move_path') or DEFAULT_SUCCESS_PATH) if not config.get('move_path_error'): logger.debug('missing move_path_error, default will be used') move_dest_path_error = os.path.join(config.get('path', ''), config.get('move_path_error') or DEFAULT_FAILURE_PATH) else: do_move = False if 'dest_path' not in config: config['dest_path'] = tempfile.mkdtemp(prefix='superdesk_ingest_') try: with ftp_connect(config) as ftp: if do_move: try: self._create_if_missing(ftp, move_dest_path) self._create_if_missing(ftp, move_dest_path_error) except ftplib.all_errors as e: logger.warning("Can't create move directory, files will not be moved: {reason}".format( reason=e)) do_move = False items = [] for filename, facts in self._list_items(ftp, provider): try: if not self._is_allowed(filename, allowed_ext): logger.info('ignoring file {filename} because of file extension'.format(filename=filename)) continue if last_updated: item_last_updated = datetime.strptime(facts, self.DATE_FORMAT).replace(tzinfo=utc) if item_last_updated <= last_updated: continue elif not crt_last_updated or item_last_updated > crt_last_updated: crt_last_updated = item_last_updated items += self._retrieve_and_parse(ftp, config, filename, provider, registered_parser) if do_move: move_dest_file_path = os.path.join(move_dest_path, filename) self._move(ftp, filename, move_dest_file_path) except Exception as e: logger.error("Error while parsing {filename}: {msg}".format(filename=filename, msg=e)) if do_move: move_dest_file_path_error = os.path.join(move_dest_path_error, filename) self._move(ftp, filename, move_dest_file_path_error) if crt_last_updated: update[LAST_UPDATED] = crt_last_updated return items except IngestFtpError: raise except Exception as ex: raise IngestFtpError.ftpError(ex, provider)