def __init__(self, plugin_path=None): self.files_manager = FilesManager() self.plugin_path = plugin_path
class PluginLoaderBase(object): """Plugin loader. Loader deals with the file structure providing ability to load, combine and form the data tree from the plugin directory. If loader fails it raising exception with the report attached. """ _metadata_path = "metadata.yaml" _path_suffix = "_path" _dont_resolve_path_keys = {'repository_path', 'deployment_scripts_path'} def __init__(self, plugin_path=None): self.files_manager = FilesManager() self.plugin_path = plugin_path paths_to_fields = {} def _get_absolute_path(self, path): """Get absolute path from the relative to the plugins folder. :param path: relative path :type path: str :return: path string :rtype: str """ return os.path.join(self.plugin_path, path) @property def _root_metadata_path(self): """Where is the root plugin data file located.""" return self._get_absolute_path(self._metadata_path) def _recursive_process_paths(self, data, report): """Recursively processed nested list/dict. :param data: data :type data: iterable :param report: report node :type report: utils.ReportNode :returns: data :rtype: list|dict """ if isinstance(data, dict): new_data = {} for key in tuple(data): value = data[key] # if we have key with path we could do 3 things: # # * if it is pointing to directory, check dir existence and # leave path intact # # * if it is a `glob` compatible mask, iterate over files # that are matched this mask and compatible with # FileManager then merge this files data if they have # list or dict as common data root. # Then remove _path suffix from key. # # * if it is file compatible with FileManager, read this # file and remove _path suffix from key. if key.endswith(self._path_suffix) \ and isinstance(value, six.string_types): if os.path.isdir(self._get_absolute_path(value)): report.info(u"{} is valid directory".format( value)) # leave directories as is new_data[key] = value elif key in self._dont_resolve_path_keys: report.info(u"{}:{} was not checked".format( key, value)) new_data[key] = value else: cleaned_key = key[:- len(self._path_suffix)] try: loaded_data = self.files_manager.load( self._get_absolute_path(value) ) new_data[cleaned_key] = loaded_data except Exception as exc: path_node = utils.ReportNode(data[key]) report.add_nodes(path_node.error(exc)) # keep path as is new_data[key] = value else: new_data[key] = self._recursive_process_paths( data[key], report) elif isinstance(data, list): new_data = [ self._recursive_process_paths(record, report) for record in data ] else: new_data = data return new_data def _load_root_metadata_file(self): """Get plugin root data (usually, it's metadata.yaml). :return: data :rtype: list|dict """ report = utils.ReportNode(u"Loading root metadata file:{}".format( self._root_metadata_path )) # todo(ikutukov): current loading schema and testing relies on case # when no metadata.yaml file is possible. So we are skipping all # exeptions. try: data = self.files_manager.load(self._root_metadata_path) except Exception as exc: report.warning(exc) return {}, report data = self._recursive_process_paths(data, report) return data, report def load(self, plugin_path=None): """Loads data from the given plugin path and producing data tree. :param plugin_path: plugin root path :param plugin_path: str|basestring|None :return: data tree starting from the data in root metadata file :rtype: tuple(dict, utils.ReportNode) """ plugin_path = plugin_path or self.plugin_path report = utils.ReportNode( u"File structure validation: {}".format(plugin_path)) data, root_report = self._load_root_metadata_file() report.add_nodes(root_report) # load files with fixed location for key, file_path in six.iteritems(self.paths_to_fields): file_report = utils.ReportNode(file_path) try: data[key] = self.files_manager.load( self._get_absolute_path(file_path) ) except errors.NoPluginFileFound as exc: data[key] = None file_report.warning(exc) except Exception as exc: file_report.error(exc) finally: report.add_nodes(file_report) if report.is_failed(): raise errors.ParseError(report.render()) return data, report
class PluginLoaderBase(object): """Plugin loader. Loader deals with the file structure providing ability to load, combine and form the data tree from the plugin directory. If loader fails it raising exception with the report attached. """ _metadata_path = "metadata.yaml" _path_suffix = "_path" _dont_resolve_path_keys = {'repository_path', 'deployment_scripts_path'} def __init__(self, plugin_path=None): self.files_manager = FilesManager() self.plugin_path = plugin_path paths_to_fields = {} def _get_absolute_path(self, path): """Get absolute path from the relative to the plugins folder. :param path: relative path :type path: str :return: path string :rtype: str """ return os.path.join(self.plugin_path, path) @property def _root_metadata_path(self): """Where is the root plugin data file located.""" return self._get_absolute_path(self._metadata_path) def _recursive_process_paths(self, data, report): """Recursively processed nested list/dict. :param data: data :type data: iterable :param report: report node :type report: utils.ReportNode :returns: data :rtype: list|dict """ if isinstance(data, dict): new_data = {} for key in tuple(data): value = data[key] # if we have key with path we could do 3 things: # # * if it is pointing to directory, check dir existence and # leave path intact # # * if it is a `glob` compatible mask, iterate over files # that are matched this mask and compatible with # FileManager then merge this files data if they have # list or dict as common data root. # Then remove _path suffix from key. # # * if it is file compatible with FileManager, read this # file and remove _path suffix from key. if key.endswith(self._path_suffix) \ and isinstance(value, six.string_types): if os.path.isdir(self._get_absolute_path(value)): report.info(u"{} is valid directory".format(value)) # leave directories as is new_data[key] = value elif key in self._dont_resolve_path_keys: report.info(u"{}:{} was not checked".format( key, value)) new_data[key] = value else: cleaned_key = key[:-len(self._path_suffix)] try: loaded_data = self.files_manager.load( self._get_absolute_path(value)) new_data[cleaned_key] = loaded_data except Exception as exc: path_node = utils.ReportNode(data[key]) report.add_nodes(path_node.error(exc)) # keep path as is new_data[key] = value else: new_data[key] = self._recursive_process_paths( data[key], report) elif isinstance(data, list): new_data = [ self._recursive_process_paths(record, report) for record in data ] else: new_data = data return new_data def _load_root_metadata_file(self): """Get plugin root data (usually, it's metadata.yaml). :return: data :rtype: list|dict """ report = utils.ReportNode(u"Loading root metadata file:{}".format( self._root_metadata_path)) # todo(ikutukov): current loading schema and testing relies on case # when no metadata.yaml file is possible. So we are skipping all # exeptions. try: data = self.files_manager.load(self._root_metadata_path) except Exception as exc: report.warning(exc) return {}, report data = self._recursive_process_paths(data, report) return data, report def load(self, plugin_path=None): """Loads data from the given plugin path and producing data tree. :param plugin_path: plugin root path :param plugin_path: str|basestring|None :return: data tree starting from the data in root metadata file :rtype: tuple(dict, utils.ReportNode) """ plugin_path = plugin_path or self.plugin_path report = utils.ReportNode( u"File structure validation: {}".format(plugin_path)) data, root_report = self._load_root_metadata_file() report.add_nodes(root_report) # load files with fixed location for key, file_path in six.iteritems(self.paths_to_fields): file_report = utils.ReportNode(file_path) try: data[key] = self.files_manager.load( self._get_absolute_path(file_path)) except errors.NoPluginFileFound as exc: data[key] = None file_report.warning(exc) except Exception as exc: file_report.error(exc) finally: report.add_nodes(file_report) if report.is_failed(): raise errors.ParseError(report.render()) return data, report
def __init__(self, prefix, output_size_mb, dbx): #initialize dropbox connector self.dbx = dbx self.prefix = prefix self.files_manager = FilesManager(output_size_mb)
class Processor: def __init__(self, prefix, output_size_mb, dbx): #initialize dropbox connector self.dbx = dbx self.prefix = prefix self.files_manager = FilesManager(output_size_mb) def set_log(self, log): self.log = log self.files_manager.set_log(log) def TL_data(self, data_folder_dropbox, dropbox_folder_upload, data_folder, result_folder, sufix, category): filenames = self.dbx.list_files(data_folder_dropbox + category) final_df = pd.DataFrame() index = 0 dropbox_path = dropbox_folder_upload + category if (not self.dbx.folder_exists(dropbox_path)): self.log.info('Create folder ' + dropbox_path) self.dbx.create_folder(dropbox_path) #for each file, append until reach threashold for filename in filenames: path = data_folder_dropbox + category + '/' + filename local_path = data_folder + filename self.dbx.download_file(path, local_path) self.log.info('Processing file:%s', local_path) try: df = pd.read_csv(local_path, dtype={'ticker': str}) if (df.shape[0] == 0): os.remove(local_path) continue except pd.io.common.EmptyDataError: os.remove(local_path) continue ticker = df['ticker'][0] df.rename(index=str, columns={ticker: 'count'}, inplace=True) if (self.files_manager.check_chunks(final_df, df)): self.log.info('Uploading chunk:%d', index) #sort values by date before saving the file final_df.sort_values(by=['date'], inplace=True, ascending=False) index = self.files_manager.save_data( result_folder, self.prefix, sufix, dropbox_folder_upload + category + '/', final_df, index, self.dbx) final_df = df else: final_df = final_df.append(df) os.remove(local_path) #save last chunck if (final_df.shape[0] > 0): self.log.info('Saving last chunck') final_df.sort_values(by=['date'], inplace=True, ascending=False) self.files_manager.save_data( result_folder, self.prefix, sufix, dropbox_folder_upload + category + '/', final_df, index, self.dbx)