def is_property_present_for_df(df, name): """ Check if the property is present for the dataframe Args: df (pandas dataframe): Input dataframe name (str): Property name Returns: result (bool). Returns True if the property is present for the input dataframe Raises: AttributeError: If the input dataframe is null KeyError: If the dataframe is not present in the catalog """ catalog = Catalog.Instance() if not isinstance(df, pd.DataFrame): logger.error('Input object is not of type pandas data frame') raise AssertionError('Input object is not of type pandas data frame') if catalog.is_df_info_present_in_catalog(df) is False: logger.error('Dataframe information is not present in the catalog') raise KeyError('Dataframe information is not present in the catalog') return catalog.is_property_present_for_df(df, name)
def get_all_properties(df): """ Get all the properties for a dataframe Args: df (pandas dataframe): Dataframe for which the properties must be retrieved Returns: Property dictionary (dict). The keys are property names (str) and the values are property values (pandas object) Raises: AttributeError: If the input dataframe is null KeyError: If the information about the input dataframe is not present in the catalog """ catalog = Catalog.Instance() if not isinstance(df, pd.DataFrame): logger.error('Input object is not of type pandas data frame') raise AssertionError('Input object is not of type pandas data frame') if not catalog.is_df_info_present_in_catalog(df): logger.error('Dataframe information is not present in the catalog') raise KeyError('Dataframe information is not present in the catalog') return catalog.get_all_properties(df)
def set_property(df, name, value): """ Set property for a dataframe Args: df (pandas dataframe): Dataframe for which the property has to be set name (str): Property name value (pandas object): Property value Returns: status (bool). Returns True if the property was set successfully Raises: AttributeError: If the input dataframe is null """ catalog = Catalog.Instance() if not isinstance(df, pd.DataFrame): logger.error('Input object is not of type pandas data frame') raise AssertionError('Input object is not of type pandas data frame') if not isinstance(name, six.string_types): logger.error('Property name is not of type string') raise AssertionError('Property name is not of type string') # if df is None: # raise AttributeError('Input dataframe cannot be null') if not catalog.is_df_info_present_in_catalog(df): catalog.init_properties(df) catalog.set_property(df, name, value)
def show_properties_for_id(obj_id): catalog = Catalog.Instance() metadata = catalog.get_all_properties_for_id(obj_id) print('id: ' + str(obj_id)) for prop, value in six.iteritems(metadata): if isinstance(value, six.string_types): print(prop + ": " + value) else: print(prop + "(obj.id): " + str(id(value)))
def del_catalog(): """ Delete catalog information Returns: status (bool). Returns True if the deletion was successful. """ catalog = Catalog.Instance() return catalog.del_catalog()
def get_catalog_len(): """ Get the number of entries in the catalog Returns: length (int) of the catalog """ catalog = Catalog.Instance() return catalog.get_catalog_len()
def is_catalog_empty(): """ Check if the catalog is empty Returns: result (bool). Returns True if the catalog is empty, else returns False. """ catalog = Catalog.Instance() return catalog.is_catalog_empty()
def get_catalog(): """ Get Catalog information. Returns: Catalog information in a dictionary format. """ catalog = Catalog.Instance() return catalog.get_catalog()
def has_property(df, prop): catalog = Catalog.Instance() if not isinstance(df, pd.DataFrame): logger.error('Input object is not of type pandas data frame') raise AssertionError('Input object is not of type pandas data frame') if not isinstance(prop, six.string_types): logger.error('Property name is not of type string') raise AssertionError('Property name is not of type string') if not is_dfinfo_present(df): logger.error('Dataframe is not in the catalog') raise KeyError('Dataframe is not in the catalog') p = get_all_properties(df) # return p.has_key(prop) return prop in p
def get_property(df, name): """ Get property for a dataframe Args: df (pandas dataframe): Dataframe for which the property should be retrieved name (str): Name of the property that should be retrieved Returns: Property value (pandas object) for the given property name Raises: AttributeError: If the input dataframe in null KeyError: If the dataframe is not present in the catalog, or the requested property is not present in the catalog """ catalog = Catalog.Instance() if not isinstance(df, pd.DataFrame): logger.error('Input object is not of type pandas data frame') raise AssertionError('Input object is not of type pandas data frame') if not isinstance(name, six.string_types): logger.error('Property name is not of type string') raise AssertionError('Property name is not of type string') # if df is None or pd.isnull(df): # logger.error('Input dataframe cannot be null') # raise AttributeError('Input dataframe cannot be null') if not catalog.is_df_info_present_in_catalog(df): logger.error('Dataframe information is not present in the catalog') raise KeyError('Dataframe information is not present in the catalog') if not catalog.is_property_present_for_df(df, name): logger.error( 'Requested metadata ( %s ) for the given dataframe is not present in the catalog' % name) raise KeyError( 'Requested metadata ( %s ) for the given dataframe is not present in the catalog' % name) return catalog.get_property(df, name)
def set_properties(df, prop_dict, replace=True): """ Set properties for a dataframe in the catalog Args: df (pandas dataframe): Input dataframe prop_dict (dict): Property dictionary with keys as property names and values as python objects replace (bool): Flag to indicate whether the input properties can replace the properties in the catalog Returns: status (bool). Returns True if the setting of properties was successful Notes: The function is intended to set all the properties in the catalog with the given property dictionary. The replace flag is just a check where the properties will be not be disturbed if they exist already in the catalog """ catalog = Catalog.Instance() if not isinstance(df, pd.DataFrame): logger.error('Input object is not of type pandas data frame') raise AssertionError('Input object is not of type pandas data frame') if not isinstance(prop_dict, dict): logger.error('The properties should be of type python dictionary') raise AssertionError( 'The properties should be of type python dictionary') if catalog.is_df_info_present_in_catalog(df) and replace is False: logger.warning( 'Properties already exists for df ( %s ). Not replacing it' % str(id(df))) return False if not catalog.is_df_info_present_in_catalog(df): catalog.init_properties(df) # for k, v in prop_dict.iteritems(): for k, v in six.iteritems(prop_dict): catalog.set_property(df, k, v) return True
def is_dfinfo_present(df): """ Check if the dataframe information is present in the catalog Args: df (pandas dataframe): Input dataframe Returns: result (bool). Returns True if the dataframe information is present in the catalog, else returns False Raises: AttributeError: If the input dataframe is null """ catalog = Catalog.Instance() if not isinstance(df, pd.DataFrame): logger.error('Input object is not of type pandas data frame') raise AssertionError('Input object is not of type pandas data frame') return catalog.is_df_info_present_in_catalog(df)
def copy_properties(src, tar, update=True): """ Copy properties from one dataframe to another Args: src (pandas dataframe): Dataframe from which the properties to be copied from tar (pandas dataframe): Dataframe to which the properties to be copied update (bool): Flag to indicate whether the source properties can replace the tart properties Returns: status (bool). Returns True if the copying was successful Notes: This function internally calls set_properties and get_all_properties """ # copy catalog information from src to tar catalog = Catalog.Instance() if not isinstance(src, pd.DataFrame): logger.error('Input object (src) is not of type pandas data frame') raise AssertionError( 'Input object (src) is not of type pandas data frame') if not isinstance(tar, pd.DataFrame): logger.error('Input object (tar) is not of type pandas data frame') raise AssertionError( 'Input object (tar) is not of type pandas data frame') if catalog.is_df_info_present_in_catalog(src) is False: logger.error( 'Dataframe information (src) is not present in the catalog') raise KeyError( 'Dataframe information (src) is not present in the catalog') metadata = catalog.get_all_properties(src) return set_properties(tar, metadata, update) # this initializes tar in the catalog.
def del_all_properties(df): """ Delete all properties for a dataframe Args: df (pandas dataframe): Input dataframe for which all the properties must be deleted. Returns: status (bool). Returns True if the deletion was successful Raises: AttributeError: If the input dataframe is null KeyError: If the dataframe information is not present in the catalog """ catalog = Catalog.Instance() if not isinstance(df, pd.DataFrame): logger.error('Input object is not of type pandas data frame') raise AssertionError('Input object is not of type pandas data frame') if not catalog.is_df_info_present_in_catalog(df): logger.error('Dataframe information is not present in the catalog') raise KeyError('Dataframe information is not present in the catalog') return catalog.del_all_properties(df)
def del_property(df, name): """ Delete a property from the catalog Args: df (pandas dataframe): Input dataframe for which a property must be deleted name (str): Property name Returns: status (bool). Returns True if the deletion was successful Raises: AttributeError: If the input dataframe is null KeyError: If the Dataframe info. is not present or the given property is not present for that dataframe in the catalog """ catalog = Catalog.Instance() if not isinstance(df, pd.DataFrame): logger.error('Input object is not of type pandas data frame') raise AssertionError('Input object is not of type pandas data frame') if not isinstance(name, six.string_types): logger.error('Property name is not of type string') raise AssertionError('Property name is not of type string') if not catalog.is_df_info_present_in_catalog(df): logger.error('Dataframe information is not present in the catalog') raise KeyError('Dataframe information is not present in the catalog') if not catalog.is_property_present_for_df(df, name): logger.error('Dataframe information is not present in the catalog') raise KeyError('Requested metadata ( %s ) for the given dataframe is ' 'not present in the catalog' % name) return catalog.del_property(df, name)
from PyQt4 import QtGui from magellan.catalog.catalog import Catalog __version__ = '0.1.0' _catalog = Catalog.Instance() # # import catalog related methods from magellan.catalog.catalog_manager import get_property, get_all_properties, \ set_property, del_property, del_all_properties from magellan.catalog.catalog_manager import get_catalog, del_catalog, \ get_catalog_len, show_properties, show_properties_for_id from magellan.catalog.catalog_manager import is_property_present_for_df, \ is_dfinfo_present, is_catalog_empty from magellan.catalog.catalog_manager import get_key, set_key # # # io related methods # from magellan.io.parsers import read_csv_metadata, to_csv_metadata from magellan.io.pickles import load_object, load_table, save_object, save_table # # # # blockers from magellan.blocker.attr_equiv_blocker import AttrEquivalenceBlocker from magellan.blocker.black_box_blocker import BlackBoxBlocker from magellan.blocker.overlap_blocker import OverlapBlocker from magellan.blocker.rule_based_blocker import RuleBasedBlocker # # blocker combiner from magellan.blockercombiner.blockercombiner import combine_blocker_outputs_via_union
def init_properties(df): catalog = Catalog.Instance() if not isinstance(df, pd.DataFrame): logger.error('Input object is not of type pandas data frame') raise AssertionError('Input object is not of type pandas data frame') catalog.init_properties(df)