def test_metadata_split_to_dict(): """Should split metadata into a dictionary.""" pc_axis = pyaxis.read(data_path + '14001.px', 'iso-8859-15') metadata_elements, raw_data = pyaxis.metadata_extract(pc_axis) metadata = pyaxis.metadata_split_to_dict(metadata_elements) assert type(metadata) == dict assert len(metadata) == 28
def test_metadata_extract(): """Should extract pcaxis metadata into a list.""" pc_axis = pyaxis.read(data_path + '14001.px', 'iso-8859-15') metadata_elements, raw_data = pyaxis.metadata_extract(pc_axis) assert type(metadata_elements) == list assert len(metadata_elements) == 28 assert type(raw_data) == str assert len(raw_data) == 29441
def test_metadata_split_to_dict(): """Should split metadata into a dictionary.""" pc_axis = pyaxis.read( 'https://www.ine.es/jaxi/files/_px/es/px/t20/e301/matri/a2000/l0/' '14001.px?nocab=1', 'iso-8859-15') metadata_elements, raw_data = pyaxis.metadata_extract(pc_axis) metadata = pyaxis.metadata_split_to_dict(metadata_elements) assert type(metadata) == dict assert len(metadata) == 23
def test_metadata_extract(): """Should extract pcaxis metadata into a list.""" pc_axis = pyaxis.read( 'https://www.ine.es/jaxi/files/_px/es/px/t20/e301/matri/a2000/l0/' '14001.px?nocab=1', 'iso-8859-15') metadata_elements, raw_data = pyaxis.metadata_extract(pc_axis) assert type(metadata_elements) == list assert len(metadata_elements) == 23 assert type(raw_data) == str assert len(raw_data) >= 40282
def test_get_codes(): """Should return two lists (dimension names with codes and codes).""" pc_axis = pyaxis.read(data_path + '14001.px', 'iso-8859-15') metadata_elements, raw_data = pyaxis.metadata_extract(pc_axis) metadata = pyaxis.metadata_split_to_dict(metadata_elements) dimensions_with_codes, dimension_codes = pyaxis.get_codes(metadata) assert len(dimension_codes) == 1 assert len(dimensions_with_codes) == 1 assert dimensions_with_codes[ 0] == 'Comunidad Autónoma de residencia del matrimonio' assert dimension_codes[0][6] == 'CA06' assert dimension_codes[0][11] == 'CA11'
def test_get_dimensions(): """Should return two lists (dimension names and members).""" pc_axis = pyaxis.read(data_path + '14001.px', 'iso-8859-15') metadata_elements, raw_data = pyaxis.metadata_extract(pc_axis) metadata = pyaxis.metadata_split_to_dict(metadata_elements) dimension_names, dimension_members = pyaxis.get_dimensions(metadata) assert len(dimension_names) == 4 assert dimension_names[ 0] == 'Comunidad Autónoma de residencia del matrimonio' assert dimension_names[3] == 'estado civil anterior de los cónyuges' assert len(dimension_members) == 4 assert dimension_members[0][0] == 'Total' assert dimension_members[3][3] == 'Divorciados/as'
def test_get_dimensions(): """Should return two lists (dimension names and members).""" pc_axis = pyaxis.read( 'https://www.ine.es/jaxi/files/_px/es/px/t20/e301/matri/a2000/l0/' '14001.px?nocab=1', 'iso-8859-15') metadata_elements, raw_data = pyaxis.metadata_extract(pc_axis) metadata = pyaxis.metadata_split_to_dict(metadata_elements) dimension_names, dimension_members = pyaxis.get_dimensions(metadata) assert len(dimension_names) == 4 assert dimension_names[ 0] == 'Comunidad Autónoma de residencia de los cónyuges' assert dimension_names[3] == 'Estado civil anterior de los cónyuges' assert len(dimension_members) == 4 assert dimension_members[0][0] == 'Todas las comunidades' assert dimension_members[3][3] == 'Divorciados/as'
def test_build_dataframe(): """Should return a dataframe with n+1 columns (dimensions + data).""" null_values = r'^"\."$' sd_values = r'"\.\."' pc_axis = pyaxis.read(data_path + '14001.px', 'iso-8859-15') metadata_elements, raw_data = pyaxis.metadata_extract(pc_axis) metadata = pyaxis.metadata_split_to_dict(metadata_elements) dimension_names, dimension_members = pyaxis.get_dimensions(metadata) data_values = Series(raw_data.split()) df = pyaxis.build_dataframe(dimension_names, dimension_members, data_values, null_values=null_values, sd_values=sd_values) assert df.shape == (8064, 5) assert df['DATA'][7] == '28138' assert df['DATA'][159] == '422'
def test_build_dataframe(): """Should return a dataframe with n+1 columns (dimensions + data).""" null_values = r'^"\."$' sd_values = r'"\.\."' pc_axis = pyaxis.read( 'https://www.ine.es/jaxi/files/_px/es/px/t20/e301/matri/a2000/l0/' '14001.px?nocab=1', 'iso-8859-15') metadata_elements, raw_data = pyaxis.metadata_extract(pc_axis) metadata = pyaxis.metadata_split_to_dict(metadata_elements) dimension_names, dimension_members = pyaxis.get_dimensions(metadata) data_values = Series(raw_data.split()) df = pyaxis.build_dataframe(dimension_names, dimension_members, data_values, null_values=null_values, sd_values=sd_values) assert df.shape == (8064, 5) assert df['DATA'][7] == '10624.0' assert df['DATA'][159] == '534.0'