def generate_itol_ltoi(labels): """Method that generates itol and ltoi Args: labels: List of labels Returns: pair of dict: itol, ltoi """ itol = Utils.convert_list_to_dict(labels) ltoi = Utils.reserve_dict(itol) return itol, ltoi
def encode_from_index_files(root, label_col, files=None, file_type='json', pattern=".*json", multi_label=False, multi_label_delimiter=None): """Creates ltoi and itol from dataset index. Args: root: root folder where dataset index files are located label_col: Column that contains lables files: List of dataset files. If this is not provided, any file in root folder that matches the pattern will be read file_type: Type of index file. Default is json pattern: regex to match if the files list is not provided. If file list is provided, that takes the precedence and pattern will be ignored multi_label: Indicates whether the labels in multi-label multi_lable_delimiter: If the labels are multi-lable, but are not represented as an array type, then this parameter stores the delimiter. Returns: Returns a pair of dict: One going from label to int and another going from int ot label. """ file_lists = Utils.create_list_of_file_paths(root, files, pattern) file_read_method = Encode_Labels.determine_read_file_method(file_type) label_extract_method = Encode_Labels.determine_label_extract_method( multi_label, multi_label_delimiter) dataframes = pd.concat([file_read_method(f) for f in file_lists]) labels = label_extract_method(dataframes[label_col], multi_label_delimiter) return Encode_Labels.generate_itol_ltoi(labels)
def default_labels(): return Utils.read_json("./deep_abyasa/tests/data/sample.json")['label']
def multi_labels_delimiter(): return Utils.read_json("./deep_abyasa/tests/data/sample3.json")['label']
def test_create_list_of_file_paths(): items = Utils.create_list_of_file_paths("/Users/temp", files=['world', 'peace']) assert ('/Users/temp/world' in items) assert ('/Users/temp/peace' in items)
def test_reserve_dict(): items = Utils.reserve_dict({0: "carbon", 1: "hydrogen", 2: "oxygen"}) assert (items['carbon'] == 0) assert (items['hydrogen'] == 1) assert (items['oxygen'] == 2)
def test_convert_list_to_dict(): items = Utils.convert_list_to_dict(['oxygen', 'carbon', 'hydrogen']) assert (items[0] == 'carbon') assert (items[1] == 'hydrogen') assert (items[2] == 'oxygen')
def test_create_list_of_file_paths_from_pattern(): items = Utils.create_list_of_file_paths("./", pattern=".*md") assert ('./README.md' in items)