def get_assigned_targets(config): """ :param config: config dictionaryn :returns: (dict (targets_id -> group_id), dict (group_id -> group_label)) """ print ">> Opening csv file from", config["base_csv_path"] all_targets_df = pd.DataFrame.from_csv(config["base_csv_path"], sep=config["sep"], index_col=config["index_cols"], tupleize_cols=True) print ">> Retriving groups" index_list_size = len(config["index_cols"]) if index_list_size > 1: grouped_targets = all_targets_df.groupby(level=range(index_list_size)) else: grouped_targets = all_targets_df.groupby(level=index_list_size-1) print ">> Setting multi-layer dictionary for groups" groups = regroup(grouped_targets) levels = get_length_per_level(groups) print ">> Setting groups ids" groups_label_id = set_groups_ids(groups, levels) print ">> Registering groups and their ids at" print config["info_frequency_path"].replace(".txt", ".INFO") info_file_path = config["info_frequency_path"].replace(".txt", ".INFO") fileloader.createValidPath(info_file_path) diversity_info_file = open(info_file_path, "w+") diversity_info_file.write("#INFO: group_name: 'group_identifier'\n") diversity_info_file.write(yaml.dump(groups, default_flow_style=False)) diversity_info_file.close() print ">> Assigning group id for each target" return set_group_id_for_target(grouped_targets, groups, config), groups_label_id
def set_groups_paths(global_path, groups): # """ SETTING AND CREATING PATHS AS IT IS NEEDED FOR EACH GROUP """ dir_path = global_path.split("/") file_name = dir_path.pop() dir_path = "/".join(dir_path) fileloader.createValidPath(global_path) output_paths = {} for group_label in groups.keys(): output_paths[group_label] = dir_path + "/" + group_label.replace(" ", "_") output_paths[group_label] += "/" + file_name fileloader.createValidPath(output_paths[group_label]) output_paths["global"] = global_path return output_paths
def set_all_paths(frequency_path, groups_label_id): #Returns dict(group_id -> writing object) """ Arguments: frequency_path = path which will hold all frequency files for each group groups_label_id = dict ( group_id -> groub_label ) """ fileloader.createValidPath(frequency_path) dir_path = frequency_path.split("/") file_name = dir_path.pop(); dir_path = '/'.join(dir_path)+'/' groups_files = {} print ">> Setting paths for groups files" depth = len(max(groups_label_id.keys()).split(".")) groups_files["global"] = frequency_path for group_id,group_label in groups_label_id.iteritems(): if len(group_label.split("/")) < depth: group_path = dir_path+group_label.replace(" ", "_").decode('utf-8')+"/" fileloader.createValidPath(group_path) groups_files[group_id]=group_path+file_name return groups_files
def adjust_paths(configs, main_paths=None, dir_list=None): """ Adjust config paths for :param CONFIGS: Config dictionary stored at /src/config/ :param main_paths: -- Used for recurstion to pass root paths :param dir_list: used for recursion to pass root dirs """ if not main_paths: main_paths = configs["paths"] if not dir_list: dir_list = configs["dir_names"] if type(configs) == dict: for attr, value in configs.iteritems(): dir_name = attr.split("_")[0] if dir_name in dir_list and attr.rstrip("_path") not in dir_list: configs[attr] = main_paths[dir_name+"_path"] + value floader.createValidPath(configs[attr]) if type(value) == list or type(value) == dict: adjust_paths(value, main_paths, dir_list) if type(configs) == list: for inner_config in configs: if type(inner_config) == list or type(inner_config) == dict: adjust_paths(inner_config, main_paths, dir_list)