Beispiel #1
0
def numerical_anysis(root_dir, xlim, norm=False):
    file_pattern = re.compile(r".*/test_rew_\d+seeds.csv$")
    norm_group_pattern = re.compile(r"(/|^)\w+?\-v(\d|$)")
    output_group_pattern = re.compile(r".*?(?=(/|^)\w+?\-v\d)")
    csv_files = find_all_files(root_dir, file_pattern)
    norm_group = group_files(csv_files, norm_group_pattern)
    output_group = group_files(csv_files, output_group_pattern)
    # calculate numerical outcome for each csv_file (y/std integration max_y, final_y)
    results = defaultdict(list)
    for f in csv_files:
        result = csv2numpy(f)
        if norm:
            result = np.stack([
                result['env_step'], result['rew'] - result['rew'][0],
                result['rew:shaded']
            ])
        else:
            result = np.stack(
                [result['env_step'], result['rew'], result['rew:shaded']])

        if result[0, -1] < xlim:
            continue

        final_rew = np.interp(xlim, result[0], result[1])
        final_rew_std = np.interp(xlim, result[0], result[2])
        result = result[:, result[0] <= xlim]

        if len(result) == 0:
            continue

        if result[0, -1] < xlim:
            last_line = np.array([xlim, final_rew,
                                  final_rew_std]).reshape(3, 1)
            result = np.concatenate([result, last_line], axis=-1)

        max_id = np.argmax(result[1])
        results['name'].append(f)
        results['final_reward'].append(result[1, -1])
        results['final_reward_std'].append(result[2, -1])
        results['max_reward'].append(result[1, max_id])
        results['max_std'].append(result[2, max_id])
        results['reward_integration'].append(np.trapz(result[1], x=result[0]))
        results['reward_std_integration'].append(
            np.trapz(result[2], x=result[0]))

    results = {k: np.array(v) for k, v in results.items()}
    print(tabulate(results, headers="keys"))

    if norm:
        # calculate normalized numerical outcome for each csv_file group
        for _, fs in norm_group.items():
            mask = np.isin(results['name'], fs)
            for k, v in results.items():
                if k == 'name':
                    continue
                v[mask] = v[mask] / max(v[mask])
        # Add all numerical results for each outcome group
        group_results = defaultdict(list)
        for g, fs in output_group.items():
            group_results['name'].append(g)
            mask = np.isin(results['name'], fs)
            group_results['num'].append(sum(mask))
            for k in results.keys():
                if k == 'name':
                    continue
                group_results[k + ":norm"].append(results[k][mask].mean())
        # print all outputs for each csv_file and each outcome group
        print()
        print(tabulate(group_results, headers="keys"))
Beispiel #2
0
     type=str,
     default=r".*",
     help=
     'regular expression to extract legend from csv file path, default to '
     'using file path as legend name.')
 parser.add_argument('--show', action='store_true', help='show figure')
 parser.add_argument('--output-path',
                     type=str,
                     help='figure save path',
                     default="./figure.png")
 parser.add_argument('--dpi',
                     type=int,
                     default=200,
                     help='figure dpi (default: 200)')
 args = parser.parse_args()
 file_lists = find_all_files(args.root_dir, re.compile(args.file_pattern))
 file_lists = [os.path.relpath(f, args.root_dir) for f in file_lists]
 if args.style:
     plt.style.use(args.style)
 os.chdir(args.root_dir)
 plot_figure(file_lists,
             group_pattern=args.group_pattern,
             legend_pattern=args.legend_pattern,
             fig_length=args.fig_length,
             fig_width=args.fig_width,
             title=args.title,
             xlabel=args.xlabel,
             ylabel=args.ylabel,
             xkey=args.xkey,
             ykey=args.ykey,
             xlim=args.xlim,
Beispiel #3
0

def list_to_dict(pos_lines):
    pos_dict = {}
    for pos_line in pos_lines:
        if pos_line[0][0] in pos_dict.keys():
            pos_dict[pos_line[0][0]].append(pos_line[0][6::1])
        else:
            pos_dict[pos_line[0][0]] = [pos_line[0][6::1]]
    return pos_dict


def save_csv(pos_dict):
    for uid, nodule_info in pos_dict.items():
        df_annos = pd.DataFrame(
            nodule_info,
            columns=["malignacy", "x_min", "y_min", "x_max", "y_max"])
        df_annos.to_csv(settings.EXTRACTED_IMAGE_DIR + uid + ".csv",
                        index=False)


if __name__ == "__main__":
    root = "./data/"
    xml_files = find_all_files(root, ".xml")

    for xml_file in xml_files:
        nodule_lines = parse_xml(xml_file)
        nodule_dict = list_to_dict(nodule_lines)
        save_csv(nodule_dict)

    print("All xml files are parded!")