Exemple #1
0
    return list_group_prefix


if __name__ == "__main__":
    args = get_args()

    # get logger
    logger = h.get_logger(args.file_id, 'missing_values')

    # load proteomics data
    data_df = pd.read_csv(args.input_file, header=0, index_col=None)

    # get parameters
    rule_params = h.load_json_parameter(args.file_id)
    filename = h.filename(args.input_file)
    data_structure = h.load_json_data(args.file_id, filename,
                                      rule_params['all']['divide'])
    values_cols_prefix = rule_params['all']['values_cols_prefix']

    # NaN per protein and per group
    group_prefix = get_groups(data_structure, values_cols_prefix)

    result_df, stats_per_groups = fqc.na_per_group(data_df, group_prefix,
                                                   values_cols_prefix)

    result_df = fqc.flag_row_supp(
        result_df, stats_per_groups,
        rule_params['missing_values']['max_na_percent_proteins'], 'na')

    keep_specific = rule_params['all']['specific_proteins']['keep']
    col_name = rule_params['all']['specific_proteins']['column_name']
Exemple #2
0
        # get data for samples in group
        data_group = df.filter(regex=group)

        # compute reduction on this data
        reduced_data_group = compute_reduction(data_group, ddof)

        # add result to result df
        res = pd.concat([res, reduced_data_group], axis=1)
    return res


if __name__ == "__main__":
    args = get_args()
    rule_params = h.load_json_parameter(args.file_id)
    filename = h.filename(args.input_file)
    data_structure = h.load_json_data(args.file_id, filename)

    # get logger
    logpath = os.path.join(paths.global_data_dir, args.file_id,
                           'log/reduce_line.log')
    logger = h.get_logger(logpath)

    # load data
    data_df = pd.read_csv(args.input_file, header=0, index_col=None)

    # get parameters
    ddof = rule_params['reduction']['ddof']
    values_cols_prefix = rule_params['all']['values_cols_prefix']
    metadata_col = rule_params['all']['metadata_col']
    depth = len(rule_params['reduction']['on']) + 1  # +1 for prefix
Exemple #3
0
def compute_protein_rank_per_subset(data_df):
    gmean_col = data_df.filter(regex='gmean_')

    for col in gmean_col:
        subset_name = '_'.join(col.split('_')[1:])
        data_df['rank_abundance_{}'.format(subset_name)] = data_df[
            'gmean_{}'.format(subset_name)].rank(ascending=False)

    return data_df


if __name__ == "__main__":
    args = get_args()
    rule_params = h.load_json_parameter(args.project, args.version)
    filename = h.filename(args.input_file)
    data_structure = h.load_json_data(args.project, args.version, filename)

    logpath = os.path.join(rule_params['all']['logpath'], 'abundance_rank.log')
    logger = h.get_logger(logpath)

    data_df = pd.read_csv(args.input_file, header=0, index_col=0)
    print(data_df.columns.values)
    print(data_df.head())
    logger.info('{} proteins to analyse.'.format(len(data_df)))

    ab_col = get_abundance_subset(data_structure, rule_params)
    print(ab_col)
    data_df = compute_protein_gmean_per_subset(data_df, ab_col)
    result_df = compute_protein_rank_per_subset(data_df)

    try:
Exemple #4
0
def main() -> int:
    """
    :param debug: Set to True to enable debug
    :return:
    """
    # Parsing arguments
    arg_desc = sys.argv[0] + ' script taking file in JSON as input and apply configuration'
    parser = argparse.ArgumentParser(description=arg_desc)
    parser.add_argument('-n', help='JSON formatted file with node connectivity params', action='store',
                        required=True)
    parser.add_argument('-r', help='directory for reports', action='store', required=False)
    parser.add_argument('--log', help=f'set logging level'
                                      f'Possible values: {log_levels}', action='store', required=False)
    args = parser.parse_args()
    if args.log.upper() in log_levels:
        user_level = getattr(logging, args.log.upper())
    else:
        user_level = logging.WARNING

    if args.r:
        user_report_directory = args.r
    else:
        user_report_directory = REPORTS_PATH
    # TODO: extend with capability to load several configuration params data files
    # Loading node attributes
    node_data = load_json_data(f"{NODE_DATA_PATH}/{args.n}")

    logger = enable_logging(name="l3topo", log_file=f"{LOG_PATH}/l3topo.log", level=user_level)
    logger.info(f"Logging level: {user_level}")
    logger.info(f"Directory for reporting: {user_report_directory}")
    logger.debug("%s", "=" * 20 + " Node connectivity data " + "=" * 20)
    logger.debug(msg=json.dumps(node_data, indent=4, skipkeys=True))

    # Check for proxy object presence
    fabric_proxy = None
    if node_data.get('fabric_proxy'):
        fabric_proxy = node_data.pop('fabric_proxy')
        # TODO: add checking for proxy attrs
        fp_ip = sroslib.extract_ipaddr(fabric_proxy["ip_address"])
        if fp_ip:
            fabric_proxy["ip_address"] = sroslib.extract_ipaddr(fabric_proxy["ip_address"])
            logger.debug("Fabric proxy configuration set: %s", fabric_proxy)
        else:
            fabric_proxy = None

    sros_node_obj = {}
    for k, v in node_data.items():
        ip = sroslib.extract_ipaddr(node_data[k]["ip_address"])
        if ip:
            node_data[k]["ip_address"] = ip
            if node_data[k].get("proxy"):
                if not fabric_proxy:
                    msg = "Proxy enabled, but proxy object is not correct or not specified."
                    raise ValueError(msg)
                node_data[k].pop("proxy")
                node_data[k]["fabric_proxy"] = fabric_proxy
            sros_node_obj[k] = sroslib.SROSNode(k, **node_data[k])
        else:
            msg = f"Node {k} IP@ is not correct. Please provide correct one."
            raise ValueError(msg)

    sros_ordered_list = list(sros_node_obj.keys())

    # Availability checking common DataFrame creation
    ping_res = []
    for node in sros_ordered_list:
        logger.info(f"Checking {node} availability ....")
        # Collecting results
        ping_res.append([sros_node_obj[node].ping_ones for i in range(3)])
    # Checking ping results and adding alert, if needed
    for n, ping in enumerate(ping_res):
        if any(ping):
            ping.append(NO_ALERT)
        else:
            ping.append(ALERT)
            # Removing not available elements for further processing
            sros_ordered_list.pop(n)
    data = np.array(ping_res)
    if not sros_ordered_list:
        logger.error("No nodes are available for processing.")
        exit(1)
    df_ping = pd.DataFrame(data, columns=["TRY#1", "TRY#2", "TRY#3", "IssueFound"], index=sros_ordered_list)
    st_ping = df_ping.style.applymap(color_up_down)

    # Template handling
    env = load_j2_env(path_to_templ=f"./{J2_PATH}/")
    # TODO: l2 templates directory to be taken from params
    template = env.get_template('node_availability.html')
    logger.info(f"Rendering report for nodes availability ....")
    # Rendering
    html = template.render(ping_table=st_ping.render())
    # Write the HTML file
    with open(f'./{user_report_directory}/node_availability.html', mode='w') as fh:
        fh.write(html)

    # Processing nodes one by one and generating report for each
    for node in sros_ordered_list:

        html = ip_fabric_l3topo(sros_node_obj[node], logger)
        # Write the HTML file
        with open(f'./{user_report_directory}/{node}.html', mode='w') as fh:
            fh.write(html)
    return 0