Example #1
0
                            required=True,
                            help="list of .npy filenames for labels to be split randomly e.g. "
                                 "'--label_filenames training_y_0.csv validation_y_0.csv "
                                 "training_y_-2.csv validation_y_-2.csv'. "
                                 "Assumed to be grouped by threshold, in the same order as "
                                 "the thresholds parameter.")
required_named.add_argument("--thresholds",
                            type=int,
                            nargs='+',
                            required=True,
                            help="list of thresholds to use to reduce dataset by similarity "
                                 "score e.g. --thresholds 0 -2")

args = parser.parse_args()

log_utils.setup_logging(args.verbosity)

label_filenames = args.label_filenames
data_filenames = args.data_filenames

group_proportions = [80, 20]
thresholds = args.thresholds

logging.info(f"Reading in data frame {args.input}")
data_frame = utils.read_bound_pairs(args.input)
logging.info(f"Read {len(data_frame)} rows")
positives_df = data_frame[data_frame['binding_observed'] == 1].iloc[:args.num_negatives]
logging.info(f"Chosen {len(positives_df)} random positive samples")

label_filenames_grouped = [label_filenames[2*i:2*i+2] for i in range(len(thresholds))]
logging.info(f"{len(label_filenames)} label files grouped into "
"""Wrapper for snakemake to call the find_unique_bound_pairs script"""
import logging
import traceback

import peptidebinding.find_unique_bound_pairs as find_unique_bound_pairs
import peptidebinding.helper.log_utils as log_utils

log_utils.setup_logging(3, logfile=snakemake.log[0])
try:
    find_unique_bound_pairs.main(
        bound_pairs_tables=snakemake.input.bound_pairs,
        output_file=snakemake.output.bound_pairs,
        fragment_lengths_out=snakemake.output.fragment_lengths)
except:
    logging.error(f"Unexpected error:\n{traceback.format_exc()}")
    raise