Ejemplo n.º 1
0
def output_gender(args):
    ref_file = np.load(args.reference, encoding='latin1')
    sample_file = np.load(args.infile, encoding='latin1')
    gender = predict_gender(sample_file['sample'].item(),
                            ref_file['trained_cutoff'])
    if gender == 'M':
        print('male')
    else:
        print('female')
Ejemplo n.º 2
0
def tool_test(args):
    logging.info('Starting CNA prediction')

    if not args.bed and not args.plot:
        logging.critical(
            'No output format selected. '
            'Select at least one of the supported output formats (--bed, --plot)'
        )
        sys.exit()

    if args.zscore <= 0:
        logging.critical(
            'Parameter --zscore should be a strictly positive number')
        sys.exit()

    if args.beta is not None:
        if args.beta <= 0 or args.beta > 1:
            logging.critical(
                'Parameter --beta should be a strictly positive number lower than 1'
            )
            sys.exit()

    if args.alpha <= 0 or args.alpha > 1:
        logging.critical(
            'Parameter --alpha should be a strictly positive number lower than 1'
        )
        sys.exit()

    logging.info('Importing data ...')
    ref_file = np.load(args.reference, encoding='latin1')
    sample_file = np.load(args.infile, encoding='latin1')

    sample = sample_file['sample'].item()
    n_reads = sum([sum(sample[x]) for x in sample.keys()])

    sample = scale_sample(sample, int(sample_file['binsize'].item()),
                          int(ref_file['binsize']))

    if not ref_file['is_nipt']:
        actual_gender = predict_gender(sample, ref_file['trained_cutoff'])
        if args.gender:
            actual_gender = args.gender
        sample = gender_correct(sample, actual_gender)
    else:
        actual_gender = 'F'

    if args.gender:
        actual_gender = args.gender

    ref_gender = actual_gender

    logging.info('Normalizing autosomes ...')

    results_r, results_z, results_w, ref_sizes, m_lr, m_z = normalize(
        args, sample, ref_file, 'A')

    if not ref_file['has_male'] and actual_gender == 'M':
        logging.warning(
            'This sample is male, whilst the reference is created with fewer than 5 males. '
            'The female gonosomal reference will be used for X predictions. Note that these might '
            'not be accurate. If the latter is desired, create a new reference and include more '
            'male samples.')
        ref_gender = 'F'

    elif not ref_file['has_female'] and actual_gender == 'F':
        logging.warning(
            'This sample is female, whilst the reference is created with fewer than 5 females. '
            'The male gonosomal reference will be used for XY predictions. Note that these might '
            'not be accurate. If the latter is desired, create a new reference and include more '
            'female samples.')
        ref_gender = 'M'

    logging.info('Normalizing gonosomes ...')

    null_ratios_aut_per_bin = ref_file['null_ratios']
    null_ratios_gon_per_bin = ref_file['null_ratios.{}'.format(
        ref_gender)][len(null_ratios_aut_per_bin):]

    results_r_2, results_z_2, results_w_2, ref_sizes_2, _, _ = normalize(
        args, sample, ref_file, ref_gender)

    rem_input = {
        'args':
        args,
        'wd':
        str(os.path.dirname(os.path.realpath(__file__))),
        'binsize':
        int(ref_file['binsize']),
        'n_reads':
        n_reads,
        'ref_gender':
        ref_gender,
        'actual_gender':
        actual_gender,
        'mask':
        ref_file['mask.{}'.format(ref_gender)],
        'bins_per_chr':
        ref_file['bins_per_chr.{}'.format(ref_gender)],
        'masked_bins_per_chr':
        ref_file['masked_bins_per_chr.{}'.format(ref_gender)],
        'masked_bins_per_chr_cum':
        ref_file['masked_bins_per_chr_cum.{}'.format(ref_gender)]
    }

    del ref_file

    results_r = np.append(results_r, results_r_2)
    results_z = np.append(results_z, results_z_2) - m_z
    results_w = np.append(results_w * np.nanmedian(results_w_2),
                          results_w_2 * np.nanmedian(results_w))
    results_w = results_w / np.nanmedian(results_w)
    ref_sizes = np.append(ref_sizes, ref_sizes_2)

    null_ratios_aut_per_sample = np.transpose(null_ratios_aut_per_bin)
    part_mask = np.array([not x for x in list(np.isnan(results_r))],
                         dtype=bool)
    null_m_lr_aut = np.array([
        np.nanmedian(x[part_mask[:len(null_ratios_aut_per_bin)]])
        for x in null_ratios_aut_per_sample
    ])

    null_ratios_aut_per_bin = null_ratios_aut_per_bin - null_m_lr_aut
    null_ratios = np.array([x.tolist() for x in null_ratios_aut_per_bin] +
                           [x.tolist() for x in null_ratios_gon_per_bin])

    results = {
        'results_r': results_r,
        'results_z': results_z,
        'results_w': results_w,
        'results_nr': null_ratios
    }

    for result in results.keys():
        results[result] = get_post_processed_result(args, results[result],
                                                    ref_sizes, rem_input)

    log_trans(results, m_lr)

    if args.blacklist:
        logging.info('Applying blacklist ...')
        apply_blacklist(rem_input, results)

    logging.info('Executing circular binary segmentation ...')

    results['results_c'] = exec_cbs(rem_input, results)

    if args.bed:
        logging.info('Writing tables ...')
        generate_output_tables(rem_input, results)

    if args.plot:
        logging.info('Writing plots ...')
        exec_write_plots(rem_input, results)

    logging.info('Finished prediction')