def output_gender(args): ref_file = np.load(args.reference, encoding='latin1') sample_file = np.load(args.infile, encoding='latin1') gender = predict_gender(sample_file['sample'].item(), ref_file['trained_cutoff']) if gender == 'M': print('male') else: print('female')
def tool_test(args): logging.info('Starting CNA prediction') if not args.bed and not args.plot: logging.critical( 'No output format selected. ' 'Select at least one of the supported output formats (--bed, --plot)' ) sys.exit() if args.zscore <= 0: logging.critical( 'Parameter --zscore should be a strictly positive number') sys.exit() if args.beta is not None: if args.beta <= 0 or args.beta > 1: logging.critical( 'Parameter --beta should be a strictly positive number lower than 1' ) sys.exit() if args.alpha <= 0 or args.alpha > 1: logging.critical( 'Parameter --alpha should be a strictly positive number lower than 1' ) sys.exit() logging.info('Importing data ...') ref_file = np.load(args.reference, encoding='latin1') sample_file = np.load(args.infile, encoding='latin1') sample = sample_file['sample'].item() n_reads = sum([sum(sample[x]) for x in sample.keys()]) sample = scale_sample(sample, int(sample_file['binsize'].item()), int(ref_file['binsize'])) if not ref_file['is_nipt']: actual_gender = predict_gender(sample, ref_file['trained_cutoff']) if args.gender: actual_gender = args.gender sample = gender_correct(sample, actual_gender) else: actual_gender = 'F' if args.gender: actual_gender = args.gender ref_gender = actual_gender logging.info('Normalizing autosomes ...') results_r, results_z, results_w, ref_sizes, m_lr, m_z = normalize( args, sample, ref_file, 'A') if not ref_file['has_male'] and actual_gender == 'M': logging.warning( 'This sample is male, whilst the reference is created with fewer than 5 males. ' 'The female gonosomal reference will be used for X predictions. Note that these might ' 'not be accurate. If the latter is desired, create a new reference and include more ' 'male samples.') ref_gender = 'F' elif not ref_file['has_female'] and actual_gender == 'F': logging.warning( 'This sample is female, whilst the reference is created with fewer than 5 females. ' 'The male gonosomal reference will be used for XY predictions. Note that these might ' 'not be accurate. If the latter is desired, create a new reference and include more ' 'female samples.') ref_gender = 'M' logging.info('Normalizing gonosomes ...') null_ratios_aut_per_bin = ref_file['null_ratios'] null_ratios_gon_per_bin = ref_file['null_ratios.{}'.format( ref_gender)][len(null_ratios_aut_per_bin):] results_r_2, results_z_2, results_w_2, ref_sizes_2, _, _ = normalize( args, sample, ref_file, ref_gender) rem_input = { 'args': args, 'wd': str(os.path.dirname(os.path.realpath(__file__))), 'binsize': int(ref_file['binsize']), 'n_reads': n_reads, 'ref_gender': ref_gender, 'actual_gender': actual_gender, 'mask': ref_file['mask.{}'.format(ref_gender)], 'bins_per_chr': ref_file['bins_per_chr.{}'.format(ref_gender)], 'masked_bins_per_chr': ref_file['masked_bins_per_chr.{}'.format(ref_gender)], 'masked_bins_per_chr_cum': ref_file['masked_bins_per_chr_cum.{}'.format(ref_gender)] } del ref_file results_r = np.append(results_r, results_r_2) results_z = np.append(results_z, results_z_2) - m_z results_w = np.append(results_w * np.nanmedian(results_w_2), results_w_2 * np.nanmedian(results_w)) results_w = results_w / np.nanmedian(results_w) ref_sizes = np.append(ref_sizes, ref_sizes_2) null_ratios_aut_per_sample = np.transpose(null_ratios_aut_per_bin) part_mask = np.array([not x for x in list(np.isnan(results_r))], dtype=bool) null_m_lr_aut = np.array([ np.nanmedian(x[part_mask[:len(null_ratios_aut_per_bin)]]) for x in null_ratios_aut_per_sample ]) null_ratios_aut_per_bin = null_ratios_aut_per_bin - null_m_lr_aut null_ratios = np.array([x.tolist() for x in null_ratios_aut_per_bin] + [x.tolist() for x in null_ratios_gon_per_bin]) results = { 'results_r': results_r, 'results_z': results_z, 'results_w': results_w, 'results_nr': null_ratios } for result in results.keys(): results[result] = get_post_processed_result(args, results[result], ref_sizes, rem_input) log_trans(results, m_lr) if args.blacklist: logging.info('Applying blacklist ...') apply_blacklist(rem_input, results) logging.info('Executing circular binary segmentation ...') results['results_c'] = exec_cbs(rem_input, results) if args.bed: logging.info('Writing tables ...') generate_output_tables(rem_input, results) if args.plot: logging.info('Writing plots ...') exec_write_plots(rem_input, results) logging.info('Finished prediction')