def test_predictor(self): """ to see if it can correctly predict SNPs in feature-vector format """ self.individual_debug = True self.init_test("test_predictor") self.init_predictor_instance() predictor = Predictor() test_dataset = DataSet(os.path.join(combivep_settings.COMBIVEP_CENTRAL_TEST_DATASET_DIR, "test_dataset")) params_file = os.path.join(self.data_dir, "params.npz") predictor.import_parameters(params_file=params_file) out = predictor.predict(test_dataset) self.assertEqual(round(out[0][0], 4), 0.2729, msg="Predictor does not functional properly")
def predict_deleterious_probability(SNPs_file, params_file=combivep_settings.USER_PARAMETERS_FILE, file_type=combivep_settings.FILE_TYPE_VCF, output_file=None, config_file=combivep_settings.COMBIVEP_CONFIGURATION_FILE, ): """ CBV (CombiVEP format) is a parsed format intended to be used by CombiVEP. CBV has 5 fields, CHROM, POS, REF, ALT, EFFECT (1=deleterious, 0=neutral). All are tab separated Required arguments - SNPs_file : list of SNPs to be predicted, can be either VCF or CBV (default is VCF) """ #pre-processing test dataset print >> sys.stderr, 'pre-processing dataset, this may take a while (around 750 SNPs/mins). . . . ' dm = DataSetManager(config_file=config_file) dm.load_data(SNPs_file, file_type=file_type) dm.validate_data() dm.calculate_scores() #predict predictor = Predictor() predictor.import_parameters(params_file=params_file) out = (np.array(predictor.predict(dm.dataset)).reshape(-1,)) #print output if output_file is not None: sys.stdout = open(output_file, 'w') print >> sys.stdout, "#%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % ("CHROM", "POS", "REF", "ALT", "ACTUAL_DELETERIOUS_EFFECT", "PREDICTED_DELETERIOUS_PROBABILITY", "PHYLOP_SCORE", "SIFT_SCORE", "PP2_SCORE", "LRT_SCORT", "MT_SCORE", "GERP_SCORE") for i in xrange(len(dm.dataset)): print >> sys.stdout, "%s\t%s\t%s\t%s\t%s\t%6.4f\t%s\t%s\t%s\t%s\t%s\t%s" % (dm.dataset[i][combivep_settings.KEY_SNP_INFO_SECTION][combivep_settings.KEY_CHROM], dm.dataset[i][combivep_settings.KEY_SNP_INFO_SECTION][combivep_settings.KEY_POS], dm.dataset[i][combivep_settings.KEY_SNP_INFO_SECTION][combivep_settings.KEY_REF], dm.dataset[i][combivep_settings.KEY_SNP_INFO_SECTION][combivep_settings.KEY_ALT], dm.dataset[i][combivep_settings.KEY_PREDICTION_SECTION][combivep_settings.KEY_TARGETS], out[i], dm.dataset[i][combivep_settings.KEY_SCORES_SECTION][combivep_settings.KEY_PHYLOP_SCORE], dm.dataset[i][combivep_settings.KEY_SCORES_SECTION][combivep_settings.KEY_SIFT_SCORE], dm.dataset[i][combivep_settings.KEY_SCORES_SECTION][combivep_settings.KEY_PP2_SCORE], dm.dataset[i][combivep_settings.KEY_SCORES_SECTION][combivep_settings.KEY_LRT_SCORE], dm.dataset[i][combivep_settings.KEY_SCORES_SECTION][combivep_settings.KEY_MT_SCORE], dm.dataset[i][combivep_settings.KEY_SCORES_SECTION][combivep_settings.KEY_GERP_SCORE], ) sys.stdout = sys.__stdout__
def test_predictor(self): """ to see if it can correctly predict SNPs in feature-vector format """ self.individual_debug = True self.init_test('test_predictor') self.init_predictor_instance() predictor = Predictor() test_data = DataSet(os.path.join(cbv_const.CBV_SAMPLE_DATASET_DIR, 'test_dataset')) params_file = os.path.join(self.data_dir, 'params.npz') predictor.import_parameters(params_file=params_file) out = predictor.predict(test_data) self.assertEqual(round(out[0][0], 4), 0.2729, msg='Predictor does not functional properly')
def fast_predict(SNPs_file, params_file=cbv_const.USER_PARAMS_FILE, file_type=cbv_const.FILE_TYPE_VCF, output_file=None, cfg_file=cbv_const.CBV_CFG_FILE, ): """ CBV (CombiVEP format) is a parsed format intended to be used by CombiVEP. CBV has 5 fields, CHROM, POS, REF, ALT, EFFECT (1=deleterious, 0=neutr). All are tab separated Required arguments - SNPs_file : list of SNPs to be predicted, can be either VCF or CBV (default is VCF) """ #pre-processing test dataset info('pre-processing dataset, this may take a while (around 750 SNPs/mins). . .') dm = FastDataSetManager(cfg_file=cfg_file) dm.load_data(SNPs_file, file_type=dev_const.FILE_TYPE_SCORES) #predict predictor = Predictor() predictor.import_parameters(params_file=params_file) out = (np.array(predictor.predict(dm.dataset)).reshape(-1,)) #print output if output_file is not None: sys.stdout = open(output_file, 'w') tmp_rec = [] tmp_rec.append("CHROM") tmp_rec.append("POS") tmp_rec.append("REF") tmp_rec.append("ALT") tmp_rec.append("ACTUAL_DELETERIOUS_EFFECT") tmp_rec.append("PREDICTED_DELETERIOUS_PROBABILITY") tmp_rec.append("PHYLOP_SCORE") tmp_rec.append("SIFT_SCORE") tmp_rec.append("PP2_SCORE") tmp_rec.append("LRT_SCORT") tmp_rec.append("MT_SCORE") tmp_rec.append("GERP_SCORE") print "#" + "\t".join(tmp_rec) for i in xrange(len(dm.dataset)): del tmp_rec[:] snp_data = dm.dataset[i][cbv_const.KW_SNP_DATA] scores = dm.dataset[i][cbv_const.KW_SCORES] tmp_rec.append(snp_data.chrom) tmp_rec.append(snp_data.pos) tmp_rec.append(snp_data.ref) tmp_rec.append(snp_data.alt) tmp_rec.append(snp_data.target) tmp_rec.append("%6.4f" % out[i]) tmp_rec.append(scores.phylop_score) tmp_rec.append(scores.sift_score) tmp_rec.append(scores.pp2_score) tmp_rec.append(scores.lrt_score) tmp_rec.append(scores.mt_score) tmp_rec.append(scores.gerp_score) print "\t".join(tmp_rec) sys.stdout = sys.__stdout__