def process(args): """ Aggregate report and analysis information coming from mSINGS in serialisation of anacore.msi.MSISAmple object. :param args: The namespace extracted from the script arguments. :type args: Namespace """ # Aggregate data msi_spl = list(MSINGSReport(args.input_report).samples.values())[0] with MSINGSAnalysis(args.input_analysis) as FH_analysis: for record in FH_analysis: if record.position in msi_spl.loci: msi_spl.loci[record.position].name = record.name if "MSINGS" not in msi_spl.loci[record.position].results: msi_spl.loci[ record.position].results["MSINGS"] = LocusResDistrib( Status.none) else: msi_spl.loci[record.position].results[ "MSINGS"]._class = "LocusResDistrib" msi_spl.loci[record.position].results[ "MSINGS"].data = record.results["MSINGS"].data else: msi_spl.addLocus(record) # Write report MSIReport.write([msi_spl], args.output_report)
def process(args): """ Filter loci usable for instability status prediction. :param args: The namespace extracted from the script arguments. :type args: Namespace """ reports = MSIReport.parse(args.input_reports) for spl in reports: # Filter loci status for locus_id, locus in spl.loci.items(): res_locus = locus.results[args.method_name] if len(res_locus.data) != 0 and res_locus.getCount( ) < args.min_distrib_support: res_locus.status = Status.undetermined res_locus.score = None # Re-repocess sample status if args.consensus_method == "majority": spl.setStatusByMajority(args.method_name, args.min_voting_loci) elif args.consensus_method == "ratio": spl.setStatusByInstabilityRatio(args.method_name, args.min_voting_loci, args.instability_ratio) elif args.consensus_method == "count": spl.setStatusByInstabilityCount(args.method_name, args.min_voting_loci, args.instability_count) spl.setScore(args.method_name, args.undetermined_weight, args.locus_weight_is_score) # Write report MSIReport.write(reports, args.output_reports)
def launchAddClf(args, models_path, reports_path): """ Launch second classification with a list of classifiers and update reports file. :param args: Arguments of the script. :type args: argparse.NameSpace :param models_path: Path to the models file (format: MSIReport). :type models_path: str :param reports_path: Path to the report file obtained with first classification (format: MSIReport). :type reports_path: str """ for clf_name in args.add_classifiers: method_name = clf_name clf_params = None if clf_name.startswith("RandomForest:"): n_estimators = clf_name.split(":")[1] clf_name = "RandomForest" clf_params = '{"n_estimators": ' + n_estimators + '}' # Copy combination produced by MIAmS in data of the new method reports = MSIReport.parse(out_reports_path) lociInitData(reports, args.default_classifier, method_name) MSIReport.write(reports, out_reports_path) # Submit classification submitAddClf(models_path, out_reports_path, out_reports_path, args, method_name, clf_name, clf_params)
def process(args): """ ********************************************************. :param args: The namespace extracted from the script arguments. :type args: Namespace """ data_by_spl = getLocusAnnotDict(args.input_loci_annotations) msi_samples = MSIReport.parse(args.input_report) for curr_spl in msi_samples: addLociResToSpl(curr_spl, data_by_spl[curr_spl.name]) MSIReport.write(msi_samples, args.output_report)
def process(args): """ Tag stability for loci and sample from length distribution on loci. :param args: The namespace extracted from the script arguments. :type args: Namespace """ spl_name = args.sample_name if args.sample_name is None: spl_name = os.path.basename(args.output_report).split(".")[0] if spl_name.endswith("_report"): spl_name = spl_name[:-7] msi_spl = MSISample(spl_name) # Parse lengths metrics by loci with HashedSVIO(args.input_combined_list) as FH_loci_list: for record in FH_loci_list: with open(record["Filepath"]) as FH_locus: locus_metrics = json.load(FH_locus) msi_locus = MSILocus.fromDict({ "name": record["Locus_name"], "position": record["Locus_position"], "results": { "PairsCombi": { "_class": "LocusResPairsCombi", "status": Status.none, "data": { "nb_by_length": locus_metrics["nb_by_length"], "nb_pairs_aligned": locus_metrics["nb_uncombined_pairs"] + locus_metrics["nb_combined_pairs"] } } } }) msi_spl.addLocus(msi_locus) # Process status msi_models = MSIReport.parse(args.input_models) for locus_id in msi_spl.loci: processor = PairsCombiProcessor(locus_id, msi_models, [msi_spl], args.min_support) processor.setLocusStatus() msi_spl.setStatus("PairsCombi") # Write report MSIReport.write([msi_spl], args.output_report)
def process(args): """ Create MSISample from loci metrics. :param args: The namespace extracted from the script arguments. :type args: Namespace """ spl_name = args.sample_name if args.sample_name is None: spl_name = os.path.basename(args.output_report).split(".")[0] if spl_name.endswith("_report"): spl_name = spl_name[:-7] msi_spl = MSISample(spl_name) # Add result data by loci addLociDataFromFiles(msi_spl, args.input_loci_metrics_list, args.method_name, args.result_keys, args.method_class_name) # Write report MSIReport.write([msi_spl], args.output_report)
def process(args): """ Create training data for MSI classifiers. These references are stored in MSIReport format. :param args: The namespace extracted from the script arguments. :type args: Namespace """ # Get method name from annotations file method_names = set() for record in MSIAnnot(args.input_loci_annot): method_names.add(record["method_id"]) if len(method_names) != 1: raise ValueError( 'The annotation file must contain only one value for method_id. The file "{}" contains {}.' .format(args.input_reports, method_names)) result_id = list(method_names)[0] # Get reference loci from targets file ref_loci = [] with BEDIO(args.input_targets) as FH_in: for record in FH_in: ref_loci.append( MSILocus( "{}:{}-{}".format(record.chrom, record.start - 1, record.end), record.name)) # Aggregate samples msi_samples = getAggregatedSpl(args.inputs_report) # Add locus result info data_by_spl = getLocusAnnotDict(args.input_loci_annot) for curr_spl in msi_samples: addLociResToSpl(curr_spl, data_by_spl[curr_spl.name], LocusResPairsCombi) # Filter locus results populateLoci(msi_samples, ref_loci) pruneResults(msi_samples, result_id, args.min_support_fragments) # Display metrics writeStatusMetrics(msi_samples, result_id, args.output_info) # Write output MSIReport.write(msi_samples, args.output_references)
def process(args): """ Predict classification (status and score) for all samples loci. :param args: The namespace extracted from the script arguments. :type args: Namespace """ train_dataset = MSIReport.parse(args.input_references) test_dataset = MSIReport.parse(args.input_evaluated) # Classification by locus loci_ids = sorted(train_dataset[0].loci.keys()) for locus_id in loci_ids: # Select the samples with a sufficient number of fragment for classify the distribution evaluated_test_dataset = [] for spl in test_dataset: if spl.loci[locus_id].results[args.method_name].getNbFrag() < args.min_support_fragments: spl.loci[locus_id].results[args.method_name].status = Status.undetermined spl.loci[locus_id].results[args.method_name].score = None else: evaluated_test_dataset.append(spl) # Classify if len(evaluated_test_dataset) != 0: clf = MIAmSClassifier(locus_id, args.method_name, "model", args.classifier, args.classifier_params) clf.fit(train_dataset) clf.set_status(evaluated_test_dataset) # Classification by sample for spl in test_dataset: if args.consensus_method == "majority": spl.setStatusByMajority(args.method_name, args.min_voting_loci) elif args.consensus_method == "ratio": spl.setStatusByInstabilityRatio(args.method_name, args.min_voting_loci, args.instability_ratio) elif args.consensus_method == "count": spl.setStatusByInstabilityCount(args.method_name, args.min_voting_loci, args.instability_count) spl.setScore(args.method_name, args.undetermined_weight, args.locus_weight_is_score) MSIReport.write(test_dataset, args.output_report)
test_samples = [ lib for lib in librairies if lib["spl_name"] in test_names ] # Select all libraries corresponding to the test samples # Process learn and tag train(train_samples, annotation_path, design_folder, baseline_path, models_path, learn_log_path, args) predict(test_samples, design_folder, baseline_path, models_path, out_folder, args) models = MSIReport.parse(models_path) reports = getMSISamples(os.path.join(out_folder, "data")) if len(args.add_classifiers) > 0: log.info( "Process {} additionnal classifiers on dataset {}/{} ({})." .format(len(args.add_classifiers), dataset_id, args.nb_tests - 1, dataset_md5)) MSIReport.write(reports, out_reports_path) launchAddClf(args, models_path, out_reports_path) reports = MSIReport.parse(out_reports_path) # Write results and dataset use_header = False out_mode = "a" if dataset_id == 0: use_header = True out_mode = "w" datasets_df_rows = [ getDatasetsInfo( dataset_id, dataset_md5, loci_id_by_name, models, reports, getLogInfo(learn_log_path), getLogInfo(os.path.join(out_folder, "MIAmSTag_log.txt")), status_by_spl) ]