예제 #1
0
def process(args):
    """
    Aggregate report and analysis information coming from mSINGS in serialisation of anacore.msi.MSISAmple object.

    :param args: The namespace extracted from the script arguments.
    :type args: Namespace
    """
    # Aggregate data
    msi_spl = list(MSINGSReport(args.input_report).samples.values())[0]
    with MSINGSAnalysis(args.input_analysis) as FH_analysis:
        for record in FH_analysis:
            if record.position in msi_spl.loci:
                msi_spl.loci[record.position].name = record.name
                if "MSINGS" not in msi_spl.loci[record.position].results:
                    msi_spl.loci[
                        record.position].results["MSINGS"] = LocusResDistrib(
                            Status.none)
                else:
                    msi_spl.loci[record.position].results[
                        "MSINGS"]._class = "LocusResDistrib"
                msi_spl.loci[record.position].results[
                    "MSINGS"].data = record.results["MSINGS"].data
            else:
                msi_spl.addLocus(record)
    # Write report
    MSIReport.write([msi_spl], args.output_report)
예제 #2
0
def process(args):
    """
    Filter loci usable for instability status prediction.

    :param args: The namespace extracted from the script arguments.
    :type args: Namespace
    """
    reports = MSIReport.parse(args.input_reports)
    for spl in reports:
        # Filter loci status
        for locus_id, locus in spl.loci.items():
            res_locus = locus.results[args.method_name]
            if len(res_locus.data) != 0 and res_locus.getCount(
            ) < args.min_distrib_support:
                res_locus.status = Status.undetermined
                res_locus.score = None
        # Re-repocess sample status
        if args.consensus_method == "majority":
            spl.setStatusByMajority(args.method_name, args.min_voting_loci)
        elif args.consensus_method == "ratio":
            spl.setStatusByInstabilityRatio(args.method_name,
                                            args.min_voting_loci,
                                            args.instability_ratio)
        elif args.consensus_method == "count":
            spl.setStatusByInstabilityCount(args.method_name,
                                            args.min_voting_loci,
                                            args.instability_count)
        spl.setScore(args.method_name, args.undetermined_weight,
                     args.locus_weight_is_score)
    # Write report
    MSIReport.write(reports, args.output_reports)
예제 #3
0
def launchAddClf(args, models_path, reports_path):
    """
    Launch second classification with a list of classifiers and update reports file.

    :param args: Arguments of the script.
    :type args: argparse.NameSpace
    :param models_path: Path to the models file (format: MSIReport).
    :type models_path: str
    :param reports_path: Path to the report file obtained with first classification (format: MSIReport).
    :type reports_path: str
    """
    for clf_name in args.add_classifiers:
        method_name = clf_name
        clf_params = None
        if clf_name.startswith("RandomForest:"):
            n_estimators = clf_name.split(":")[1]
            clf_name = "RandomForest"
            clf_params = '{"n_estimators": ' + n_estimators + '}'
        # Copy combination produced by MIAmS in data of the new method
        reports = MSIReport.parse(out_reports_path)
        lociInitData(reports, args.default_classifier, method_name)
        MSIReport.write(reports, out_reports_path)
        # Submit classification
        submitAddClf(models_path, out_reports_path, out_reports_path, args,
                     method_name, clf_name, clf_params)
예제 #4
0
def process(args):
    """
    ********************************************************.

    :param args: The namespace extracted from the script arguments.
    :type args: Namespace
    """

    data_by_spl = getLocusAnnotDict(args.input_loci_annotations)
    msi_samples = MSIReport.parse(args.input_report)
    for curr_spl in msi_samples:
        addLociResToSpl(curr_spl, data_by_spl[curr_spl.name])
    MSIReport.write(msi_samples, args.output_report)
예제 #5
0
def process(args):
    """
    Tag stability for loci and sample from length distribution on loci.

    :param args: The namespace extracted from the script arguments.
    :type args: Namespace
    """
    spl_name = args.sample_name
    if args.sample_name is None:
        spl_name = os.path.basename(args.output_report).split(".")[0]
        if spl_name.endswith("_report"):
            spl_name = spl_name[:-7]
    msi_spl = MSISample(spl_name)
    # Parse lengths metrics by loci
    with HashedSVIO(args.input_combined_list) as FH_loci_list:
        for record in FH_loci_list:
            with open(record["Filepath"]) as FH_locus:
                locus_metrics = json.load(FH_locus)
            msi_locus = MSILocus.fromDict({
                "name": record["Locus_name"],
                "position": record["Locus_position"],
                "results": {
                    "PairsCombi": {
                        "_class": "LocusResPairsCombi",
                        "status": Status.none,
                        "data": {
                            "nb_by_length":
                            locus_metrics["nb_by_length"],
                            "nb_pairs_aligned":
                            locus_metrics["nb_uncombined_pairs"] +
                            locus_metrics["nb_combined_pairs"]
                        }
                    }
                }
            })
            msi_spl.addLocus(msi_locus)
    # Process status
    msi_models = MSIReport.parse(args.input_models)
    for locus_id in msi_spl.loci:
        processor = PairsCombiProcessor(locus_id, msi_models, [msi_spl],
                                        args.min_support)
        processor.setLocusStatus()
    msi_spl.setStatus("PairsCombi")
    # Write report
    MSIReport.write([msi_spl], args.output_report)
예제 #6
0
def process(args):
    """
    Create MSISample from loci metrics.

    :param args: The namespace extracted from the script arguments.
    :type args: Namespace
    """
    spl_name = args.sample_name
    if args.sample_name is None:
        spl_name = os.path.basename(args.output_report).split(".")[0]
        if spl_name.endswith("_report"):
            spl_name = spl_name[:-7]
    msi_spl = MSISample(spl_name)
    # Add result data by loci
    addLociDataFromFiles(msi_spl, args.input_loci_metrics_list,
                         args.method_name, args.result_keys,
                         args.method_class_name)
    # Write report
    MSIReport.write([msi_spl], args.output_report)
예제 #7
0
def process(args):
    """
    Create training data for MSI classifiers. These references are stored in
    MSIReport format.

    :param args: The namespace extracted from the script arguments.
    :type args: Namespace
    """
    # Get method name from annotations file
    method_names = set()
    for record in MSIAnnot(args.input_loci_annot):
        method_names.add(record["method_id"])
    if len(method_names) != 1:
        raise ValueError(
            'The annotation file must contain only one value for method_id. The file "{}" contains {}.'
            .format(args.input_reports, method_names))
    result_id = list(method_names)[0]
    # Get reference loci from targets file
    ref_loci = []
    with BEDIO(args.input_targets) as FH_in:
        for record in FH_in:
            ref_loci.append(
                MSILocus(
                    "{}:{}-{}".format(record.chrom, record.start - 1,
                                      record.end), record.name))
    # Aggregate samples
    msi_samples = getAggregatedSpl(args.inputs_report)
    # Add locus result info
    data_by_spl = getLocusAnnotDict(args.input_loci_annot)
    for curr_spl in msi_samples:
        addLociResToSpl(curr_spl, data_by_spl[curr_spl.name],
                        LocusResPairsCombi)
    # Filter locus results
    populateLoci(msi_samples, ref_loci)
    pruneResults(msi_samples, result_id, args.min_support_fragments)
    # Display metrics
    writeStatusMetrics(msi_samples, result_id, args.output_info)
    # Write output
    MSIReport.write(msi_samples, args.output_references)
예제 #8
0
def process(args):
    """
    Predict classification (status and score) for all samples loci.

    :param args: The namespace extracted from the script arguments.
    :type args: Namespace
    """
    train_dataset = MSIReport.parse(args.input_references)
    test_dataset = MSIReport.parse(args.input_evaluated)

    # Classification by locus
    loci_ids = sorted(train_dataset[0].loci.keys())
    for locus_id in loci_ids:
        # Select the samples with a sufficient number of fragment for classify the distribution
        evaluated_test_dataset = []
        for spl in test_dataset:
            if spl.loci[locus_id].results[args.method_name].getNbFrag() < args.min_support_fragments:
                spl.loci[locus_id].results[args.method_name].status = Status.undetermined
                spl.loci[locus_id].results[args.method_name].score = None
            else:
                evaluated_test_dataset.append(spl)
        # Classify
        if len(evaluated_test_dataset) != 0:
            clf = MIAmSClassifier(locus_id, args.method_name, "model", args.classifier, args.classifier_params)
            clf.fit(train_dataset)
            clf.set_status(evaluated_test_dataset)

    # Classification by sample
    for spl in test_dataset:
        if args.consensus_method == "majority":
            spl.setStatusByMajority(args.method_name, args.min_voting_loci)
        elif args.consensus_method == "ratio":
            spl.setStatusByInstabilityRatio(args.method_name, args.min_voting_loci, args.instability_ratio)
        elif args.consensus_method == "count":
            spl.setStatusByInstabilityCount(args.method_name, args.min_voting_loci, args.instability_count)
        spl.setScore(args.method_name, args.undetermined_weight, args.locus_weight_is_score)

    MSIReport.write(test_dataset, args.output_report)
예제 #9
0
 test_samples = [
     lib for lib in librairies if lib["spl_name"] in test_names
 ]  # Select all libraries corresponding to the test samples
 # Process learn and tag
 train(train_samples, annotation_path, design_folder, baseline_path,
       models_path, learn_log_path, args)
 predict(test_samples, design_folder, baseline_path, models_path,
         out_folder, args)
 models = MSIReport.parse(models_path)
 reports = getMSISamples(os.path.join(out_folder, "data"))
 if len(args.add_classifiers) > 0:
     log.info(
         "Process {} additionnal classifiers on dataset {}/{} ({})."
         .format(len(args.add_classifiers), dataset_id,
                 args.nb_tests - 1, dataset_md5))
     MSIReport.write(reports, out_reports_path)
     launchAddClf(args, models_path, out_reports_path)
     reports = MSIReport.parse(out_reports_path)
 # Write results and dataset
 use_header = False
 out_mode = "a"
 if dataset_id == 0:
     use_header = True
     out_mode = "w"
 datasets_df_rows = [
     getDatasetsInfo(
         dataset_id, dataset_md5, loci_id_by_name, models, reports,
         getLogInfo(learn_log_path),
         getLogInfo(os.path.join(out_folder, "MIAmSTag_log.txt")),
         status_by_spl)
 ]