예제 #1
0
 def test_get_classes_by_partial_name(self):
     classes = ReflectionHandler.get_classes_by_partial_name(
         "Implanting", "simulation/signal_implanting_strategy/")
     self.assertSetEqual(
         {
             HealthySequenceImplanting, ReceptorImplanting,
             FullSequenceImplanting
         }, set(classes))
예제 #2
0
def parse_commandline_arguments(args):
    ReflectionHandler.get_classes_by_partial_name("", "ml_methods/")
    ml_method_names = [
        cl.__name__
        for cl in ReflectionHandler.all_nonabstract_subclasses(MLMethod)
    ] + ["SimpleLogisticRegression"]

    parser = argparse.ArgumentParser(
        description="tool for building immuneML Galaxy YAML from arguments")
    parser.add_argument(
        "-o",
        "--output_path",
        required=True,
        help="Output location for the generated yaml file (directiory).")
    parser.add_argument(
        "-f",
        "--file_name",
        default="specs.yaml",
        help=
        "Output file name for the yaml file. Default name is 'specs.yaml' if not specified."
    )
    parser.add_argument(
        "-l",
        "--labels",
        required=True,
        help=
        "Which metadata labels should be predicted for the dataset (separated by comma)."
    )
    parser.add_argument(
        "-m",
        "--ml_methods",
        nargs="+",
        choices=ml_method_names,
        required=True,
        help="Which machine learning methods should be applied.")
    parser.add_argument("-t",
                        "--training_percentage",
                        type=float,
                        required=True,
                        help="The percentage of data used for training.")
    parser.add_argument(
        "-c",
        "--split_count",
        type=int,
        required=True,
        help=
        "The number of times to repeat the training process with a different random split of the data."
    )
    parser.add_argument(
        "-s",
        "--sequence_type",
        choices=["complete", "subsequence"],
        default=["subsequence"],
        nargs="+",
        help="Whether complete CDR3 sequences are used, or k-mer subsequences."
    )
    parser.add_argument(
        "-p",
        "--position_type",
        choices=["invariant", "positional"],
        nargs="+",
        help=
        "Whether IMGT-positional information is used for k-mers, or the k-mer positions are position-invariant."
    )
    parser.add_argument("-g",
                        "--gap_type",
                        choices=["gapped", "ungapped"],
                        nargs="+",
                        help="Whether the k-mers contain gaps.")
    parser.add_argument("-k", "--k", type=int, nargs="+", help="K-mer size.")
    parser.add_argument("-kl",
                        "--k_left",
                        type=int,
                        nargs="+",
                        help="Length before gap when k-mers are used.")
    parser.add_argument("-kr",
                        "--k_right",
                        type=int,
                        nargs="+",
                        help="Length after gap when k-mers are used.")
    parser.add_argument("-gi",
                        "--min_gap",
                        type=int,
                        nargs="+",
                        help="Minimal gap length when gapped k-mers are used.")
    parser.add_argument("-ga",
                        "--max_gap",
                        type=int,
                        nargs="+",
                        help="Maximal gap length when gapped k-mers are used.")
    parser.add_argument(
        "-r",
        "--reads",
        choices=[ReadsType.UNIQUE.value, ReadsType.ALL.value],
        nargs="+",
        default=[ReadsType.UNIQUE.value],
        help=
        "Whether k-mer counts should be scaled by unique clonotypes or all observed receptor sequences"
    )

    return parser.parse_args(args)