Exemple #1
0
    def parse_outputs(self, chunks):
        split_parsed_output_files = []
        for (split_start, split_end) in chunks:
            tsv_chunk = "%d-%d" % (split_start, split_end)
            if self.input_file_type == 'fasta':
                fasta_chunk = tsv_chunk
            else:
                fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2)
            for a in self.alleles:
                for epl in self.epitope_lengths:
                    split_iedb_output_files = []
                    status_message("Parsing binding predictions for Allele %s and Epitope Length %s - Entries %s" % (a, epl, fasta_chunk))
                    for method in self.prediction_algorithms:
                        prediction_class = globals()[method]
                        prediction = prediction_class()
                        if hasattr(prediction, 'iedb_prediction_method'):
                            iedb_method = prediction.iedb_prediction_method
                        else:
                            iedb_method = method
                        valid_alleles = prediction.valid_allele_names()
                        if a not in valid_alleles:
                            continue
                        valid_lengths = prediction.valid_lengths_for_allele(a)
                        if epl not in valid_lengths:
                            continue
                        split_iedb_out = os.path.join(self.tmp_dir, ".".join([self.sample_name, iedb_method, a, str(epl), "tsv_%s" % fasta_chunk]))
                        if os.path.exists(split_iedb_out):
                            split_iedb_output_files.append(split_iedb_out)

                    split_parsed_file_path = os.path.join(self.tmp_dir, ".".join([self.sample_name, a, str(epl), "parsed", "tsv_%s" % fasta_chunk]))
                    if os.path.exists(split_parsed_file_path):
                        status_message("Parsed Output File for Allele %s and Epitope Length %s (Entries %s) already exists. Skipping" % (a, epl, fasta_chunk))
                        split_parsed_output_files.append(split_parsed_file_path)
                        continue
                    if self.input_file_type == 'pvacvector_input_fasta':
                        split_fasta_file_path = "{}_1-2.{}.tsv".format(self.split_fasta_basename(), epl)
                    else:
                        split_fasta_file_path = "%s_%s"%(self.split_fasta_basename(), fasta_chunk)
                    split_fasta_key_file_path = split_fasta_file_path + '.key'

                    if len(split_iedb_output_files) > 0:
                        status_message("Parsing prediction file for Allele %s and Epitope Length %s - Entries %s" % (a, epl, fasta_chunk))
                        split_tsv_file_path = "%s_%s" % (self.tsv_file_path(), tsv_chunk)
                        params = {
                            'input_iedb_files'       : split_iedb_output_files,
                            'input_tsv_file'         : split_tsv_file_path,
                            'key_file'               : split_fasta_key_file_path,
                            'output_file'            : split_parsed_file_path,
                        }
                        if self.additional_report_columns and 'sample_name' in self.additional_report_columns:
                            params['sample_name'] = self.sample_name
                        else:
                            params['sample_name'] = None
                        parser = self.output_parser(params)
                        parser.execute()
                        status_message("Parsing prediction file for Allele %s and Epitope Length %s - Entries %s - Completed" % (a, epl, fasta_chunk))

                        split_parsed_output_files.append(split_parsed_file_path)
        return split_parsed_output_files
Exemple #2
0
    def call_iedb_and_parse_outputs(self, chunks):
        split_parsed_output_files = []
        for (split_start, split_end) in chunks:
            tsv_chunk = "%d-%d" % (split_start, split_end)
            fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2)
            for a in self.alleles:
                split_fasta_file_path = "%s_%s"%(self.split_fasta_basename(), fasta_chunk)
                split_iedb_output_files = []
                status_message("Processing entries for Allele %s - Entries %s" % (a, fasta_chunk))
                for method in self.prediction_algorithms:
                    prediction_class = globals()[method]
                    prediction = prediction_class()
                    iedb_method = prediction.iedb_prediction_method
                    valid_alleles = prediction.valid_allele_names()
                    if a not in valid_alleles:
                        status_message("Allele %s not valid for Method %s. Skipping." % (a, method))
                        continue

                    split_iedb_out = os.path.join(self.tmp_dir, ".".join([self.sample_name, iedb_method, a, "tsv_%s" % fasta_chunk]))
                    if os.path.exists(split_iedb_out):
                        status_message("IEDB file for Allele %s with Method %s (Entries %s) already exists. Skipping." % (a, method, fasta_chunk))
                        split_iedb_output_files.append(split_iedb_out)
                        continue
                    status_message("Running IEDB on Allele %s with Method %s - Entries %s" % (a, method, fasta_chunk))
                    lib.call_iedb.main([
                        split_fasta_file_path,
                        split_iedb_out,
                        iedb_method,
                        a,
                        '-r', str(self.iedb_retries),
                        '-e', self.iedb_executable,
                    ])
                    status_message("Completed")
                    split_iedb_output_files.append(split_iedb_out)

                split_parsed_file_path = os.path.join(self.tmp_dir, ".".join([self.sample_name, a, "parsed", "tsv_%s" % fasta_chunk]))
                if os.path.exists(split_parsed_file_path):
                    status_message("Parsed Output File for Allele %s (Entries %s) already exists. Skipping" % (a, fasta_chunk))
                    split_parsed_output_files.append(split_parsed_file_path)
                    continue
                split_fasta_key_file_path = split_fasta_file_path + '.key'

                if len(split_iedb_output_files) > 0:
                    status_message("Parsing IEDB Output for Allele %s - Entries %s" % (a, fasta_chunk))
                    split_tsv_file_path = "%s_%s" % (self.tsv_file_path(), tsv_chunk)
                    params = [
                        *split_iedb_output_files,
                        split_tsv_file_path,
                        split_fasta_key_file_path,
                        split_parsed_file_path,
                        '-m', self.top_score_metric,
                    ]
                    if self.top_result_per_mutation == True:
                        params.append('-t')
                    lib.parse_output.main(params)
                    status_message("Completed")
                    split_parsed_output_files.append(split_parsed_file_path)

        return split_parsed_output_files
Exemple #3
0
def valid_alleles_per_algorithm(prediction_algorithms):
    valid_allele_list = {}
    for algorithm in prediction_algorithms.split(","):
        prediction_class = globals()[algorithm]
        alleles = prediction_class().valid_allele_names()
        # alleles sometimes returns as dict_keys instead of an array, so must specify as list
        valid_allele_list[algorithm] = list(alleles)
    return valid_allele_list
Exemple #4
0
def valid_alleles(prediction_algorithms):
    valid_allele_list = {}
    for algorithm in prediction_algorithms.split(","):
        prediction_class = globals()[algorithm]
        alleles = prediction_class().valid_allele_names()
        # alleles sometimes returns as dict_keys instead of an array, so must specify as list 
        valid_allele_list[algorithm] = list(alleles)
    return valid_allele_list
Exemple #5
0
def main(args_input = sys.argv[1:]):
    parser = define_parser()
    args = parser.parse_args(args_input)

    if args.prediction_algorithm is None:
        print('\n'.join(sorted(PredictionClass.all_valid_allele_names())))
    else:
        prediction_class = globals()[args.prediction_algorithm]
        print("\n".join(sorted(prediction_class().valid_allele_names())))
Exemple #6
0
def main(args_input=sys.argv[1:]):
    parser = define_parser()
    args = parser.parse_args(args_input)

    if args.prediction_algorithm is None:
        print('\n'.join(sorted(PredictionClass.all_valid_allele_names())))
    else:
        prediction_class = globals()[args.prediction_algorithm]
        print("\n".join(sorted(prediction_class().valid_allele_names())))
Exemple #7
0
def main(args_input=sys.argv[1:]):
    parser = argparse.ArgumentParser(
        'pvacseq call_iedb',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('input_file',
                        type=argparse.FileType('r'),
                        help="Input FASTA file")
    parser.add_argument('output_file', help="Output file from iedb")
    parser.add_argument('method',
                        choices=PredictionClass.prediction_methods(),
                        help="The iedb analysis method to use")
    parser.add_argument('allele', help="Allele for which to make prediction")
    parser.add_argument('-l',
                        '--epitope-length',
                        type=int,
                        choices=[8, 9, 10, 11, 12, 13, 14, 15],
                        help="Length of subpeptides (epitopes) to predict")
    parser.add_argument(
        "-r",
        "--iedb-retries",
        type=int,
        default=5,
        help=
        "Number of retries when making requests to the IEDB RESTful web interface. Must be less than or equal to 100."
    )
    parser.add_argument("-e",
                        "--iedb-executable-path",
                        help="The executable path of the local IEDB install")
    args = parser.parse_args(args_input)

    PredictionClass.check_alleles_valid([args.allele])
    prediction_class = getattr(sys.modules[__name__], args.method)
    prediction_class_object = prediction_class()
    prediction_class_object.check_allele_valid(args.allele)

    prediction_class_object.check_length_valid_for_allele(
        args.epitope_length, args.allele)

    if args.epitope_length is None and prediction_class_object.needs_epitope_length:
        sys.exit("Epitope length is required for class I binding predictions")

    (response_text, output_mode) = prediction_class_object.predict(
        args.input_file, args.allele, args.epitope_length,
        args.iedb_executable_path, args.iedb_retries)

    tmp_output_file = args.output_file + '.tmp'
    if output_mode == 'pandas':
        response_text.to_csv(tmp_output_file, index=False, sep="\t")
    else:
        tmp_output_filehandle = open(tmp_output_file, output_mode)
        tmp_output_filehandle.write(response_text)
        tmp_output_filehandle.close()
    os.replace(tmp_output_file, args.output_file)

    args.input_file.close()
Exemple #8
0
def main(args_input=sys.argv[1:]):
    parser = define_parser()
    args = parser.parse_args(args_input)

    if "." in args.sample_name:
        sys.exit("Sample name cannot contain '.'")

    if args.fasta_size % 2 != 0:
        sys.exit("The fasta size needs to be an even number")

    if args.iedb_retries > 100:
        sys.exit(
            "The number of IEDB retries must be less than or equal to 100")

    if args.downstream_sequence_length == 'full':
        downstream_sequence_length = None
    elif args.downstream_sequence_length.isdigit():
        downstream_sequence_length = int(args.downstream_sequence_length)
    else:
        sys.exit(
            "The downstream sequence length needs to be a positive integer or 'full'"
        )

    # if args.iedb_install_directory:
    #     lib.call_iedb.setup_iedb_conda_env()

    input_file_type = 'bedpe'
    base_output_dir = os.path.abspath(args.output_dir)

    class_i_prediction_algorithms = []
    class_ii_prediction_algorithms = []
    for prediction_algorithm in sorted(args.prediction_algorithms):
        prediction_class = globals()[prediction_algorithm]
        prediction_class_object = prediction_class()
        if isinstance(prediction_class_object, MHCI):
            class_i_prediction_algorithms.append(prediction_algorithm)
        elif isinstance(prediction_class_object, MHCII):
            class_ii_prediction_algorithms.append(prediction_algorithm)

    class_i_alleles = []
    class_ii_alleles = []
    for allele in sorted(set(args.allele)):
        valid = 0
        if allele in MHCI.all_valid_allele_names():
            class_i_alleles.append(allele)
            valid = 1
        if allele in MHCII.all_valid_allele_names():
            class_ii_alleles.append(allele)
            valid = 1
        if not valid:
            print("Allele %s not valid. Skipping." % allele)

    shared_arguments = {
        'input_file': args.input_file,
        'input_file_type': input_file_type,
        'sample_name': args.sample_name,
        'top_score_metric': args.top_score_metric,
        'binding_threshold': args.binding_threshold,
        'allele_specific_cutoffs': args.allele_specific_binding_thresholds,
        'net_chop_method': args.net_chop_method,
        'net_chop_threshold': args.net_chop_threshold,
        'additional_report_columns': args.additional_report_columns,
        'fasta_size': args.fasta_size,
        'iedb_retries': args.iedb_retries,
        'downstream_sequence_length': downstream_sequence_length,
        'keep_tmp_files': args.keep_tmp_files,
        'n_threads': args.n_threads,
    }

    if len(class_i_prediction_algorithms) > 0 and len(class_i_alleles) > 0:
        if args.epitope_length is None:
            sys.exit(
                "Epitope length is required for class I binding predictions")

        if args.iedb_install_directory:
            iedb_mhc_i_executable = os.path.join(args.iedb_install_directory,
                                                 'mhc_i', 'src',
                                                 'predict_binding.py')
            if not os.path.exists(iedb_mhc_i_executable):
                sys.exit("IEDB MHC I executable path doesn't exist %s" %
                         iedb_mhc_i_executable)
        else:
            iedb_mhc_i_executable = None

        print("Executing MHC Class I predictions")

        output_dir = os.path.join(base_output_dir, 'MHC_Class_I')
        os.makedirs(output_dir, exist_ok=True)

        class_i_arguments = shared_arguments.copy()
        class_i_arguments['alleles'] = class_i_alleles
        class_i_arguments[
            'peptide_sequence_length'] = args.peptide_sequence_length
        class_i_arguments['iedb_executable'] = iedb_mhc_i_executable
        class_i_arguments['epitope_lengths'] = args.epitope_length
        class_i_arguments[
            'prediction_algorithms'] = class_i_prediction_algorithms
        class_i_arguments['output_dir'] = output_dir
        class_i_arguments['netmhc_stab'] = args.netmhc_stab
        pipeline = Pipeline(**class_i_arguments)
        pipeline.execute()
    elif len(class_i_prediction_algorithms) == 0:
        print(
            "No MHC class I prediction algorithms chosen. Skipping MHC class I predictions."
        )
    elif len(class_i_alleles) == 0:
        print(
            "No MHC class I alleles chosen. Skipping MHC class I predictions.")

    if len(class_ii_prediction_algorithms) > 0 and len(class_ii_alleles) > 0:
        if args.iedb_install_directory:
            iedb_mhc_ii_executable = os.path.join(args.iedb_install_directory,
                                                  'mhc_ii',
                                                  'mhc_II_binding.py')
            if not os.path.exists(iedb_mhc_ii_executable):
                sys.exit("IEDB MHC II executable path doesn't exist %s" %
                         iedb_mhc_ii_executable)
        else:
            iedb_mhc_ii_executable = None

        print("Executing MHC Class II predictions")

        output_dir = os.path.join(base_output_dir, 'MHC_Class_II')
        os.makedirs(output_dir, exist_ok=True)

        class_ii_arguments = shared_arguments.copy()
        class_ii_arguments['alleles'] = class_ii_alleles
        class_ii_arguments[
            'prediction_algorithms'] = class_ii_prediction_algorithms
        class_ii_arguments['peptide_sequence_length'] = 31
        class_ii_arguments['iedb_executable'] = iedb_mhc_ii_executable
        class_ii_arguments['epitope_lengths'] = [15]
        class_ii_arguments['output_dir'] = output_dir
        class_ii_arguments['netmhc_stab'] = False
        pipeline = Pipeline(**class_ii_arguments)
        pipeline.execute()
    elif len(class_ii_prediction_algorithms) == 0:
        print(
            "No MHC class II prediction algorithms chosen. Skipping MHC class II predictions."
        )
    elif len(class_ii_alleles) == 0:
        print(
            "No MHC class II alleles chosen. Skipping MHC class II predictions."
        )

    if len(class_i_prediction_algorithms) > 0 and len(
            class_i_alleles) > 0 and len(class_ii_prediction_algorithms
                                         ) > 0 and len(class_ii_alleles) > 0:
        print("Creating combined reports")
        create_combined_reports(base_output_dir, args)
Exemple #9
0
    def call_iedb_and_parse_outputs(self, chunks):
        split_parsed_output_files = []
        for (split_start, split_end) in chunks:
            tsv_chunk = "%d-%d" % (split_start, split_end)
            fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2)
            for a in self.alleles:
                split_fasta_file_path = "%s_%s"%(self.split_fasta_basename(), fasta_chunk)
                split_iedb_output_files = []
                status_message("Processing entries for Allele %s - Entries %s" % (a, fasta_chunk))
                if os.path.getsize(split_fasta_file_path) == 0:
                    status_message("Fasta file is empty. Skipping")
                    continue
                for method in self.prediction_algorithms:
                    prediction_class = globals()[method]
                    prediction = prediction_class()
                    if hasattr(prediction, 'iedb_prediction_method'):
                        iedb_method = prediction.iedb_prediction_method
                    else:
                        iedb_method = method
                    valid_alleles = prediction.valid_allele_names()
                    if a not in valid_alleles:
                        status_message("Allele %s not valid for Method %s. Skipping." % (a, method))
                        continue

                    split_iedb_out = os.path.join(self.tmp_dir, ".".join([self.sample_name, iedb_method, a, "tsv_%s" % fasta_chunk]))
                    if os.path.exists(split_iedb_out):
                        status_message("IEDB file for Allele %s with Method %s (Entries %s) already exists. Skipping." % (a, method, fasta_chunk))
                        split_iedb_output_files.append(split_iedb_out)
                        continue
                    status_message("Running IEDB on Allele %s with Method %s - Entries %s" % (a, method, fasta_chunk))

                    if not os.environ.get('TEST_FLAG') or os.environ.get('TEST_FLAG') == '0':
                        if 'last_execute_timestamp' in locals() and not self.iedb_executable:
                            elapsed_time = ( datetime.datetime.now() - last_execute_timestamp ).total_seconds()
                            wait_time = 60 - elapsed_time
                            if wait_time > 0:
                                time.sleep(wait_time)

                    lib.call_iedb.main([
                        split_fasta_file_path,
                        split_iedb_out,
                        method,
                        a,
                        '-r', str(self.iedb_retries),
                        '-e', self.iedb_executable,
                    ])
                    last_execute_timestamp = datetime.datetime.now()
                    status_message("Completed")
                    split_iedb_output_files.append(split_iedb_out)

                split_parsed_file_path = os.path.join(self.tmp_dir, ".".join([self.sample_name, a, "parsed", "tsv_%s" % fasta_chunk]))
                if os.path.exists(split_parsed_file_path):
                    status_message("Parsed Output File for Allele %s (Entries %s) already exists. Skipping" % (a, fasta_chunk))
                    split_parsed_output_files.append(split_parsed_file_path)
                    continue
                split_fasta_key_file_path = split_fasta_file_path + '.key'

                if len(split_iedb_output_files) > 0:
                    status_message("Parsing IEDB Output for Allele %s - Entries %s" % (a, fasta_chunk))
                    split_tsv_file_path = "%s_%s" % (self.tsv_file_path(), tsv_chunk)
                    params = {
                        'input_iedb_files'       : split_iedb_output_files,
                        'input_tsv_file'         : split_tsv_file_path,
                        'key_file'               : split_fasta_key_file_path,
                        'output_file'            : split_parsed_file_path,
                    }
                    if self.additional_report_columns and 'sample_name' in self.additional_report_columns:
                        params['sample_name'] = self.sample_name
                    else:
                        params['sample_name'] = None
                    parser = self.output_parser(params)
                    parser.execute()
                    status_message("Completed")
                    split_parsed_output_files.append(split_parsed_file_path)

        return split_parsed_output_files
Exemple #10
0
    def call_iedb_and_parse_outputs(self, chunks):
        split_parsed_output_files = []
        for chunk in chunks:
            for a in self.alleles:
                split_fasta_file_path = "%s_%s" % (self.split_fasta_basename(),
                                                   chunk)
                split_iedb_output_files = []
                print("Processing entries for Allele %s - Entries %s" %
                      (a, chunk))
                for method in self.prediction_algorithms:
                    prediction_class = globals()[method]
                    prediction = prediction_class()
                    iedb_method = prediction.iedb_prediction_method
                    valid_alleles = prediction.valid_allele_names()
                    if a not in valid_alleles:
                        print("Allele %s not valid for Method %s. Skipping." %
                              (a, method))
                        continue

                    split_iedb_out = os.path.join(
                        self.tmp_dir, ".".join([
                            self.sample_name, iedb_method, a,
                            "tsv_%s" % chunk
                        ]))
                    if os.path.exists(split_iedb_out):
                        print(
                            "IEDB file for Allele %s with Method %s (Entries %s) already exists. Skipping."
                            % (a, method, chunk))
                        split_iedb_output_files.append(split_iedb_out)
                        continue
                    print(
                        "Running IEDB on Allele %s with Method %s - Entries %s"
                        % (a, method, chunk))
                    lib.call_iedb.main([
                        split_fasta_file_path,
                        split_iedb_out,
                        iedb_method,
                        a,
                    ])
                    print("Completed")
                    split_iedb_output_files.append(split_iedb_out)

                split_parsed_file_path = os.path.join(
                    self.tmp_dir,
                    ".".join([self.sample_name, a, "parsed",
                              "tsv_%s" % chunk]))
                if os.path.exists(split_parsed_file_path):
                    print(
                        "Parsed Output File for Allele %s (Entries %s) already exists. Skipping"
                        % (a, chunk))
                    split_parsed_output_files.append(split_parsed_file_path)
                    continue
                split_fasta_key_file_path = split_fasta_file_path + '.key'

                if len(split_iedb_output_files) > 0:
                    print("Parsing IEDB Output for Allele %s - Entries %s" %
                          (a, chunk))
                    params = [
                        *split_iedb_output_files,
                        self.tsv_file_path(),
                        split_fasta_key_file_path,
                        split_parsed_file_path,
                        '-m',
                        self.top_score_metric,
                    ]
                    if self.top_result_per_mutation == True:
                        params.append('-t')
                    lib.parse_output.main(params)
                    print("Completed")
                    split_parsed_output_files.append(split_parsed_file_path)

        return split_parsed_output_files
Exemple #11
0
    def call_iedb_and_parse_outputs(self, chunks):
        split_parsed_output_files = []
        for (split_start, split_end) in chunks:
            tsv_chunk = "%d-%d" % (split_start, split_end)
            fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2)
            for a in self.alleles:
                for epl in self.epitope_lengths:
                    split_fasta_file_path = "%s_%s"%(self.split_fasta_basename(), fasta_chunk)
                    split_iedb_output_files = []
                    status_message("Processing entries for Allele %s and Epitope Length %s - Entries %s" % (a, epl, fasta_chunk))
                    for method in self.prediction_algorithms:
                        prediction_class = globals()[method]
                        prediction = prediction_class()
                        iedb_method = prediction.iedb_prediction_method
                        valid_alleles = prediction.valid_allele_names()
                        if a not in valid_alleles:
                            status_message("Allele %s not valid for Method %s. Skipping." % (a, method))
                            continue
                        valid_lengths = prediction.valid_lengths_for_allele(a)
                        if epl not in valid_lengths:
                            status_message("Epitope Length %s is not valid for Method %s and Allele %s. Skipping." % (epl, method, a))
                            continue

                        split_iedb_out = os.path.join(self.tmp_dir, ".".join([self.sample_name, iedb_method, a, str(epl), "tsv_%s" % fasta_chunk]))
                        if os.path.exists(split_iedb_out):
                            status_message("IEDB file for Allele %s and Epitope Length %s with Method %s (Entries %s) already exists. Skipping." % (a, epl, method, fasta_chunk))
                            split_iedb_output_files.append(split_iedb_out)
                            continue
                        status_message("Running IEDB on Allele %s and Epitope Length %s with Method %s - Entries %s" % (a, epl, method, fasta_chunk))

                        if not os.environ.get('TEST_FLAG') or os.environ.get('TEST_FLAG') == '0':
                            if 'last_execute_timestamp' in locals() and not self.iedb_executable:
                                elapsed_time = ( datetime.datetime.now() - last_execute_timestamp ).total_seconds()
                                wait_time = 60 - elapsed_time
                                if wait_time > 0:
                                    time.sleep(wait_time)

                        lib.call_iedb.main([
                            split_fasta_file_path,
                            split_iedb_out,
                            iedb_method,
                            a,
                            '-l', str(epl),
                            '-r', str(self.iedb_retries),
                            '-e', self.iedb_executable,
                        ])
                        last_execute_timestamp = datetime.datetime.now()
                        status_message("Completed")
                        split_iedb_output_files.append(split_iedb_out)

                    split_parsed_file_path = os.path.join(self.tmp_dir, ".".join([self.sample_name, a, str(epl), "parsed", "tsv_%s" % fasta_chunk]))
                    if os.path.exists(split_parsed_file_path):
                        status_message("Parsed Output File for Allele %s and Epitope Length %s (Entries %s) already exists. Skipping" % (a, epl, fasta_chunk))
                        split_parsed_output_files.append(split_parsed_file_path)
                        continue
                    split_fasta_key_file_path = split_fasta_file_path + '.key'

                    if len(split_iedb_output_files) > 0:
                        status_message("Parsing IEDB Output for Allele %s and Epitope Length %s - Entries %s" % (a, epl, fasta_chunk))
                        split_tsv_file_path = "%s_%s" % (self.tsv_file_path(), tsv_chunk)
                        params = [
                            *split_iedb_output_files,
                            split_tsv_file_path,
                            split_fasta_key_file_path,
                            split_parsed_file_path,
                            '-m', self.top_score_metric,
                        ]
                        if self.top_result_per_mutation == True:
                            params.append('-t')
                        lib.parse_output.main(params)
                        status_message("Completed")
                        split_parsed_output_files.append(split_parsed_file_path)
        return split_parsed_output_files
Exemple #12
0
    def call_iedb(self, chunks, length):
        alleles = self.alleles
        prediction_algorithms = self.prediction_algorithms
        argument_sets = []
        warning_messages = []
        for (split_start, split_end) in chunks:
            tsv_chunk = "%d-%d" % (split_start, split_end)
            if self.input_file_type == 'fasta':
                fasta_chunk = tsv_chunk
            else:
                fasta_chunk = "%d-%d" % (split_start * 2 - 1, split_end * 2)
            for a in alleles:
                split_fasta_file_path = "%s_%s" % (
                    self.split_fasta_basename(length), fasta_chunk)
                if os.path.getsize(split_fasta_file_path) == 0:
                    msg = "Fasta file {} is empty. Skipping".format(
                        split_fasta_file_path)
                    if msg not in warning_messages:
                        warning_messages.append(msg)
                    continue
                #begin of per-algorithm processing
                for method in prediction_algorithms:
                    prediction_class = globals()[method]
                    prediction = prediction_class()
                    if hasattr(prediction, 'iedb_prediction_method'):
                        iedb_method = prediction.iedb_prediction_method
                    else:
                        iedb_method = method
                    valid_alleles = prediction.valid_allele_names()
                    if a not in valid_alleles:
                        msg = "Allele %s not valid for Method %s. Skipping." % (
                            a, method)
                        if msg not in warning_messages:
                            warning_messages.append(msg)
                        continue
                    valid_lengths = prediction.valid_lengths_for_allele(a)
                    if length not in valid_lengths:
                        msg = "Epitope Length %s is not valid for Method %s and Allele %s. Skipping." % (
                            length, method, a)
                        if msg not in warning_messages:
                            warning_messages.append(msg)
                        continue

                    split_iedb_out = os.path.join(
                        self.tmp_dir, ".".join([
                            self.sample_name, iedb_method, a,
                            str(length),
                            "tsv_%s" % fasta_chunk
                        ]))
                    if os.path.exists(split_iedb_out):
                        msg = "Prediction file for Allele %s and Epitope Length %s with Method %s (Entries %s) already exists. Skipping." % (
                            a, length, method, fasta_chunk)
                        if msg not in warning_messages:
                            warning_messages.append(msg)
                        continue
                    arguments = [
                        split_fasta_file_path,
                        split_iedb_out,
                        method,
                        a,
                        '-r',
                        str(self.iedb_retries),
                        '-e',
                        self.iedb_executable,
                    ]
                    if not isinstance(prediction, IEDBMHCII):
                        arguments.extend([
                            '-l',
                            str(length),
                        ])
                    argument_sets.append(arguments)

        for msg in warning_messages:
            status_message(msg)

        with pymp.Parallel(self.n_threads) as p:
            for index in p.range(len(argument_sets)):
                arguments = argument_sets[index]
                a = arguments[3]
                method = arguments[2]
                filename = arguments[1]
                if len(arguments) == 10:
                    epl = arguments[9]
                else:
                    epl = 15
                p.print(
                    "Making binding predictions on Allele %s and Epitope Length %s with Method %s - File %s"
                    % (a, epl, method, filename))
                lib.call_iedb.main(arguments)
                p.print(
                    "Making binding predictions on Allele %s and Epitope Length %s with Method %s - File %s - Completed"
                    % (a, epl, method, filename))
Exemple #13
0
    def call_iedb_and_parse_outputs(self, chunks):
        pymp.config.nested = True
        alleles = self.alleles
        epitope_lengths = self.epitope_lengths
        prediction_algorithms = self.prediction_algorithms
        iteration_info = {
            'file': {
                'total_iterations': len(chunks),
                'iterations_per_thread': len(chunks),
                'threads': 1,
            },
            'allele': {
                'total_iterations': len(alleles),
                'iterations_per_thread': len(alleles),
                'threads': 1,
            },
            'length': {
                'total_iterations': len(epitope_lengths),
                'iterations_per_thread': len(epitope_lengths),
                'threads': 1,
            },
            'algorithm': {
                'total_iterations': len(prediction_algorithms),
                'iterations_per_thread': len(prediction_algorithms),
                'threads': 1,
            },
        }
        iteration_info = self.balance_multithreads(iteration_info)

        split_parsed_output_files = []
        lock = Lock()
        with pymp.Parallel(iteration_info['file']['threads']) as p:
            for i in p.range(len(chunks)):
                (split_start, split_end) = chunks[i]
                tsv_chunk = "%d-%d" % (split_start, split_end)
                fasta_chunk = "%d-%d" % (split_start * 2 - 1, split_end * 2)
                with pymp.Parallel(iteration_info['allele']['threads']) as p2:
                    for j in p2.range(len(alleles)):
                        a = alleles[j]
                        with pymp.Parallel(
                                iteration_info['length']['threads']) as p3:
                            for k in p3.range(len(epitope_lengths)):
                                epl = epitope_lengths[k]
                                if self.input_file_type == 'pvacvector_input_fasta':
                                    split_fasta_file_path = "{}_1-2.{}.tsv".format(
                                        self.split_fasta_basename(), epl)
                                else:
                                    split_fasta_file_path = "%s_%s" % (
                                        self.split_fasta_basename(),
                                        fasta_chunk)
                                split_iedb_output_files = []
                                status_message_with_lock(
                                    "Processing entries for Allele %s and Epitope Length %s - Entries %s"
                                    % (a, epl, fasta_chunk), lock)
                                if os.path.getsize(split_fasta_file_path) == 0:
                                    status_message_with_lock(
                                        "Fasta file is empty. Skipping", lock)
                                    continue
                                #begin of per-algorithm processing
                                with pymp.Parallel(iteration_info['algorithm']
                                                   ['threads']) as p4:
                                    for m in p4.range(
                                            len(prediction_algorithms)):
                                        method = prediction_algorithms[m]
                                        prediction_class = globals()[method]
                                        prediction = prediction_class()
                                        if hasattr(prediction,
                                                   'iedb_prediction_method'):
                                            iedb_method = prediction.iedb_prediction_method
                                        else:
                                            iedb_method = method
                                        valid_alleles = prediction.valid_allele_names(
                                        )
                                        if a not in valid_alleles:
                                            status_message_with_lock(
                                                "Allele %s not valid for Method %s. Skipping."
                                                % (a, method), lock)
                                            continue
                                        valid_lengths = prediction.valid_lengths_for_allele(
                                            a)
                                        if epl not in valid_lengths:
                                            status_message_with_lock(
                                                "Epitope Length %s is not valid for Method %s and Allele %s. Skipping."
                                                % (epl, method, a), lock)
                                            continue

                                        split_iedb_out = os.path.join(
                                            self.tmp_dir, ".".join([
                                                self.sample_name, iedb_method,
                                                a,
                                                str(epl),
                                                "tsv_%s" % fasta_chunk
                                            ]))
                                        if os.path.exists(split_iedb_out):
                                            status_message_with_lock(
                                                "IEDB file for Allele %s and Epitope Length %s with Method %s (Entries %s) already exists. Skipping."
                                                %
                                                (a, epl, method, fasta_chunk),
                                                lock)
                                            split_iedb_output_files.append(
                                                split_iedb_out)
                                            continue
                                        status_message_with_lock(
                                            "Running IEDB on Allele %s and Epitope Length %s with Method %s - Entries %s"
                                            % (a, epl, method, fasta_chunk),
                                            lock)

                                        if not os.environ.get(
                                                'TEST_FLAG') or os.environ.get(
                                                    'TEST_FLAG') == '0':
                                            if 'last_execute_timestamp' in locals(
                                            ) and not self.iedb_executable:
                                                elapsed_time = (
                                                    datetime.datetime.now() -
                                                    last_execute_timestamp
                                                ).total_seconds()
                                                wait_time = 60 - elapsed_time
                                                if wait_time > 0:
                                                    time.sleep(wait_time)

                                        arguments = [
                                            split_fasta_file_path,
                                            split_iedb_out,
                                            method,
                                            a,
                                            '-r',
                                            str(self.iedb_retries),
                                            '-e',
                                            self.iedb_executable,
                                        ]
                                        if not isinstance(
                                                prediction, IEDBMHCII):
                                            arguments.extend([
                                                '-l',
                                                str(epl),
                                            ])
                                        lib.call_iedb.main(arguments)
                                        last_execute_timestamp = datetime.datetime.now(
                                        )
                                        status_message_with_lock(
                                            "Running IEDB on Allele %s and Epitope Length %s with Method %s - Entries %s - Completed"
                                            % (a, epl, method, fasta_chunk),
                                            lock)
                                        split_iedb_output_files.append(
                                            split_iedb_out)
                                #end of per-algorithm processing

                                #parse all output files for one allele, epitope, and file chunk over all algorithms into one file
                                split_parsed_file_path = os.path.join(
                                    self.tmp_dir, ".".join([
                                        self.sample_name, a,
                                        str(epl), "parsed",
                                        "tsv_%s" % fasta_chunk
                                    ]))
                                if os.path.exists(split_parsed_file_path):
                                    status_message_with_lock(
                                        "Parsed Output File for Allele %s and Epitope Length %s (Entries %s) already exists. Skipping"
                                        % (a, epl, fasta_chunk), lock)
                                    split_parsed_output_files.append(
                                        split_parsed_file_path)
                                    continue
                                split_fasta_key_file_path = split_fasta_file_path + '.key'

                                if len(split_iedb_output_files) > 0:
                                    status_message_with_lock(
                                        "Parsing IEDB Output for Allele %s and Epitope Length %s - Entries %s"
                                        % (a, epl, fasta_chunk), lock)
                                    split_tsv_file_path = "%s_%s" % (
                                        self.tsv_file_path(), tsv_chunk)
                                    params = {
                                        'input_iedb_files':
                                        split_iedb_output_files,
                                        'input_tsv_file': split_tsv_file_path,
                                        'key_file': split_fasta_key_file_path,
                                        'output_file': split_parsed_file_path,
                                    }
                                    if self.additional_report_columns and 'sample_name' in self.additional_report_columns:
                                        params[
                                            'sample_name'] = self.sample_name
                                    else:
                                        params['sample_name'] = None
                                    parser = self.output_parser(params)
                                    parser.execute()
                                    status_message_with_lock(
                                        "Parsing IEDB Output for Allele %s and Epitope Length %s - Entries %s - Completed"
                                        % (a, epl, fasta_chunk), lock)
                                    split_parsed_output_files.append(
                                        split_parsed_file_path)
        return split_parsed_output_files
Exemple #14
0
def main(args_input=sys.argv[1:]):
    parser = define_parser()
    args = parser.parse_args(args_input)

    PredictionClass.check_alleles_valid(args.allele)

    if "." in args.sample_name:
        sys.exit("Sample name cannot contain '.'")

    if args.fasta_size % 2 != 0:
        sys.exit("The fasta size needs to be an even number")

    if args.downstream_sequence_length == 'full':
        downstream_sequence_length = None
    elif args.downstream_sequence_length.isdigit():
        downstream_sequence_length = args.downstream_sequence_length
    else:
        sys.exit(
            "The downstream sequence length needs to be a positive integer or 'full'"
        )

    base_output_dir = os.path.abspath(args.output_dir)

    class_i_prediction_algorithms = []
    class_ii_prediction_algorithms = []
    for prediction_algorithm in args.prediction_algorithms:
        prediction_class = globals()[prediction_algorithm]
        prediction_class_object = prediction_class()
        if isinstance(prediction_class_object, MHCI):
            class_i_prediction_algorithms.append(prediction_algorithm)
        elif isinstance(prediction_class_object, MHCII):
            class_ii_prediction_algorithms.append(prediction_algorithm)

    shared_arguments = {
        'input_file': args.input_file,
        'sample_name': args.sample_name,
        'alleles': args.allele,
        'top_result_per_mutation': args.top_result_per_mutation,
        'top_score_metric': args.top_score_metric,
        'binding_threshold': args.binding_threshold,
        'minimum_fold_change': args.minimum_fold_change,
        'net_chop_method': args.net_chop_method,
        'net_chop_threshold': args.net_chop_threshold,
        'normal_cov': args.normal_cov,
        'normal_vaf': args.normal_vaf,
        'tdna_cov': args.tdna_cov,
        'tdna_vaf': args.tdna_vaf,
        'trna_cov': args.trna_cov,
        'trna_vaf': args.trna_vaf,
        'expn_val': args.expn_val,
        'fasta_size': args.fasta_size,
        'downstream_sequence_length': downstream_sequence_length,
        'keep_tmp_files': args.keep_tmp_files,
    }
    additional_input_files = parse_additional_input_file_list(
        args.additional_input_file_list)
    shared_arguments.update(additional_input_files)

    if len(class_i_prediction_algorithms) > 0:
        if args.epitope_length is None:
            sys.exit(
                "Epitope length is required for class I binding predictions")

        print("Executing MHC Class I predictions")

        output_dir = os.path.join(base_output_dir, 'MHC_Class_I')
        os.makedirs(output_dir, exist_ok=True)

        class_i_arguments = shared_arguments.copy()
        class_i_arguments[
            'peptide_sequence_length'] = args.peptide_sequence_length
        class_i_arguments['epitope_lengths'] = args.epitope_length
        class_i_arguments[
            'prediction_algorithms'] = class_i_prediction_algorithms
        class_i_arguments['output_dir'] = output_dir
        class_i_arguments['netmhc_stab'] = args.netmhc_stab
        pipeline = MHCIPipeline(**class_i_arguments)
        pipeline.execute()

    if len(class_ii_prediction_algorithms) > 0:
        print("Executing MHC Class II predictions")

        output_dir = os.path.join(base_output_dir, 'MHC_Class_II')
        os.makedirs(output_dir, exist_ok=True)

        class_ii_arguments = shared_arguments.copy()
        class_ii_arguments[
            'prediction_algorithms'] = class_ii_prediction_algorithms
        class_ii_arguments['output_dir'] = output_dir
        class_ii_arguments['netmhc_stab'] = False
        pipeline = MHCIIPipeline(**class_ii_arguments)
        pipeline.execute()
 def print_valid_alleles(self):
     if self.prediction_algorithm is None:
         print('\n'.join(sorted(PredictionClass.all_valid_allele_names())))
     else:
         prediction_class = globals()[self.prediction_algorithm]
         print("\n".join(sorted(prediction_class().valid_allele_names())))
Exemple #16
0
def main(args_input = sys.argv[1:]):
    parser = define_parser()
    args = parser.parse_args(args_input)

    PredictionClass.check_alleles_valid(args.allele)

    if "." in args.sample_name:
        sys.exit("Sample name cannot contain '.'")

    if args.fasta_size%2 != 0:
        sys.exit("The fasta size needs to be an even number")

    if args.iedb_retries > 100:
        sys.exit("The number of IEDB retries must be less than or equal to 100")

    if args.downstream_sequence_length == 'full':
        downstream_sequence_length = None
    elif args.downstream_sequence_length.isdigit():
        downstream_sequence_length = args.downstream_sequence_length
    else:
        sys.exit("The downstream sequence length needs to be a positive integer or 'full'")

    base_output_dir = os.path.abspath(args.output_dir)

    class_i_prediction_algorithms = []
    class_ii_prediction_algorithms = []
    for prediction_algorithm in sorted(args.prediction_algorithms):
        prediction_class = globals()[prediction_algorithm]
        prediction_class_object = prediction_class()
        if isinstance(prediction_class_object, MHCI):
            class_i_prediction_algorithms.append(prediction_algorithm)
        elif isinstance(prediction_class_object, MHCII):
            class_ii_prediction_algorithms.append(prediction_algorithm)

    class_i_alleles = []
    class_ii_alleles = []
    for allele in sorted(set(args.allele)):
        if allele in MHCI.all_valid_allele_names():
            class_i_alleles.append(allele)
        if allele in MHCII.all_valid_allele_names():
            class_ii_alleles.append(allele)

    shared_arguments = {
        'input_file'                : args.input_file,
        'sample_name'               : args.sample_name,
        'top_result_per_mutation'   : args.top_result_per_mutation,
        'top_score_metric'          : args.top_score_metric,
        'binding_threshold'         : args.binding_threshold,
        'minimum_fold_change'       : args.minimum_fold_change,
        'net_chop_method'           : args.net_chop_method,
        'net_chop_threshold'        : args.net_chop_threshold,
        'normal_cov'                : args.normal_cov,
        'normal_vaf'                : args.normal_vaf,
        'tdna_cov'                  : args.tdna_cov,
        'tdna_vaf'                  : args.tdna_vaf,
        'trna_cov'                  : args.trna_cov,
        'trna_vaf'                  : args.trna_vaf,
        'expn_val'                  : args.expn_val,
        'fasta_size'                : args.fasta_size,
        'iedb_retries'              : args.iedb_retries,
        'downstream_sequence_length': downstream_sequence_length,
        'keep_tmp_files'            : args.keep_tmp_files,
    }
    additional_input_files = parse_additional_input_file_list(args.additional_input_file_list)
    shared_arguments.update(additional_input_files)

    if len(class_i_prediction_algorithms) > 0 and len(class_i_alleles) > 0:
        if args.epitope_length is None:
            sys.exit("Epitope length is required for class I binding predictions")

        if args.iedb_install_directory:
            iedb_mhc_i_executable = os.path.join(args.iedb_install_directory, 'mhc_i', 'src', 'predict_binding.py')
            if not os.path.exists(iedb_mhc_i_executable):
                sys.exit("IEDB MHC I executable path doesn't exist %s" % iedb_mhc_i_executable)
        else:
            iedb_mhc_i_executable = None

        print("Executing MHC Class I predictions")

        output_dir = os.path.join(base_output_dir, 'MHC_Class_I')
        os.makedirs(output_dir, exist_ok=True)

        class_i_arguments = shared_arguments.copy()
        class_i_arguments['alleles']                 = class_i_alleles
        class_i_arguments['peptide_sequence_length'] = args.peptide_sequence_length
        class_i_arguments['iedb_executable']         = iedb_mhc_i_executable
        class_i_arguments['epitope_lengths']         = args.epitope_length
        class_i_arguments['prediction_algorithms']   = class_i_prediction_algorithms
        class_i_arguments['output_dir']              = output_dir
        class_i_arguments['netmhc_stab']             = args.netmhc_stab
        pipeline = MHCIPipeline(**class_i_arguments)
        pipeline.execute()

    if len(class_ii_prediction_algorithms) > 0 and len(class_ii_alleles) > 0:
        if args.iedb_install_directory:
            iedb_mhc_ii_executable = os.path.join(args.iedb_install_directory, 'mhc_ii', 'mhc_II_binding.py')
            if not os.path.exists(iedb_mhc_ii_executable):
                sys.exit("IEDB MHC II executable path doesn't exist %s" % iedb_mhc_ii_executable)
        else:
            iedb_mhc_ii_executable = None

        print("Executing MHC Class II predictions")

        output_dir = os.path.join(base_output_dir, 'MHC_Class_II')
        os.makedirs(output_dir, exist_ok=True)

        class_ii_arguments = shared_arguments.copy()
        class_ii_arguments['alleles']               = class_ii_alleles
        class_ii_arguments['prediction_algorithms'] = class_ii_prediction_algorithms
        class_ii_arguments['iedb_executable']       = iedb_mhc_ii_executable
        class_ii_arguments['output_dir']            = output_dir
        class_ii_arguments['netmhc_stab']           = False
        pipeline = MHCIIPipeline(**class_ii_arguments)
        pipeline.execute()
Exemple #17
0
def main(args_input=sys.argv[1:]):
    parser = define_parser()
    args = parser.parse_args(args_input)

    if args.input_file.endswith('.vcf'):
        input_file_type = 'vcf'
    elif args.input_file.endswith('.bedpe'):
        input_file_type = 'bedpe'
    else:
        sys.exit(
            "Unknown input file type for file (%s). Input file must be either a VCF (.vcf) or a bedpe (.bedpe) file."
            % input_file)

    if "." in args.sample_name:
        sys.exit("Sample name cannot contain '.'")

    if args.fasta_size % 2 != 0:
        sys.exit("The fasta size needs to be an even number")

    if args.iedb_retries > 100:
        sys.exit(
            "The number of IEDB retries must be less than or equal to 100")

    if args.downstream_sequence_length == 'full':
        downstream_sequence_length = None
    elif args.downstream_sequence_length.isdigit():
        downstream_sequence_length = int(args.downstream_sequence_length)
    else:
        sys.exit(
            "The downstream sequence length needs to be a positive integer or 'full'"
        )

    base_output_dir = os.path.abspath(args.output_dir)

    class_i_prediction_algorithms = []
    class_ii_prediction_algorithms = []
    for prediction_algorithm in sorted(args.prediction_algorithms):
        prediction_class = globals()[prediction_algorithm]
        prediction_class_object = prediction_class()
        if isinstance(prediction_class_object, MHCI):
            class_i_prediction_algorithms.append(prediction_algorithm)
        elif isinstance(prediction_class_object, MHCII):
            class_ii_prediction_algorithms.append(prediction_algorithm)

    class_i_alleles = []
    class_ii_alleles = []
    for allele in sorted(set(args.allele)):
        valid = 0
        if allele in MHCI.all_valid_allele_names():
            class_i_alleles.append(allele)
            valid = 1
        if allele in MHCII.all_valid_allele_names():
            class_ii_alleles.append(allele)
            valid = 1
        if not valid:
            print("Allele %s not valid. Skipping." % allele)

    shared_arguments = {
        'input_file': args.input_file,
        'input_file_type': input_file_type,
        'sample_name': args.sample_name,
        'top_result_per_mutation': args.top_result_per_mutation,
        'top_score_metric': args.top_score_metric,
        'binding_threshold': args.binding_threshold,
        'minimum_fold_change': args.minimum_fold_change,
        'net_chop_method': args.net_chop_method,
        'net_chop_threshold': args.net_chop_threshold,
        'normal_cov': args.normal_cov,
        'normal_vaf': args.normal_vaf,
        'tdna_cov': args.tdna_cov,
        'tdna_vaf': args.tdna_vaf,
        'trna_cov': args.trna_cov,
        'trna_vaf': args.trna_vaf,
        'expn_val': args.expn_val,
        'additional_report_columns': args.additional_report_columns,
        'fasta_size': args.fasta_size,
        'iedb_retries': args.iedb_retries,
        'downstream_sequence_length': downstream_sequence_length,
        'keep_tmp_files': args.keep_tmp_files,
    }
    additional_input_files = parse_additional_input_file_list(
        args.additional_input_file_list)
    shared_arguments.update(additional_input_files)

    if len(class_i_prediction_algorithms) > 0 and len(class_i_alleles) > 0:
        if args.epitope_length is None:
            sys.exit(
                "Epitope length is required for class I binding predictions")

        if args.iedb_install_directory:
            iedb_mhc_i_executable = os.path.join(args.iedb_install_directory,
                                                 'mhc_i', 'src',
                                                 'predict_binding.py')
            if not os.path.exists(iedb_mhc_i_executable):
                sys.exit("IEDB MHC I executable path doesn't exist %s" %
                         iedb_mhc_i_executable)
        else:
            iedb_mhc_i_executable = None

        print("Executing MHC Class I predictions")

        output_dir = os.path.join(base_output_dir, 'MHC_Class_I')
        os.makedirs(output_dir, exist_ok=True)

        class_i_arguments = shared_arguments.copy()
        class_i_arguments['alleles'] = class_i_alleles
        class_i_arguments[
            'peptide_sequence_length'] = args.peptide_sequence_length
        class_i_arguments['iedb_executable'] = iedb_mhc_i_executable
        class_i_arguments['epitope_lengths'] = args.epitope_length
        class_i_arguments[
            'prediction_algorithms'] = class_i_prediction_algorithms
        class_i_arguments['output_dir'] = output_dir
        class_i_arguments['netmhc_stab'] = args.netmhc_stab
        pipeline = MHCIPipeline(**class_i_arguments)
        pipeline.execute()

    if len(class_ii_prediction_algorithms) > 0 and len(class_ii_alleles) > 0:
        if args.iedb_install_directory:
            iedb_mhc_ii_executable = os.path.join(args.iedb_install_directory,
                                                  'mhc_ii',
                                                  'mhc_II_binding.py')
            if not os.path.exists(iedb_mhc_ii_executable):
                sys.exit("IEDB MHC II executable path doesn't exist %s" %
                         iedb_mhc_ii_executable)
        else:
            iedb_mhc_ii_executable = None

        print("Executing MHC Class II predictions")

        output_dir = os.path.join(base_output_dir, 'MHC_Class_II')
        os.makedirs(output_dir, exist_ok=True)

        class_ii_arguments = shared_arguments.copy()
        class_ii_arguments['alleles'] = class_ii_alleles
        class_ii_arguments[
            'prediction_algorithms'] = class_ii_prediction_algorithms
        class_ii_arguments['iedb_executable'] = iedb_mhc_ii_executable
        class_ii_arguments['output_dir'] = output_dir
        class_ii_arguments['netmhc_stab'] = False
        pipeline = MHCIIPipeline(**class_ii_arguments)
        pipeline.execute()
Exemple #18
0
    def call_iedb_and_parse_outputs(self, chunks):
        split_parsed_output_files = []
        for (split_start, split_end) in chunks:
            tsv_chunk = "%d-%d" % (split_start, split_end)
            fasta_chunk = "%d-%d" % (split_start * 2 - 1, split_end * 2)
            for a in self.alleles:
                for epl in self.epitope_lengths:
                    split_fasta_file_path = "%s_%s" % (
                        self.split_fasta_basename(), fasta_chunk)
                    split_iedb_output_files = []
                    status_message(
                        "Processing entries for Allele %s and Epitope Length %s - Entries %s"
                        % (a, epl, fasta_chunk))
                    for method in self.prediction_algorithms:
                        prediction_class = globals()[method]
                        prediction = prediction_class()
                        iedb_method = prediction.iedb_prediction_method
                        valid_alleles = prediction.valid_allele_names()
                        if a not in valid_alleles:
                            status_message(
                                "Allele %s not valid for Method %s. Skipping."
                                % (a, method))
                            continue
                        valid_lengths = prediction.valid_lengths_for_allele(a)
                        if epl not in valid_lengths:
                            status_message(
                                "Epitope Length %s is not valid for Method %s and Allele %s. Skipping."
                                % (epl, method, a))
                            continue

                        split_iedb_out = os.path.join(
                            self.tmp_dir, ".".join([
                                self.sample_name, iedb_method, a,
                                str(epl),
                                "tsv_%s" % fasta_chunk
                            ]))
                        if os.path.exists(split_iedb_out):
                            status_message(
                                "IEDB file for Allele %s and Epitope Length %s with Method %s (Entries %s) already exists. Skipping."
                                % (a, epl, method, fasta_chunk))
                            split_iedb_output_files.append(split_iedb_out)
                            continue
                        status_message(
                            "Running IEDB on Allele %s and Epitope Length %s with Method %s - Entries %s"
                            % (a, epl, method, fasta_chunk))
                        lib.call_iedb.main([
                            split_fasta_file_path,
                            split_iedb_out,
                            iedb_method,
                            a,
                            '-l',
                            str(epl),
                            '-r',
                            str(self.iedb_retries),
                            '-e',
                            self.iedb_executable,
                        ])
                        status_message("Completed")
                        split_iedb_output_files.append(split_iedb_out)

                    split_parsed_file_path = os.path.join(
                        self.tmp_dir, ".".join([
                            self.sample_name, a,
                            str(epl), "parsed",
                            "tsv_%s" % fasta_chunk
                        ]))
                    if os.path.exists(split_parsed_file_path):
                        status_message(
                            "Parsed Output File for Allele %s and Epitope Length %s (Entries %s) already exists. Skipping"
                            % (a, epl, fasta_chunk))
                        split_parsed_output_files.append(
                            split_parsed_file_path)
                        continue
                    split_fasta_key_file_path = split_fasta_file_path + '.key'

                    if len(split_iedb_output_files) > 0:
                        status_message(
                            "Parsing IEDB Output for Allele %s and Epitope Length %s - Entries %s"
                            % (a, epl, fasta_chunk))
                        split_tsv_file_path = "%s_%s" % (self.tsv_file_path(),
                                                         tsv_chunk)
                        params = [
                            *split_iedb_output_files,
                            split_tsv_file_path,
                            split_fasta_key_file_path,
                            split_parsed_file_path,
                            '-m',
                            self.top_score_metric,
                        ]
                        if self.top_result_per_mutation == True:
                            params.append('-t')
                        lib.parse_output.main(params)
                        status_message("Completed")
                        split_parsed_output_files.append(
                            split_parsed_file_path)
        return split_parsed_output_files